summaryrefslogtreecommitdiff
path: root/src/leap/mail
diff options
context:
space:
mode:
authorTomás Touceda <chiiph@leap.se>2013-10-02 17:08:21 -0300
committerTomás Touceda <chiiph@leap.se>2013-10-03 12:21:52 -0300
commit5a4ea784c5ab7fd51de8d17cda4b550a4385d424 (patch)
tree3d335a7d4389f492d659c6a0341a84a25e74c8df /src/leap/mail
parent4012678895d045c4c43a4c89dac38430adbe3e8e (diff)
Improve charset handling for email
Diffstat (limited to 'src/leap/mail')
-rw-r--r--src/leap/mail/imap/server.py28
1 files changed, 26 insertions, 2 deletions
diff --git a/src/leap/mail/imap/server.py b/src/leap/mail/imap/server.py
index ae76833..10d338a 100644
--- a/src/leap/mail/imap/server.py
+++ b/src/leap/mail/imap/server.py
@@ -18,7 +18,9 @@
Soledad-backed IMAP Server.
"""
import copy
+import email
import logging
+import re
import StringIO
import cStringIO
import time
@@ -693,6 +695,26 @@ class LeapMessage(WithMsgFields):
the more complex MIME-based interface.
"""
+ def _get_charset(self, content):
+ """
+ Mini parser to retrieve the charset of an email
+
+ :param content: mail contents
+ :type content: unicode
+
+ :returns: the charset as parsed from the contents
+ :rtype: str
+ """
+ charset = "UTF-8"
+ try:
+ em = email.message_from_string(content.encode("utf-8"))
+ # Miniparser for: Content-Type: <something>; charset=<charset>
+ charset_re = r'''charset=(?P<charset>[\w|\d|-]*)'''
+ charset = re.findall(charset_re, em["Content-Type"])[0]
+ except Exception:
+ pass
+ return charset
+
def open(self):
"""
Return an file-like object opened for reading.
@@ -704,7 +726,8 @@ class LeapMessage(WithMsgFields):
:rtype: StringIO
"""
fd = cStringIO.StringIO()
- fd.write(str(self._doc.content.get(self.RAW_KEY, '')))
+ charset = self._get_charset(self._doc.content.get(self.RAW_KEY, ''))
+ fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset))
fd.seek(0)
return fd
@@ -723,7 +746,8 @@ class LeapMessage(WithMsgFields):
:rtype: StringIO
"""
fd = StringIO.StringIO()
- fd.write(str(self._doc.content.get(self.RAW_KEY, '')))
+ charset = self._get_charset(self._doc.content.get(self.RAW_KEY, ''))
+ fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset))
# SHOULD use a separate BODY FIELD ...
fd.seek(0)
return fd