diff options
author | Tomás Touceda <chiiph@leap.se> | 2013-10-02 17:08:21 -0300 |
---|---|---|
committer | Tomás Touceda <chiiph@leap.se> | 2013-10-03 12:21:52 -0300 |
commit | 5a4ea784c5ab7fd51de8d17cda4b550a4385d424 (patch) | |
tree | 3d335a7d4389f492d659c6a0341a84a25e74c8df /src/leap/mail/imap/server.py | |
parent | 4012678895d045c4c43a4c89dac38430adbe3e8e (diff) |
Improve charset handling for email
Diffstat (limited to 'src/leap/mail/imap/server.py')
-rw-r--r-- | src/leap/mail/imap/server.py | 28 |
1 files changed, 26 insertions, 2 deletions
diff --git a/src/leap/mail/imap/server.py b/src/leap/mail/imap/server.py index ae76833..10d338a 100644 --- a/src/leap/mail/imap/server.py +++ b/src/leap/mail/imap/server.py @@ -18,7 +18,9 @@ Soledad-backed IMAP Server. """ import copy +import email import logging +import re import StringIO import cStringIO import time @@ -693,6 +695,26 @@ class LeapMessage(WithMsgFields): the more complex MIME-based interface. """ + def _get_charset(self, content): + """ + Mini parser to retrieve the charset of an email + + :param content: mail contents + :type content: unicode + + :returns: the charset as parsed from the contents + :rtype: str + """ + charset = "UTF-8" + try: + em = email.message_from_string(content.encode("utf-8")) + # Miniparser for: Content-Type: <something>; charset=<charset> + charset_re = r'''charset=(?P<charset>[\w|\d|-]*)''' + charset = re.findall(charset_re, em["Content-Type"])[0] + except Exception: + pass + return charset + def open(self): """ Return an file-like object opened for reading. @@ -704,7 +726,8 @@ class LeapMessage(WithMsgFields): :rtype: StringIO """ fd = cStringIO.StringIO() - fd.write(str(self._doc.content.get(self.RAW_KEY, ''))) + charset = self._get_charset(self._doc.content.get(self.RAW_KEY, '')) + fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset)) fd.seek(0) return fd @@ -723,7 +746,8 @@ class LeapMessage(WithMsgFields): :rtype: StringIO """ fd = StringIO.StringIO() - fd.write(str(self._doc.content.get(self.RAW_KEY, ''))) + charset = self._get_charset(self._doc.content.get(self.RAW_KEY, '')) + fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset)) # SHOULD use a separate BODY FIELD ... fd.seek(0) return fd |