summaryrefslogtreecommitdiff
path: root/mail/src
diff options
context:
space:
mode:
authorTomás Touceda <chiiph@leap.se>2013-10-02 17:08:21 -0300
committerTomás Touceda <chiiph@leap.se>2013-10-03 12:21:52 -0300
commit5a89e8d6c418030af772a216af92fad61082c5ab (patch)
treeaa8a79bd8a2161bfb2d43dd8ac14648d791093a5 /mail/src
parentc8227124c3c78f56ad8c6c1ed212140f7248a225 (diff)
Improve charset handling for email
Diffstat (limited to 'mail/src')
-rw-r--r--mail/src/leap/mail/imap/server.py28
1 files changed, 26 insertions, 2 deletions
diff --git a/mail/src/leap/mail/imap/server.py b/mail/src/leap/mail/imap/server.py
index ae76833b..10d338a1 100644
--- a/mail/src/leap/mail/imap/server.py
+++ b/mail/src/leap/mail/imap/server.py
@@ -18,7 +18,9 @@
Soledad-backed IMAP Server.
"""
import copy
+import email
import logging
+import re
import StringIO
import cStringIO
import time
@@ -693,6 +695,26 @@ class LeapMessage(WithMsgFields):
the more complex MIME-based interface.
"""
+ def _get_charset(self, content):
+ """
+ Mini parser to retrieve the charset of an email
+
+ :param content: mail contents
+ :type content: unicode
+
+ :returns: the charset as parsed from the contents
+ :rtype: str
+ """
+ charset = "UTF-8"
+ try:
+ em = email.message_from_string(content.encode("utf-8"))
+ # Miniparser for: Content-Type: <something>; charset=<charset>
+ charset_re = r'''charset=(?P<charset>[\w|\d|-]*)'''
+ charset = re.findall(charset_re, em["Content-Type"])[0]
+ except Exception:
+ pass
+ return charset
+
def open(self):
"""
Return an file-like object opened for reading.
@@ -704,7 +726,8 @@ class LeapMessage(WithMsgFields):
:rtype: StringIO
"""
fd = cStringIO.StringIO()
- fd.write(str(self._doc.content.get(self.RAW_KEY, '')))
+ charset = self._get_charset(self._doc.content.get(self.RAW_KEY, ''))
+ fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset))
fd.seek(0)
return fd
@@ -723,7 +746,8 @@ class LeapMessage(WithMsgFields):
:rtype: StringIO
"""
fd = StringIO.StringIO()
- fd.write(str(self._doc.content.get(self.RAW_KEY, '')))
+ charset = self._get_charset(self._doc.content.get(self.RAW_KEY, ''))
+ fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset))
# SHOULD use a separate BODY FIELD ...
fd.seek(0)
return fd