diff options
| -rw-r--r-- | mail/changes/bug-4000_support-non-ascii | 1 | ||||
| -rw-r--r-- | mail/src/leap/mail/imap/fetch.py | 21 | ||||
| -rw-r--r-- | mail/src/leap/mail/imap/server.py | 52 | ||||
| -rw-r--r-- | mail/src/leap/mail/utils.py | 44 | 
4 files changed, 84 insertions, 34 deletions
| diff --git a/mail/changes/bug-4000_support-non-ascii b/mail/changes/bug-4000_support-non-ascii new file mode 100644 index 00000000..8f6712d7 --- /dev/null +++ b/mail/changes/bug-4000_support-non-ascii @@ -0,0 +1 @@ +  o Add support for non-ascii characters in emails. Closes #4000. diff --git a/mail/src/leap/mail/imap/fetch.py b/mail/src/leap/mail/imap/fetch.py index 0a71f53b..a776ac70 100644 --- a/mail/src/leap/mail/imap/fetch.py +++ b/mail/src/leap/mail/imap/fetch.py @@ -40,6 +40,7 @@ from leap.common.events.events_pb2 import IMAP_MSG_DECRYPTED  from leap.common.events.events_pb2 import IMAP_MSG_SAVED_LOCALLY  from leap.common.events.events_pb2 import IMAP_MSG_DELETED_INCOMING  from leap.common.events.events_pb2 import IMAP_UNREAD_MAIL +from leap.mail.utils import get_email_charset  logger = logging.getLogger(__name__) @@ -296,12 +297,17 @@ class LeapIncomingMail(object):          Tries to decrypt a gpg message if data looks like one.          :param data: the text to be decrypted. -        :type data: str +        :type data: unicode          :return: data, possibly descrypted.          :rtype: str          """ +        leap_assert_type(data, unicode) +          parser = Parser() +        encoding = get_email_charset(data) +        data = data.encode(encoding)          origmsg = parser.parsestr(data) +          # handle multipart/encrypted messages          if origmsg.get_content_type() == 'multipart/encrypted':              # sanity check @@ -320,13 +326,21 @@ class LeapIncomingMail(object):                      "Multipart/encrypted messages' second body part should "                      "have content type equal to 'octet-stream' (instead of "                      "%s)." % payload[1].get_content_type()) +              # parse message and get encrypted content              pgpencmsg = origmsg.get_payload()[1]              encdata = pgpencmsg.get_payload() +              # decrypt and parse decrypted message              decrdata = self._keymanager.decrypt(                  encdata, self._pkey,                  passphrase=self._soledad.passphrase) +            try: +                decrdata = decrdata.encode(encoding) +            except (UnicodeEncodeError, UnicodeDecodeError) as e: +                logger.error("Unicode error {0}".format(e)) +                decrdata = decrdata.encode(encoding, 'replace') +              decrmsg = parser.parsestr(decrdata)              # replace headers back in original message              for hkey, hval in decrmsg.items(): @@ -335,6 +349,7 @@ class LeapIncomingMail(object):                      origmsg.replace_header(hkey, hval)                  except KeyError:                      origmsg[hkey] = hval +              # replace payload by unencrypted payload              origmsg.set_payload(decrmsg.get_payload())              return origmsg.as_string(unixfrom=False) @@ -352,6 +367,10 @@ class LeapIncomingMail(object):                  # replace encrypted by decrypted content                  data = data.replace(pgp_message, decrdata)          # if message is not encrypted, return raw data + +        if isinstance(data, unicode): +            data = data.encode(encoding, 'replace') +          return data      def _add_message_locally(self, msgtuple): diff --git a/mail/src/leap/mail/imap/server.py b/mail/src/leap/mail/imap/server.py index df510ce4..5a98315e 100644 --- a/mail/src/leap/mail/imap/server.py +++ b/mail/src/leap/mail/imap/server.py @@ -18,9 +18,7 @@  Soledad-backed IMAP Server.  """  import copy -import email  import logging -import re  import StringIO  import cStringIO  import time @@ -33,14 +31,11 @@ from twisted.mail import imap4  from twisted.internet import defer  from twisted.python import log -#from twisted import cred - -#import u1db -  from leap.common import events as leap_events  from leap.common.events.events_pb2 import IMAP_UNREAD_MAIL  from leap.common.check import leap_assert, leap_assert_type  from leap.soledad.client import Soledad +from leap.mail.utils import get_email_charset  logger = logging.getLogger(__name__) @@ -184,7 +179,8 @@ class SoledadBackedAccount(WithMsgFields, IndexedDB):          # messages          TYPE_MBOX_SEEN_IDX: [KTYPE, MBOX_VAL, 'bool(seen)'],          TYPE_MBOX_RECT_IDX: [KTYPE, MBOX_VAL, 'bool(recent)'], -        TYPE_MBOX_RECT_SEEN_IDX: [KTYPE, MBOX_VAL, 'bool(recent)', 'bool(seen)'], +        TYPE_MBOX_RECT_SEEN_IDX: [KTYPE, MBOX_VAL, +                                  'bool(recent)', 'bool(seen)'],      }      INBOX_NAME = "INBOX" @@ -695,28 +691,6 @@ class LeapMessage(WithMsgFields):      the more complex MIME-based interface.      """ -    def _get_charset(self, content): -        """ -        Mini parser to retrieve the charset of an email - -        :param content: mail contents -        :type content: unicode - -        :returns: the charset as parsed from the contents -        :rtype: str -        """ -        charset = "UTF-8" -        try: -            em = email.message_from_string(content.encode("utf-8")) -            # Miniparser for: Content-Type: <something>; charset=<charset> -            charset_re = r'''charset=(?P<charset>[\w|\d|-]*)''' -            charset = re.findall(charset_re, em["Content-Type"])[0] -            if charset is None or len(charset) == 0: -                charset = "UTF-8" -        except Exception: -            pass -        return charset -      def open(self):          """          Return an file-like object opened for reading. @@ -728,8 +702,14 @@ class LeapMessage(WithMsgFields):          :rtype: StringIO          """          fd = cStringIO.StringIO() -        charset = self._get_charset(self._doc.content.get(self.RAW_KEY, '')) -        fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset)) +        charset = get_email_charset(self._doc.content.get(self.RAW_KEY, '')) +        content = self._doc.content.get(self.RAW_KEY, '') +        try: +            content = content.encode(charset) +        except (UnicodeEncodeError, UnicodeDecodeError) as e: +            logger.error("Unicode error {0}".format(e)) +            content = content.encode(charset, 'replace') +        fd.write(content)          fd.seek(0)          return fd @@ -748,8 +728,14 @@ class LeapMessage(WithMsgFields):          :rtype: StringIO          """          fd = StringIO.StringIO() -        charset = self._get_charset(self._doc.content.get(self.RAW_KEY, '')) -        fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset)) +        charset = get_email_charset(self._doc.content.get(self.RAW_KEY, '')) +        content = self._doc.content.get(self.RAW_KEY, '') +        try: +            content = content.encode(charset) +        except (UnicodeEncodeError, UnicodeDecodeError) as e: +            logger.error("Unicode error {0}".format(e)) +            content = content.encode(charset, 'replace') +        fd.write(content)          # SHOULD use a separate BODY FIELD ...          fd.seek(0)          return fd diff --git a/mail/src/leap/mail/utils.py b/mail/src/leap/mail/utils.py new file mode 100644 index 00000000..22e16a75 --- /dev/null +++ b/mail/src/leap/mail/utils.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# utils.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. +""" +Utility functions for email. +""" +import email +import re + + +def get_email_charset(content): +    """ +    Mini parser to retrieve the charset of an email. + +    :param content: mail contents +    :type content: unicode + +    :returns: the charset as parsed from the contents +    :rtype: str +    """ +    charset = "UTF-8" +    try: +        em = email.message_from_string(content.encode("utf-8")) +        # Miniparser for: Content-Type: <something>; charset=<charset> +        charset_re = r'''charset=(?P<charset>[\w|\d|-]*)''' +        charset = re.findall(charset_re, em["Content-Type"])[0] +        if charset is None or len(charset) == 0: +            charset = "UTF-8" +    except Exception: +        pass +    return charset | 
