From c0834048d564dfdc3e2cedc1a0f81788d14f3cab Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Wed, 23 Oct 2013 10:17:21 -0300 Subject: Move charset parser to a utils module. --- src/leap/mail/imap/server.py | 29 +++-------------------------- 1 file changed, 3 insertions(+), 26 deletions(-) (limited to 'src/leap/mail/imap') diff --git a/src/leap/mail/imap/server.py b/src/leap/mail/imap/server.py index df510ce..b9a041d 100644 --- a/src/leap/mail/imap/server.py +++ b/src/leap/mail/imap/server.py @@ -18,9 +18,7 @@ Soledad-backed IMAP Server. """ import copy -import email import logging -import re import StringIO import cStringIO import time @@ -41,6 +39,7 @@ from leap.common import events as leap_events from leap.common.events.events_pb2 import IMAP_UNREAD_MAIL from leap.common.check import leap_assert, leap_assert_type from leap.soledad.client import Soledad +from leap.mail.utils import get_email_charset logger = logging.getLogger(__name__) @@ -695,28 +694,6 @@ class LeapMessage(WithMsgFields): the more complex MIME-based interface. """ - def _get_charset(self, content): - """ - Mini parser to retrieve the charset of an email - - :param content: mail contents - :type content: unicode - - :returns: the charset as parsed from the contents - :rtype: str - """ - charset = "UTF-8" - try: - em = email.message_from_string(content.encode("utf-8")) - # Miniparser for: Content-Type: ; charset= - charset_re = r'''charset=(?P[\w|\d|-]*)''' - charset = re.findall(charset_re, em["Content-Type"])[0] - if charset is None or len(charset) == 0: - charset = "UTF-8" - except Exception: - pass - return charset - def open(self): """ Return an file-like object opened for reading. @@ -728,7 +705,7 @@ class LeapMessage(WithMsgFields): :rtype: StringIO """ fd = cStringIO.StringIO() - charset = self._get_charset(self._doc.content.get(self.RAW_KEY, '')) + charset = get_email_charset(self._doc.content.get(self.RAW_KEY, '')) fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset)) fd.seek(0) return fd @@ -748,7 +725,7 @@ class LeapMessage(WithMsgFields): :rtype: StringIO """ fd = StringIO.StringIO() - charset = self._get_charset(self._doc.content.get(self.RAW_KEY, '')) + charset = get_email_charset(self._doc.content.get(self.RAW_KEY, '')) fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset)) # SHOULD use a separate BODY FIELD ... fd.seek(0) -- cgit v1.2.3 From f864f21b51b9f767afa8ccb3b3f39967b2f08f60 Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Wed, 23 Oct 2013 10:18:34 -0300 Subject: Add encoding exception catch to avoid crashes. --- src/leap/mail/imap/server.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'src/leap/mail/imap') diff --git a/src/leap/mail/imap/server.py b/src/leap/mail/imap/server.py index b9a041d..7ae3c45 100644 --- a/src/leap/mail/imap/server.py +++ b/src/leap/mail/imap/server.py @@ -706,7 +706,13 @@ class LeapMessage(WithMsgFields): """ fd = cStringIO.StringIO() charset = get_email_charset(self._doc.content.get(self.RAW_KEY, '')) - fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset)) + content = self._doc.content.get(self.RAW_KEY, '') + try: + content = content.encode(charset) + except (UnicodeEncodeError, UnicodeDecodeError) as e: + logger.error("Unicode error {0}".format(e)) + content = content.encode(charset, 'replace') + fd.write(content) fd.seek(0) return fd @@ -726,7 +732,13 @@ class LeapMessage(WithMsgFields): """ fd = StringIO.StringIO() charset = get_email_charset(self._doc.content.get(self.RAW_KEY, '')) - fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset)) + content = self._doc.content.get(self.RAW_KEY, '') + try: + content = content.encode(charset) + except (UnicodeEncodeError, UnicodeDecodeError) as e: + logger.error("Unicode error {0}".format(e)) + content = content.encode(charset, 'replace') + fd.write(content) # SHOULD use a separate BODY FIELD ... fd.seek(0) return fd -- cgit v1.2.3 From 7dd2e6ed4d1980626f486a6d5065a4fd5ffdfeb3 Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Wed, 23 Oct 2013 10:19:44 -0300 Subject: Use correct encoding and data type in mails. --- src/leap/mail/imap/fetch.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'src/leap/mail/imap') diff --git a/src/leap/mail/imap/fetch.py b/src/leap/mail/imap/fetch.py index 0a71f53..a776ac7 100644 --- a/src/leap/mail/imap/fetch.py +++ b/src/leap/mail/imap/fetch.py @@ -40,6 +40,7 @@ from leap.common.events.events_pb2 import IMAP_MSG_DECRYPTED from leap.common.events.events_pb2 import IMAP_MSG_SAVED_LOCALLY from leap.common.events.events_pb2 import IMAP_MSG_DELETED_INCOMING from leap.common.events.events_pb2 import IMAP_UNREAD_MAIL +from leap.mail.utils import get_email_charset logger = logging.getLogger(__name__) @@ -296,12 +297,17 @@ class LeapIncomingMail(object): Tries to decrypt a gpg message if data looks like one. :param data: the text to be decrypted. - :type data: str + :type data: unicode :return: data, possibly descrypted. :rtype: str """ + leap_assert_type(data, unicode) + parser = Parser() + encoding = get_email_charset(data) + data = data.encode(encoding) origmsg = parser.parsestr(data) + # handle multipart/encrypted messages if origmsg.get_content_type() == 'multipart/encrypted': # sanity check @@ -320,13 +326,21 @@ class LeapIncomingMail(object): "Multipart/encrypted messages' second body part should " "have content type equal to 'octet-stream' (instead of " "%s)." % payload[1].get_content_type()) + # parse message and get encrypted content pgpencmsg = origmsg.get_payload()[1] encdata = pgpencmsg.get_payload() + # decrypt and parse decrypted message decrdata = self._keymanager.decrypt( encdata, self._pkey, passphrase=self._soledad.passphrase) + try: + decrdata = decrdata.encode(encoding) + except (UnicodeEncodeError, UnicodeDecodeError) as e: + logger.error("Unicode error {0}".format(e)) + decrdata = decrdata.encode(encoding, 'replace') + decrmsg = parser.parsestr(decrdata) # replace headers back in original message for hkey, hval in decrmsg.items(): @@ -335,6 +349,7 @@ class LeapIncomingMail(object): origmsg.replace_header(hkey, hval) except KeyError: origmsg[hkey] = hval + # replace payload by unencrypted payload origmsg.set_payload(decrmsg.get_payload()) return origmsg.as_string(unixfrom=False) @@ -352,6 +367,10 @@ class LeapIncomingMail(object): # replace encrypted by decrypted content data = data.replace(pgp_message, decrdata) # if message is not encrypted, return raw data + + if isinstance(data, unicode): + data = data.encode(encoding, 'replace') + return data def _add_message_locally(self, msgtuple): -- cgit v1.2.3 From 2e5dbaec5189603615c7b4e9e93bd6129af9c3b1 Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Wed, 23 Oct 2013 10:22:47 -0300 Subject: Remove commented imports. --- src/leap/mail/imap/server.py | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/leap/mail/imap') diff --git a/src/leap/mail/imap/server.py b/src/leap/mail/imap/server.py index 7ae3c45..6fc4db3 100644 --- a/src/leap/mail/imap/server.py +++ b/src/leap/mail/imap/server.py @@ -31,10 +31,6 @@ from twisted.mail import imap4 from twisted.internet import defer from twisted.python import log -#from twisted import cred - -#import u1db - from leap.common import events as leap_events from leap.common.events.events_pb2 import IMAP_UNREAD_MAIL from leap.common.check import leap_assert, leap_assert_type -- cgit v1.2.3 From 755d4a18ce80db745118edb9b2b27d359ed839d2 Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Wed, 23 Oct 2013 10:24:39 -0300 Subject: pep8 fix: line too long. --- src/leap/mail/imap/server.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/leap/mail/imap') diff --git a/src/leap/mail/imap/server.py b/src/leap/mail/imap/server.py index 6fc4db3..5a98315 100644 --- a/src/leap/mail/imap/server.py +++ b/src/leap/mail/imap/server.py @@ -179,7 +179,8 @@ class SoledadBackedAccount(WithMsgFields, IndexedDB): # messages TYPE_MBOX_SEEN_IDX: [KTYPE, MBOX_VAL, 'bool(seen)'], TYPE_MBOX_RECT_IDX: [KTYPE, MBOX_VAL, 'bool(recent)'], - TYPE_MBOX_RECT_SEEN_IDX: [KTYPE, MBOX_VAL, 'bool(recent)', 'bool(seen)'], + TYPE_MBOX_RECT_SEEN_IDX: [KTYPE, MBOX_VAL, + 'bool(recent)', 'bool(seen)'], } INBOX_NAME = "INBOX" -- cgit v1.2.3