summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomás Touceda <chiiph@leap.se>2013-10-23 11:36:32 -0300
committerTomás Touceda <chiiph@leap.se>2013-10-23 11:36:32 -0300
commitd57436f84abaa5d3a14ca1002d2de90176e9cb52 (patch)
tree7be7768872cf52888a84a74493b62e4eea82d6c7
parentdd8b6212072bb8db499e12468d9905a5cf8ce630 (diff)
parentbea7ee293c720b8b632a3b5149e4550c6409fd20 (diff)
Merge remote-tracking branch 'ivan/bug/4000_support-non-ascii' into develop
-rw-r--r--changes/bug-4000_support-non-ascii1
-rw-r--r--src/leap/mail/imap/fetch.py21
-rw-r--r--src/leap/mail/imap/server.py52
-rw-r--r--src/leap/mail/utils.py44
4 files changed, 84 insertions, 34 deletions
diff --git a/changes/bug-4000_support-non-ascii b/changes/bug-4000_support-non-ascii
new file mode 100644
index 0000000..8f6712d
--- /dev/null
+++ b/changes/bug-4000_support-non-ascii
@@ -0,0 +1 @@
+ o Add support for non-ascii characters in emails. Closes #4000.
diff --git a/src/leap/mail/imap/fetch.py b/src/leap/mail/imap/fetch.py
index 0a71f53..a776ac7 100644
--- a/src/leap/mail/imap/fetch.py
+++ b/src/leap/mail/imap/fetch.py
@@ -40,6 +40,7 @@ from leap.common.events.events_pb2 import IMAP_MSG_DECRYPTED
from leap.common.events.events_pb2 import IMAP_MSG_SAVED_LOCALLY
from leap.common.events.events_pb2 import IMAP_MSG_DELETED_INCOMING
from leap.common.events.events_pb2 import IMAP_UNREAD_MAIL
+from leap.mail.utils import get_email_charset
logger = logging.getLogger(__name__)
@@ -296,12 +297,17 @@ class LeapIncomingMail(object):
Tries to decrypt a gpg message if data looks like one.
:param data: the text to be decrypted.
- :type data: str
+ :type data: unicode
:return: data, possibly descrypted.
:rtype: str
"""
+ leap_assert_type(data, unicode)
+
parser = Parser()
+ encoding = get_email_charset(data)
+ data = data.encode(encoding)
origmsg = parser.parsestr(data)
+
# handle multipart/encrypted messages
if origmsg.get_content_type() == 'multipart/encrypted':
# sanity check
@@ -320,13 +326,21 @@ class LeapIncomingMail(object):
"Multipart/encrypted messages' second body part should "
"have content type equal to 'octet-stream' (instead of "
"%s)." % payload[1].get_content_type())
+
# parse message and get encrypted content
pgpencmsg = origmsg.get_payload()[1]
encdata = pgpencmsg.get_payload()
+
# decrypt and parse decrypted message
decrdata = self._keymanager.decrypt(
encdata, self._pkey,
passphrase=self._soledad.passphrase)
+ try:
+ decrdata = decrdata.encode(encoding)
+ except (UnicodeEncodeError, UnicodeDecodeError) as e:
+ logger.error("Unicode error {0}".format(e))
+ decrdata = decrdata.encode(encoding, 'replace')
+
decrmsg = parser.parsestr(decrdata)
# replace headers back in original message
for hkey, hval in decrmsg.items():
@@ -335,6 +349,7 @@ class LeapIncomingMail(object):
origmsg.replace_header(hkey, hval)
except KeyError:
origmsg[hkey] = hval
+
# replace payload by unencrypted payload
origmsg.set_payload(decrmsg.get_payload())
return origmsg.as_string(unixfrom=False)
@@ -352,6 +367,10 @@ class LeapIncomingMail(object):
# replace encrypted by decrypted content
data = data.replace(pgp_message, decrdata)
# if message is not encrypted, return raw data
+
+ if isinstance(data, unicode):
+ data = data.encode(encoding, 'replace')
+
return data
def _add_message_locally(self, msgtuple):
diff --git a/src/leap/mail/imap/server.py b/src/leap/mail/imap/server.py
index df510ce..5a98315 100644
--- a/src/leap/mail/imap/server.py
+++ b/src/leap/mail/imap/server.py
@@ -18,9 +18,7 @@
Soledad-backed IMAP Server.
"""
import copy
-import email
import logging
-import re
import StringIO
import cStringIO
import time
@@ -33,14 +31,11 @@ from twisted.mail import imap4
from twisted.internet import defer
from twisted.python import log
-#from twisted import cred
-
-#import u1db
-
from leap.common import events as leap_events
from leap.common.events.events_pb2 import IMAP_UNREAD_MAIL
from leap.common.check import leap_assert, leap_assert_type
from leap.soledad.client import Soledad
+from leap.mail.utils import get_email_charset
logger = logging.getLogger(__name__)
@@ -184,7 +179,8 @@ class SoledadBackedAccount(WithMsgFields, IndexedDB):
# messages
TYPE_MBOX_SEEN_IDX: [KTYPE, MBOX_VAL, 'bool(seen)'],
TYPE_MBOX_RECT_IDX: [KTYPE, MBOX_VAL, 'bool(recent)'],
- TYPE_MBOX_RECT_SEEN_IDX: [KTYPE, MBOX_VAL, 'bool(recent)', 'bool(seen)'],
+ TYPE_MBOX_RECT_SEEN_IDX: [KTYPE, MBOX_VAL,
+ 'bool(recent)', 'bool(seen)'],
}
INBOX_NAME = "INBOX"
@@ -695,28 +691,6 @@ class LeapMessage(WithMsgFields):
the more complex MIME-based interface.
"""
- def _get_charset(self, content):
- """
- Mini parser to retrieve the charset of an email
-
- :param content: mail contents
- :type content: unicode
-
- :returns: the charset as parsed from the contents
- :rtype: str
- """
- charset = "UTF-8"
- try:
- em = email.message_from_string(content.encode("utf-8"))
- # Miniparser for: Content-Type: <something>; charset=<charset>
- charset_re = r'''charset=(?P<charset>[\w|\d|-]*)'''
- charset = re.findall(charset_re, em["Content-Type"])[0]
- if charset is None or len(charset) == 0:
- charset = "UTF-8"
- except Exception:
- pass
- return charset
-
def open(self):
"""
Return an file-like object opened for reading.
@@ -728,8 +702,14 @@ class LeapMessage(WithMsgFields):
:rtype: StringIO
"""
fd = cStringIO.StringIO()
- charset = self._get_charset(self._doc.content.get(self.RAW_KEY, ''))
- fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset))
+ charset = get_email_charset(self._doc.content.get(self.RAW_KEY, ''))
+ content = self._doc.content.get(self.RAW_KEY, '')
+ try:
+ content = content.encode(charset)
+ except (UnicodeEncodeError, UnicodeDecodeError) as e:
+ logger.error("Unicode error {0}".format(e))
+ content = content.encode(charset, 'replace')
+ fd.write(content)
fd.seek(0)
return fd
@@ -748,8 +728,14 @@ class LeapMessage(WithMsgFields):
:rtype: StringIO
"""
fd = StringIO.StringIO()
- charset = self._get_charset(self._doc.content.get(self.RAW_KEY, ''))
- fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset))
+ charset = get_email_charset(self._doc.content.get(self.RAW_KEY, ''))
+ content = self._doc.content.get(self.RAW_KEY, '')
+ try:
+ content = content.encode(charset)
+ except (UnicodeEncodeError, UnicodeDecodeError) as e:
+ logger.error("Unicode error {0}".format(e))
+ content = content.encode(charset, 'replace')
+ fd.write(content)
# SHOULD use a separate BODY FIELD ...
fd.seek(0)
return fd
diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py
new file mode 100644
index 0000000..22e16a7
--- /dev/null
+++ b/src/leap/mail/utils.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+# utils.py
+# Copyright (C) 2013 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+Utility functions for email.
+"""
+import email
+import re
+
+
+def get_email_charset(content):
+ """
+ Mini parser to retrieve the charset of an email.
+
+ :param content: mail contents
+ :type content: unicode
+
+ :returns: the charset as parsed from the contents
+ :rtype: str
+ """
+ charset = "UTF-8"
+ try:
+ em = email.message_from_string(content.encode("utf-8"))
+ # Miniparser for: Content-Type: <something>; charset=<charset>
+ charset_re = r'''charset=(?P<charset>[\w|\d|-]*)'''
+ charset = re.findall(charset_re, em["Content-Type"])[0]
+ if charset is None or len(charset) == 0:
+ charset = "UTF-8"
+ except Exception:
+ pass
+ return charset