From 884ca20097f5c14d4b00553522292f051e3c097c Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Wed, 22 Jan 2014 11:01:05 -0300 Subject: Add find_charset helper and use where is needed. --- mail/src/leap/mail/imap/messages.py | 13 +++++-------- mail/src/leap/mail/utils.py | 25 +++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/mail/src/leap/mail/imap/messages.py b/mail/src/leap/mail/imap/messages.py index cd4d85f1..862a9f21 100644 --- a/mail/src/leap/mail/imap/messages.py +++ b/mail/src/leap/mail/imap/messages.py @@ -38,7 +38,7 @@ from leap.common.check import leap_assert, leap_assert_type from leap.common.decorators import memoized_method from leap.common.mail import get_email_charset from leap.mail import walk -from leap.mail.utils import first +from leap.mail.utils import first, find_charset from leap.mail.decorators import deferred from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields @@ -92,10 +92,7 @@ def try_unique_query(curried): except Exception as exc: logger.exception("Unhandled error %r" % exc) -CHARSET_PATTERN = r"""charset=([\w-]+)""" MSGID_PATTERN = r"""<([\w@.]+)>""" - -CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) MSGID_RE = re.compile(MSGID_PATTERN) @@ -177,9 +174,9 @@ class MessagePart(object): if payload: content_type = self._get_ctype_from_document(phash) - charset = first(CHARSET_RE.findall(content_type)) + charset = find_charset(content_type) logger.debug("Got charset from header: %s" % (charset,)) - if not charset: + if charset is None: charset = self._get_charset(payload) try: payload = payload.encode(charset) @@ -527,8 +524,8 @@ class LeapMessage(fields, MailParser, MBoxParser): if bdoc: body = self._bdoc.content.get(self.RAW_KEY, "") content_type = bdoc.content.get('content-type', "") - charset = first(CHARSET_RE.findall(content_type)) - if not charset: + charset = find_charset(content_type) + if charset is None: charset = self._get_charset(body) try: body = body.encode(charset) diff --git a/mail/src/leap/mail/utils.py b/mail/src/leap/mail/utils.py index 93388d31..6c79227f 100644 --- a/mail/src/leap/mail/utils.py +++ b/mail/src/leap/mail/utils.py @@ -18,9 +18,14 @@ Mail utilities. """ import json +import re import traceback +CHARSET_PATTERN = r"""charset=([\w-]+)""" +CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) + + def first(things): """ Return the head of a collection. @@ -31,6 +36,26 @@ def first(things): return None +def find_charset(thing, default=None): + """ + Looks into the object 'thing' for a charset specification. + It searchs into the object's `repr`. + + :param thing: the object to look into. + :type thing: object + :param default: the dafault charset to return if no charset is found. + :type default: str + + :returns: the charset or 'default' + :rtype: str or None + """ + charset = first(CHARSET_RE.findall(repr(thing))) + if charset is None: + charset = default + + return charset + + class CustomJsonScanner(object): """ This class is a context manager definition used to monkey patch the default -- cgit v1.2.3 From 3017b9665438ddaf2ece1dd3cdfe81f0f0965146 Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Wed, 22 Jan 2014 11:03:58 -0300 Subject: Handle non-ascii headers. Closes #5021. --- mail/src/leap/mail/imap/messages.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/mail/src/leap/mail/imap/messages.py b/mail/src/leap/mail/imap/messages.py index 862a9f21..5bb5f1cc 100644 --- a/mail/src/leap/mail/imap/messages.py +++ b/mail/src/leap/mail/imap/messages.py @@ -605,18 +605,26 @@ class LeapMessage(fields, MailParser, MBoxParser): if isinstance(headers, list): headers = dict(headers) + # default to most likely standard + charset = find_charset(headers, "utf-8") + # twisted imap server expects *some* headers to be lowercase # XXX refactor together with MessagePart method - headers = dict( - (str(key), str(value)) if key.lower() != "content-type" - else (str(key.lower()), str(value)) - for (key, value) in headers.items()) + headers2 = dict() + for key, value in headers.items(): + if key.lower() == "content-type": + key = key.lower() - # unpack and filter original dict by negate-condition - filter_by_cond = [(key, val) for key, val - in headers.items() if cond(key)] + if not isinstance(key, str): + key = key.encode(charset, 'replace') + if not isinstance(value, str): + value = value.encode(charset, 'replace') + + # filter original dict by negate-condition + if cond(key): + headers2[key] = value - return dict(filter_by_cond) + return headers2 def _get_headers(self): """ -- cgit v1.2.3 From e3d0b4ad75063b2a2565268d5091a4ed613d6c3d Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Wed, 22 Jan 2014 11:05:15 -0300 Subject: Add changes file for #5021. --- mail/changes/bug-5021_handle-non-ascii-headers | 1 + 1 file changed, 1 insertion(+) create mode 100644 mail/changes/bug-5021_handle-non-ascii-headers diff --git a/mail/changes/bug-5021_handle-non-ascii-headers b/mail/changes/bug-5021_handle-non-ascii-headers new file mode 100644 index 00000000..098cfa02 --- /dev/null +++ b/mail/changes/bug-5021_handle-non-ascii-headers @@ -0,0 +1 @@ + o Handle non-ascii headers. Closes #5021. -- cgit v1.2.3