diff options
| author | Tomás Touceda <chiiph@leap.se> | 2014-01-22 11:20:40 -0300 | 
|---|---|---|
| committer | Tomás Touceda <chiiph@leap.se> | 2014-01-22 11:20:40 -0300 | 
| commit | e844ac826b5e97cea2dd7c4bf240574f2cc57eb6 (patch) | |
| tree | cab2ce2d9018ab2d3b911572f9d104b6e70a3148 /mail/src | |
| parent | 19b7d01f3022a9dd8553ce4b260f8987f0eb9858 (diff) | |
| parent | e3d0b4ad75063b2a2565268d5091a4ed613d6c3d (diff) | |
Merge remote-tracking branch 'refs/remotes/ivan/bug/5021_handle-non-ascii-headers' into develop
Diffstat (limited to 'mail/src')
| -rw-r--r-- | mail/src/leap/mail/imap/messages.py | 37 | ||||
| -rw-r--r-- | mail/src/leap/mail/utils.py | 25 | 
2 files changed, 46 insertions, 16 deletions
| diff --git a/mail/src/leap/mail/imap/messages.py b/mail/src/leap/mail/imap/messages.py index cd4d85f..5bb5f1c 100644 --- a/mail/src/leap/mail/imap/messages.py +++ b/mail/src/leap/mail/imap/messages.py @@ -38,7 +38,7 @@ from leap.common.check import leap_assert, leap_assert_type  from leap.common.decorators import memoized_method  from leap.common.mail import get_email_charset  from leap.mail import walk -from leap.mail.utils import first +from leap.mail.utils import first, find_charset  from leap.mail.decorators import deferred  from leap.mail.imap.index import IndexedDB  from leap.mail.imap.fields import fields, WithMsgFields @@ -92,10 +92,7 @@ def try_unique_query(curried):      except Exception as exc:          logger.exception("Unhandled error %r" % exc) -CHARSET_PATTERN = r"""charset=([\w-]+)"""  MSGID_PATTERN = r"""<([\w@.]+)>""" - -CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE)  MSGID_RE = re.compile(MSGID_PATTERN) @@ -177,9 +174,9 @@ class MessagePart(object):          if payload:              content_type = self._get_ctype_from_document(phash) -            charset = first(CHARSET_RE.findall(content_type)) +            charset = find_charset(content_type)              logger.debug("Got charset from header: %s" % (charset,)) -            if not charset: +            if charset is None:                  charset = self._get_charset(payload)              try:                  payload = payload.encode(charset) @@ -527,8 +524,8 @@ class LeapMessage(fields, MailParser, MBoxParser):          if bdoc:              body = self._bdoc.content.get(self.RAW_KEY, "")              content_type = bdoc.content.get('content-type', "") -            charset = first(CHARSET_RE.findall(content_type)) -            if not charset: +            charset = find_charset(content_type) +            if charset is None:                  charset = self._get_charset(body)              try:                  body = body.encode(charset) @@ -608,18 +605,26 @@ class LeapMessage(fields, MailParser, MBoxParser):          if isinstance(headers, list):              headers = dict(headers) +        # default to most likely standard +        charset = find_charset(headers, "utf-8") +          # twisted imap server expects *some* headers to be lowercase          # XXX refactor together with MessagePart method -        headers = dict( -            (str(key), str(value)) if key.lower() != "content-type" -            else (str(key.lower()), str(value)) -            for (key, value) in headers.items()) +        headers2 = dict() +        for key, value in headers.items(): +            if key.lower() == "content-type": +                key = key.lower() -        # unpack and filter original dict by negate-condition -        filter_by_cond = [(key, val) for key, val -                          in headers.items() if cond(key)] +            if not isinstance(key, str): +                key = key.encode(charset, 'replace') +            if not isinstance(value, str): +                value = value.encode(charset, 'replace') + +            # filter original dict by negate-condition +            if cond(key): +                headers2[key] = value -        return dict(filter_by_cond) +        return headers2      def _get_headers(self):          """ diff --git a/mail/src/leap/mail/utils.py b/mail/src/leap/mail/utils.py index 93388d3..6c79227 100644 --- a/mail/src/leap/mail/utils.py +++ b/mail/src/leap/mail/utils.py @@ -18,9 +18,14 @@  Mail utilities.  """  import json +import re  import traceback +CHARSET_PATTERN = r"""charset=([\w-]+)""" +CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) + +  def first(things):      """      Return the head of a collection. @@ -31,6 +36,26 @@ def first(things):          return None +def find_charset(thing, default=None): +    """ +    Looks into the object 'thing' for a charset specification. +    It searchs into the object's `repr`. + +    :param thing: the object to look into. +    :type thing: object +    :param default: the dafault charset to return if no charset is found. +    :type default: str + +    :returns: the charset or 'default' +    :rtype: str or None +    """ +    charset = first(CHARSET_RE.findall(repr(thing))) +    if charset is None: +        charset = default + +    return charset + +  class CustomJsonScanner(object):      """      This class is a context manager definition used to monkey patch the default | 
