summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mail/changes/bug-5021_handle-non-ascii-headers1
-rw-r--r--mail/src/leap/mail/imap/messages.py37
-rw-r--r--mail/src/leap/mail/utils.py25
3 files changed, 47 insertions, 16 deletions
diff --git a/mail/changes/bug-5021_handle-non-ascii-headers b/mail/changes/bug-5021_handle-non-ascii-headers
new file mode 100644
index 0000000..098cfa0
--- /dev/null
+++ b/mail/changes/bug-5021_handle-non-ascii-headers
@@ -0,0 +1 @@
+ o Handle non-ascii headers. Closes #5021.
diff --git a/mail/src/leap/mail/imap/messages.py b/mail/src/leap/mail/imap/messages.py
index cd4d85f..5bb5f1c 100644
--- a/mail/src/leap/mail/imap/messages.py
+++ b/mail/src/leap/mail/imap/messages.py
@@ -38,7 +38,7 @@ from leap.common.check import leap_assert, leap_assert_type
from leap.common.decorators import memoized_method
from leap.common.mail import get_email_charset
from leap.mail import walk
-from leap.mail.utils import first
+from leap.mail.utils import first, find_charset
from leap.mail.decorators import deferred
from leap.mail.imap.index import IndexedDB
from leap.mail.imap.fields import fields, WithMsgFields
@@ -92,10 +92,7 @@ def try_unique_query(curried):
except Exception as exc:
logger.exception("Unhandled error %r" % exc)
-CHARSET_PATTERN = r"""charset=([\w-]+)"""
MSGID_PATTERN = r"""<([\w@.]+)>"""
-
-CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE)
MSGID_RE = re.compile(MSGID_PATTERN)
@@ -177,9 +174,9 @@ class MessagePart(object):
if payload:
content_type = self._get_ctype_from_document(phash)
- charset = first(CHARSET_RE.findall(content_type))
+ charset = find_charset(content_type)
logger.debug("Got charset from header: %s" % (charset,))
- if not charset:
+ if charset is None:
charset = self._get_charset(payload)
try:
payload = payload.encode(charset)
@@ -527,8 +524,8 @@ class LeapMessage(fields, MailParser, MBoxParser):
if bdoc:
body = self._bdoc.content.get(self.RAW_KEY, "")
content_type = bdoc.content.get('content-type', "")
- charset = first(CHARSET_RE.findall(content_type))
- if not charset:
+ charset = find_charset(content_type)
+ if charset is None:
charset = self._get_charset(body)
try:
body = body.encode(charset)
@@ -608,18 +605,26 @@ class LeapMessage(fields, MailParser, MBoxParser):
if isinstance(headers, list):
headers = dict(headers)
+ # default to most likely standard
+ charset = find_charset(headers, "utf-8")
+
# twisted imap server expects *some* headers to be lowercase
# XXX refactor together with MessagePart method
- headers = dict(
- (str(key), str(value)) if key.lower() != "content-type"
- else (str(key.lower()), str(value))
- for (key, value) in headers.items())
+ headers2 = dict()
+ for key, value in headers.items():
+ if key.lower() == "content-type":
+ key = key.lower()
- # unpack and filter original dict by negate-condition
- filter_by_cond = [(key, val) for key, val
- in headers.items() if cond(key)]
+ if not isinstance(key, str):
+ key = key.encode(charset, 'replace')
+ if not isinstance(value, str):
+ value = value.encode(charset, 'replace')
+
+ # filter original dict by negate-condition
+ if cond(key):
+ headers2[key] = value
- return dict(filter_by_cond)
+ return headers2
def _get_headers(self):
"""
diff --git a/mail/src/leap/mail/utils.py b/mail/src/leap/mail/utils.py
index 93388d3..6c79227 100644
--- a/mail/src/leap/mail/utils.py
+++ b/mail/src/leap/mail/utils.py
@@ -18,9 +18,14 @@
Mail utilities.
"""
import json
+import re
import traceback
+CHARSET_PATTERN = r"""charset=([\w-]+)"""
+CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE)
+
+
def first(things):
"""
Return the head of a collection.
@@ -31,6 +36,26 @@ def first(things):
return None
+def find_charset(thing, default=None):
+ """
+ Looks into the object 'thing' for a charset specification.
+ It searchs into the object's `repr`.
+
+ :param thing: the object to look into.
+ :type thing: object
+ :param default: the dafault charset to return if no charset is found.
+ :type default: str
+
+ :returns: the charset or 'default'
+ :rtype: str or None
+ """
+ charset = first(CHARSET_RE.findall(repr(thing)))
+ if charset is None:
+ charset = default
+
+ return charset
+
+
class CustomJsonScanner(object):
"""
This class is a context manager definition used to monkey patch the default