summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIvan Alejandro <ivanalejandro0@gmail.com>2014-01-22 11:01:05 -0300
committerIvan Alejandro <ivanalejandro0@gmail.com>2014-01-22 11:01:05 -0300
commitc2e052a08789057d550a0442caa28b27ebc4b416 (patch)
tree31355e0b85c058b759fb3e261cd095ed9632d79c
parent92c161b7dc970530252662af4636c0ec3cdb8595 (diff)
Add find_charset helper and use where is needed.
-rw-r--r--src/leap/mail/imap/messages.py13
-rw-r--r--src/leap/mail/utils.py25
2 files changed, 30 insertions, 8 deletions
diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py
index cd4d85f..862a9f2 100644
--- a/src/leap/mail/imap/messages.py
+++ b/src/leap/mail/imap/messages.py
@@ -38,7 +38,7 @@ from leap.common.check import leap_assert, leap_assert_type
from leap.common.decorators import memoized_method
from leap.common.mail import get_email_charset
from leap.mail import walk
-from leap.mail.utils import first
+from leap.mail.utils import first, find_charset
from leap.mail.decorators import deferred
from leap.mail.imap.index import IndexedDB
from leap.mail.imap.fields import fields, WithMsgFields
@@ -92,10 +92,7 @@ def try_unique_query(curried):
except Exception as exc:
logger.exception("Unhandled error %r" % exc)
-CHARSET_PATTERN = r"""charset=([\w-]+)"""
MSGID_PATTERN = r"""<([\w@.]+)>"""
-
-CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE)
MSGID_RE = re.compile(MSGID_PATTERN)
@@ -177,9 +174,9 @@ class MessagePart(object):
if payload:
content_type = self._get_ctype_from_document(phash)
- charset = first(CHARSET_RE.findall(content_type))
+ charset = find_charset(content_type)
logger.debug("Got charset from header: %s" % (charset,))
- if not charset:
+ if charset is None:
charset = self._get_charset(payload)
try:
payload = payload.encode(charset)
@@ -527,8 +524,8 @@ class LeapMessage(fields, MailParser, MBoxParser):
if bdoc:
body = self._bdoc.content.get(self.RAW_KEY, "")
content_type = bdoc.content.get('content-type', "")
- charset = first(CHARSET_RE.findall(content_type))
- if not charset:
+ charset = find_charset(content_type)
+ if charset is None:
charset = self._get_charset(body)
try:
body = body.encode(charset)
diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py
index 93388d3..6c79227 100644
--- a/src/leap/mail/utils.py
+++ b/src/leap/mail/utils.py
@@ -18,9 +18,14 @@
Mail utilities.
"""
import json
+import re
import traceback
+CHARSET_PATTERN = r"""charset=([\w-]+)"""
+CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE)
+
+
def first(things):
"""
Return the head of a collection.
@@ -31,6 +36,26 @@ def first(things):
return None
+def find_charset(thing, default=None):
+ """
+ Looks into the object 'thing' for a charset specification.
+ It searchs into the object's `repr`.
+
+ :param thing: the object to look into.
+ :type thing: object
+ :param default: the dafault charset to return if no charset is found.
+ :type default: str
+
+ :returns: the charset or 'default'
+ :rtype: str or None
+ """
+ charset = first(CHARSET_RE.findall(repr(thing)))
+ if charset is None:
+ charset = default
+
+ return charset
+
+
class CustomJsonScanner(object):
"""
This class is a context manager definition used to monkey patch the default