From 884ca20097f5c14d4b00553522292f051e3c097c Mon Sep 17 00:00:00 2001
From: Ivan Alejandro <ivanalejandro0@gmail.com>
Date: Wed, 22 Jan 2014 11:01:05 -0300
Subject: Add find_charset helper and use where is needed.

---
 mail/src/leap/mail/imap/messages.py | 13 +++++--------
 mail/src/leap/mail/utils.py         | 25 +++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 8 deletions(-)

(limited to 'mail/src')

diff --git a/mail/src/leap/mail/imap/messages.py b/mail/src/leap/mail/imap/messages.py
index cd4d85f..862a9f2 100644
--- a/mail/src/leap/mail/imap/messages.py
+++ b/mail/src/leap/mail/imap/messages.py
@@ -38,7 +38,7 @@ from leap.common.check import leap_assert, leap_assert_type
 from leap.common.decorators import memoized_method
 from leap.common.mail import get_email_charset
 from leap.mail import walk
-from leap.mail.utils import first
+from leap.mail.utils import first, find_charset
 from leap.mail.decorators import deferred
 from leap.mail.imap.index import IndexedDB
 from leap.mail.imap.fields import fields, WithMsgFields
@@ -92,10 +92,7 @@ def try_unique_query(curried):
     except Exception as exc:
         logger.exception("Unhandled error %r" % exc)
 
-CHARSET_PATTERN = r"""charset=([\w-]+)"""
 MSGID_PATTERN = r"""<([\w@.]+)>"""
-
-CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE)
 MSGID_RE = re.compile(MSGID_PATTERN)
 
 
@@ -177,9 +174,9 @@ class MessagePart(object):
 
         if payload:
             content_type = self._get_ctype_from_document(phash)
-            charset = first(CHARSET_RE.findall(content_type))
+            charset = find_charset(content_type)
             logger.debug("Got charset from header: %s" % (charset,))
-            if not charset:
+            if charset is None:
                 charset = self._get_charset(payload)
             try:
                 payload = payload.encode(charset)
@@ -527,8 +524,8 @@ class LeapMessage(fields, MailParser, MBoxParser):
         if bdoc:
             body = self._bdoc.content.get(self.RAW_KEY, "")
             content_type = bdoc.content.get('content-type', "")
-            charset = first(CHARSET_RE.findall(content_type))
-            if not charset:
+            charset = find_charset(content_type)
+            if charset is None:
                 charset = self._get_charset(body)
             try:
                 body = body.encode(charset)
diff --git a/mail/src/leap/mail/utils.py b/mail/src/leap/mail/utils.py
index 93388d3..6c79227 100644
--- a/mail/src/leap/mail/utils.py
+++ b/mail/src/leap/mail/utils.py
@@ -18,9 +18,14 @@
 Mail utilities.
 """
 import json
+import re
 import traceback
 
 
+CHARSET_PATTERN = r"""charset=([\w-]+)"""
+CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE)
+
+
 def first(things):
     """
     Return the head of a collection.
@@ -31,6 +36,26 @@ def first(things):
         return None
 
 
+def find_charset(thing, default=None):
+    """
+    Looks into the object 'thing' for a charset specification.
+    It searchs into the object's `repr`.
+
+    :param thing: the object to look into.
+    :type thing: object
+    :param default: the dafault charset to return if no charset is found.
+    :type default: str
+
+    :returns: the charset or 'default'
+    :rtype: str or None
+    """
+    charset = first(CHARSET_RE.findall(repr(thing)))
+    if charset is None:
+        charset = default
+
+    return charset
+
+
 class CustomJsonScanner(object):
     """
     This class is a context manager definition used to monkey patch the default
-- 
cgit v1.2.3