re-add support for basic multipart messages

author: Kali Kaneko <kali@leap.se> 2015-01-22 03:16:32 -0400
committer: Kali Kaneko <kali@leap.se> 2015-02-11 14:05:43 -0400
commit: 4c02942472be00486d04d854e987d6eda8c7df12 (patch)
tree: b4a7d95c3cadcccfdf4cee4eae9134b1e613e833 /src/leap/mail
parent: f8c07f66bfcd97e3ae7085071c1f1efbd80e0286 (diff)
4 files changed, 147 insertions, 113 deletions
diff --git a/src/leap/mail/adaptors/soledad.py b/src/leap/mail/adaptors/soledad.py
index 9f0bb30..d21638c 100644
--- a/src/leap/mail/adaptors/soledad.py
+++ b/src/leap/mail/adaptors/soledad.py
@@ -463,8 +463,9 @@ class MessageWrapper(object):
             cdocs = {}
         cdocs_keys = cdocs.keys()
         assert sorted(cdocs_keys) == range(1, len(cdocs_keys) + 1)
-        self.cdocs = dict([(key, ContentDocWrapper(**doc)) for (key, doc) in
-                           cdocs.items()])
+        self.cdocs = dict([
+            (key, ContentDocWrapper(**doc.content))
+            for (key, doc) in cdocs.items()])
         for doc_id, cdoc in zip(self.mdoc.cdocs, self.cdocs.values()):
             cdoc.set_future_doc_id(doc_id)
 
@@ -560,6 +561,9 @@ class MessageWrapper(object):
         """
         return self.hdoc.part_map[str(index)]
 
+    def get_subpart_indexes(self):
+        return self.hdoc.part_map.keys()
+
     def get_body(self, store):
         """
         :rtype: deferred
diff --git a/src/leap/mail/imap/mailbox.py b/src/leap/mail/imap/mailbox.py
index 52f4dd5..045636e 100644
--- a/src/leap/mail/imap/mailbox.py
+++ b/src/leap/mail/imap/mailbox.py
@@ -540,7 +540,11 @@ class IMAPMailbox(object):
 
             d_msg = []
             for msgid in msg_sequence:
-                d_msg.append(getmsg(msgid))
+                # XXX We want cdocs because we "probably" are asked for the
+                # body. We should be smarted at do_FETCH and pass a parameter
+                # to this method in order not to prefetch cdocs if they're not
+                # going to be used.
+                d_msg.append(getmsg(msgid, get_cdocs=True))
 
             d = defer.gatherResults(d_msg)
             d.addCallback(_get_imap_msg)
diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py
index 8f4c953..b7bb6ee 100644
--- a/src/leap/mail/imap/messages.py
+++ b/src/leap/mail/imap/messages.py
@@ -115,13 +115,6 @@ class IMAPMessage(object):
     #
     # IMessagePart
     #
-    def __prefetch_body_file(self):
-        def assign_body_fd(fd):
-            self.__body_fd = fd
-            return fd
-        d = self.getBodyFile()
-        d.addCallback(assign_body_fd)
-        return d
 
     def getBodyFile(self, store=None):
         """
@@ -139,25 +132,6 @@ class IMAPMessage(object):
             store = self.store
         return self.message.get_body_file(store)
 
-        # TODO refactor with getBodyFile in MessagePart
-
-        #body = bdoc_content.get(self.RAW_KEY, "")
-        #content_type = bdoc_content.get('content-type', "")
-        #charset = find_charset(content_type)
-        #if charset is None:
-            #charset = self._get_charset(body)
-        #try:
-            #if isinstance(body, unicode):
-                #body = body.encode(charset)
-        #except UnicodeError as exc:
-            #logger.error(
-                #"Unicode error, using 'replace'. {0!r}".format(exc))
-            #logger.debug("Attempted to encode with: %s" % charset)
-            #body = body.encode(charset, 'replace')
-        #finally:
-            #return write_fd(body)
-
-
     def getSize(self):
         """
         Return the total size, in octets, of this message.
@@ -182,48 +156,8 @@ class IMAPMessage(object):
         :return: A mapping of header field names to header field values
         :rtype: dict
         """
-        # TODO split in smaller methods -- format_headers()?
-        # XXX refactor together with MessagePart method
-
         headers = self.message.get_headers()
-
-        # XXX keep this in the imap imessage implementation,
-        # because the server impl. expects content-type to be present.
-        if not headers:
-            logger.warning("No headers found")
-            return {str('content-type'): str('')}
-
-        names = map(lambda s: s.upper(), names)
-        if negate:
-            cond = lambda key: key.upper() not in names
-        else:
-            cond = lambda key: key.upper() in names
-
-        if isinstance(headers, list):
-            headers = dict(headers)
-
-        # default to most likely standard
-        charset = find_charset(headers, "utf-8")
-        headers2 = dict()
-        for key, value in headers.items():
-            # twisted imap server expects *some* headers to be lowercase
-            # We could use a CaseInsensitiveDict here...
-            if key.lower() == "content-type":
-                key = key.lower()
-
-            if not isinstance(key, str):
-                key = key.encode(charset, 'replace')
-            if not isinstance(value, str):
-                value = value.encode(charset, 'replace')
-
-            if value.endswith(";"):
-                # bastards
-                value = value[:-1]
-
-            # filter original dict by negate-condition
-            if cond(key):
-                headers2[key] = value
-        return headers2
+        return _format_headers(headers, negate, *names)
 
     def isMultipart(self):
         """
@@ -242,7 +176,81 @@ class IMAPMessage(object):
         :rtype: Any object implementing C{IMessagePart}.
         :return: The specified sub-part.
         """
-        return self.message.get_subpart(part)
+        subpart = self.message.get_subpart(part)
+        return IMAPMessagePart(subpart)
+
+    def __prefetch_body_file(self):
+        def assign_body_fd(fd):
+            self.__body_fd = fd
+            return fd
+        d = self.getBodyFile()
+        d.addCallback(assign_body_fd)
+        return d
+
+
+class IMAPMessagePart(object):
+
+    def __init__(self, message_part):
+        self.message_part = message_part
+
+    def getBodyFile(self, store=None):
+        return self.message_part.get_body_file()
+
+    def getSize(self):
+        return self.message_part.get_size()
+
+    def getHeaders(self, negate, *names):
+        headers = self.message_part.get_headers()
+        return _format_headers(headers, negate, *names)
+
+    def isMultipart(self):
+        return self.message_part.is_multipart()
+
+    def getSubPart(self, part):
+        subpart = self.message_part.get_subpart(part)
+        return IMAPMessagePart(subpart)
+
+
+def _format_headers(headers, negate, *names):
+    # current server impl. expects content-type to be present, so if for
+    # some reason we do not have headers, we have to return at least that
+    # one
+    if not headers:
+        logger.warning("No headers found")
+        return {str('content-type'): str('')}
+
+    names = map(lambda s: s.upper(), names)
+    if negate:
+        cond = lambda key: key.upper() not in names
+    else:
+        cond = lambda key: key.upper() in names
+
+    if isinstance(headers, list):
+        headers = dict(headers)
+
+    # default to most likely standard
+    charset = find_charset(headers, "utf-8")
+
+    _headers = dict()
+    for key, value in headers.items():
+        # twisted imap server expects *some* headers to be lowercase
+        # We could use a CaseInsensitiveDict here...
+        if key.lower() == "content-type":
+            key = key.lower()
+
+        if not isinstance(key, str):
+            key = key.encode(charset, 'replace')
+        if not isinstance(value, str):
+            value = value.encode(charset, 'replace')
+
+        if value.endswith(";"):
+            # bastards
+            value = value[:-1]
+
+        # filter original dict by negate-condition
+        if cond(key):
+            _headers[key] = value
+    return _headers
 
 
 class IMAPMessageCollection(object):
diff --git a/src/leap/mail/mail.py b/src/leap/mail/mail.py
index 59fd57c..aa499c0 100644
--- a/src/leap/mail/mail.py
+++ b/src/leap/mail/mail.py
@@ -22,6 +22,7 @@ import logging
 import StringIO
 
 from twisted.internet import defer
+from twisted.python import log
 
 from leap.common.check import leap_assert_type
 from leap.common.mail import get_email_charset
@@ -30,7 +31,7 @@ from leap.mail.adaptors.soledad import SoledadMailAdaptor
 from leap.mail.constants import INBOX_NAME
 from leap.mail.constants import MessageFlags
 from leap.mail.mailbox_indexer import MailboxIndexer
-from leap.mail.utils import empty, find_charset
+from leap.mail.utils import empty  # find_charset
 
 logger = logging.getLogger(name=__name__)
 
@@ -57,61 +58,57 @@ def _write_and_rewind(payload):
 
 
 class MessagePart(object):
+    # TODO This class should be better abstracted from the data model.
+    # TODO support arbitrarily nested multiparts (right now we only support
+    #      the trivial case)
 
-    def __init__(self, part_map, cdocs={}):
+    def __init__(self, part_map, index=1, cdocs={}):
         """
         :param part_map: a dictionary mapping the subparts for
                          this MessagePart (1-indexed).
         :type part_map: dict
-        :param cdoc: optional, a dict of content documents
+
+        The format for the part_map is as follows:
+
+        {u'1': {u'ctype': u'text/plain',
+        u'headers': [[u'Content-Type', u'text/plain; charset="utf-8"'],
+                     [u'Content-Transfer-Encoding', u'8bit']],
+        u'multi': False,
+        u'parts': 1,
+        u'phash': u'02D82B29F6BB0C8612D1C',
+        u'size': 132}}
+
+        :param index: which index in the content-doc is this subpart
+                      representing.
+        :param cdocs: optional, a reference to the top-level dict of wrappers
+                      for content-docs (1-indexed).
         """
-        # TODO document the expected keys in the part_map dict.
-        # TODO add abstraction layer between the cdocs and this class. Only
-        # adaptor should know about the format of the cdocs.
+        # TODO: Pass only the cdoc wrapper for this part.
         self._pmap = part_map
+        self._index = index
         self._cdocs = cdocs
 
     def get_size(self):
         return self._pmap['size']
 
     def get_body_file(self):
+        payload = ""
         pmap = self._pmap
         multi = pmap.get('multi')
         if not multi:
-            phash = pmap.get("phash")
+            payload = self._get_payload(self._index)
         else:
-            pmap_ = pmap.get('part_map')
-            first_part = pmap_.get('1', None)
-            if not empty(first_part):
-                phash = first_part['phash']
-            else:
-                phash = ""
-
-        payload = self._get_payload(phash)
-
+            # XXX uh, multi also...  should recurse"
+            raise NotImplementedError
         if payload:
-            # FIXME
-            # content_type = self._get_ctype_from_document(phash)
-            # charset = find_charset(content_type)
-            charset = None
-            if charset is None:
-                charset = get_email_charset(payload)
-            try:
-                if isinstance(payload, unicode):
-                    payload = payload.encode(charset)
-            except UnicodeError as exc:
-                logger.error(
-                    "Unicode error, using 'replace'. {0!r}".format(exc))
-                payload = payload.encode(charset, 'replace')
-
+            payload = self._format_payload(payload)
         return _write_and_rewind(payload)
 
     def get_headers(self):
         return self._pmap.get("headers", [])
 
     def is_multipart(self):
-        multi = self._pmap.get("multi", False)
-        return multi
+        return self._pmap.get("multi", False)
 
     def get_subpart(self, part):
         if not self.is_multipart():
@@ -123,10 +120,30 @@ class MessagePart(object):
         except KeyError:
             logger.debug("getSubpart for %s: KeyError" % (part,))
             raise IndexError
-        return MessagePart(self._soledad, part_map)
-
-    def _get_payload(self, phash):
-        return self._cdocs.get(phash, "")
+        return MessagePart(part_map, cdocs={1: self._cdocs.get(1, {})})
+
+    def _get_payload(self, index):
+        cdoc_wrapper = self._cdocs.get(index, None)
+        if cdoc_wrapper:
+            return cdoc_wrapper.raw
+        return ""
+
+    def _format_payload(self, payload):
+        # FIXME -----------------------------------------------
+        # Test against unicode payloads...
+        # content_type = self._get_ctype_from_document(phash)
+        # charset = find_charset(content_type)
+        charset = None
+        if charset is None:
+            charset = get_email_charset(payload)
+        try:
+            if isinstance(payload, unicode):
+                payload = payload.encode(charset)
+        except UnicodeError as exc:
+            logger.error(
+                "Unicode error, using 'replace'. {0!r}".format(exc))
+            payload = payload.encode(charset, 'replace')
+        return payload
 
 
 class Message(object):
@@ -224,17 +241,18 @@ class Message(object):
             raise TypeError
         part_index = part + 1
         try:
-            subpart_dict = self._wrapper.get_subpart_dict(
-                part_index)
+            subpart_dict = self._wrapper.get_subpart_dict(part_index)
         except KeyError:
-            raise TypeError
-        # XXX pass cdocs
-        return MessagePart(subpart_dict)
+            raise IndexError
+
+        return MessagePart(
+            subpart_dict, index=part_index, cdocs=self._wrapper.cdocs)
 
     # Custom methods.
 
     def get_tags(self):
         """
+        Get the tags for this message.
         """
         return tuple(self._wrapper.fdoc.tags)
 
@@ -290,7 +308,7 @@ class MessageCollection(object):
         self.adaptor = adaptor
         self.store = store
 
-        # XXX I have to think about what to do when there is no mbox passed to
+        # XXX think about what to do when there is no mbox passed to
         # the initialization. We could still get the MetaMsg by index, instead
         # of by doc_id. See get_message_by_content_hash
         self.mbox_indexer = mbox_indexer
author	Kali Kaneko <kali@leap.se>	2015-01-22 03:16:32 -0400
committer	Kali Kaneko <kali@leap.se>	2015-02-11 14:05:43 -0400
commit	4c02942472be00486d04d854e987d6eda8c7df12 (patch)
tree	b4a7d95c3cadcccfdf4cee4eae9134b1e613e833 /src/leap/mail
parent	f8c07f66bfcd97e3ae7085071c1f1efbd80e0286 (diff)