1 files changed, 586 insertions, 0 deletions
diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py
new file mode 100644
index 0000000..257721c
--- /dev/null
+++ b/src/leap/mail/imap/messageparts.py
@@ -0,0 +1,586 @@
+# messageparts.py
+# Copyright (C) 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+MessagePart implementation. Used from LeapMessage.
+"""
+import logging
+import StringIO
+import weakref
+
+from collections import namedtuple
+
+from enum import Enum
+from zope.interface import implements
+from twisted.mail import imap4
+
+from leap.common.decorators import memoized_method
+from leap.common.mail import get_email_charset
+from leap.mail.imap import interfaces
+from leap.mail.imap.fields import fields
+from leap.mail.utils import empty, first, find_charset
+
+MessagePartType = Enum("hdoc", "fdoc", "cdoc", "cdocs", "docs_id")
+
+
+logger = logging.getLogger(__name__)
+
+
+"""
+A MessagePartDoc is a light wrapper around the dictionary-like
+data that we pass along for message parts. It can be used almost everywhere
+that you would expect a SoledadDocument, since it has a dict under the
+`content` attribute.
+
+We also keep some metadata on it, relative in part to the message as a whole,
+and sometimes to a part in particular only.
+
+* `new` indicates that the document has just been created. SoledadStore
+  should just create a new doc for all the related message parts.
+* `store` indicates the type of store a given MessagePartDoc lives in.
+  We currently use this to indicate that  the document comes from memeory,
+  but we should probably get rid of it as soon as we extend the use of the
+  SoledadStore interface along LeapMessage, MessageCollection and Mailbox.
+* `part` is one of the MessagePartType enums.
+
+* `dirty` indicates that, while we already have the document in Soledad,
+  we have modified its state in memory, so we need to put_doc instead while
+  dumping the MemoryStore contents.
+  `dirty` attribute would only apply to flags-docs and linkage-docs.
+* `doc_id` is the identifier for the document in the u1db database, if any.
+
+"""
+
+MessagePartDoc = namedtuple(
+    'MessagePartDoc',
+    ['new', 'dirty', 'part', 'store', 'content', 'doc_id'])
+
+"""
+A RecentFlagsDoc is used to send the recent-flags document payload to the
+SoledadWriter during dumps.
+"""
+RecentFlagsDoc = namedtuple(
+    'RecentFlagsDoc',
+    ['content', 'doc_id'])
+
+
+class ReferenciableDict(dict):
+    """
+    A dict that can be weak-referenced.
+
+    Some builtin objects are not weak-referenciable unless
+    subclassed. So we do.
+
+    Used to return pointers to the items in the MemoryStore.
+    """
+
+
+class MessageWrapper(object):
+    """
+    A simple nested dictionary container around the different message subparts.
+    """
+    implements(interfaces.IMessageContainer)
+
+    FDOC = "fdoc"
+    HDOC = "hdoc"
+    CDOCS = "cdocs"
+    DOCS_ID = "docs_id"
+
+    # Using slots to limit some the memory use,
+    # Add your attribute here.
+
+    __slots__ = ["_dict", "_new", "_dirty", "_storetype", "memstore"]
+
+    def __init__(self, fdoc=None, hdoc=None, cdocs=None,
+                 from_dict=None, memstore=None,
+                 new=True, dirty=False, docs_id={}):
+        """
+        Initialize a MessageWrapper.
+        """
+        # TODO add optional reference to original message in the incoming
+        self._dict = {}
+        self.memstore = memstore
+
+        self._new = new
+        self._dirty = dirty
+
+        self._storetype = "mem"
+
+        if from_dict is not None:
+            self.from_dict(from_dict)
+        else:
+            if fdoc is not None:
+                self._dict[self.FDOC] = ReferenciableDict(fdoc)
+            if hdoc is not None:
+                self._dict[self.HDOC] = ReferenciableDict(hdoc)
+            if cdocs is not None:
+                self._dict[self.CDOCS] = ReferenciableDict(cdocs)
+
+        # This will keep references to the doc_ids to be able to put
+        # messages to soledad. It will be populated during the walk() to avoid
+        # the overhead of reading from the db.
+
+        # XXX it really *only* make sense for the FDOC, the other parts
+        # should not be "dirty", just new...!!!
+        self._dict[self.DOCS_ID] = docs_id
+
+    # properties
+
+    # TODO Could refactor new and dirty properties together.
+
+    def _get_new(self):
+        """
+        Get the value for the `new` flag.
+
+        :rtype: bool
+        """
+        return self._new
+
+    def _set_new(self, value=False):
+        """
+        Set the value for the `new` flag, and propagate it
+        to the memory store if any.
+
+        :param value: the value to set
+        :type value: bool
+        """
+        self._new = value
+        if self.memstore:
+            mbox = self.fdoc.content.get('mbox', None)
+            uid = self.fdoc.content.get('uid', None)
+            if not mbox or not uid:
+                logger.warning("Malformed fdoc")
+                return
+            key = mbox, uid
+            fun = [self.memstore.unset_new_queued,
+                   self.memstore.set_new_queued][int(value)]
+            fun(key)
+        else:
+            logger.warning("Could not find a memstore referenced from this "
+                           "MessageWrapper. The value for new will not be "
+                           "propagated")
+
+    new = property(_get_new, _set_new,
+                   doc="The `new` flag for this MessageWrapper")
+
+    def _get_dirty(self):
+        """
+        Get the value for the `dirty` flag.
+
+        :rtype: bool
+        """
+        return self._dirty
+
+    def _set_dirty(self, value=True):
+        """
+        Set the value for the `dirty` flag, and propagate it
+        to the memory store if any.
+
+        :param value: the value to set
+        :type value: bool
+        """
+        self._dirty = value
+        if self.memstore:
+            mbox = self.fdoc.content.get('mbox', None)
+            uid = self.fdoc.content.get('uid', None)
+            if not mbox or not uid:
+                logger.warning("Malformed fdoc")
+                return
+            key = mbox, uid
+            fun = [self.memstore.unset_dirty_queued,
+                   self.memstore.set_dirty_queued][int(value)]
+            fun(key)
+        else:
+            logger.warning("Could not find a memstore referenced from this "
+                           "MessageWrapper. The value for new will not be "
+                           "propagated")
+
+    dirty = property(_get_dirty, _set_dirty)
+
+    # IMessageContainer
+
+    @property
+    def fdoc(self):
+        """
+        Return a MessagePartDoc wrapping around a weak reference to
+        the flags-document in this MemoryStore, if any.
+
+        :rtype: MessagePartDoc
+        """
+        _fdoc = self._dict.get(self.FDOC, None)
+        if _fdoc:
+            content_ref = weakref.proxy(_fdoc)
+        else:
+            logger.warning("NO FDOC!!!")
+            content_ref = {}
+
+        return MessagePartDoc(new=self.new, dirty=self.dirty,
+                              store=self._storetype,
+                              part=MessagePartType.fdoc,
+                              content=content_ref,
+                              doc_id=self._dict[self.DOCS_ID].get(
+                                  self.FDOC, None))
+
+    @property
+    def hdoc(self):
+        """
+        Return a MessagePartDoc wrapping around a weak reference to
+        the headers-document in this MemoryStore, if any.
+
+        :rtype: MessagePartDoc
+        """
+        _hdoc = self._dict.get(self.HDOC, None)
+        if _hdoc:
+            content_ref = weakref.proxy(_hdoc)
+        else:
+            content_ref = {}
+        return MessagePartDoc(new=self.new, dirty=self.dirty,
+                              store=self._storetype,
+                              part=MessagePartType.hdoc,
+                              content=content_ref,
+                              doc_id=self._dict[self.DOCS_ID].get(
+                                  self.HDOC, None))
+
+    @property
+    def cdocs(self):
+        """
+        Return a weak reference to a zero-indexed dict containing
+        the content-documents, or an empty dict if none found.
+        If you want access to the MessagePartDoc for the individual
+        parts, use the generator returned by `walk` instead.
+
+        :rtype: dict
+        """
+        _cdocs = self._dict.get(self.CDOCS, None)
+        if _cdocs:
+            return weakref.proxy(_cdocs)
+        else:
+            return {}
+
+    def walk(self):
+        """
+        Generator that iterates through all the parts, returning
+        MessagePartDoc. Used for writing to SoledadStore.
+
+        :rtype: generator
+        """
+        if self._dirty:
+            try:
+                mbox = self.fdoc.content[fields.MBOX_KEY]
+                uid = self.fdoc.content[fields.UID_KEY]
+                docid_dict = self._dict[self.DOCS_ID]
+                docid_dict[self.FDOC] = self.memstore.get_docid_for_fdoc(
+                    mbox, uid)
+            except Exception as exc:
+                logger.debug("Error while walking message...")
+                logger.exception(exc)
+
+        if not empty(self.fdoc.content) and 'uid' in self.fdoc.content:
+            yield self.fdoc
+        if not empty(self.hdoc.content):
+            yield self.hdoc
+        for cdoc in self.cdocs.values():
+            if not empty(cdoc):
+                content_ref = weakref.proxy(cdoc)
+                yield MessagePartDoc(new=self.new, dirty=self.dirty,
+                                     store=self._storetype,
+                                     part=MessagePartType.cdoc,
+                                     content=content_ref,
+                                     doc_id=None)
+
+    # i/o
+
+    def as_dict(self):
+        """
+        Return a dict representation of the parts contained.
+
+        :rtype: dict
+        """
+        return self._dict
+
+    def from_dict(self, msg_dict):
+        """
+        Populate MessageWrapper parts from a dictionary.
+        It expects the same format that we use in a
+        MessageWrapper.
+
+
+        :param msg_dict: a dictionary containing the parts to populate
+                         the MessageWrapper from
+        :type msg_dict: dict
+        """
+        fdoc, hdoc, cdocs = map(
+            lambda part: msg_dict.get(part, None),
+            [self.FDOC, self.HDOC, self.CDOCS])
+
+        for t, doc in ((self.FDOC, fdoc), (self.HDOC, hdoc),
+                       (self.CDOCS, cdocs)):
+            self._dict[t] = ReferenciableDict(doc) if doc else None
+
+
+class MessagePart(object):
+    """
+    IMessagePart implementor, to be passed to several methods
+    of the IMAP4Server.
+    It takes a subpart message and is able to find
+    the inner parts.
+
+    See the interface documentation.
+    """
+
+    implements(imap4.IMessagePart)
+
+    def __init__(self, soledad, part_map):
+        """
+        Initializes the MessagePart.
+
+        :param soledad: Soledad instance.
+        :type soledad: Soledad
+        :param part_map: a dictionary containing the parts map for this
+                         message
+        :type part_map: dict
+        """
+        # TODO
+        # It would be good to pass the uid/mailbox also
+        # for references while debugging.
+
+        # We have a problem on bulk moves, and is
+        # that when the fetch on the new mailbox is done
+        # the parts maybe are not complete.
+        # So we should be able to fail with empty
+        # docs until we solve that. The ideal would be
+        # to gather the results of the deferred operations
+        # to signal the operation is complete.
+        #leap_assert(part_map, "part map dict cannot be null")
+
+        self._soledad = soledad
+        self._pmap = part_map
+
+    def getSize(self):
+        """
+        Return the total size, in octets, of this message part.
+
+        :return: size of the message, in octets
+        :rtype: int
+        """
+        if empty(self._pmap):
+            return 0
+        size = self._pmap.get('size', None)
+        if size is None:
+            logger.error("Message part cannot find size in the partmap")
+            size = 0
+        return size
+
+    def getBodyFile(self):
+        """
+        Retrieve a file object containing only the body of this message.
+
+        :return: file-like object opened for reading
+        :rtype: StringIO
+        """
+        fd = StringIO.StringIO()
+        if not empty(self._pmap):
+            multi = self._pmap.get('multi')
+            if not multi:
+                phash = self._pmap.get("phash", None)
+            else:
+                pmap = self._pmap.get('part_map')
+                first_part = pmap.get('1', None)
+                if not empty(first_part):
+                    phash = first_part['phash']
+                else:
+                    phash = None
+
+            if phash is None:
+                logger.warning("Could not find phash for this subpart!")
+                payload = ""
+            else:
+                payload = self._get_payload_from_document_memoized(phash)
+                if empty(payload):
+                    payload = self._get_payload_from_document(phash)
+
+        else:
+            logger.warning("Message with no part_map!")
+            payload = ""
+
+        if payload:
+            content_type = self._get_ctype_from_document(phash)
+            charset = find_charset(content_type)
+            if charset is None:
+                charset = self._get_charset(payload)
+            try:
+                if isinstance(payload, unicode):
+                    payload = payload.encode(charset)
+            except UnicodeError as exc:
+                logger.error(
+                    "Unicode error, using 'replace'. {0!r}".format(exc))
+                payload = payload.encode(charset, 'replace')
+
+        fd.write(payload)
+        fd.seek(0)
+        return fd
+
+    # TODO should memory-bound this memoize!!!
+    @memoized_method
+    def _get_payload_from_document_memoized(self, phash):
+        """
+        Memoized method call around the regular method, to be able
+        to call the non-memoized method in case we got a None.
+
+        :param phash: the payload hash to retrieve by.
+        :type phash: str or unicode
+        :rtype: str or unicode or None
+        """
+        return self._get_payload_from_document(phash)
+
+    def _get_payload_from_document(self, phash):
+        """
+        Return the message payload from the content document.
+
+        :param phash: the payload hash to retrieve by.
+        :type phash: str or unicode
+        :rtype: str or unicode or None
+        """
+        cdocs = self._soledad.get_from_index(
+            fields.TYPE_P_HASH_IDX,
+            fields.TYPE_CONTENT_VAL, str(phash))
+
+        cdoc = first(cdocs)
+        if cdoc is None:
+            logger.warning(
+                "Could not find the content doc "
+                "for phash %s" % (phash,))
+            payload = ""
+        else:
+            payload = cdoc.content.get(fields.RAW_KEY, "")
+        return payload
+
+    # TODO should memory-bound this memoize!!!
+    @memoized_method
+    def _get_ctype_from_document(self, phash):
+        """
+        Reeturn the content-type from the content document.
+
+        :param phash: the payload hash to retrieve by.
+        :type phash: str or unicode
+        :rtype: str or unicode
+        """
+        cdocs = self._soledad.get_from_index(
+            fields.TYPE_P_HASH_IDX,
+            fields.TYPE_CONTENT_VAL, str(phash))
+
+        cdoc = first(cdocs)
+        if not cdoc:
+            logger.warning(
+                "Could not find the content doc "
+                "for phash %s" % (phash,))
+        ctype = cdoc.content.get('ctype', "")
+        return ctype
+
+    @memoized_method
+    def _get_charset(self, stuff):
+        # TODO put in a common class with LeapMessage
+        """
+        Gets (guesses?) the charset of a payload.
+
+        :param stuff: the stuff to guess about.
+        :type stuff: str or unicode
+        :return: charset
+        :rtype: unicode
+        """
+        # XXX existential doubt 2. shouldn't we make the scope
+        # of the decorator somewhat more persistent?
+        # ah! yes! and put memory bounds.
+        return get_email_charset(stuff)
+
+    def getHeaders(self, negate, *names):
+        """
+        Retrieve a group of message headers.
+
+        :param names: The names of the headers to retrieve or omit.
+        :type names: tuple of str
+
+        :param negate: If True, indicates that the headers listed in names
+                       should be omitted from the return value, rather
+                       than included.
+        :type negate: bool
+
+        :return: A mapping of header field names to header field values
+        :rtype: dict
+        """
+        # XXX refactor together with MessagePart method
+        if not self._pmap:
+            logger.warning("No pmap in Subpart!")
+            return {}
+        headers = dict(self._pmap.get("headers", []))
+
+        names = map(lambda s: s.upper(), names)
+        if negate:
+            cond = lambda key: key.upper() not in names
+        else:
+            cond = lambda key: key.upper() in names
+
+        # default to most likely standard
+        charset = find_charset(headers, "utf-8")
+        headers2 = dict()
+        for key, value in headers.items():
+            # twisted imap server expects *some* headers to be lowercase
+            # We could use a CaseInsensitiveDict here...
+            if key.lower() == "content-type":
+                key = key.lower()
+
+            if not isinstance(key, str):
+                key = key.encode(charset, 'replace')
+            if not isinstance(value, str):
+                value = value.encode(charset, 'replace')
+
+            # filter original dict by negate-condition
+            if cond(key):
+                headers2[key] = value
+        return headers2
+
+    def isMultipart(self):
+        """
+        Return True if this message is multipart.
+        """
+        if empty(self._pmap):
+            logger.warning("Could not get part map!")
+            return False
+        multi = self._pmap.get("multi", False)
+        return multi
+
+    def getSubPart(self, part):
+        """
+        Retrieve a MIME submessage
+
+        :type part: C{int}
+        :param part: The number of the part to retrieve, indexed from 0.
+        :raise IndexError: Raised if the specified part does not exist.
+        :raise TypeError: Raised if this message is not multipart.
+        :rtype: Any object implementing C{IMessagePart}.
+        :return: The specified sub-part.
+        """
+        if not self.isMultipart():
+            raise TypeError
+
+        sub_pmap = self._pmap.get("part_map", {})
+        try:
+            part_map = sub_pmap[str(part + 1)]
+        except KeyError:
+            logger.debug("getSubpart for %s: KeyError" % (part,))
+            raise IndexError
+
+        # XXX check for validity
+        return MessagePart(self._soledad, part_map)