summaryrefslogtreecommitdiff
path: root/src/leap/mail/imap/messageparts.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/leap/mail/imap/messageparts.py')
-rw-r--r--src/leap/mail/imap/messageparts.py586
1 files changed, 586 insertions, 0 deletions
diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py
new file mode 100644
index 0000000..257721c
--- /dev/null
+++ b/src/leap/mail/imap/messageparts.py
@@ -0,0 +1,586 @@
+# messageparts.py
+# Copyright (C) 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+MessagePart implementation. Used from LeapMessage.
+"""
+import logging
+import StringIO
+import weakref
+
+from collections import namedtuple
+
+from enum import Enum
+from zope.interface import implements
+from twisted.mail import imap4
+
+from leap.common.decorators import memoized_method
+from leap.common.mail import get_email_charset
+from leap.mail.imap import interfaces
+from leap.mail.imap.fields import fields
+from leap.mail.utils import empty, first, find_charset
+
+MessagePartType = Enum("hdoc", "fdoc", "cdoc", "cdocs", "docs_id")
+
+
+logger = logging.getLogger(__name__)
+
+
+"""
+A MessagePartDoc is a light wrapper around the dictionary-like
+data that we pass along for message parts. It can be used almost everywhere
+that you would expect a SoledadDocument, since it has a dict under the
+`content` attribute.
+
+We also keep some metadata on it, relative in part to the message as a whole,
+and sometimes to a part in particular only.
+
+* `new` indicates that the document has just been created. SoledadStore
+ should just create a new doc for all the related message parts.
+* `store` indicates the type of store a given MessagePartDoc lives in.
+ We currently use this to indicate that the document comes from memeory,
+ but we should probably get rid of it as soon as we extend the use of the
+ SoledadStore interface along LeapMessage, MessageCollection and Mailbox.
+* `part` is one of the MessagePartType enums.
+
+* `dirty` indicates that, while we already have the document in Soledad,
+ we have modified its state in memory, so we need to put_doc instead while
+ dumping the MemoryStore contents.
+ `dirty` attribute would only apply to flags-docs and linkage-docs.
+* `doc_id` is the identifier for the document in the u1db database, if any.
+
+"""
+
+MessagePartDoc = namedtuple(
+ 'MessagePartDoc',
+ ['new', 'dirty', 'part', 'store', 'content', 'doc_id'])
+
+"""
+A RecentFlagsDoc is used to send the recent-flags document payload to the
+SoledadWriter during dumps.
+"""
+RecentFlagsDoc = namedtuple(
+ 'RecentFlagsDoc',
+ ['content', 'doc_id'])
+
+
+class ReferenciableDict(dict):
+ """
+ A dict that can be weak-referenced.
+
+ Some builtin objects are not weak-referenciable unless
+ subclassed. So we do.
+
+ Used to return pointers to the items in the MemoryStore.
+ """
+
+
+class MessageWrapper(object):
+ """
+ A simple nested dictionary container around the different message subparts.
+ """
+ implements(interfaces.IMessageContainer)
+
+ FDOC = "fdoc"
+ HDOC = "hdoc"
+ CDOCS = "cdocs"
+ DOCS_ID = "docs_id"
+
+ # Using slots to limit some the memory use,
+ # Add your attribute here.
+
+ __slots__ = ["_dict", "_new", "_dirty", "_storetype", "memstore"]
+
+ def __init__(self, fdoc=None, hdoc=None, cdocs=None,
+ from_dict=None, memstore=None,
+ new=True, dirty=False, docs_id={}):
+ """
+ Initialize a MessageWrapper.
+ """
+ # TODO add optional reference to original message in the incoming
+ self._dict = {}
+ self.memstore = memstore
+
+ self._new = new
+ self._dirty = dirty
+
+ self._storetype = "mem"
+
+ if from_dict is not None:
+ self.from_dict(from_dict)
+ else:
+ if fdoc is not None:
+ self._dict[self.FDOC] = ReferenciableDict(fdoc)
+ if hdoc is not None:
+ self._dict[self.HDOC] = ReferenciableDict(hdoc)
+ if cdocs is not None:
+ self._dict[self.CDOCS] = ReferenciableDict(cdocs)
+
+ # This will keep references to the doc_ids to be able to put
+ # messages to soledad. It will be populated during the walk() to avoid
+ # the overhead of reading from the db.
+
+ # XXX it really *only* make sense for the FDOC, the other parts
+ # should not be "dirty", just new...!!!
+ self._dict[self.DOCS_ID] = docs_id
+
+ # properties
+
+ # TODO Could refactor new and dirty properties together.
+
+ def _get_new(self):
+ """
+ Get the value for the `new` flag.
+
+ :rtype: bool
+ """
+ return self._new
+
+ def _set_new(self, value=False):
+ """
+ Set the value for the `new` flag, and propagate it
+ to the memory store if any.
+
+ :param value: the value to set
+ :type value: bool
+ """
+ self._new = value
+ if self.memstore:
+ mbox = self.fdoc.content.get('mbox', None)
+ uid = self.fdoc.content.get('uid', None)
+ if not mbox or not uid:
+ logger.warning("Malformed fdoc")
+ return
+ key = mbox, uid
+ fun = [self.memstore.unset_new_queued,
+ self.memstore.set_new_queued][int(value)]
+ fun(key)
+ else:
+ logger.warning("Could not find a memstore referenced from this "
+ "MessageWrapper. The value for new will not be "
+ "propagated")
+
+ new = property(_get_new, _set_new,
+ doc="The `new` flag for this MessageWrapper")
+
+ def _get_dirty(self):
+ """
+ Get the value for the `dirty` flag.
+
+ :rtype: bool
+ """
+ return self._dirty
+
+ def _set_dirty(self, value=True):
+ """
+ Set the value for the `dirty` flag, and propagate it
+ to the memory store if any.
+
+ :param value: the value to set
+ :type value: bool
+ """
+ self._dirty = value
+ if self.memstore:
+ mbox = self.fdoc.content.get('mbox', None)
+ uid = self.fdoc.content.get('uid', None)
+ if not mbox or not uid:
+ logger.warning("Malformed fdoc")
+ return
+ key = mbox, uid
+ fun = [self.memstore.unset_dirty_queued,
+ self.memstore.set_dirty_queued][int(value)]
+ fun(key)
+ else:
+ logger.warning("Could not find a memstore referenced from this "
+ "MessageWrapper. The value for new will not be "
+ "propagated")
+
+ dirty = property(_get_dirty, _set_dirty)
+
+ # IMessageContainer
+
+ @property
+ def fdoc(self):
+ """
+ Return a MessagePartDoc wrapping around a weak reference to
+ the flags-document in this MemoryStore, if any.
+
+ :rtype: MessagePartDoc
+ """
+ _fdoc = self._dict.get(self.FDOC, None)
+ if _fdoc:
+ content_ref = weakref.proxy(_fdoc)
+ else:
+ logger.warning("NO FDOC!!!")
+ content_ref = {}
+
+ return MessagePartDoc(new=self.new, dirty=self.dirty,
+ store=self._storetype,
+ part=MessagePartType.fdoc,
+ content=content_ref,
+ doc_id=self._dict[self.DOCS_ID].get(
+ self.FDOC, None))
+
+ @property
+ def hdoc(self):
+ """
+ Return a MessagePartDoc wrapping around a weak reference to
+ the headers-document in this MemoryStore, if any.
+
+ :rtype: MessagePartDoc
+ """
+ _hdoc = self._dict.get(self.HDOC, None)
+ if _hdoc:
+ content_ref = weakref.proxy(_hdoc)
+ else:
+ content_ref = {}
+ return MessagePartDoc(new=self.new, dirty=self.dirty,
+ store=self._storetype,
+ part=MessagePartType.hdoc,
+ content=content_ref,
+ doc_id=self._dict[self.DOCS_ID].get(
+ self.HDOC, None))
+
+ @property
+ def cdocs(self):
+ """
+ Return a weak reference to a zero-indexed dict containing
+ the content-documents, or an empty dict if none found.
+ If you want access to the MessagePartDoc for the individual
+ parts, use the generator returned by `walk` instead.
+
+ :rtype: dict
+ """
+ _cdocs = self._dict.get(self.CDOCS, None)
+ if _cdocs:
+ return weakref.proxy(_cdocs)
+ else:
+ return {}
+
+ def walk(self):
+ """
+ Generator that iterates through all the parts, returning
+ MessagePartDoc. Used for writing to SoledadStore.
+
+ :rtype: generator
+ """
+ if self._dirty:
+ try:
+ mbox = self.fdoc.content[fields.MBOX_KEY]
+ uid = self.fdoc.content[fields.UID_KEY]
+ docid_dict = self._dict[self.DOCS_ID]
+ docid_dict[self.FDOC] = self.memstore.get_docid_for_fdoc(
+ mbox, uid)
+ except Exception as exc:
+ logger.debug("Error while walking message...")
+ logger.exception(exc)
+
+ if not empty(self.fdoc.content) and 'uid' in self.fdoc.content:
+ yield self.fdoc
+ if not empty(self.hdoc.content):
+ yield self.hdoc
+ for cdoc in self.cdocs.values():
+ if not empty(cdoc):
+ content_ref = weakref.proxy(cdoc)
+ yield MessagePartDoc(new=self.new, dirty=self.dirty,
+ store=self._storetype,
+ part=MessagePartType.cdoc,
+ content=content_ref,
+ doc_id=None)
+
+ # i/o
+
+ def as_dict(self):
+ """
+ Return a dict representation of the parts contained.
+
+ :rtype: dict
+ """
+ return self._dict
+
+ def from_dict(self, msg_dict):
+ """
+ Populate MessageWrapper parts from a dictionary.
+ It expects the same format that we use in a
+ MessageWrapper.
+
+
+ :param msg_dict: a dictionary containing the parts to populate
+ the MessageWrapper from
+ :type msg_dict: dict
+ """
+ fdoc, hdoc, cdocs = map(
+ lambda part: msg_dict.get(part, None),
+ [self.FDOC, self.HDOC, self.CDOCS])
+
+ for t, doc in ((self.FDOC, fdoc), (self.HDOC, hdoc),
+ (self.CDOCS, cdocs)):
+ self._dict[t] = ReferenciableDict(doc) if doc else None
+
+
+class MessagePart(object):
+ """
+ IMessagePart implementor, to be passed to several methods
+ of the IMAP4Server.
+ It takes a subpart message and is able to find
+ the inner parts.
+
+ See the interface documentation.
+ """
+
+ implements(imap4.IMessagePart)
+
+ def __init__(self, soledad, part_map):
+ """
+ Initializes the MessagePart.
+
+ :param soledad: Soledad instance.
+ :type soledad: Soledad
+ :param part_map: a dictionary containing the parts map for this
+ message
+ :type part_map: dict
+ """
+ # TODO
+ # It would be good to pass the uid/mailbox also
+ # for references while debugging.
+
+ # We have a problem on bulk moves, and is
+ # that when the fetch on the new mailbox is done
+ # the parts maybe are not complete.
+ # So we should be able to fail with empty
+ # docs until we solve that. The ideal would be
+ # to gather the results of the deferred operations
+ # to signal the operation is complete.
+ #leap_assert(part_map, "part map dict cannot be null")
+
+ self._soledad = soledad
+ self._pmap = part_map
+
+ def getSize(self):
+ """
+ Return the total size, in octets, of this message part.
+
+ :return: size of the message, in octets
+ :rtype: int
+ """
+ if empty(self._pmap):
+ return 0
+ size = self._pmap.get('size', None)
+ if size is None:
+ logger.error("Message part cannot find size in the partmap")
+ size = 0
+ return size
+
+ def getBodyFile(self):
+ """
+ Retrieve a file object containing only the body of this message.
+
+ :return: file-like object opened for reading
+ :rtype: StringIO
+ """
+ fd = StringIO.StringIO()
+ if not empty(self._pmap):
+ multi = self._pmap.get('multi')
+ if not multi:
+ phash = self._pmap.get("phash", None)
+ else:
+ pmap = self._pmap.get('part_map')
+ first_part = pmap.get('1', None)
+ if not empty(first_part):
+ phash = first_part['phash']
+ else:
+ phash = None
+
+ if phash is None:
+ logger.warning("Could not find phash for this subpart!")
+ payload = ""
+ else:
+ payload = self._get_payload_from_document_memoized(phash)
+ if empty(payload):
+ payload = self._get_payload_from_document(phash)
+
+ else:
+ logger.warning("Message with no part_map!")
+ payload = ""
+
+ if payload:
+ content_type = self._get_ctype_from_document(phash)
+ charset = find_charset(content_type)
+ if charset is None:
+ charset = self._get_charset(payload)
+ try:
+ if isinstance(payload, unicode):
+ payload = payload.encode(charset)
+ except UnicodeError as exc:
+ logger.error(
+ "Unicode error, using 'replace'. {0!r}".format(exc))
+ payload = payload.encode(charset, 'replace')
+
+ fd.write(payload)
+ fd.seek(0)
+ return fd
+
+ # TODO should memory-bound this memoize!!!
+ @memoized_method
+ def _get_payload_from_document_memoized(self, phash):
+ """
+ Memoized method call around the regular method, to be able
+ to call the non-memoized method in case we got a None.
+
+ :param phash: the payload hash to retrieve by.
+ :type phash: str or unicode
+ :rtype: str or unicode or None
+ """
+ return self._get_payload_from_document(phash)
+
+ def _get_payload_from_document(self, phash):
+ """
+ Return the message payload from the content document.
+
+ :param phash: the payload hash to retrieve by.
+ :type phash: str or unicode
+ :rtype: str or unicode or None
+ """
+ cdocs = self._soledad.get_from_index(
+ fields.TYPE_P_HASH_IDX,
+ fields.TYPE_CONTENT_VAL, str(phash))
+
+ cdoc = first(cdocs)
+ if cdoc is None:
+ logger.warning(
+ "Could not find the content doc "
+ "for phash %s" % (phash,))
+ payload = ""
+ else:
+ payload = cdoc.content.get(fields.RAW_KEY, "")
+ return payload
+
+ # TODO should memory-bound this memoize!!!
+ @memoized_method
+ def _get_ctype_from_document(self, phash):
+ """
+ Reeturn the content-type from the content document.
+
+ :param phash: the payload hash to retrieve by.
+ :type phash: str or unicode
+ :rtype: str or unicode
+ """
+ cdocs = self._soledad.get_from_index(
+ fields.TYPE_P_HASH_IDX,
+ fields.TYPE_CONTENT_VAL, str(phash))
+
+ cdoc = first(cdocs)
+ if not cdoc:
+ logger.warning(
+ "Could not find the content doc "
+ "for phash %s" % (phash,))
+ ctype = cdoc.content.get('ctype', "")
+ return ctype
+
+ @memoized_method
+ def _get_charset(self, stuff):
+ # TODO put in a common class with LeapMessage
+ """
+ Gets (guesses?) the charset of a payload.
+
+ :param stuff: the stuff to guess about.
+ :type stuff: str or unicode
+ :return: charset
+ :rtype: unicode
+ """
+ # XXX existential doubt 2. shouldn't we make the scope
+ # of the decorator somewhat more persistent?
+ # ah! yes! and put memory bounds.
+ return get_email_charset(stuff)
+
+ def getHeaders(self, negate, *names):
+ """
+ Retrieve a group of message headers.
+
+ :param names: The names of the headers to retrieve or omit.
+ :type names: tuple of str
+
+ :param negate: If True, indicates that the headers listed in names
+ should be omitted from the return value, rather
+ than included.
+ :type negate: bool
+
+ :return: A mapping of header field names to header field values
+ :rtype: dict
+ """
+ # XXX refactor together with MessagePart method
+ if not self._pmap:
+ logger.warning("No pmap in Subpart!")
+ return {}
+ headers = dict(self._pmap.get("headers", []))
+
+ names = map(lambda s: s.upper(), names)
+ if negate:
+ cond = lambda key: key.upper() not in names
+ else:
+ cond = lambda key: key.upper() in names
+
+ # default to most likely standard
+ charset = find_charset(headers, "utf-8")
+ headers2 = dict()
+ for key, value in headers.items():
+ # twisted imap server expects *some* headers to be lowercase
+ # We could use a CaseInsensitiveDict here...
+ if key.lower() == "content-type":
+ key = key.lower()
+
+ if not isinstance(key, str):
+ key = key.encode(charset, 'replace')
+ if not isinstance(value, str):
+ value = value.encode(charset, 'replace')
+
+ # filter original dict by negate-condition
+ if cond(key):
+ headers2[key] = value
+ return headers2
+
+ def isMultipart(self):
+ """
+ Return True if this message is multipart.
+ """
+ if empty(self._pmap):
+ logger.warning("Could not get part map!")
+ return False
+ multi = self._pmap.get("multi", False)
+ return multi
+
+ def getSubPart(self, part):
+ """
+ Retrieve a MIME submessage
+
+ :type part: C{int}
+ :param part: The number of the part to retrieve, indexed from 0.
+ :raise IndexError: Raised if the specified part does not exist.
+ :raise TypeError: Raised if this message is not multipart.
+ :rtype: Any object implementing C{IMessagePart}.
+ :return: The specified sub-part.
+ """
+ if not self.isMultipart():
+ raise TypeError
+
+ sub_pmap = self._pmap.get("part_map", {})
+ try:
+ part_map = sub_pmap[str(part + 1)]
+ except KeyError:
+ logger.debug("getSubpart for %s: KeyError" % (part,))
+ raise IndexError
+
+ # XXX check for validity
+ return MessagePart(self._soledad, part_map)