summaryrefslogtreecommitdiff
path: root/src/leap/mail/imap/messageparts.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/leap/mail/imap/messageparts.py')
-rw-r--r--src/leap/mail/imap/messageparts.py586
1 files changed, 0 insertions, 586 deletions
diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py
deleted file mode 100644
index 257721c..0000000
--- a/src/leap/mail/imap/messageparts.py
+++ /dev/null
@@ -1,586 +0,0 @@
-# messageparts.py
-# Copyright (C) 2014 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-MessagePart implementation. Used from LeapMessage.
-"""
-import logging
-import StringIO
-import weakref
-
-from collections import namedtuple
-
-from enum import Enum
-from zope.interface import implements
-from twisted.mail import imap4
-
-from leap.common.decorators import memoized_method
-from leap.common.mail import get_email_charset
-from leap.mail.imap import interfaces
-from leap.mail.imap.fields import fields
-from leap.mail.utils import empty, first, find_charset
-
-MessagePartType = Enum("hdoc", "fdoc", "cdoc", "cdocs", "docs_id")
-
-
-logger = logging.getLogger(__name__)
-
-
-"""
-A MessagePartDoc is a light wrapper around the dictionary-like
-data that we pass along for message parts. It can be used almost everywhere
-that you would expect a SoledadDocument, since it has a dict under the
-`content` attribute.
-
-We also keep some metadata on it, relative in part to the message as a whole,
-and sometimes to a part in particular only.
-
-* `new` indicates that the document has just been created. SoledadStore
- should just create a new doc for all the related message parts.
-* `store` indicates the type of store a given MessagePartDoc lives in.
- We currently use this to indicate that the document comes from memeory,
- but we should probably get rid of it as soon as we extend the use of the
- SoledadStore interface along LeapMessage, MessageCollection and Mailbox.
-* `part` is one of the MessagePartType enums.
-
-* `dirty` indicates that, while we already have the document in Soledad,
- we have modified its state in memory, so we need to put_doc instead while
- dumping the MemoryStore contents.
- `dirty` attribute would only apply to flags-docs and linkage-docs.
-* `doc_id` is the identifier for the document in the u1db database, if any.
-
-"""
-
-MessagePartDoc = namedtuple(
- 'MessagePartDoc',
- ['new', 'dirty', 'part', 'store', 'content', 'doc_id'])
-
-"""
-A RecentFlagsDoc is used to send the recent-flags document payload to the
-SoledadWriter during dumps.
-"""
-RecentFlagsDoc = namedtuple(
- 'RecentFlagsDoc',
- ['content', 'doc_id'])
-
-
-class ReferenciableDict(dict):
- """
- A dict that can be weak-referenced.
-
- Some builtin objects are not weak-referenciable unless
- subclassed. So we do.
-
- Used to return pointers to the items in the MemoryStore.
- """
-
-
-class MessageWrapper(object):
- """
- A simple nested dictionary container around the different message subparts.
- """
- implements(interfaces.IMessageContainer)
-
- FDOC = "fdoc"
- HDOC = "hdoc"
- CDOCS = "cdocs"
- DOCS_ID = "docs_id"
-
- # Using slots to limit some the memory use,
- # Add your attribute here.
-
- __slots__ = ["_dict", "_new", "_dirty", "_storetype", "memstore"]
-
- def __init__(self, fdoc=None, hdoc=None, cdocs=None,
- from_dict=None, memstore=None,
- new=True, dirty=False, docs_id={}):
- """
- Initialize a MessageWrapper.
- """
- # TODO add optional reference to original message in the incoming
- self._dict = {}
- self.memstore = memstore
-
- self._new = new
- self._dirty = dirty
-
- self._storetype = "mem"
-
- if from_dict is not None:
- self.from_dict(from_dict)
- else:
- if fdoc is not None:
- self._dict[self.FDOC] = ReferenciableDict(fdoc)
- if hdoc is not None:
- self._dict[self.HDOC] = ReferenciableDict(hdoc)
- if cdocs is not None:
- self._dict[self.CDOCS] = ReferenciableDict(cdocs)
-
- # This will keep references to the doc_ids to be able to put
- # messages to soledad. It will be populated during the walk() to avoid
- # the overhead of reading from the db.
-
- # XXX it really *only* make sense for the FDOC, the other parts
- # should not be "dirty", just new...!!!
- self._dict[self.DOCS_ID] = docs_id
-
- # properties
-
- # TODO Could refactor new and dirty properties together.
-
- def _get_new(self):
- """
- Get the value for the `new` flag.
-
- :rtype: bool
- """
- return self._new
-
- def _set_new(self, value=False):
- """
- Set the value for the `new` flag, and propagate it
- to the memory store if any.
-
- :param value: the value to set
- :type value: bool
- """
- self._new = value
- if self.memstore:
- mbox = self.fdoc.content.get('mbox', None)
- uid = self.fdoc.content.get('uid', None)
- if not mbox or not uid:
- logger.warning("Malformed fdoc")
- return
- key = mbox, uid
- fun = [self.memstore.unset_new_queued,
- self.memstore.set_new_queued][int(value)]
- fun(key)
- else:
- logger.warning("Could not find a memstore referenced from this "
- "MessageWrapper. The value for new will not be "
- "propagated")
-
- new = property(_get_new, _set_new,
- doc="The `new` flag for this MessageWrapper")
-
- def _get_dirty(self):
- """
- Get the value for the `dirty` flag.
-
- :rtype: bool
- """
- return self._dirty
-
- def _set_dirty(self, value=True):
- """
- Set the value for the `dirty` flag, and propagate it
- to the memory store if any.
-
- :param value: the value to set
- :type value: bool
- """
- self._dirty = value
- if self.memstore:
- mbox = self.fdoc.content.get('mbox', None)
- uid = self.fdoc.content.get('uid', None)
- if not mbox or not uid:
- logger.warning("Malformed fdoc")
- return
- key = mbox, uid
- fun = [self.memstore.unset_dirty_queued,
- self.memstore.set_dirty_queued][int(value)]
- fun(key)
- else:
- logger.warning("Could not find a memstore referenced from this "
- "MessageWrapper. The value for new will not be "
- "propagated")
-
- dirty = property(_get_dirty, _set_dirty)
-
- # IMessageContainer
-
- @property
- def fdoc(self):
- """
- Return a MessagePartDoc wrapping around a weak reference to
- the flags-document in this MemoryStore, if any.
-
- :rtype: MessagePartDoc
- """
- _fdoc = self._dict.get(self.FDOC, None)
- if _fdoc:
- content_ref = weakref.proxy(_fdoc)
- else:
- logger.warning("NO FDOC!!!")
- content_ref = {}
-
- return MessagePartDoc(new=self.new, dirty=self.dirty,
- store=self._storetype,
- part=MessagePartType.fdoc,
- content=content_ref,
- doc_id=self._dict[self.DOCS_ID].get(
- self.FDOC, None))
-
- @property
- def hdoc(self):
- """
- Return a MessagePartDoc wrapping around a weak reference to
- the headers-document in this MemoryStore, if any.
-
- :rtype: MessagePartDoc
- """
- _hdoc = self._dict.get(self.HDOC, None)
- if _hdoc:
- content_ref = weakref.proxy(_hdoc)
- else:
- content_ref = {}
- return MessagePartDoc(new=self.new, dirty=self.dirty,
- store=self._storetype,
- part=MessagePartType.hdoc,
- content=content_ref,
- doc_id=self._dict[self.DOCS_ID].get(
- self.HDOC, None))
-
- @property
- def cdocs(self):
- """
- Return a weak reference to a zero-indexed dict containing
- the content-documents, or an empty dict if none found.
- If you want access to the MessagePartDoc for the individual
- parts, use the generator returned by `walk` instead.
-
- :rtype: dict
- """
- _cdocs = self._dict.get(self.CDOCS, None)
- if _cdocs:
- return weakref.proxy(_cdocs)
- else:
- return {}
-
- def walk(self):
- """
- Generator that iterates through all the parts, returning
- MessagePartDoc. Used for writing to SoledadStore.
-
- :rtype: generator
- """
- if self._dirty:
- try:
- mbox = self.fdoc.content[fields.MBOX_KEY]
- uid = self.fdoc.content[fields.UID_KEY]
- docid_dict = self._dict[self.DOCS_ID]
- docid_dict[self.FDOC] = self.memstore.get_docid_for_fdoc(
- mbox, uid)
- except Exception as exc:
- logger.debug("Error while walking message...")
- logger.exception(exc)
-
- if not empty(self.fdoc.content) and 'uid' in self.fdoc.content:
- yield self.fdoc
- if not empty(self.hdoc.content):
- yield self.hdoc
- for cdoc in self.cdocs.values():
- if not empty(cdoc):
- content_ref = weakref.proxy(cdoc)
- yield MessagePartDoc(new=self.new, dirty=self.dirty,
- store=self._storetype,
- part=MessagePartType.cdoc,
- content=content_ref,
- doc_id=None)
-
- # i/o
-
- def as_dict(self):
- """
- Return a dict representation of the parts contained.
-
- :rtype: dict
- """
- return self._dict
-
- def from_dict(self, msg_dict):
- """
- Populate MessageWrapper parts from a dictionary.
- It expects the same format that we use in a
- MessageWrapper.
-
-
- :param msg_dict: a dictionary containing the parts to populate
- the MessageWrapper from
- :type msg_dict: dict
- """
- fdoc, hdoc, cdocs = map(
- lambda part: msg_dict.get(part, None),
- [self.FDOC, self.HDOC, self.CDOCS])
-
- for t, doc in ((self.FDOC, fdoc), (self.HDOC, hdoc),
- (self.CDOCS, cdocs)):
- self._dict[t] = ReferenciableDict(doc) if doc else None
-
-
-class MessagePart(object):
- """
- IMessagePart implementor, to be passed to several methods
- of the IMAP4Server.
- It takes a subpart message and is able to find
- the inner parts.
-
- See the interface documentation.
- """
-
- implements(imap4.IMessagePart)
-
- def __init__(self, soledad, part_map):
- """
- Initializes the MessagePart.
-
- :param soledad: Soledad instance.
- :type soledad: Soledad
- :param part_map: a dictionary containing the parts map for this
- message
- :type part_map: dict
- """
- # TODO
- # It would be good to pass the uid/mailbox also
- # for references while debugging.
-
- # We have a problem on bulk moves, and is
- # that when the fetch on the new mailbox is done
- # the parts maybe are not complete.
- # So we should be able to fail with empty
- # docs until we solve that. The ideal would be
- # to gather the results of the deferred operations
- # to signal the operation is complete.
- #leap_assert(part_map, "part map dict cannot be null")
-
- self._soledad = soledad
- self._pmap = part_map
-
- def getSize(self):
- """
- Return the total size, in octets, of this message part.
-
- :return: size of the message, in octets
- :rtype: int
- """
- if empty(self._pmap):
- return 0
- size = self._pmap.get('size', None)
- if size is None:
- logger.error("Message part cannot find size in the partmap")
- size = 0
- return size
-
- def getBodyFile(self):
- """
- Retrieve a file object containing only the body of this message.
-
- :return: file-like object opened for reading
- :rtype: StringIO
- """
- fd = StringIO.StringIO()
- if not empty(self._pmap):
- multi = self._pmap.get('multi')
- if not multi:
- phash = self._pmap.get("phash", None)
- else:
- pmap = self._pmap.get('part_map')
- first_part = pmap.get('1', None)
- if not empty(first_part):
- phash = first_part['phash']
- else:
- phash = None
-
- if phash is None:
- logger.warning("Could not find phash for this subpart!")
- payload = ""
- else:
- payload = self._get_payload_from_document_memoized(phash)
- if empty(payload):
- payload = self._get_payload_from_document(phash)
-
- else:
- logger.warning("Message with no part_map!")
- payload = ""
-
- if payload:
- content_type = self._get_ctype_from_document(phash)
- charset = find_charset(content_type)
- if charset is None:
- charset = self._get_charset(payload)
- try:
- if isinstance(payload, unicode):
- payload = payload.encode(charset)
- except UnicodeError as exc:
- logger.error(
- "Unicode error, using 'replace'. {0!r}".format(exc))
- payload = payload.encode(charset, 'replace')
-
- fd.write(payload)
- fd.seek(0)
- return fd
-
- # TODO should memory-bound this memoize!!!
- @memoized_method
- def _get_payload_from_document_memoized(self, phash):
- """
- Memoized method call around the regular method, to be able
- to call the non-memoized method in case we got a None.
-
- :param phash: the payload hash to retrieve by.
- :type phash: str or unicode
- :rtype: str or unicode or None
- """
- return self._get_payload_from_document(phash)
-
- def _get_payload_from_document(self, phash):
- """
- Return the message payload from the content document.
-
- :param phash: the payload hash to retrieve by.
- :type phash: str or unicode
- :rtype: str or unicode or None
- """
- cdocs = self._soledad.get_from_index(
- fields.TYPE_P_HASH_IDX,
- fields.TYPE_CONTENT_VAL, str(phash))
-
- cdoc = first(cdocs)
- if cdoc is None:
- logger.warning(
- "Could not find the content doc "
- "for phash %s" % (phash,))
- payload = ""
- else:
- payload = cdoc.content.get(fields.RAW_KEY, "")
- return payload
-
- # TODO should memory-bound this memoize!!!
- @memoized_method
- def _get_ctype_from_document(self, phash):
- """
- Reeturn the content-type from the content document.
-
- :param phash: the payload hash to retrieve by.
- :type phash: str or unicode
- :rtype: str or unicode
- """
- cdocs = self._soledad.get_from_index(
- fields.TYPE_P_HASH_IDX,
- fields.TYPE_CONTENT_VAL, str(phash))
-
- cdoc = first(cdocs)
- if not cdoc:
- logger.warning(
- "Could not find the content doc "
- "for phash %s" % (phash,))
- ctype = cdoc.content.get('ctype', "")
- return ctype
-
- @memoized_method
- def _get_charset(self, stuff):
- # TODO put in a common class with LeapMessage
- """
- Gets (guesses?) the charset of a payload.
-
- :param stuff: the stuff to guess about.
- :type stuff: str or unicode
- :return: charset
- :rtype: unicode
- """
- # XXX existential doubt 2. shouldn't we make the scope
- # of the decorator somewhat more persistent?
- # ah! yes! and put memory bounds.
- return get_email_charset(stuff)
-
- def getHeaders(self, negate, *names):
- """
- Retrieve a group of message headers.
-
- :param names: The names of the headers to retrieve or omit.
- :type names: tuple of str
-
- :param negate: If True, indicates that the headers listed in names
- should be omitted from the return value, rather
- than included.
- :type negate: bool
-
- :return: A mapping of header field names to header field values
- :rtype: dict
- """
- # XXX refactor together with MessagePart method
- if not self._pmap:
- logger.warning("No pmap in Subpart!")
- return {}
- headers = dict(self._pmap.get("headers", []))
-
- names = map(lambda s: s.upper(), names)
- if negate:
- cond = lambda key: key.upper() not in names
- else:
- cond = lambda key: key.upper() in names
-
- # default to most likely standard
- charset = find_charset(headers, "utf-8")
- headers2 = dict()
- for key, value in headers.items():
- # twisted imap server expects *some* headers to be lowercase
- # We could use a CaseInsensitiveDict here...
- if key.lower() == "content-type":
- key = key.lower()
-
- if not isinstance(key, str):
- key = key.encode(charset, 'replace')
- if not isinstance(value, str):
- value = value.encode(charset, 'replace')
-
- # filter original dict by negate-condition
- if cond(key):
- headers2[key] = value
- return headers2
-
- def isMultipart(self):
- """
- Return True if this message is multipart.
- """
- if empty(self._pmap):
- logger.warning("Could not get part map!")
- return False
- multi = self._pmap.get("multi", False)
- return multi
-
- def getSubPart(self, part):
- """
- Retrieve a MIME submessage
-
- :type part: C{int}
- :param part: The number of the part to retrieve, indexed from 0.
- :raise IndexError: Raised if the specified part does not exist.
- :raise TypeError: Raised if this message is not multipart.
- :rtype: Any object implementing C{IMessagePart}.
- :return: The specified sub-part.
- """
- if not self.isMultipart():
- raise TypeError
-
- sub_pmap = self._pmap.get("part_map", {})
- try:
- part_map = sub_pmap[str(part + 1)]
- except KeyError:
- logger.debug("getSubpart for %s: KeyError" % (part,))
- raise IndexError
-
- # XXX check for validity
- return MessagePart(self._soledad, part_map)