From 4c02942472be00486d04d854e987d6eda8c7df12 Mon Sep 17 00:00:00 2001 From: Kali Kaneko <kali@leap.se> Date: Thu, 22 Jan 2015 03:16:32 -0400 Subject: re-add support for basic multipart messages --- src/leap/mail/adaptors/soledad.py | 8 ++- src/leap/mail/imap/mailbox.py | 6 +- src/leap/mail/imap/messages.py | 144 ++++++++++++++++++++------------------ src/leap/mail/mail.py | 102 ++++++++++++++++----------- 4 files changed, 147 insertions(+), 113 deletions(-) (limited to 'src/leap') diff --git a/src/leap/mail/adaptors/soledad.py b/src/leap/mail/adaptors/soledad.py index 9f0bb30..d21638c 100644 --- a/src/leap/mail/adaptors/soledad.py +++ b/src/leap/mail/adaptors/soledad.py @@ -463,8 +463,9 @@ class MessageWrapper(object): cdocs = {} cdocs_keys = cdocs.keys() assert sorted(cdocs_keys) == range(1, len(cdocs_keys) + 1) - self.cdocs = dict([(key, ContentDocWrapper(**doc)) for (key, doc) in - cdocs.items()]) + self.cdocs = dict([ + (key, ContentDocWrapper(**doc.content)) + for (key, doc) in cdocs.items()]) for doc_id, cdoc in zip(self.mdoc.cdocs, self.cdocs.values()): cdoc.set_future_doc_id(doc_id) @@ -560,6 +561,9 @@ class MessageWrapper(object): """ return self.hdoc.part_map[str(index)] + def get_subpart_indexes(self): + return self.hdoc.part_map.keys() + def get_body(self, store): """ :rtype: deferred diff --git a/src/leap/mail/imap/mailbox.py b/src/leap/mail/imap/mailbox.py index 52f4dd5..045636e 100644 --- a/src/leap/mail/imap/mailbox.py +++ b/src/leap/mail/imap/mailbox.py @@ -540,7 +540,11 @@ class IMAPMailbox(object): d_msg = [] for msgid in msg_sequence: - d_msg.append(getmsg(msgid)) + # XXX We want cdocs because we "probably" are asked for the + # body. We should be smarted at do_FETCH and pass a parameter + # to this method in order not to prefetch cdocs if they're not + # going to be used. + d_msg.append(getmsg(msgid, get_cdocs=True)) d = defer.gatherResults(d_msg) d.addCallback(_get_imap_msg) diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 8f4c953..b7bb6ee 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -115,13 +115,6 @@ class IMAPMessage(object): # # IMessagePart # - def __prefetch_body_file(self): - def assign_body_fd(fd): - self.__body_fd = fd - return fd - d = self.getBodyFile() - d.addCallback(assign_body_fd) - return d def getBodyFile(self, store=None): """ @@ -139,25 +132,6 @@ class IMAPMessage(object): store = self.store return self.message.get_body_file(store) - # TODO refactor with getBodyFile in MessagePart - - #body = bdoc_content.get(self.RAW_KEY, "") - #content_type = bdoc_content.get('content-type', "") - #charset = find_charset(content_type) - #if charset is None: - #charset = self._get_charset(body) - #try: - #if isinstance(body, unicode): - #body = body.encode(charset) - #except UnicodeError as exc: - #logger.error( - #"Unicode error, using 'replace'. {0!r}".format(exc)) - #logger.debug("Attempted to encode with: %s" % charset) - #body = body.encode(charset, 'replace') - #finally: - #return write_fd(body) - - def getSize(self): """ Return the total size, in octets, of this message. @@ -182,48 +156,8 @@ class IMAPMessage(object): :return: A mapping of header field names to header field values :rtype: dict """ - # TODO split in smaller methods -- format_headers()? - # XXX refactor together with MessagePart method - headers = self.message.get_headers() - - # XXX keep this in the imap imessage implementation, - # because the server impl. expects content-type to be present. - if not headers: - logger.warning("No headers found") - return {str('content-type'): str('')} - - names = map(lambda s: s.upper(), names) - if negate: - cond = lambda key: key.upper() not in names - else: - cond = lambda key: key.upper() in names - - if isinstance(headers, list): - headers = dict(headers) - - # default to most likely standard - charset = find_charset(headers, "utf-8") - headers2 = dict() - for key, value in headers.items(): - # twisted imap server expects *some* headers to be lowercase - # We could use a CaseInsensitiveDict here... - if key.lower() == "content-type": - key = key.lower() - - if not isinstance(key, str): - key = key.encode(charset, 'replace') - if not isinstance(value, str): - value = value.encode(charset, 'replace') - - if value.endswith(";"): - # bastards - value = value[:-1] - - # filter original dict by negate-condition - if cond(key): - headers2[key] = value - return headers2 + return _format_headers(headers, negate, *names) def isMultipart(self): """ @@ -242,7 +176,81 @@ class IMAPMessage(object): :rtype: Any object implementing C{IMessagePart}. :return: The specified sub-part. """ - return self.message.get_subpart(part) + subpart = self.message.get_subpart(part) + return IMAPMessagePart(subpart) + + def __prefetch_body_file(self): + def assign_body_fd(fd): + self.__body_fd = fd + return fd + d = self.getBodyFile() + d.addCallback(assign_body_fd) + return d + + +class IMAPMessagePart(object): + + def __init__(self, message_part): + self.message_part = message_part + + def getBodyFile(self, store=None): + return self.message_part.get_body_file() + + def getSize(self): + return self.message_part.get_size() + + def getHeaders(self, negate, *names): + headers = self.message_part.get_headers() + return _format_headers(headers, negate, *names) + + def isMultipart(self): + return self.message_part.is_multipart() + + def getSubPart(self, part): + subpart = self.message_part.get_subpart(part) + return IMAPMessagePart(subpart) + + +def _format_headers(headers, negate, *names): + # current server impl. expects content-type to be present, so if for + # some reason we do not have headers, we have to return at least that + # one + if not headers: + logger.warning("No headers found") + return {str('content-type'): str('')} + + names = map(lambda s: s.upper(), names) + if negate: + cond = lambda key: key.upper() not in names + else: + cond = lambda key: key.upper() in names + + if isinstance(headers, list): + headers = dict(headers) + + # default to most likely standard + charset = find_charset(headers, "utf-8") + + _headers = dict() + for key, value in headers.items(): + # twisted imap server expects *some* headers to be lowercase + # We could use a CaseInsensitiveDict here... + if key.lower() == "content-type": + key = key.lower() + + if not isinstance(key, str): + key = key.encode(charset, 'replace') + if not isinstance(value, str): + value = value.encode(charset, 'replace') + + if value.endswith(";"): + # bastards + value = value[:-1] + + # filter original dict by negate-condition + if cond(key): + _headers[key] = value + return _headers class IMAPMessageCollection(object): diff --git a/src/leap/mail/mail.py b/src/leap/mail/mail.py index 59fd57c..aa499c0 100644 --- a/src/leap/mail/mail.py +++ b/src/leap/mail/mail.py @@ -22,6 +22,7 @@ import logging import StringIO from twisted.internet import defer +from twisted.python import log from leap.common.check import leap_assert_type from leap.common.mail import get_email_charset @@ -30,7 +31,7 @@ from leap.mail.adaptors.soledad import SoledadMailAdaptor from leap.mail.constants import INBOX_NAME from leap.mail.constants import MessageFlags from leap.mail.mailbox_indexer import MailboxIndexer -from leap.mail.utils import empty, find_charset +from leap.mail.utils import empty # find_charset logger = logging.getLogger(name=__name__) @@ -57,61 +58,57 @@ def _write_and_rewind(payload): class MessagePart(object): + # TODO This class should be better abstracted from the data model. + # TODO support arbitrarily nested multiparts (right now we only support + # the trivial case) - def __init__(self, part_map, cdocs={}): + def __init__(self, part_map, index=1, cdocs={}): """ :param part_map: a dictionary mapping the subparts for this MessagePart (1-indexed). :type part_map: dict - :param cdoc: optional, a dict of content documents + + The format for the part_map is as follows: + + {u'1': {u'ctype': u'text/plain', + u'headers': [[u'Content-Type', u'text/plain; charset="utf-8"'], + [u'Content-Transfer-Encoding', u'8bit']], + u'multi': False, + u'parts': 1, + u'phash': u'02D82B29F6BB0C8612D1C', + u'size': 132}} + + :param index: which index in the content-doc is this subpart + representing. + :param cdocs: optional, a reference to the top-level dict of wrappers + for content-docs (1-indexed). """ - # TODO document the expected keys in the part_map dict. - # TODO add abstraction layer between the cdocs and this class. Only - # adaptor should know about the format of the cdocs. + # TODO: Pass only the cdoc wrapper for this part. self._pmap = part_map + self._index = index self._cdocs = cdocs def get_size(self): return self._pmap['size'] def get_body_file(self): + payload = "" pmap = self._pmap multi = pmap.get('multi') if not multi: - phash = pmap.get("phash") + payload = self._get_payload(self._index) else: - pmap_ = pmap.get('part_map') - first_part = pmap_.get('1', None) - if not empty(first_part): - phash = first_part['phash'] - else: - phash = "" - - payload = self._get_payload(phash) - + # XXX uh, multi also... should recurse" + raise NotImplementedError if payload: - # FIXME - # content_type = self._get_ctype_from_document(phash) - # charset = find_charset(content_type) - charset = None - if charset is None: - charset = get_email_charset(payload) - try: - if isinstance(payload, unicode): - payload = payload.encode(charset) - except UnicodeError as exc: - logger.error( - "Unicode error, using 'replace'. {0!r}".format(exc)) - payload = payload.encode(charset, 'replace') - + payload = self._format_payload(payload) return _write_and_rewind(payload) def get_headers(self): return self._pmap.get("headers", []) def is_multipart(self): - multi = self._pmap.get("multi", False) - return multi + return self._pmap.get("multi", False) def get_subpart(self, part): if not self.is_multipart(): @@ -123,10 +120,30 @@ class MessagePart(object): except KeyError: logger.debug("getSubpart for %s: KeyError" % (part,)) raise IndexError - return MessagePart(self._soledad, part_map) - - def _get_payload(self, phash): - return self._cdocs.get(phash, "") + return MessagePart(part_map, cdocs={1: self._cdocs.get(1, {})}) + + def _get_payload(self, index): + cdoc_wrapper = self._cdocs.get(index, None) + if cdoc_wrapper: + return cdoc_wrapper.raw + return "" + + def _format_payload(self, payload): + # FIXME ----------------------------------------------- + # Test against unicode payloads... + # content_type = self._get_ctype_from_document(phash) + # charset = find_charset(content_type) + charset = None + if charset is None: + charset = get_email_charset(payload) + try: + if isinstance(payload, unicode): + payload = payload.encode(charset) + except UnicodeError as exc: + logger.error( + "Unicode error, using 'replace'. {0!r}".format(exc)) + payload = payload.encode(charset, 'replace') + return payload class Message(object): @@ -224,17 +241,18 @@ class Message(object): raise TypeError part_index = part + 1 try: - subpart_dict = self._wrapper.get_subpart_dict( - part_index) + subpart_dict = self._wrapper.get_subpart_dict(part_index) except KeyError: - raise TypeError - # XXX pass cdocs - return MessagePart(subpart_dict) + raise IndexError + + return MessagePart( + subpart_dict, index=part_index, cdocs=self._wrapper.cdocs) # Custom methods. def get_tags(self): """ + Get the tags for this message. """ return tuple(self._wrapper.fdoc.tags) @@ -290,7 +308,7 @@ class MessageCollection(object): self.adaptor = adaptor self.store = store - # XXX I have to think about what to do when there is no mbox passed to + # XXX think about what to do when there is no mbox passed to # the initialization. We could still get the MetaMsg by index, instead # of by doc_id. See get_message_by_content_hash self.mbox_indexer = mbox_indexer -- cgit v1.2.3