From eaa4bcb241d5d55c4fd2458cb05c74fcdc79368c Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 23 Jan 2014 02:32:52 -0400 Subject: split messageparts --- src/leap/mail/imap/messageparts.py | 262 +++++++++++++++++++++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 src/leap/mail/imap/messageparts.py (limited to 'src/leap/mail/imap/messageparts.py') diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py new file mode 100644 index 0000000..a47ea1d --- /dev/null +++ b/src/leap/mail/imap/messageparts.py @@ -0,0 +1,262 @@ +# -*- coding: utf-8 -*- +# messageparts.py +# Copyright (C) 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +MessagePart implementation. Used from LeapMessage. +""" +import logging +import re +import StringIO + +from enum import Enum +from zope.interface import implements +from twisted.mail import imap4 + +from leap.common.decorators import memoized_method +from leap.common.mail import get_email_charset +from leap.mail.imap.fields import fields +from leap.mail.utils import first + +MessagePartType = Enum("hdoc", "fdoc", "cdoc") + + +logger = logging.getLogger(__name__) + + +CHARSET_PATTERN = r"""charset=([\w-]+)""" +CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) + + +class MessagePart(object): + """ + IMessagePart implementor. + It takes a subpart message and is able to find + the inner parts. + + Excusatio non petita: see the interface documentation. + """ + + implements(imap4.IMessagePart) + + def __init__(self, soledad, part_map): + """ + Initializes the MessagePart. + + :param part_map: a dictionary containing the parts map for this + message + :type part_map: dict + """ + # TODO + # It would be good to pass the uid/mailbox also + # for references while debugging. + + # We have a problem on bulk moves, and is + # that when the fetch on the new mailbox is done + # the parts maybe are not complete. + # So we should be able to fail with empty + # docs until we solve that. The ideal would be + # to gather the results of the deferred operations + # to signal the operation is complete. + #leap_assert(part_map, "part map dict cannot be null") + self._soledad = soledad + self._pmap = part_map + + def getSize(self): + """ + Return the total size, in octets, of this message part. + + :return: size of the message, in octets + :rtype: int + """ + if not self._pmap: + return 0 + size = self._pmap.get('size', None) + if not size: + logger.error("Message part cannot find size in the partmap") + return size + + def getBodyFile(self): + """ + Retrieve a file object containing only the body of this message. + + :return: file-like object opened for reading + :rtype: StringIO + """ + fd = StringIO.StringIO() + if self._pmap: + multi = self._pmap.get('multi') + if not multi: + phash = self._pmap.get("phash", None) + else: + pmap = self._pmap.get('part_map') + first_part = pmap.get('1', None) + if first_part: + phash = first_part['phash'] + + if not phash: + logger.warning("Could not find phash for this subpart!") + payload = str("") + else: + payload = self._get_payload_from_document(phash) + + else: + logger.warning("Message with no part_map!") + payload = str("") + + if payload: + content_type = self._get_ctype_from_document(phash) + charset = first(CHARSET_RE.findall(content_type)) + logger.debug("Got charset from header: %s" % (charset,)) + if not charset: + charset = self._get_charset(payload) + try: + payload = payload.encode(charset) + except UnicodeError as exc: + logger.error("Unicode error {0}".format(exc)) + payload = payload.encode(charset, 'replace') + + fd.write(payload) + fd.seek(0) + return fd + + # TODO cache the phash retrieval + def _get_payload_from_document(self, phash): + """ + Gets the message payload from the content document. + + :param phash: the payload hash to retrieve by. + :type phash: basestring + """ + cdocs = self._soledad.get_from_index( + fields.TYPE_P_HASH_IDX, + fields.TYPE_CONTENT_VAL, str(phash)) + + cdoc = first(cdocs) + if not cdoc: + logger.warning( + "Could not find the content doc " + "for phash %s" % (phash,)) + payload = cdoc.content.get(fields.RAW_KEY, "") + return payload + + # TODO cache the pahash retrieval + def _get_ctype_from_document(self, phash): + """ + Gets the content-type from the content document. + + :param phash: the payload hash to retrieve by. + :type phash: basestring + """ + cdocs = self._soledad.get_from_index( + fields.TYPE_P_HASH_IDX, + fields.TYPE_CONTENT_VAL, str(phash)) + + cdoc = first(cdocs) + if not cdoc: + logger.warning( + "Could not find the content doc " + "for phash %s" % (phash,)) + ctype = cdoc.content.get('ctype', "") + return ctype + + @memoized_method + def _get_charset(self, stuff): + # TODO put in a common class with LeapMessage + """ + Gets (guesses?) the charset of a payload. + + :param stuff: the stuff to guess about. + :type stuff: basestring + :returns: charset + """ + # XXX existential doubt 2. shouldn't we make the scope + # of the decorator somewhat more persistent? + # ah! yes! and put memory bounds. + return get_email_charset(unicode(stuff)) + + def getHeaders(self, negate, *names): + """ + Retrieve a group of message headers. + + :param names: The names of the headers to retrieve or omit. + :type names: tuple of str + + :param negate: If True, indicates that the headers listed in names + should be omitted from the return value, rather + than included. + :type negate: bool + + :return: A mapping of header field names to header field values + :rtype: dict + """ + if not self._pmap: + logger.warning("No pmap in Subpart!") + return {} + headers = dict(self._pmap.get("headers", [])) + + # twisted imap server expects *some* headers to be lowercase + # We could use a CaseInsensitiveDict here... + headers = dict( + (str(key), str(value)) if key.lower() != "content-type" + else (str(key.lower()), str(value)) + for (key, value) in headers.items()) + + names = map(lambda s: s.upper(), names) + if negate: + cond = lambda key: key.upper() not in names + else: + cond = lambda key: key.upper() in names + + # unpack and filter original dict by negate-condition + filter_by_cond = [ + map(str, (key, val)) for + key, val in headers.items() + if cond(key)] + filtered = dict(filter_by_cond) + return filtered + + def isMultipart(self): + """ + Return True if this message is multipart. + """ + if not self._pmap: + logger.warning("Could not get part map!") + return False + multi = self._pmap.get("multi", False) + return multi + + def getSubPart(self, part): + """ + Retrieve a MIME submessage + + :type part: C{int} + :param part: The number of the part to retrieve, indexed from 0. + :raise IndexError: Raised if the specified part does not exist. + :raise TypeError: Raised if this message is not multipart. + :rtype: Any object implementing C{IMessagePart}. + :return: The specified sub-part. + """ + if not self.isMultipart(): + raise TypeError + sub_pmap = self._pmap.get("part_map", {}) + try: + part_map = sub_pmap[str(part + 1)] + except KeyError: + logger.debug("getSubpart for %s: KeyError" % (part,)) + raise IndexError + + # XXX check for validity + return MessagePart(self._soledad, part_map) -- cgit v1.2.3 From e2218eec4fd91e4648160a05e3debc05efa0d0d9 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 23 Jan 2014 02:36:38 -0400 Subject: add soledadstore class move parts-related bits to messageparts pass soledad in initialization for memory messages --- src/leap/mail/imap/messageparts.py | 183 ++++++++++++++++++++++++++++++++++++- 1 file changed, 182 insertions(+), 1 deletion(-) (limited to 'src/leap/mail/imap/messageparts.py') diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py index a47ea1d..3f89193 100644 --- a/src/leap/mail/imap/messageparts.py +++ b/src/leap/mail/imap/messageparts.py @@ -20,6 +20,9 @@ MessagePart implementation. Used from LeapMessage. import logging import re import StringIO +import weakref + +from collections import namedtuple from enum import Enum from zope.interface import implements @@ -27,6 +30,7 @@ from twisted.mail import imap4 from leap.common.decorators import memoized_method from leap.common.mail import get_email_charset +from leap.mail.imap import interfaces from leap.mail.imap.fields import fields from leap.mail.utils import first @@ -36,13 +40,188 @@ MessagePartType = Enum("hdoc", "fdoc", "cdoc") logger = logging.getLogger(__name__) +# XXX not needed anymoar ... CHARSET_PATTERN = r"""charset=([\w-]+)""" CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) +""" +A MessagePartDoc is a light wrapper around the dictionary-like +data that we pass along for message parts. It can be used almost everywhere +that you would expect a SoledadDocument, since it has a dict under the +`content` attribute. + +We also keep some metadata on it, relative in part to the message as a whole, +and sometimes to a part in particular only. + +* `new` indicates that the document has just been created. SoledadStore + should just create a new doc for all the related message parts. +* `store` indicates the type of store a given MessagePartDoc lives in. + We currently use this to indicate that the document comes from memeory, + but we should probably get rid of it as soon as we extend the use of the + SoledadStore interface along LeapMessage, MessageCollection and Mailbox. +* `part` is one of the MessagePartType enums. + +* `dirty` indicates that, while we already have the document in Soledad, + we have modified its state in memory, so we need to put_doc instead while + dumping the MemoryStore contents. + `dirty` attribute would only apply to flags-docs and linkage-docs. + + + XXX this is still not implemented! + +""" + +MessagePartDoc = namedtuple( + 'MessagePartDoc', + ['new', 'dirty', 'part', 'store', 'content']) + + +class ReferenciableDict(dict): + """ + A dict that can be weak-referenced. + + Some builtin objects are not weak-referenciable unless + subclassed. So we do. + + Used to return pointers to the items in the MemoryStore. + """ + + +class MessageWrapper(object): + """ + A simple nested dictionary container around the different message subparts. + """ + implements(interfaces.IMessageContainer) + + FDOC = "fdoc" + HDOC = "hdoc" + CDOCS = "cdocs" + + # XXX can use this to limit the memory footprint, + # or is it too premature to optimize? + # Does it work well together with the interfaces.implements? + + #__slots__ = ["_dict", "_new", "_dirty", "memstore"] + + def __init__(self, fdoc=None, hdoc=None, cdocs=None, + from_dict=None, memstore=None, + new=True, dirty=False): + self._dict = {} + self.memstore = memstore + + self._new = new + self._dirty = dirty + self._storetype = "mem" + + if from_dict is not None: + self.from_dict(from_dict) + else: + if fdoc is not None: + self._dict[self.FDOC] = ReferenciableDict(fdoc) + if hdoc is not None: + self._dict[self.HDOC] = ReferenciableDict(hdoc) + if cdocs is not None: + self._dict[self.CDOCS] = ReferenciableDict(cdocs) + + # properties + + @property + def new(self): + return self._new + + def set_new(self, value=True): + self._new = value + + @property + def dirty(self): + return self._dirty + + def set_dirty(self, value=True): + self._dirty = value + + # IMessageContainer + + @property + def fdoc(self): + _fdoc = self._dict.get(self.FDOC, None) + if _fdoc: + content_ref = weakref.proxy(_fdoc) + else: + logger.warning("NO FDOC!!!") + content_ref = {} + return MessagePartDoc(new=self.new, dirty=self.dirty, + store=self._storetype, + part=MessagePartType.fdoc, + content=content_ref) + + @property + def hdoc(self): + _hdoc = self._dict.get(self.HDOC, None) + if _hdoc: + content_ref = weakref.proxy(_hdoc) + else: + logger.warning("NO HDOC!!!!") + content_ref = {} + return MessagePartDoc(new=self.new, dirty=self.dirty, + store=self._storetype, + part=MessagePartType.hdoc, + content=content_ref) + + @property + def cdocs(self): + _cdocs = self._dict.get(self.CDOCS, None) + if _cdocs: + return weakref.proxy(_cdocs) + else: + return {} + + def walk(self): + """ + Generator that iterates through all the parts, returning + MessagePartDoc. + """ + yield self.fdoc + yield self.hdoc + for cdoc in self.cdocs.values(): + # XXX this will break ---- + #content_ref = weakref.proxy(cdoc) + #yield MessagePartDoc(new=self.new, dirty=self.dirty, + #store=self._storetype, + #part=MessagePartType.cdoc, + #content=content_ref) + + # the put is handling this for us, so + # we already have stored a MessagePartDoc + # but we should really do it while adding in the + # constructor or the from_dict method + yield cdoc + + # i/o + + def as_dict(self): + """ + Return a dict representation of the parts contained. + """ + return self._dict + + def from_dict(self, msg_dict): + """ + Populate MessageWrapper parts from a dictionary. + It expects the same format that we use in a + MessageWrapper. + """ + fdoc, hdoc, cdocs = map( + lambda part: msg_dict.get(part, None), + [self.FDOC, self.HDOC, self.CDOCS]) + self._dict[self.FDOC] = fdoc + self._dict[self.HDOC] = hdoc + self._dict[self.CDOCS] = cdocs + class MessagePart(object): """ - IMessagePart implementor. + IMessagePart implementor, to be passed to several methods + of the IMAP4Server. It takes a subpart message and is able to find the inner parts. @@ -117,6 +296,8 @@ class MessagePart(object): payload = str("") if payload: + # XXX use find_charset instead -------------------------- + # bad rebase??? content_type = self._get_ctype_from_document(phash) charset = first(CHARSET_RE.findall(content_type)) logger.debug("Got charset from header: %s" % (charset,)) -- cgit v1.2.3 From ff28e22977db802c87f0b7be99e37c6de29183e9 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 23 Jan 2014 13:32:01 -0400 Subject: Unset new flag after successful write --- src/leap/mail/imap/messageparts.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'src/leap/mail/imap/messageparts.py') diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py index 3f89193..42eef02 100644 --- a/src/leap/mail/imap/messageparts.py +++ b/src/leap/mail/imap/messageparts.py @@ -125,20 +125,41 @@ class MessageWrapper(object): # properties - @property - def new(self): + def _get_new(self): + """ + Get the value for the `new` flag. + """ return self._new - def set_new(self, value=True): + def _set_new(self, value=True): + """ + Set the value for the `new` flag, and propagate it + to the memory store if any. + """ self._new = value + if self.memstore: + mbox = self.fdoc.content['mbox'] + uid = self.fdoc.content['uid'] + key = mbox, uid + fun = [self.memstore.unset_new, + self.memstore.set_new][int(value)] + fun(key) + else: + logger.warning("Could not find a memstore referenced from this " + "MessageWrapper. The value for new will not be " + "propagated") - @property - def dirty(self): + new = property(_get_new, _set_new, + doc="The `new` flag for this MessageWrapper") + + def _get_dirty(self): return self._dirty - def set_dirty(self, value=True): + def _set_dirty(self, value=True): self._dirty = value + dirty = property(_get_dirty, _set_dirty) + # IMessageContainer @property -- cgit v1.2.3 From e02db78b1b6d8fe021efd4adb250c64a1dd4bac4 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 24 Jan 2014 05:39:13 -0400 Subject: flags use the memstore * add new/dirty deferred dict to notify when written to disk * fix eventual duplication after copy * fix flag flickering on first retrieval. --- src/leap/mail/imap/messageparts.py | 72 +++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 24 deletions(-) (limited to 'src/leap/mail/imap/messageparts.py') diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py index 42eef02..b43bc37 100644 --- a/src/leap/mail/imap/messageparts.py +++ b/src/leap/mail/imap/messageparts.py @@ -65,15 +65,13 @@ and sometimes to a part in particular only. we have modified its state in memory, so we need to put_doc instead while dumping the MemoryStore contents. `dirty` attribute would only apply to flags-docs and linkage-docs. - - - XXX this is still not implemented! +* `doc_id` is the identifier for the document in the u1db database, if any. """ MessagePartDoc = namedtuple( 'MessagePartDoc', - ['new', 'dirty', 'part', 'store', 'content']) + ['new', 'dirty', 'part', 'store', 'content', 'doc_id']) class ReferenciableDict(dict): @@ -96,6 +94,7 @@ class MessageWrapper(object): FDOC = "fdoc" HDOC = "hdoc" CDOCS = "cdocs" + DOCS_ID = "docs_id" # XXX can use this to limit the memory footprint, # or is it too premature to optimize? @@ -105,12 +104,17 @@ class MessageWrapper(object): def __init__(self, fdoc=None, hdoc=None, cdocs=None, from_dict=None, memstore=None, - new=True, dirty=False): + new=True, dirty=False, docs_id={}): + """ + Initialize a MessageWrapper. + """ + # TODO add optional reference to original message in the incoming self._dict = {} self.memstore = memstore self._new = new self._dirty = dirty + self._storetype = "mem" if from_dict is not None: @@ -122,6 +126,7 @@ class MessageWrapper(object): self._dict[self.HDOC] = ReferenciableDict(hdoc) if cdocs is not None: self._dict[self.CDOCS] = ReferenciableDict(cdocs) + self._dict[self.DOCS_ID] = docs_id # properties @@ -153,10 +158,28 @@ class MessageWrapper(object): doc="The `new` flag for this MessageWrapper") def _get_dirty(self): + """ + Get the value for the `dirty` flag. + """ return self._dirty def _set_dirty(self, value=True): + """ + Set the value for the `dirty` flag, and propagate it + to the memory store if any. + """ self._dirty = value + if self.memstore: + mbox = self.fdoc.content['mbox'] + uid = self.fdoc.content['uid'] + key = mbox, uid + fun = [self.memstore.unset_dirty, + self.memstore.set_dirty][int(value)] + fun(key) + else: + logger.warning("Could not find a memstore referenced from this " + "MessageWrapper. The value for new will not be " + "propagated") dirty = property(_get_dirty, _set_dirty) @@ -173,7 +196,9 @@ class MessageWrapper(object): return MessagePartDoc(new=self.new, dirty=self.dirty, store=self._storetype, part=MessagePartType.fdoc, - content=content_ref) + content=content_ref, + doc_id=self._dict[self.DOCS_ID].get( + self.FDOC, None)) @property def hdoc(self): @@ -186,7 +211,9 @@ class MessageWrapper(object): return MessagePartDoc(new=self.new, dirty=self.dirty, store=self._storetype, part=MessagePartType.hdoc, - content=content_ref) + content=content_ref, + doc_id=self._dict[self.DOCS_ID].get( + self.HDOC, None)) @property def cdocs(self): @@ -201,21 +228,18 @@ class MessageWrapper(object): Generator that iterates through all the parts, returning MessagePartDoc. """ - yield self.fdoc - yield self.hdoc + if self.fdoc is not None: + yield self.fdoc + if self.hdoc is not None: + yield self.hdoc for cdoc in self.cdocs.values(): - # XXX this will break ---- - #content_ref = weakref.proxy(cdoc) - #yield MessagePartDoc(new=self.new, dirty=self.dirty, - #store=self._storetype, - #part=MessagePartType.cdoc, - #content=content_ref) - - # the put is handling this for us, so - # we already have stored a MessagePartDoc - # but we should really do it while adding in the - # constructor or the from_dict method - yield cdoc + if cdoc is not None: + content_ref = weakref.proxy(cdoc) + yield MessagePartDoc(new=self.new, dirty=self.dirty, + store=self._storetype, + part=MessagePartType.cdoc, + content=content_ref, + doc_id=None) # i/o @@ -234,9 +258,9 @@ class MessageWrapper(object): fdoc, hdoc, cdocs = map( lambda part: msg_dict.get(part, None), [self.FDOC, self.HDOC, self.CDOCS]) - self._dict[self.FDOC] = fdoc - self._dict[self.HDOC] = hdoc - self._dict[self.CDOCS] = cdocs + for t, doc in ((self.FDOC, fdoc), (self.HDOC, hdoc), + (self.CDOCS, cdocs)): + self._dict[t] = ReferenciableDict(doc) if doc else None class MessagePart(object): -- cgit v1.2.3 From b6f08b2fb731a4f3d1e6a04839bd3af71e9b2f5c Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 24 Jan 2014 21:09:38 -0400 Subject: use enums for dict keys --- src/leap/mail/imap/messageparts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/leap/mail/imap/messageparts.py') diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py index b43bc37..055e6a5 100644 --- a/src/leap/mail/imap/messageparts.py +++ b/src/leap/mail/imap/messageparts.py @@ -34,7 +34,7 @@ from leap.mail.imap import interfaces from leap.mail.imap.fields import fields from leap.mail.utils import first -MessagePartType = Enum("hdoc", "fdoc", "cdoc") +MessagePartType = Enum("hdoc", "fdoc", "cdoc", "cdocs", "docs_id") logger = logging.getLogger(__name__) -- cgit v1.2.3 From a5508429b90e2e9b58c5d073610ee5a10274663f Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 24 Jan 2014 23:14:38 -0400 Subject: recent-flags use the memory store --- src/leap/mail/imap/messageparts.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/leap/mail/imap/messageparts.py') diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py index 055e6a5..257d3f0 100644 --- a/src/leap/mail/imap/messageparts.py +++ b/src/leap/mail/imap/messageparts.py @@ -73,6 +73,14 @@ MessagePartDoc = namedtuple( 'MessagePartDoc', ['new', 'dirty', 'part', 'store', 'content', 'doc_id']) +""" +A RecentFlagsDoc is used to send the recent-flags document payload to the +SoledadWriter during dumps. +""" +RecentFlagsDoc = namedtuple( + 'RecentFlagsDoc', + ['content', 'doc_id']) + class ReferenciableDict(dict): """ -- cgit v1.2.3 From f5365ae0c2edb8b3e879f876f2f7e42b25f4616a Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Mon, 27 Jan 2014 16:11:53 -0400 Subject: handle last_uid property in memory store --- src/leap/mail/imap/messageparts.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'src/leap/mail/imap/messageparts.py') diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py index 257d3f0..6d8631a 100644 --- a/src/leap/mail/imap/messageparts.py +++ b/src/leap/mail/imap/messageparts.py @@ -32,7 +32,7 @@ from leap.common.decorators import memoized_method from leap.common.mail import get_email_charset from leap.mail.imap import interfaces from leap.mail.imap.fields import fields -from leap.mail.utils import first +from leap.mail.utils import empty, first MessagePartType = Enum("hdoc", "fdoc", "cdoc", "cdocs", "docs_id") @@ -134,6 +134,13 @@ class MessageWrapper(object): self._dict[self.HDOC] = ReferenciableDict(hdoc) if cdocs is not None: self._dict[self.CDOCS] = ReferenciableDict(cdocs) + + # This will keep references to the doc_ids to be able to put + # messages to soledad. It will be populated during the walk() to avoid + # the overhead of reading from the db. + + # XXX it really *only* make sense for the FDOC, the other parts + # should not be "dirty", just new...!!! self._dict[self.DOCS_ID] = docs_id # properties @@ -201,6 +208,7 @@ class MessageWrapper(object): else: logger.warning("NO FDOC!!!") content_ref = {} + return MessagePartDoc(new=self.new, dirty=self.dirty, store=self._storetype, part=MessagePartType.fdoc, @@ -214,7 +222,6 @@ class MessageWrapper(object): if _hdoc: content_ref = weakref.proxy(_hdoc) else: - logger.warning("NO HDOC!!!!") content_ref = {} return MessagePartDoc(new=self.new, dirty=self.dirty, store=self._storetype, @@ -234,14 +241,21 @@ class MessageWrapper(object): def walk(self): """ Generator that iterates through all the parts, returning - MessagePartDoc. + MessagePartDoc. Used for writing to SoledadStore. """ - if self.fdoc is not None: + if self._dirty: + mbox = self.fdoc.content[fields.MBOX_KEY] + uid = self.fdoc.content[fields.UID_KEY] + docid_dict = self._dict[self.DOCS_ID] + docid_dict[self.FDOC] = self.memstore.get_docid_for_fdoc( + mbox, uid) + + if not empty(self.fdoc.content): yield self.fdoc - if self.hdoc is not None: + if not empty(self.hdoc.content): yield self.hdoc for cdoc in self.cdocs.values(): - if cdoc is not None: + if not empty(cdoc): content_ref = weakref.proxy(cdoc) yield MessagePartDoc(new=self.new, dirty=self.dirty, store=self._storetype, -- cgit v1.2.3 From f096368cfbc49caab52811ae50388aae74272a1a Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 28 Jan 2014 10:24:04 -0400 Subject: fix find_charset rebase --- src/leap/mail/imap/messageparts.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'src/leap/mail/imap/messageparts.py') diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py index 6d8631a..10672ed 100644 --- a/src/leap/mail/imap/messageparts.py +++ b/src/leap/mail/imap/messageparts.py @@ -32,7 +32,7 @@ from leap.common.decorators import memoized_method from leap.common.mail import get_email_charset from leap.mail.imap import interfaces from leap.mail.imap.fields import fields -from leap.mail.utils import empty, first +from leap.mail.utils import empty, first, find_charset MessagePartType = Enum("hdoc", "fdoc", "cdoc", "cdocs", "docs_id") @@ -40,10 +40,6 @@ MessagePartType = Enum("hdoc", "fdoc", "cdoc", "cdocs", "docs_id") logger = logging.getLogger(__name__) -# XXX not needed anymoar ... -CHARSET_PATTERN = r"""charset=([\w-]+)""" -CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) - """ A MessagePartDoc is a light wrapper around the dictionary-like data that we pass along for message parts. It can be used almost everywhere @@ -363,17 +359,17 @@ class MessagePart(object): payload = str("") if payload: - # XXX use find_charset instead -------------------------- - # bad rebase??? content_type = self._get_ctype_from_document(phash) - charset = first(CHARSET_RE.findall(content_type)) + charset = find_charset(content_type) logger.debug("Got charset from header: %s" % (charset,)) - if not charset: + if charset is None: charset = self._get_charset(payload) + logger.debug("Got charset: %s" % (charset,)) try: payload = payload.encode(charset) except UnicodeError as exc: - logger.error("Unicode error {0}".format(exc)) + logger.error( + "Unicode error, using 'replace'. {0!r}".format(exc)) payload = payload.encode(charset, 'replace') fd.write(payload) -- cgit v1.2.3 From a7e0054b595822325f749b0b1df7d25cab4e6486 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 28 Jan 2014 18:39:59 -0400 Subject: docstring fixes Also some fixes for None comparisons. --- src/leap/mail/imap/messageparts.py | 129 ++++++++++++++++++++++++++----------- 1 file changed, 92 insertions(+), 37 deletions(-) (limited to 'src/leap/mail/imap/messageparts.py') diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py index 10672ed..5067263 100644 --- a/src/leap/mail/imap/messageparts.py +++ b/src/leap/mail/imap/messageparts.py @@ -18,7 +18,6 @@ MessagePart implementation. Used from LeapMessage. """ import logging -import re import StringIO import weakref @@ -100,11 +99,10 @@ class MessageWrapper(object): CDOCS = "cdocs" DOCS_ID = "docs_id" - # XXX can use this to limit the memory footprint, - # or is it too premature to optimize? - # Does it work well together with the interfaces.implements? + # Using slots to limit some the memory footprint, + # Add your attribute here. - #__slots__ = ["_dict", "_new", "_dirty", "memstore"] + __slots__ = ["_dict", "_new", "_dirty", "_storetype", "memstore"] def __init__(self, fdoc=None, hdoc=None, cdocs=None, from_dict=None, memstore=None, @@ -141,9 +139,13 @@ class MessageWrapper(object): # properties + # TODO Could refactor new and dirty properties together. + def _get_new(self): """ Get the value for the `new` flag. + + :rtype: bool """ return self._new @@ -151,6 +153,9 @@ class MessageWrapper(object): """ Set the value for the `new` flag, and propagate it to the memory store if any. + + :param value: the value to set + :type value: bool """ self._new = value if self.memstore: @@ -171,6 +176,8 @@ class MessageWrapper(object): def _get_dirty(self): """ Get the value for the `dirty` flag. + + :rtype: bool """ return self._dirty @@ -178,6 +185,9 @@ class MessageWrapper(object): """ Set the value for the `dirty` flag, and propagate it to the memory store if any. + + :param value: the value to set + :type value: bool """ self._dirty = value if self.memstore: @@ -198,6 +208,12 @@ class MessageWrapper(object): @property def fdoc(self): + """ + Return a MessagePartDoc wrapping around a weak reference to + the flags-document in this MemoryStore, if any. + + :rtype: MessagePartDoc + """ _fdoc = self._dict.get(self.FDOC, None) if _fdoc: content_ref = weakref.proxy(_fdoc) @@ -214,6 +230,12 @@ class MessageWrapper(object): @property def hdoc(self): + """ + Return a MessagePartDoc wrapping around a weak reference to + the headers-document in this MemoryStore, if any. + + :rtype: MessagePartDoc + """ _hdoc = self._dict.get(self.HDOC, None) if _hdoc: content_ref = weakref.proxy(_hdoc) @@ -228,6 +250,14 @@ class MessageWrapper(object): @property def cdocs(self): + """ + Return a weak reference to a zero-indexed dict containing + the content-documents, or an empty dict if none found. + If you want access to the MessagePartDoc for the individual + parts, use the generator returned by `walk` instead. + + :rtype: dict + """ _cdocs = self._dict.get(self.CDOCS, None) if _cdocs: return weakref.proxy(_cdocs) @@ -238,6 +268,8 @@ class MessageWrapper(object): """ Generator that iterates through all the parts, returning MessagePartDoc. Used for writing to SoledadStore. + + :rtype: generator """ if self._dirty: mbox = self.fdoc.content[fields.MBOX_KEY] @@ -264,6 +296,8 @@ class MessageWrapper(object): def as_dict(self): """ Return a dict representation of the parts contained. + + :rtype: dict """ return self._dict @@ -272,6 +306,11 @@ class MessageWrapper(object): Populate MessageWrapper parts from a dictionary. It expects the same format that we use in a MessageWrapper. + + + :param msg_dict: a dictionary containing the parts to populate + the MessageWrapper from + :type msg_dict: dict """ fdoc, hdoc, cdocs = map( lambda part: msg_dict.get(part, None), @@ -288,7 +327,7 @@ class MessagePart(object): It takes a subpart message and is able to find the inner parts. - Excusatio non petita: see the interface documentation. + See the interface documentation. """ implements(imap4.IMessagePart) @@ -297,6 +336,8 @@ class MessagePart(object): """ Initializes the MessagePart. + :param soledad: Soledad instance. + :type soledad: Soledad :param part_map: a dictionary containing the parts map for this message :type part_map: dict @@ -313,6 +354,7 @@ class MessagePart(object): # to gather the results of the deferred operations # to signal the operation is complete. #leap_assert(part_map, "part map dict cannot be null") + self._soledad = soledad self._pmap = part_map @@ -323,11 +365,12 @@ class MessagePart(object): :return: size of the message, in octets :rtype: int """ - if not self._pmap: + if empty(self._pmap): return 0 size = self._pmap.get('size', None) - if not size: + if size is None: logger.error("Message part cannot find size in the partmap") + size = 0 return size def getBodyFile(self): @@ -338,25 +381,25 @@ class MessagePart(object): :rtype: StringIO """ fd = StringIO.StringIO() - if self._pmap: + if not empty(self._pmap): multi = self._pmap.get('multi') if not multi: phash = self._pmap.get("phash", None) else: pmap = self._pmap.get('part_map') first_part = pmap.get('1', None) - if first_part: + if not empty(first_part): phash = first_part['phash'] if not phash: logger.warning("Could not find phash for this subpart!") - payload = str("") + payload = "" else: payload = self._get_payload_from_document(phash) else: logger.warning("Message with no part_map!") - payload = str("") + payload = "" if payload: content_type = self._get_ctype_from_document(phash) @@ -366,7 +409,8 @@ class MessagePart(object): charset = self._get_charset(payload) logger.debug("Got charset: %s" % (charset,)) try: - payload = payload.encode(charset) + if isinstance(payload, unicode): + payload = payload.encode(charset) except UnicodeError as exc: logger.error( "Unicode error, using 'replace'. {0!r}".format(exc)) @@ -376,13 +420,15 @@ class MessagePart(object): fd.seek(0) return fd - # TODO cache the phash retrieval + # TODO should memory-bound this memoize!!! + @memoized_method def _get_payload_from_document(self, phash): """ - Gets the message payload from the content document. + Return the message payload from the content document. :param phash: the payload hash to retrieve by. - :type phash: basestring + :type phash: str or unicode + :rtype: str or unicode """ cdocs = self._soledad.get_from_index( fields.TYPE_P_HASH_IDX, @@ -396,13 +442,15 @@ class MessagePart(object): payload = cdoc.content.get(fields.RAW_KEY, "") return payload - # TODO cache the pahash retrieval + # TODO should memory-bound this memoize!!! + @memoized_method def _get_ctype_from_document(self, phash): """ - Gets the content-type from the content document. + Reeturn the content-type from the content document. :param phash: the payload hash to retrieve by. - :type phash: basestring + :type phash: str or unicode + :rtype: str or unicode """ cdocs = self._soledad.get_from_index( fields.TYPE_P_HASH_IDX, @@ -423,13 +471,14 @@ class MessagePart(object): Gets (guesses?) the charset of a payload. :param stuff: the stuff to guess about. - :type stuff: basestring - :returns: charset + :type stuff: str or unicode + :return: charset + :rtype: unicode """ # XXX existential doubt 2. shouldn't we make the scope # of the decorator somewhat more persistent? # ah! yes! and put memory bounds. - return get_email_charset(unicode(stuff)) + return get_email_charset(stuff) def getHeaders(self, negate, *names): """ @@ -446,37 +495,42 @@ class MessagePart(object): :return: A mapping of header field names to header field values :rtype: dict """ + # XXX refactor together with MessagePart method if not self._pmap: logger.warning("No pmap in Subpart!") return {} headers = dict(self._pmap.get("headers", [])) - # twisted imap server expects *some* headers to be lowercase - # We could use a CaseInsensitiveDict here... - headers = dict( - (str(key), str(value)) if key.lower() != "content-type" - else (str(key.lower()), str(value)) - for (key, value) in headers.items()) - names = map(lambda s: s.upper(), names) if negate: cond = lambda key: key.upper() not in names else: cond = lambda key: key.upper() in names - # unpack and filter original dict by negate-condition - filter_by_cond = [ - map(str, (key, val)) for - key, val in headers.items() - if cond(key)] - filtered = dict(filter_by_cond) - return filtered + # default to most likely standard + charset = find_charset(headers, "utf-8") + headers2 = dict() + for key, value in headers.items(): + # twisted imap server expects *some* headers to be lowercase + # We could use a CaseInsensitiveDict here... + if key.lower() == "content-type": + key = key.lower() + + if not isinstance(key, str): + key = key.encode(charset, 'replace') + if not isinstance(value, str): + value = value.encode(charset, 'replace') + + # filter original dict by negate-condition + if cond(key): + headers2[key] = value + return headers2 def isMultipart(self): """ Return True if this message is multipart. """ - if not self._pmap: + if empty(self._pmap): logger.warning("Could not get part map!") return False multi = self._pmap.get("multi", False) @@ -495,6 +549,7 @@ class MessagePart(object): """ if not self.isMultipart(): raise TypeError + sub_pmap = self._pmap.get("part_map", {}) try: part_map = sub_pmap[str(part + 1)] -- cgit v1.2.3 From 18fed49c4143eb764ae9e806882d24f8f4e95744 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Sun, 2 Feb 2014 09:26:37 -0400 Subject: fix missing content after in-memory add because THE KEYS WILL BE STRINGS AFTER ADDED TO SOLEDAD Can I remember that? * Fix copy from local folders * Fix copy when we already have a copy of the message in the inbox, marked as deleted. * Fix also bad deferred.succeed in add_msg when it already exist. --- src/leap/mail/imap/messageparts.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src/leap/mail/imap/messageparts.py') diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py index 5067263..b07681b 100644 --- a/src/leap/mail/imap/messageparts.py +++ b/src/leap/mail/imap/messageparts.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # messageparts.py # Copyright (C) 2014 LEAP # @@ -315,6 +314,7 @@ class MessageWrapper(object): fdoc, hdoc, cdocs = map( lambda part: msg_dict.get(part, None), [self.FDOC, self.HDOC, self.CDOCS]) + for t, doc in ((self.FDOC, fdoc), (self.HDOC, hdoc), (self.CDOCS, cdocs)): self._dict[t] = ReferenciableDict(doc) if doc else None @@ -390,8 +390,10 @@ class MessagePart(object): first_part = pmap.get('1', None) if not empty(first_part): phash = first_part['phash'] + else: + phash = None - if not phash: + if phash is None: logger.warning("Could not find phash for this subpart!") payload = "" else: @@ -435,11 +437,13 @@ class MessagePart(object): fields.TYPE_CONTENT_VAL, str(phash)) cdoc = first(cdocs) - if not cdoc: + if cdoc is None: logger.warning( "Could not find the content doc " "for phash %s" % (phash,)) - payload = cdoc.content.get(fields.RAW_KEY, "") + payload = "" + else: + payload = cdoc.content.get(fields.RAW_KEY, "") return payload # TODO should memory-bound this memoize!!! -- cgit v1.2.3 From 3f9c3ab22523c553dc677d5273dc8d01394d74f7 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 5 Feb 2014 12:37:50 -0400 Subject: fix memoized call returning always None --- src/leap/mail/imap/messageparts.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'src/leap/mail/imap/messageparts.py') diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py index b07681b..2d9b3a2 100644 --- a/src/leap/mail/imap/messageparts.py +++ b/src/leap/mail/imap/messageparts.py @@ -397,7 +397,9 @@ class MessagePart(object): logger.warning("Could not find phash for this subpart!") payload = "" else: - payload = self._get_payload_from_document(phash) + payload = self._get_payload_from_document_memoized(phash) + if payload is None: + payload = self._get_payload_from_document(phash) else: logger.warning("Message with no part_map!") @@ -424,13 +426,24 @@ class MessagePart(object): # TODO should memory-bound this memoize!!! @memoized_method + def _get_payload_from_document_memoized(self, phash): + """ + Memoized method call around the regular method, to be able + to call the non-memoized method in case we got a None. + + :param phash: the payload hash to retrieve by. + :type phash: str or unicode + :rtype: str or unicode or None + """ + return self._get_payload_from_document(phash) + def _get_payload_from_document(self, phash): """ Return the message payload from the content document. :param phash: the payload hash to retrieve by. :type phash: str or unicode - :rtype: str or unicode + :rtype: str or unicode or None """ cdocs = self._soledad.get_from_index( fields.TYPE_P_HASH_IDX, -- cgit v1.2.3 From bf9db4b5381230b4e2a1e1d2d4b2acc31c29ff87 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 5 Feb 2014 16:47:36 -0400 Subject: Fix the fallback for the memoized call for bodies/content. Changed to "empty" to consider empty strings too. --- src/leap/mail/imap/messageparts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/leap/mail/imap/messageparts.py') diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py index 2d9b3a2..b1f333a 100644 --- a/src/leap/mail/imap/messageparts.py +++ b/src/leap/mail/imap/messageparts.py @@ -398,7 +398,7 @@ class MessagePart(object): payload = "" else: payload = self._get_payload_from_document_memoized(phash) - if payload is None: + if empty(payload): payload = self._get_payload_from_document(phash) else: -- cgit v1.2.3 From 4338368aa2ba0efaee742e9000e21b81af34d3db Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 11 Feb 2014 01:43:14 -0400 Subject: separate new and dirty queues --- src/leap/mail/imap/messageparts.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'src/leap/mail/imap/messageparts.py') diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py index b1f333a..9b7de86 100644 --- a/src/leap/mail/imap/messageparts.py +++ b/src/leap/mail/imap/messageparts.py @@ -98,7 +98,7 @@ class MessageWrapper(object): CDOCS = "cdocs" DOCS_ID = "docs_id" - # Using slots to limit some the memory footprint, + # Using slots to limit some the memory use, # Add your attribute here. __slots__ = ["_dict", "_new", "_dirty", "_storetype", "memstore"] @@ -148,7 +148,7 @@ class MessageWrapper(object): """ return self._new - def _set_new(self, value=True): + def _set_new(self, value=False): """ Set the value for the `new` flag, and propagate it to the memory store if any. @@ -161,8 +161,8 @@ class MessageWrapper(object): mbox = self.fdoc.content['mbox'] uid = self.fdoc.content['uid'] key = mbox, uid - fun = [self.memstore.unset_new, - self.memstore.set_new][int(value)] + fun = [self.memstore.unset_new_queued, + self.memstore.set_new_queued][int(value)] fun(key) else: logger.warning("Could not find a memstore referenced from this " @@ -193,8 +193,8 @@ class MessageWrapper(object): mbox = self.fdoc.content['mbox'] uid = self.fdoc.content['uid'] key = mbox, uid - fun = [self.memstore.unset_dirty, - self.memstore.set_dirty][int(value)] + fun = [self.memstore.unset_dirty_queued, + self.memstore.set_dirty_queued][int(value)] fun(key) else: logger.warning("Could not find a memstore referenced from this " @@ -271,11 +271,14 @@ class MessageWrapper(object): :rtype: generator """ if self._dirty: - mbox = self.fdoc.content[fields.MBOX_KEY] - uid = self.fdoc.content[fields.UID_KEY] - docid_dict = self._dict[self.DOCS_ID] - docid_dict[self.FDOC] = self.memstore.get_docid_for_fdoc( - mbox, uid) + try: + mbox = self.fdoc.content[fields.MBOX_KEY] + uid = self.fdoc.content[fields.UID_KEY] + docid_dict = self._dict[self.DOCS_ID] + docid_dict[self.FDOC] = self.memstore.get_docid_for_fdoc( + mbox, uid) + except Exception as exc: + logger.exception(exc) if not empty(self.fdoc.content): yield self.fdoc -- cgit v1.2.3 From 484c5fc316c0f95ebccc4a2c2a04c1cda96a34f8 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 11 Feb 2014 03:04:04 -0400 Subject: defend against malformed fdocs during unset dirty/new --- src/leap/mail/imap/messageparts.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'src/leap/mail/imap/messageparts.py') diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py index 9b7de86..6f1376a 100644 --- a/src/leap/mail/imap/messageparts.py +++ b/src/leap/mail/imap/messageparts.py @@ -158,8 +158,11 @@ class MessageWrapper(object): """ self._new = value if self.memstore: - mbox = self.fdoc.content['mbox'] - uid = self.fdoc.content['uid'] + mbox = self.fdoc.content.get('mbox', None) + uid = self.fdoc.content.get('uid', None) + if not mbox or not uid: + logger.warning("Malformed fdoc") + return key = mbox, uid fun = [self.memstore.unset_new_queued, self.memstore.set_new_queued][int(value)] @@ -190,8 +193,11 @@ class MessageWrapper(object): """ self._dirty = value if self.memstore: - mbox = self.fdoc.content['mbox'] - uid = self.fdoc.content['uid'] + mbox = self.fdoc.content.get('mbox', None) + uid = self.fdoc.content.get('uid', None) + if not mbox or not uid: + logger.warning("Malformed fdoc") + return key = mbox, uid fun = [self.memstore.unset_dirty_queued, self.memstore.set_dirty_queued][int(value)] @@ -278,6 +284,7 @@ class MessageWrapper(object): docid_dict[self.FDOC] = self.memstore.get_docid_for_fdoc( mbox, uid) except Exception as exc: + logger.debug("Error while walking message...") logger.exception(exc) if not empty(self.fdoc.content): -- cgit v1.2.3 From ac4c70f0be36c985e16e3f4ec0a38ef6f8d48166 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 12 Feb 2014 12:42:02 -0400 Subject: remove all refs during removal, and protect from empty docs --- src/leap/mail/imap/messageparts.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/leap/mail/imap/messageparts.py') diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py index 6f1376a..257721c 100644 --- a/src/leap/mail/imap/messageparts.py +++ b/src/leap/mail/imap/messageparts.py @@ -287,7 +287,7 @@ class MessageWrapper(object): logger.debug("Error while walking message...") logger.exception(exc) - if not empty(self.fdoc.content): + if not empty(self.fdoc.content) and 'uid' in self.fdoc.content: yield self.fdoc if not empty(self.hdoc.content): yield self.hdoc @@ -418,10 +418,8 @@ class MessagePart(object): if payload: content_type = self._get_ctype_from_document(phash) charset = find_charset(content_type) - logger.debug("Got charset from header: %s" % (charset,)) if charset is None: charset = self._get_charset(payload) - logger.debug("Got charset: %s" % (charset,)) try: if isinstance(payload, unicode): payload = payload.encode(charset) -- cgit v1.2.3