1 files changed, 723 insertions, 0 deletions
diff --git a/src/leap/mail/adaptors/soledad.py b/src/leap/mail/adaptors/soledad.py
new file mode 100644
index 0000000..2e25f04
--- /dev/null
+++ b/src/leap/mail/adaptors/soledad.py
@@ -0,0 +1,723 @@
+# -*- coding: utf-8 -*-
+# soledad.py
+# Copyright (C) 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+Soledadad MailAdaptor module.
+"""
+import re
+from collections import defaultdict
+from email import message_from_string
+
+from pycryptopp.hash import sha256
+from twisted.internet import defer
+from zope.interface import implements
+
+from leap.common.check import leap_assert, leap_assert_type
+
+from leap.mail import walk
+from leap.mail.adaptors import soledad_indexes as indexes
+from leap.mail.constants import INBOX_NAME
+from leap.mail.adaptors import models
+from leap.mail.imap.mailbox import normalize_mailbox
+from leap.mail.utils import lowerdict, first
+from leap.mail.utils import stringify_parts_map
+from leap.mail.interfaces import IMailAdaptor, IMessageWrapper
+
+
+# TODO
+# [ ] Convenience function to create mail specifying subject, date, etc?
+
+
+_MSGID_PATTERN = r"""<([\w@.]+)>"""
+_MSGID_RE = re.compile(_MSGID_PATTERN)
+
+
+class DuplicatedDocumentError(Exception):
+    """
+    Raised when a duplicated document is detected.
+    """
+    pass
+
+
+class SoledadDocumentWrapper(models.DocumentWrapper):
+    """
+    A Wrapper object that can be manipulated, passed around, and serialized in
+    a format that the Soledad Store understands.
+
+    It ensures atomicity of the document operations on creation, update and
+    deletion.
+    """
+
+    # TODO we could also use a _dirty flag (in models)
+
+    # We keep a dictionary with DeferredLocks, that will be
+    # unique to every subclass of SoledadDocumentWrapper.
+    _k_locks = defaultdict(defer.DeferredLock)
+
+    @classmethod
+    def _get_klass_lock(cls):
+        """
+        Get a DeferredLock that is unique for this subclass name.
+        Used to lock the access to indexes in the `get_or_create` call
+        for a particular DocumentWrapper.
+        """
+        return cls._k_locks[cls.__name__]
+
+    def __init__(self, **kwargs):
+        doc_id = kwargs.pop('doc_id', None)
+        self._doc_id = doc_id
+        self._lock = defer.DeferredLock()
+        super(SoledadDocumentWrapper, self).__init__(**kwargs)
+
+    @property
+    def doc_id(self):
+        return self._doc_id
+
+    def create(self, store):
+        """
+        Create the documents for this wrapper.
+        Since this method will not check for duplication, the
+        responsibility of avoiding duplicates is left to the caller.
+
+        You might be interested in using `get_or_create` classmethod
+        instead (that's the preferred way of creating documents from
+        the wrapper object).
+
+        :return: a deferred that will fire when the underlying
+                 Soledad document has been created.
+        :rtype: Deferred
+        """
+        leap_assert(self._doc_id is None,
+                    "This document already has a doc_id!")
+
+        def update_doc_id(doc):
+            self._doc_id = doc.doc_id
+            return doc
+        d = store.create_doc(self.serialize())
+        d.addCallback(update_doc_id)
+        return d
+
+    def update(self, store):
+        """
+        Update the documents for this wrapper.
+
+        :return: a deferred that will fire when the underlying
+                 Soledad document has been updated.
+        :rtype: Deferred
+        """
+        # the deferred lock guards against revision conflicts
+        return self._lock.run(self._update, store)
+
+    def _update(self, store):
+        leap_assert(self._doc_id is not None,
+                    "Need to create doc before updating")
+
+        def update_and_put_doc(doc):
+            doc.content.update(self.serialize())
+            return store.put_doc(doc)
+
+        d = store.get_doc(self._doc_id)
+        d.addCallback(update_and_put_doc)
+        return d
+
+    def delete(self, store):
+        """
+        Delete the documents for this wrapper.
+
+        :return: a deferred that will fire when the underlying
+                 Soledad document has been deleted.
+        :rtype: Deferred
+        """
+        # the deferred lock guards against conflicts while updating
+        return self._lock.run(self._delete, store)
+
+    def _delete(self, store):
+        leap_assert(self._doc_id is not None,
+                    "Need to create doc before deleting")
+        # XXX might want to flag this DocumentWrapper to avoid
+        # updating it by mistake. This could go in models.DocumentWrapper
+
+        def delete_doc(doc):
+            return store.delete_doc(doc)
+
+        d = store.get_doc(self._doc_id)
+        d.addCallback(delete_doc)
+        return d
+
+    @classmethod
+    def get_or_create(cls, store, index, value):
+        """
+        Get a unique DocumentWrapper by index, or create a new one if the
+        matching query does not exist.
+
+        :param index: the primary index for the model.
+        :type index: str
+        :param value: the value to query the primary index.
+        :type value: str
+
+        :return: a deferred that will be fired with the SoledadDocumentWrapper
+                 matching the index query, either existing or just created.
+        :rtype: Deferred
+        """
+        return cls._get_klass_lock().run(
+            cls._get_or_create, store, index, value)
+
+    @classmethod
+    def _get_or_create(cls, store, index, value):
+        assert store is not None
+        assert index is not None
+        assert value is not None
+
+        def get_main_index():
+            try:
+                return cls.model.__meta__.index
+            except AttributeError:
+                raise RuntimeError("The model is badly defined")
+
+        def try_to_get_doc_from_index(indexes):
+            values = []
+            idx_def = dict(indexes)[index]
+            if len(idx_def) == 1:
+                values = [value]
+            else:
+                main_index = get_main_index()
+                fields = cls.model.serialize()
+                for field in idx_def:
+                    if field == main_index:
+                        values.append(value)
+                    else:
+                        values.append(fields[field])
+            d = store.get_from_index(index, *values)
+            return d
+
+        def get_first_doc_if_any(docs):
+            if not docs:
+                return None
+            if len(docs) > 1:
+                raise DuplicatedDocumentError
+            return docs[0]
+
+        def wrap_existing_or_create_new(doc):
+            if doc:
+                return cls(doc_id=doc.doc_id, **doc.content)
+            else:
+                return create_and_wrap_new_doc()
+
+        def create_and_wrap_new_doc():
+            # XXX use closure to store indexes instead of
+            # querying for them again.
+            d = store.list_indexes()
+            d.addCallback(get_wrapper_instance_from_index)
+            d.addCallback(return_wrapper_when_created)
+            return d
+
+        def get_wrapper_instance_from_index(indexes):
+            init_values = {}
+            idx_def = dict(indexes)[index]
+            if len(idx_def) == 1:
+                init_value = {idx_def[0]: value}
+                return cls(**init_value)
+            main_index = get_main_index()
+            fields = cls.model.serialize()
+            for field in idx_def:
+                if field == main_index:
+                    init_values[field] = value
+                else:
+                    init_values[field] = fields[field]
+            return cls(**init_values)
+
+        def return_wrapper_when_created(wrapper):
+            d = wrapper.create(store)
+            d.addCallback(lambda doc: wrapper)
+            return d
+
+        d = store.list_indexes()
+        d.addCallback(try_to_get_doc_from_index)
+        d.addCallback(get_first_doc_if_any)
+        d.addCallback(wrap_existing_or_create_new)
+        return d
+
+    @classmethod
+    def get_all(cls, store):
+        """
+        Get a collection of wrappers around all the documents belonging
+        to this kind.
+
+        For this to work, the model.__meta__ needs to include a tuple with
+        the index to be used for listing purposes, and which is the field to be
+        used to query the index.
+
+        Note that this method only supports indexes of a single field at the
+        moment. It also might be too expensive to return all the documents
+        matching the query, so handle with care.
+
+        class __meta__(object):
+            index = "name"
+            list_index = ("by-type", "type_")
+
+        :return: a deferred that will be fired with an iterable containing
+                 as many SoledadDocumentWrapper are matching the index defined
+                 in the model as the `list_index`.
+        :rtype: Deferred
+        """
+        # TODO
+        # [ ] extend support to indexes with n-ples
+        # [ ] benchmark the cost of querying and returning indexes in a big
+        #     database. This might badly need pagination before being put to
+        #     serious use.
+        return cls._get_klass_lock().run(cls._get_all, store)
+
+    @classmethod
+    def _get_all(cls, store):
+        try:
+            list_index, list_attr = cls.model.__meta__.list_index
+        except AttributeError:
+            raise RuntimeError("The model is badly defined: no list_index")
+        try:
+            index_value = getattr(cls.model, list_attr)
+        except AttributeError:
+            raise RuntimeError("The model is badly defined: "
+                               "no attribute matching list_index")
+
+        def wrap_docs(docs):
+            return (cls(doc_id=doc.doc_id, **doc.content) for doc in docs)
+
+        d = store.get_from_index(list_index, index_value)
+        d.addCallback(wrap_docs)
+        return d
+
+    # TODO
+    # [ ] get_count() ???
+
+    def __repr__(self):
+        try:
+            idx = getattr(self, self.model.__meta__.index)
+        except AttributeError:
+            idx = ""
+        return "<%s: %s (%s)>" % (self.__class__.__name__,
+                                  idx, self._doc_id)
+
+
+#
+# Message documents
+#
+
+class FlagsDocWrapper(SoledadDocumentWrapper):
+
+    class model(models.SerializableModel):
+        type_ = "flags"
+        chash = ""
+
+        mbox = "inbox"
+        seen = False
+        deleted = False
+        recent = False
+        multi = False
+        flags = []
+        tags = []
+        size = 0
+
+        class __meta__(object):
+            index = "mbox"
+
+
+class HeaderDocWrapper(SoledadDocumentWrapper):
+
+    class model(models.SerializableModel):
+        type_ = "head"
+        chash = ""
+
+        date = ""
+        subject = ""
+        headers = {}
+        part_map = {}
+        body = ""  # link to phash of body
+        msgid = ""
+        multi = False
+
+        class __meta__(object):
+            index = "chash"
+
+
+class ContentDocWrapper(SoledadDocumentWrapper):
+
+    class model(models.SerializableModel):
+        type_ = "cnt"
+        phash = ""
+
+        ctype = ""  # XXX index by ctype too?
+        lkf = []  # XXX not implemented yet!
+        raw = ""
+
+        content_disposition = ""
+        content_transfer_encoding = ""
+        content_type = ""
+
+        class __meta__(object):
+            index = "phash"
+
+
+class MessageWrapper(object):
+
+    # TODO generalize wrapper composition?
+    # This could benefit of a DeferredLock to create/update all the
+    # documents at the same time maybe, and defend against concurrent updates?
+
+    implements(IMessageWrapper)
+
+    def __init__(self, fdoc, hdoc, cdocs=None):
+        """
+        Need at least a flag-document and a header-document to instantiate a
+        MessageWrapper. Content-documents can be retrieved lazily.
+
+        cdocs, if any, should be a dictionary in which the keys are ascending
+        integers, beginning at one, and the values are dictionaries with the
+        content of the content-docs.
+        """
+        self.fdoc = FlagsDocWrapper(**fdoc)
+        self.hdoc = HeaderDocWrapper(**hdoc)
+        if cdocs is None:
+            cdocs = {}
+        cdocs_keys = cdocs.keys()
+        assert sorted(cdocs_keys) == range(1, len(cdocs_keys) + 1)
+        self.cdocs = dict([(key, ContentDocWrapper(**doc)) for (key, doc) in
+                           cdocs.items()])
+
+    def create(self, store):
+        """
+        Create all the parts for this message in the store.
+        """
+        leap_assert(self.cdocs,
+                    "Need non empty cdocs to create the "
+                    "MessageWrapper documents")
+        leap_assert(self.fdoc.doc_id is None,
+                    "Cannot create: fdoc has a doc_id")
+
+        # TODO I think we need to tolerate the no hdoc.doc_id case, for when we
+        # are doing a copy to another mailbox.
+        leap_assert(self.hdoc.doc_id is None,
+                    "Cannot create: hdoc has a doc_id")
+        d = []
+        d.append(self.fdoc.create(store))
+        d.append(self.hdoc.create(store))
+        for cdoc in self.cdocs.values():
+            if cdoc.doc_id is not None:
+                # we could be just linking to an existing
+                # content-doc.
+                continue
+            d.append(cdoc.create(store))
+        return defer.gatherResults(d)
+
+    def update(self, store):
+        """
+        Update the only mutable parts, which are within the flags document.
+        """
+        return self.fdoc.update(store)
+
+    def delete(self, store):
+        # Eventually this would have to do the duplicate search or send for the
+        # garbage collector. At least the fdoc can be unlinked.
+        raise NotImplementedError()
+
+#
+# Mailboxes
+#
+
+
+class MailboxWrapper(SoledadDocumentWrapper):
+
+    class model(models.SerializableModel):
+        type_ = "mbox"
+        mbox = INBOX_NAME
+        flags = []
+        closed = False
+        subscribed = False
+        rw = True
+
+        class __meta__(object):
+            index = "mbox"
+            list_index = (indexes.TYPE_IDX, 'type_')
+
+
+#
+# Soledad Adaptor
+#
+
+# TODO make this an interface?
+class SoledadIndexMixin(object):
+    """
+    this will need a class attribute `indexes`, that is a dictionary containing
+    the index definitions for the underlying u1db store underlying soledad.
+
+    It needs to be in the following format:
+    {'index-name': ['field1', 'field2']}
+    """
+    # TODO could have a wrapper class for indexes, supporting introspection
+    # and __getattr__
+    indexes = {}
+
+    store_ready = False
+    _index_creation_deferreds = []
+
+    # TODO we might want to move this logic to soledad itself
+    # so that each application can pass a set of indexes for their data model.
+    # TODO check also the decorator used in keymanager for waiting for indexes
+    # to be ready.
+
+    def initialize_store(self, store):
+        """
+        Initialize the indexes in the database.
+
+        :param store: store
+        :returns: a Deferred that will fire when the store is correctly
+                  initialized.
+        :rtype: deferred
+        """
+        # TODO I think we *should* get another deferredLock in here, but
+        # global to the soledad namespace, to protect from several points
+        # initializing soledad indexes at the same time.
+
+        leap_assert(store, "Need a store")
+        leap_assert_type(self.indexes, dict)
+        self._index_creation_deferreds = []
+
+        def _on_indexes_created(ignored):
+            self.store_ready = True
+
+        def _create_index(name, expression):
+            d = store.create_index(name, *expression)
+            self._index_creation_deferreds.append(d)
+
+        def _create_indexes(db_indexes):
+            db_indexes = dict(db_indexes)
+
+            for name, expression in self.indexes.items():
+                if name not in db_indexes:
+                    # The index does not yet exist.
+                    _create_index(name, expression)
+                    continue
+
+                if expression == db_indexes[name]:
+                    # The index exists and is up to date.
+                    continue
+                # The index exists but the definition is not what expected, so
+                # we delete it and add the proper index expression.
+                d1 = store.delete_index(name)
+                d1.addCallback(lambda _: _create_index(name, expression))
+
+            all_created = defer.gatherResults(self._index_creation_deferreds)
+            all_created.addCallback(_on_indexes_created)
+            return all_created
+
+        # Ask the database for currently existing indexes, and create them
+        # if not found.
+        d = store.list_indexes()
+        d.addCallback(_create_indexes)
+        return d
+
+
+class SoledadMailAdaptor(SoledadIndexMixin):
+
+    implements(IMailAdaptor)
+    store = None
+
+    indexes = indexes.MAIL_INDEXES
+
+    # Message handling
+
+    def get_msg_from_string(self, MessageClass, raw_msg):
+        """
+        Get an instance of a MessageClass initialized with a MessageWrapper
+        that contains all the parts obtained from parsing the raw string for
+        the message.
+
+        :param MessageClass: any Message class that can be initialized passing
+                             an instance of an IMessageWrapper implementor.
+        :type MessageClass: type
+        :param raw_msg: a string containing the raw email message.
+        :type raw_msg: str
+        :rtype: MessageClass instance.
+        """
+        assert(MessageClass is not None)
+        fdoc, hdoc, cdocs = _split_into_parts(raw_msg)
+        return self.get_msg_from_docs(
+            MessageClass, fdoc, hdoc, cdocs)
+
+    def get_msg_from_docs(self, MessageClass, fdoc, hdoc, cdocs=None):
+        """
+        Get an instance of a MessageClass initialized with a MessageWrapper
+        that contains the passed part documents.
+
+        This is not the recommended way of obtaining a message, unless you know
+        how to take care of ensuring the internal consistency between the part
+        documents, or unless you are glueing together the part documents that
+        have been previously generated by `get_msg_from_string`.
+
+        :param MessageClass: any Message class that can be initialized passing
+                             an instance of an IMessageWrapper implementor.
+        :type MessageClass: type
+        :param fdoc: a dictionary containing values from which a
+                     FlagsDocWrapper can be initialized
+        :type fdoc: dict
+        :param hdoc: a dictionary containing values from which a
+                     HeaderDocWrapper can be initialized
+        :type hdoc: dict
+        :param cdocs: None, or a dictionary mapping integers (1-indexed) to
+                      dicts from where a ContentDocWrapper can be initialized.
+        :type cdocs: dict, or None
+
+        :rtype: MessageClass instance.
+        """
+        assert(MessageClass is not None)
+        return MessageClass(MessageWrapper(fdoc, hdoc, cdocs))
+
+    def create_msg(self, store, msg):
+        """
+        :param store: an instance of soledad, or anything that behaves alike
+        :type store:
+        :param msg: a Message object.
+
+        :return: a Deferred that is fired when all the underlying documents
+                 have been created.
+        :rtype: defer.Deferred
+        """
+        wrapper = msg.get_wrapper()
+        return wrapper.create(store)
+
+    def update_msg(self, store, msg):
+        """
+        :param msg: a Message object.
+        :param store: an instance of soledad, or anything that behaves alike
+        :type store:
+        :param msg: a Message object.
+        :return: a Deferred that is fired when all the underlying documents
+                 have been updated (actually, it's only the fdoc that's allowed
+                 to update).
+        :rtype: defer.Deferred
+        """
+        wrapper = msg.get_wrapper()
+        return wrapper.update(store)
+
+    # Mailbox handling
+
+    def get_or_create_mbox(self, store, name):
+        """
+        Get the mailbox with the given name, or creatre one if it does not
+        exist.
+
+        :param name: the name of the mailbox
+        :type name: str
+        """
+        index = indexes.TYPE_MBOX_IDX
+        mbox = normalize_mailbox(name)
+        return MailboxWrapper.get_or_create(store, index, mbox)
+
+    def update_mbox(self, store, mbox_wrapper):
+        """
+        Update the documents for a given mailbox.
+        :param mbox_wrapper: MailboxWrapper instance
+        :type mbox_wrapper: MailboxWrapper
+        :return: a Deferred that will be fired when the mailbox documents
+                 have been updated.
+        :rtype: defer.Deferred
+        """
+        return mbox_wrapper.update(store)
+
+    def get_all_mboxes(self, store):
+        """
+        Retrieve a list with wrappers for all the mailboxes.
+
+        :return: a deferred that will be fired with a list of all the
+                 MailboxWrappers found.
+        :rtype: defer.Deferred
+        """
+        return MailboxWrapper.get_all(store)
+
+
+def _split_into_parts(raw):
+    # TODO signal that we can delete the original message!-----
+    # when all the processing is done.
+    # TODO add the linked-from info !
+    # TODO add reference to the original message?
+    # TODO populate Default FLAGS/TAGS (unseen?)
+    # TODO seed propely the content_docs with defaults??
+
+    msg, parts, chash, size, multi = _parse_msg(raw)
+    body_phash_fun = [walk.get_body_phash_simple,
+                      walk.get_body_phash_multi][int(multi)]
+    body_phash = body_phash_fun(walk.get_payloads(msg))
+    parts_map = walk.walk_msg_tree(parts, body_phash=body_phash)
+
+    fdoc = _build_flags_doc(chash, size, multi)
+    hdoc = _build_headers_doc(msg, chash, parts_map)
+
+    # The MessageWrapper expects a dict, one-indexed
+    cdocs = dict(enumerate(walk.get_raw_docs(msg, parts), 1))
+
+    # XXX convert each to_dicts...
+    return fdoc, hdoc, cdocs
+
+
+def _parse_msg(raw):
+    msg = message_from_string(raw)
+    parts = walk.get_parts(msg)
+    size = len(raw)
+    chash = sha256.SHA256(raw).hexdigest()
+    multi = msg.is_multipart()
+    return msg, parts, chash, size, multi
+
+
+def _build_flags_doc(chash, size, multi):
+    _fdoc = FlagsDocWrapper(chash=chash, size=size, multi=multi)
+    return _fdoc.serialize()
+
+
+def _build_headers_doc(msg, chash, parts_map):
+    """
+    Assemble a headers document from the original parsed message, the
+    content-hash, and the parts map.
+
+    It takes into account possibly repeated headers.
+    """
+    headers = defaultdict(list)
+    for k, v in msg.items():
+        headers[k].append(v)
+
+    # "fix" for repeated headers.
+    for k, v in headers.items():
+        newline = "\n%s: " % (k,)
+        headers[k] = newline.join(v)
+
+    lower_headers = lowerdict(headers)
+    msgid = first(_MSGID_RE.findall(
+        lower_headers.get('message-id', '')))
+
+    _hdoc = HeaderDocWrapper(
+        chash=chash, headers=lower_headers, msgid=msgid)
+
+    def copy_attr(headers, key, doc):
+        if key in headers:
+            setattr(doc, key, headers[key])
+
+    copy_attr(lower_headers, "subject", _hdoc)
+    copy_attr(lower_headers, "date", _hdoc)
+
+    hdoc = _hdoc.serialize()
+    # add parts map to header doc
+    # (body, multi, part_map)
+    for key in parts_map:
+        hdoc[key] = parts_map[key]
+    return stringify_parts_map(hdoc)