From 2c6d1e054242ee8be43f5cd03aad04e4ba40243b Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Mon, 13 Jan 2014 17:58:02 -0400 Subject: Very limited support for SEARCH Commands. Closes: #4209 limited to HEADER Message-ID. This is a quick workaround for avoiding duplicate saves in Drafts Folder. but we'll get there! --- mail/changes/feature_enable-search-by-msg-id | 3 + mail/src/leap/mail/imap/fields.py | 13 ++-- mail/src/leap/mail/imap/mailbox.py | 45 ++++++++++++++ mail/src/leap/mail/imap/messages.py | 93 +++++++++++++++++++++++++--- 4 files changed, 137 insertions(+), 17 deletions(-) create mode 100644 mail/changes/feature_enable-search-by-msg-id diff --git a/mail/changes/feature_enable-search-by-msg-id b/mail/changes/feature_enable-search-by-msg-id new file mode 100644 index 00000000..accc12fc --- /dev/null +++ b/mail/changes/feature_enable-search-by-msg-id @@ -0,0 +1,3 @@ + o Ability to support SEARCH Commands, limited to HEADER Message-ID. + This is a quick workaround for avoiding duplicate saves in Drafts Folder. + Closes: #4209 diff --git a/mail/src/leap/mail/imap/fields.py b/mail/src/leap/mail/imap/fields.py index 70af61f6..3d2ac928 100644 --- a/mail/src/leap/mail/imap/fields.py +++ b/mail/src/leap/mail/imap/fields.py @@ -45,13 +45,12 @@ class WithMsgFields(object): HEADERS_KEY = "headers" DATE_KEY = "date" SUBJECT_KEY = "subject" - # XXX DELETE-ME - #NUM_PARTS_KEY = "numparts" # not needed?! PARTS_MAP_KEY = "part_map" BODY_KEY = "body" # link to phash of body + MSGID_KEY = "msgid" # content - LINKED_FROM_KEY = "lkf" + LINKED_FROM_KEY = "lkf" # XXX not implemented yet! RAW_KEY = "raw" CTYPE_KEY = "ctype" @@ -69,10 +68,6 @@ class WithMsgFields(object): TYPE_HEADERS_VAL = "head" TYPE_CONTENT_VAL = "cnt" - # XXX DEPRECATE - #TYPE_MESSAGE_VAL = "msg" - #TYPE_ATTACHMENT_VAL = "attach" - INBOX_VAL = "inbox" # Flags in Mailbox and Message @@ -96,6 +91,7 @@ class WithMsgFields(object): TYPE_MBOX_IDX = 'by-type-and-mbox' TYPE_MBOX_UID_IDX = 'by-type-and-mbox-and-uid' TYPE_SUBS_IDX = 'by-type-and-subscribed' + TYPE_MSGID_IDX = 'by-type-and-message-id' TYPE_MBOX_SEEN_IDX = 'by-type-and-mbox-and-seen' TYPE_MBOX_RECT_IDX = 'by-type-and-mbox-and-recent' TYPE_MBOX_DEL_IDX = 'by-type-and-mbox-and-deleted' @@ -125,6 +121,9 @@ class WithMsgFields(object): # fdocs uniqueness TYPE_MBOX_C_HASH_IDX: [KTYPE, MBOX_VAL, CHASH_VAL], + # headers doc - search by msgid. + TYPE_MSGID_IDX: [KTYPE, MSGID_KEY], + # content, headers doc TYPE_C_HASH_IDX: [KTYPE, CHASH_VAL], diff --git a/mail/src/leap/mail/imap/mailbox.py b/mail/src/leap/mail/imap/mailbox.py index c9e86847..ccbf5c2e 100644 --- a/mail/src/leap/mail/imap/mailbox.py +++ b/mail/src/leap/mail/imap/mailbox.py @@ -39,6 +39,7 @@ from leap.mail.decorators import deferred from leap.mail.imap.fields import WithMsgFields, fields from leap.mail.imap.messages import MessageCollection from leap.mail.imap.parser import MBoxParser +from leap.mail.utils import first logger = logging.getLogger(__name__) @@ -55,6 +56,7 @@ class SoledadMailbox(WithMsgFields, MBoxParser): imap4.IMailbox, imap4.IMailboxInfo, imap4.ICloseableMailbox, + imap4.ISearchableMailbox, imap4.IMessageCopier) # XXX should finish the implementation of IMailboxListener @@ -617,6 +619,49 @@ class SoledadMailbox(WithMsgFields, MBoxParser): self._signal_unread_to_ui() return result + # ISearchableMailbox + + def search(self, query, uid): + """ + Search for messages that meet the given query criteria. + + Warning: this is half-baked, and it might give problems since + it offers the SearchableInterface. + We'll be implementing it asap. + + :param query: The search criteria + :type query: list + + :param uid: If true, the IDs specified in the query are UIDs; + otherwise they are message sequence IDs. + :type uid: bool + + :return: A list of message sequence numbers or message UIDs which + match the search criteria or a C{Deferred} whose callback + will be invoked with such a list. + :rtype: C{list} or C{Deferred} + """ + # TODO see if we can raise w/o interrupting flow + #:raise IllegalQueryError: Raised when query is not valid. + # example query: + # ['UNDELETED', 'HEADER', 'Message-ID', + # '52D44F11.9060107@dev.bitmask.net'] + + # TODO hardcoding for now! -- we'll support generic queries later on + # but doing a quickfix for avoiding duplicat saves in the draft folder. + # See issue #4209 + + if query[1] == 'HEADER' and query[2].lower() == "message-id": + msgid = str(query[3]).strip() + d = self.messages._get_uid_from_msgid(str(msgid)) + d1 = defer.gatherResults([d]) + # we want a list, so return it all the same + return d1 + + # nothing implemented for any other query + logger.warning("Cannot process query: %s" % (query,)) + return [] + # IMessageCopier @deferred diff --git a/mail/src/leap/mail/imap/messages.py b/mail/src/leap/mail/imap/messages.py index 7b49c807..a3d29d63 100644 --- a/mail/src/leap/mail/imap/messages.py +++ b/mail/src/leap/mail/imap/messages.py @@ -20,6 +20,8 @@ LeapMessage and MessageCollection. import copy import logging import re +import threading +import time import StringIO from collections import defaultdict, namedtuple @@ -44,6 +46,7 @@ from leap.mail.messageflow import IMessageConsumer, MessageProducer logger = logging.getLogger(__name__) +read_write_lock = threading.Lock() # TODO ------------------------------------------------------------ @@ -53,6 +56,7 @@ logger = logging.getLogger(__name__) # [ ] Send patch to twisted for bug in imap4.py:5717 (content-type can be # none? lower-case?) + def lowerdict(_dict): """ Return a dict with the keys in lowercase. @@ -60,12 +64,17 @@ def lowerdict(_dict): :param _dict: the dict to convert :rtype: dict """ + # TODO should properly implement a CaseInsensitive dict. + # Look into requests code. return dict((key.lower(), value) for key, value in _dict.items()) CHARSET_PATTERN = r"""charset=([\w-]+)""" +MSGID_PATTERN = r"""<([\w@.]+)>""" + CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) +MSGID_RE = re.compile(MSGID_PATTERN) class MessagePart(object): @@ -897,6 +906,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): Implements a filter query over the messages contained in a soledad database. """ + # XXX this should be able to produce a MessageSet methinks # could validate these kinds of objects turning them # into a template for the class. @@ -1044,9 +1054,14 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): newline = "\n%s: " % (k,) headers[k] = newline.join(v) + lower_headers = lowerdict(headers) + msgid = first(MSGID_RE.findall( + lower_headers.get('message-id', ''))) + hd = self._get_empty_doc(self.HEADERS_DOC) hd[self.CONTENT_HASH_KEY] = chash hd[self.HEADERS_KEY] = headers + hd[self.MSGID_KEY] = msgid if not subject and self.SUBJECT_FIELD in headers: hd[self.SUBJECT_KEY] = first(headers[self.SUBJECT_FIELD]) @@ -1139,16 +1154,17 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): logger.debug('enqueuing message docs for write') ptuple = SoledadWriterPayload - # first, regular docs: flags and headers - for doc in docs: - self.soledad_writer.put(ptuple( - mode=ptuple.CREATE, payload=doc)) + with read_write_lock: + # first, regular docs: flags and headers + for doc in docs: + self.soledad_writer.put(ptuple( + mode=ptuple.CREATE, payload=doc)) - # and last, but not least, try to create - # content docs if not already there. - for cd in cdocs: - self.soledad_writer.put(ptuple( - mode=ptuple.CONTENT_CREATE, payload=cd)) + # and last, but not least, try to create + # content docs if not already there. + for cd in cdocs: + self.soledad_writer.put(ptuple( + mode=ptuple.CONTENT_CREATE, payload=cd)) def _remove_cb(self, result): return result @@ -1174,7 +1190,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): d.addCallback(self._remove_cb) return d - # getters + # getters: specific queries def _get_fdoc_from_chash(self, chash): """ @@ -1201,6 +1217,63 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): except Exception as exc: logger.exception("Unhandled error %r" % exc) + def _get_uid_from_msgidCb(self, msgid): + hdoc = None + with read_write_lock: + try: + query = self._soledad.get_from_index( + fields.TYPE_MSGID_IDX, + fields.TYPE_HEADERS_VAL, msgid) + if query: + if len(query) > 1: + logger.warning( + "More than one hdoc found for this msgid, " + "we got a duplicate!!") + # XXX we could take action, like trigger a background + # process to kill dupes. + hdoc = query.pop() + except Exception as exc: + logger.exception("Unhandled error %r" % exc) + + if hdoc is None: + logger.warning("Could not find hdoc for msgid %s" + % (msgid,)) + return None + msg_chash = hdoc.content.get(fields.CONTENT_HASH_KEY) + fdoc = self._get_fdoc_from_chash(msg_chash) + if not fdoc: + logger.warning("Could not find fdoc for msgid %s" + % (msgid,)) + return None + return fdoc.content.get(fields.UID_KEY, None) + + @deferred + def _get_uid_from_msgid(self, msgid): + """ + Return a UID for a given message-id. + + It first gets the headers-doc for that msg-id, and + it found it queries the flags doc for the current mailbox + for the matching content-hash. + + :return: A UID, or None + """ + # We need to wait a little bit, cause in some of the cases + # the query is received right after we've saved the document, + # and we cannot find it otherwise. This seems to be enough. + + # Doing a sleep since we'll be calling this in a secondary thread, + # but we'll should be able to collect the results after a + # reactor.callLater. + # Maybe we can implement something like NOT_DONE_YET in the web + # framework, and return from the callback? + # See: http://jcalderone.livejournal.com/50226.html + # reactor.callLater(0.3, self._get_uid_from_msgidCb, msgid) + time.sleep(0.3) + return self._get_uid_from_msgidCb(msgid) + + # getters: generic for a mailbox + def get_msg_by_uid(self, uid): """ Retrieves a LeapMessage by UID. -- cgit v1.2.3