From 9f9701d42be385aa9a6d7e72fd10104b0025971b Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 16 Jan 2014 22:01:20 -0400 Subject: Separate RECENT Flag to a mailbox document. this way we avoid a bunch of writes. --- src/leap/mail/imap/messages.py | 163 ++++++++++++++++++++++++++++++----------- 1 file changed, 120 insertions(+), 43 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 1b996b6..6556b12 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -21,6 +21,7 @@ import copy import logging import re import time +import threading import StringIO from collections import defaultdict, namedtuple @@ -308,7 +309,7 @@ class LeapMessage(fields, MailParser, MBoxParser): implements(imap4.IMessage) - def __init__(self, soledad, uid, mbox): + def __init__(self, soledad, uid, mbox, collection=None): """ Initializes a LeapMessage. @@ -318,11 +319,14 @@ class LeapMessage(fields, MailParser, MBoxParser): :type uid: int or basestring :param mbox: the mbox this message belongs to :type mbox: basestring + :param collection: a reference to the parent collection object + :type collection: MessageCollection """ MailParser.__init__(self) self._soledad = soledad self._uid = int(uid) self._mbox = self._parse_mailbox_name(mbox) + self._collection = collection self.__chash = None self.__bdoc = None @@ -373,7 +377,7 @@ class LeapMessage(fields, MailParser, MBoxParser): def getUID(self): """ - Retrieve the unique identifier associated with this message + Retrieve the unique identifier associated with this Message. :return: uid for this message :rtype: int @@ -382,18 +386,26 @@ class LeapMessage(fields, MailParser, MBoxParser): def getFlags(self): """ - Retrieve the flags associated with this message + Retrieve the flags associated with this Message. :return: The flags, represented as strings :rtype: tuple """ if self._uid is None: return [] + uid = self._uid flags = [] fdoc = self._fdoc if fdoc: flags = fdoc.content.get(self.FLAGS_KEY, None) + + msgcol = self._collection + + # We treat the recent flag specially: gotten from + # a mailbox-level document. + if msgcol and uid in msgcol.recent_flags: + flags.append(fields.RECENT_FLAG) if flags: flags = map(str, flags) return tuple(flags) @@ -414,7 +426,7 @@ class LeapMessage(fields, MailParser, MBoxParser): :rtype: SoledadDocument """ leap_assert(isinstance(flags, tuple), "flags need to be a tuple") - log.msg('setting flags: %s' % (self._uid)) + log.msg('setting flags: %s (%s)' % (self._uid, flags)) doc = self._fdoc if not doc: @@ -424,7 +436,6 @@ class LeapMessage(fields, MailParser, MBoxParser): return doc.content[self.FLAGS_KEY] = flags doc.content[self.SEEN_KEY] = self.SEEN_FLAG in flags - doc.content[self.RECENT_KEY] = self.RECENT_FLAG in flags doc.content[self.DEL_KEY] = self.DELETED_FLAG in flags self._soledad.put_doc(doc) @@ -927,6 +938,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, FLAGS_DOC = "FLAGS" HEADERS_DOC = "HEADERS" CONTENT_DOC = "CONTENT" + RECENT_DOC = "RECENT" templates = { @@ -937,7 +949,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, fields.CONTENT_HASH_KEY: "", fields.SEEN_KEY: False, - fields.RECENT_KEY: True, fields.DEL_KEY: False, fields.FLAGS_KEY: [], fields.MULTIPART_KEY: False, @@ -970,12 +981,25 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, fields.MULTIPART_KEY: False, }, + RECENT_DOC: { + fields.TYPE_KEY: fields.TYPE_RECENT_VAL, + fields.MBOX_KEY: fields.INBOX_VAL, + fields.RECENTFLAGS_KEY: [], + } } + _rdoc_lock = threading.Lock() + def __init__(self, mbox=None, soledad=None): """ Constructor for MessageCollection. + On initialization, we ensure that we have a document for + storing the recent flags. The nature of this flag make us wanting + to store the set of the UIDs with this flag at the level of the + MessageCollection for each mailbox, instead of treating them + as a property of each message. + :param mbox: the name of the mailbox. It is the name with which we filter the query over the messages database @@ -994,17 +1018,11 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # okay, all in order, keep going... self.mbox = self._parse_mailbox_name(mbox) self._soledad = soledad + self.__rflags = None self.initialize_db() - # I think of someone like nietzsche when reading this - - # this will be the producer that will enqueue the content - # to be processed serially by the consumer (the writer). We just - # need to `put` the new material on its plate. - - self.soledad_writer = MessageProducer( - SoledadDocWriter(soledad), - period=0.02) + # ensure that we have a recent-flags doc + self._get_or_create_rdoc() def _get_empty_doc(self, _type=FLAGS_DOC): """ @@ -1017,6 +1035,18 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, raise TypeError("Improper type passed to _get_empty_doc") return copy.deepcopy(self.templates[_type]) + def _get_or_create_rdoc(self): + """ + Try to retrieve the recent-flags doc for this MessageCollection, + and create one if not found. + """ + rdoc = self._get_recent_doc() + if not rdoc: + rdoc = self._get_empty_doc(self.RECENT_DOC) + if self.mbox != fields.INBOX_VAL: + rdoc[fields.MBOX_KEY] = self.mbox + self._soledad.create_doc(rdoc) + def _do_parse(self, raw): """ Parse raw message and return it along with @@ -1161,7 +1191,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, hd[key] = parts_map[key] del parts_map - # Saving + # Saving ---------------------------------------- + self.set_recent_flag(uid) # first, regular docs: flags and headers self._soledad.create_doc(fd) @@ -1203,6 +1234,76 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # getters: specific queries + def _get_recent_flags(self): + """ + An accessor for the recent-flags set for this mailbox. + """ + if not self.__rflags: + rdoc = self._get_recent_doc() + self.__rflags = set(rdoc.content.get( + fields.RECENTFLAGS_KEY, [])) + return self.__rflags + + def _set_recent_flags(self, value): + """ + Setter for the recent-flags set for this mailbox. + """ + rdoc = self._get_recent_doc() + newv = set(value) + self.__rflags = newv + + with self._rdoc_lock: + rdoc.content[fields.RECENTFLAGS_KEY] = list(newv) + # XXX should deferLater 0 it? + self._soledad.put_doc(rdoc) + + recent_flags = property( + _get_recent_flags, _set_recent_flags, + doc="Set of UIDs with the recent flag for this mailbox.") + + def unset_recent_flags(self, uids): + """ + Unset Recent flag for a sequence of uids. + """ + self.recent_flags = self.recent_flags.difference( + set(uids)) + + def unset_recent_flag(self, uid): + """ + Unset Recent flag for a given uid. + """ + self.recent_flags = self.recent_flags.difference( + set([uid])) + + def set_recent_flag(self, uid): + """ + Set Recent flag for a given uid. + """ + self.recent_flags = self.recent_flags.union( + set([uid])) + + def _get_recent_doc(self): + """ + Get recent-flags document for this inbox. + """ + # TODO refactor this try-catch structure into a utility + try: + query = self._soledad.get_from_index( + fields.TYPE_MBOX_IDX, + fields.TYPE_RECENT_VAL, self.mbox) + if query: + if len(query) > 1: + logger.warning( + "More than one rdoc found for this mbox, " + "we got a duplicate!!") + # XXX we could take action, like trigger a background + # process to kill dupes. + return query.pop() + else: + return None + except Exception as exc: + logger.exception("Unhandled error %r" % exc) + def _get_fdoc_from_chash(self, chash): """ Return a flags document for this mailbox with a given chash. @@ -1287,6 +1388,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, def get_msg_by_uid(self, uid): """ Retrieves a LeapMessage by UID. + This is used primarity in the Mailbox fetch and store methods. :param uid: the message uid to query by :type uid: int @@ -1295,7 +1397,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, or None if not found. :rtype: LeapMessage """ - msg = LeapMessage(self._soledad, uid, self.mbox) + msg = LeapMessage(self._soledad, uid, self.mbox, collection=self) if not msg.does_exist(): return None return msg @@ -1412,28 +1514,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # recent messages - def recent_iter(self): - """ - Get an iterator for the message UIDs with `recent` flag. - - :return: iterator through recent message docs - :rtype: iterable - """ - return (doc.content[self.UID_KEY] for doc in - self._soledad.get_from_index( - fields.TYPE_MBOX_RECT_IDX, - fields.TYPE_FLAGS_VAL, self.mbox, '1')) - - def get_recent(self): - """ - Get all messages with the `Recent` flag. - - :returns: a list of LeapMessages - :rtype: list - """ - return [LeapMessage(self._soledad, docid, self.mbox) - for docid in self.recent_iter()] - def count_recent(self): """ Count all messages with the `Recent` flag. @@ -1441,10 +1521,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, :returns: count :rtype: int """ - count = self._soledad.get_count_from_index( - fields.TYPE_MBOX_RECT_IDX, - fields.TYPE_FLAGS_VAL, self.mbox, '1') - return count + return len(self.recent_flags) # deleted messages -- cgit v1.2.3 From 8ebd48d923466db51b9ea5698f51d1f12867a7cb Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 16 Jan 2014 23:46:08 -0400 Subject: refactor common pattern to utility function --- src/leap/mail/imap/messages.py | 104 +++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 56 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 6556b12..f968c47 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -25,6 +25,7 @@ import threading import StringIO from collections import defaultdict, namedtuple +from functools import partial from twisted.mail import imap4 from twisted.internet import defer @@ -42,7 +43,7 @@ from leap.mail.decorators import deferred from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields from leap.mail.imap.parser import MailParser, MBoxParser -from leap.mail.messageflow import IMessageConsumer, MessageProducer +from leap.mail.messageflow import IMessageConsumer logger = logging.getLogger(__name__) @@ -66,6 +67,31 @@ def lowerdict(_dict): for key, value in _dict.items()) +def try_unique_query(curried): + """ + Try to execute a query that is expected to have a + single outcome, and log a warning if more than one document found. + + :param curried: a curried function + :type curried: callable + """ + leap_assert(callable(curried), "A callable is expected") + try: + query = curried() + if query: + if len(query) > 1: + # TODO we could take action, like trigger a background + # process to kill dupes. + name = getattr(curried, 'expected', 'doc') + logger.warning( + "More than one %s found for this mbox, " + "we got a duplicate!!" % (name,)) + return query.pop() + else: + return None + except Exception as exc: + logger.exception("Unhandled error %r" % exc) + CHARSET_PATTERN = r"""charset=([\w-]+)""" MSGID_PATTERN = r"""<([\w@.]+)>""" @@ -1286,23 +1312,12 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, """ Get recent-flags document for this inbox. """ - # TODO refactor this try-catch structure into a utility - try: - query = self._soledad.get_from_index( - fields.TYPE_MBOX_IDX, - fields.TYPE_RECENT_VAL, self.mbox) - if query: - if len(query) > 1: - logger.warning( - "More than one rdoc found for this mbox, " - "we got a duplicate!!") - # XXX we could take action, like trigger a background - # process to kill dupes. - return query.pop() - else: - return None - except Exception as exc: - logger.exception("Unhandled error %r" % exc) + curried = partial( + self._soledad.get_from_index, + fields.TYPE_MBOX_IDX, + fields.TYPE_RECENT_VAL, self.mbox) + curried.expected = "rdoc" + return try_unique_query(curried) def _get_fdoc_from_chash(self, chash): """ @@ -1312,39 +1327,21 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, the query failed. :rtype: SoledadDocument or None. """ - try: - query = self._soledad.get_from_index( - fields.TYPE_MBOX_C_HASH_IDX, - fields.TYPE_FLAGS_VAL, self.mbox, chash) - if query: - if len(query) > 1: - logger.warning( - "More than one fdoc found for this chash, " - "we got a duplicate!!") - # XXX we could take action, like trigger a background - # process to kill dupes. - return query.pop() - else: - return None - except Exception as exc: - logger.exception("Unhandled error %r" % exc) + curried = partial( + self._soledad.get_from_index, + fields.TYPE_MBOX_C_HASH_IDX, + fields.TYPE_FLAGS_VAL, self.mbox, chash) + curried.expected = "fdoc" + return try_unique_query(curried) def _get_uid_from_msgidCb(self, msgid): hdoc = None - try: - query = self._soledad.get_from_index( - fields.TYPE_MSGID_IDX, - fields.TYPE_HEADERS_VAL, msgid) - if query: - if len(query) > 1: - logger.warning( - "More than one hdoc found for this msgid, " - "we got a duplicate!!") - # XXX we could take action, like trigger a background - # process to kill dupes. - hdoc = query.pop() - except Exception as exc: - logger.exception("Unhandled error %r" % exc) + curried = partial( + self._soledad.get_from_index, + fields.TYPE_MSGID_IDX, + fields.TYPE_HEADERS_VAL, msgid) + curried.expected = "hdoc" + hdoc = try_unique_query(curried) if hdoc is None: logger.warning("Could not find hdoc for msgid %s" @@ -1373,13 +1370,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # the query is received right after we've saved the document, # and we cannot find it otherwise. This seems to be enough. - # Doing a sleep since we'll be calling this in a secondary thread, - # but we'll should be able to collect the results after a - # reactor.callLater. - # Maybe we can implement something like NOT_DONE_YET in the web - # framework, and return from the callback? - # See: http://jcalderone.livejournal.com/50226.html - # reactor.callLater(0.3, self._get_uid_from_msgidCb, msgid) + # XXX do a deferLater instead ?? time.sleep(0.3) return self._get_uid_from_msgidCb(msgid) @@ -1426,6 +1417,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # inneficient, but first let's grok it and then # let's worry about efficiency. # XXX FIXINDEX -- should implement order by in soledad + # FIXME ---------------------------------------------- return sorted(all_docs, key=lambda item: item.content['uid']) def all_uid_iter(self): @@ -1573,4 +1565,4 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, self.mbox, self.count()) # XXX should implement __eq__ also !!! - # --- use the content hash for that, will be used for dedup. + # use chash... -- cgit v1.2.3 From 9ef1cd79397d811575826025b924c615e6ce2aa4 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 17 Jan 2014 02:51:31 -0400 Subject: Add a fetch_headers for mass-header fetch queries --- src/leap/mail/imap/messages.py | 159 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 154 insertions(+), 5 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index f968c47..7a21009 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -964,10 +964,30 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, FLAGS_DOC = "FLAGS" HEADERS_DOC = "HEADERS" CONTENT_DOC = "CONTENT" + """ + RECENT_DOC is a document that stores a list of the UIDs + with the recent flag for this mailbox. It deserves a special treatment + because: + (1) it cannot be set by the user + (2) it's a flag that we set inmediately after a fetch, which is quite + often. + (3) we need to be able to set/unset it in batches without doing a single + write for each element in the sequence. + """ RECENT_DOC = "RECENT" + """ + HDOCS_SET_DOC is a document that stores a set of the Document-IDs + (the u1db index) for all the headers documents for a given mailbox. + We use it to prefetch massively all the headers for a mailbox. + This is the second massive query, after fetching all the FLAGS, that + a MUA will do in a case where we do not have local disk cache. + """ + HDOCS_SET_DOC = "HDOCS_SET" templates = { + # Message Level + FLAGS_DOC: { fields.TYPE_KEY: fields.TYPE_FLAGS_VAL, fields.UID_KEY: 1, # XXX moe to a local table @@ -1007,14 +1027,25 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, fields.MULTIPART_KEY: False, }, + # Mailbox Level + RECENT_DOC: { fields.TYPE_KEY: fields.TYPE_RECENT_VAL, fields.MBOX_KEY: fields.INBOX_VAL, fields.RECENTFLAGS_KEY: [], + }, + + HDOCS_SET_DOC: { + fields.TYPE_KEY: fields.TYPE_HDOCS_SET_VAL, + fields.MBOX_KEY: fields.INBOX_VAL, + fields.HDOCS_SET_KEY: [], } + + } _rdoc_lock = threading.Lock() + _hdocset_lock = threading.Lock() def __init__(self, mbox=None, soledad=None): """ @@ -1045,10 +1076,12 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, self.mbox = self._parse_mailbox_name(mbox) self._soledad = soledad self.__rflags = None + self.__hdocset = None self.initialize_db() - # ensure that we have a recent-flags doc + # ensure that we have a recent-flags and a hdocs-sec doc self._get_or_create_rdoc() + self._get_or_create_hdocset() def _get_empty_doc(self, _type=FLAGS_DOC): """ @@ -1073,6 +1106,18 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, rdoc[fields.MBOX_KEY] = self.mbox self._soledad.create_doc(rdoc) + def _get_or_create_hdocset(self): + """ + Try to retrieve the hdocs-set doc for this MessageCollection, + and create one if not found. + """ + hdocset = self._get_hdocset_doc() + if not hdocset: + hdocset = self._get_empty_doc(self.HDOCS_SET_DOC) + if self.mbox != fields.INBOX_VAL: + hdocset[fields.MBOX_KEY] = self.mbox + self._soledad.create_doc(hdocset) + def _do_parse(self, raw): """ Parse raw message and return it along with @@ -1222,10 +1267,12 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # first, regular docs: flags and headers self._soledad.create_doc(fd) - # XXX should check for content duplication on headers too # but with chash. !!! - self._soledad.create_doc(hd) + hdoc = self._soledad.create_doc(hd) + # We add the newly created hdoc to the fast-access set of + # headers documents associated with the mailbox. + self.add_hdocset_docid(hdoc.doc_id) # and last, but not least, try to create # content docs if not already there. @@ -1258,7 +1305,11 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, d.addCallback(self._remove_cb) return d + # # getters: specific queries + # + + # recent flags def _get_recent_flags(self): """ @@ -1310,14 +1361,85 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, def _get_recent_doc(self): """ - Get recent-flags document for this inbox. + Get recent-flags document for this mailbox. """ curried = partial( self._soledad.get_from_index, fields.TYPE_MBOX_IDX, fields.TYPE_RECENT_VAL, self.mbox) curried.expected = "rdoc" - return try_unique_query(curried) + with self._rdoc_lock: + return try_unique_query(curried) + + # headers-docs-set + + def _get_hdocset(self): + """ + An accessor for the hdocs-set for this mailbox. + """ + if not self.__hdocset: + hdocset_doc = self._get_hdocset_doc() + value = set(hdocset_doc.content.get( + fields.HDOCS_SET_KEY, [])) + self.__hdocset = value + return self.__hdocset + + def _set_hdocset(self, value): + """ + Setter for the hdocs-set for this mailbox. + """ + hdocset_doc = self._get_hdocset_doc() + newv = set(value) + self.__hdocset = newv + + with self._hdocset_lock: + hdocset_doc.content[fields.HDOCS_SET_KEY] = list(newv) + # XXX should deferLater 0 it? + self._soledad.put_doc(hdocset_doc) + + _hdocset = property( + _get_hdocset, _set_hdocset, + doc="Set of Document-IDs for the headers docs associated " + "with this mailbox.") + + def _get_hdocset_doc(self): + """ + Get hdocs-set document for this mailbox. + """ + curried = partial( + self._soledad.get_from_index, + fields.TYPE_MBOX_IDX, + fields.TYPE_HDOCS_SET_VAL, self.mbox) + curried.expected = "hdocset" + with self._hdocset_lock: + hdocset_doc = try_unique_query(curried) + return hdocset_doc + + def remove_hdocset_docids(self, docids): + """ + Remove the given document IDs from the set of + header-documents associated with this mailbox. + """ + self._hdocset = self._hdocset.difference( + set(docids)) + + def remove_hdocset_docid(self, docid): + """ + Remove the given document ID from the set of + header-documents associated with this mailbox. + """ + self._hdocset = self._hdocset.difference( + set([docid])) + + def add_hdocset_docid(self, docid): + """ + Add the given document ID to the set of + header-documents associated with this mailbox. + """ + hdocset = self._hdocset + self._hdocset = hdocset.union(set([docid])) + + # individual doc getters, message layer. def _get_fdoc_from_chash(self, chash): """ @@ -1456,6 +1578,30 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, fields.TYPE_FLAGS_VAL, self.mbox))) return all_flags + def all_flags_chash(self): + """ + Return a dict with the content-hash for all flag documents + for this mailbox. + """ + all_flags_chash = dict((( + doc.content[self.UID_KEY], + doc.content[self.CONTENT_HASH_KEY]) for doc in + self._soledad.get_from_index( + fields.TYPE_MBOX_IDX, + fields.TYPE_FLAGS_VAL, self.mbox))) + return all_flags_chash + + def all_headers(self): + """ + Return a dict with all the headers documents for this + mailbox. + """ + all_headers = dict((( + doc.content[self.CONTENT_HASH_KEY], + doc.content[self.HEADERS_KEY]) for doc in + self._soledad.get_docs(self._hdocset))) + return all_headers + def count(self): """ Return the count of messages for this mailbox. @@ -1509,6 +1655,9 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, def count_recent(self): """ Count all messages with the `Recent` flag. + It just retrieves the length of the recent_flags set, + which is stored in a specific type of document for + this collection. :returns: count :rtype: int -- cgit v1.2.3