diff options
-rw-r--r-- | mail/src/leap/mail/imap/messages.py | 163 |
1 files changed, 99 insertions, 64 deletions
diff --git a/mail/src/leap/mail/imap/messages.py b/mail/src/leap/mail/imap/messages.py index 7c17dbe..b35b808 100644 --- a/mail/src/leap/mail/imap/messages.py +++ b/mail/src/leap/mail/imap/messages.py @@ -20,7 +20,6 @@ LeapMessage and MessageCollection. import copy import logging import re -import threading import time import StringIO @@ -51,8 +50,6 @@ logger = logging.getLogger(__name__) # [ ] Add linked-from info. # [ ] Delete incoming mail only after successful write! # [ ] Remove UID from syncable db. Store only those indexes locally. -# [ ] Send patch to twisted for bug in imap4.py:5717 (content-type can be -# none? lower-case?) def lowerdict(_dict): @@ -657,10 +654,27 @@ class LeapMessage(fields, MailParser, MBoxParser): Return the document that keeps the flags for this message. """ - flag_docs = self._soledad.get_from_index( - fields.TYPE_MBOX_UID_IDX, - fields.TYPE_FLAGS_VAL, self._mbox, str(self._uid)) - return first(flag_docs) + result = {} + try: + flag_docs = self._soledad.get_from_index( + fields.TYPE_MBOX_UID_IDX, + fields.TYPE_FLAGS_VAL, self._mbox, str(self._uid)) + result = first(flag_docs) + except Exception as exc: + # ugh! Something's broken down there! + logger.warning("FUCKING ERROR ----- getting for UID:", self._uid) + logger.exception(exc) + try: + flag_docs = self._soledad.get_from_index( + fields.TYPE_MBOX_UID_IDX, + fields.TYPE_FLAGS_VAL, self._mbox, str(self._uid)) + result = first(flag_docs) + except Exception as exc: + # ugh! Something's broken down there! + logger.warning("FUCKING ERROR, 2nd time -----") + logger.exception(exc) + finally: + return result def _get_headers_doc(self): """ @@ -770,6 +784,51 @@ class LeapMessage(fields, MailParser, MBoxParser): return self._fdoc is not None +class ContentDedup(object): + """ + Message deduplication. + + We do a query for the content hashes before writing to our beloved + sqlcipher backend of Soledad. This means, by now, that: + + 1. We will not store the same attachment twice, only the hash of it. + 2. We will not store the same message body twice, only the hash of it. + + The first case is useful if you are always receiving the same old memes + from unwary friends that still have not discovered that 4chan is the + generator of the internet. The second will save your day if you have + initiated session with the same account in two different machines. I also + wonder why would you do that, but let's respect each other choices, like + with the religious celebrations, and assume that one day we'll be able + to run Bitmask in completely free phones. Yes, I mean that, the whole GSM + Stack. + """ + + def _content_does_exist(self, doc): + """ + Check whether we already have a content document for a payload + with this hash in our database. + + :param doc: tentative body document + :type doc: dict + :returns: True if that happens, False otherwise. + """ + if not doc: + return False + phash = doc[fields.PAYLOAD_HASH_KEY] + attach_docs = self._soledad.get_from_index( + fields.TYPE_P_HASH_IDX, + fields.TYPE_CONTENT_VAL, str(phash)) + if not attach_docs: + return False + + if len(attach_docs) != 1: + logger.warning("Found more than one copy of phash %s!" + % (phash,)) + logger.debug("Found attachment doc with that hash! Skipping save!") + return True + + SoledadWriterPayload = namedtuple( 'SoledadWriterPayload', ['mode', 'payload']) @@ -781,6 +840,13 @@ SoledadWriterPayload.PUT = 2 SoledadWriterPayload.CONTENT_CREATE = 3 +""" +SoledadDocWriter was used to avoid writing to the db from multiple threads. +Its use here has been deprecated in favor of a local rw_lock in the client. +But we might want to reuse in in the near future to implement priority queues. +""" + + class SoledadDocWriter(object): """ This writer will create docs serially in the local soledad database. @@ -852,51 +918,9 @@ class SoledadDocWriter(object): empty = queue.empty() - """ - Message deduplication. - We do a query for the content hashes before writing to our beloved - sqlcipher backend of Soledad. This means, by now, that: - - 1. We will not store the same attachment twice, only the hash of it. - 2. We will not store the same message body twice, only the hash of it. - - The first case is useful if you are always receiving the same old memes - from unwary friends that still have not discovered that 4chan is the - generator of the internet. The second will save your day if you have - initiated session with the same account in two different machines. I also - wonder why would you do that, but let's respect each other choices, like - with the religious celebrations, and assume that one day we'll be able - to run Bitmask in completely free phones. Yes, I mean that, the whole GSM - Stack. - """ - - def _content_does_exist(self, doc): - """ - Check whether we already have a content document for a payload - with this hash in our database. - - :param doc: tentative body document - :type doc: dict - :returns: True if that happens, False otherwise. - """ - if not doc: - return False - phash = doc[fields.PAYLOAD_HASH_KEY] - attach_docs = self._soledad.get_from_index( - fields.TYPE_P_HASH_IDX, - fields.TYPE_CONTENT_VAL, str(phash)) - if not attach_docs: - return False - - if len(attach_docs) != 1: - logger.warning("Found more than one copy of phash %s!" - % (phash,)) - logger.debug("Found attachment doc with that hash! Skipping save!") - return True - - -class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): +class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, + ContentDedup): """ A collection of messages, surprisingly. @@ -1145,23 +1169,21 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): hd[key] = parts_map[key] del parts_map - docs = [fd, hd] - cdocs = walk.get_raw_docs(msg, parts) - # Saving - logger.debug('enqueuing message docs for write') - ptuple = SoledadWriterPayload # first, regular docs: flags and headers - for doc in docs: - self.soledad_writer.put(ptuple( - mode=ptuple.CREATE, payload=doc)) + self._soledad.create_doc(fd) + + # XXX should check for content duplication on headers too + # but with chash. !!! + self._soledad.create_doc(hd) # and last, but not least, try to create # content docs if not already there. - for cd in cdocs: - self.soledad_writer.put(ptuple( - mode=ptuple.CONTENT_CREATE, payload=cd)) + cdocs = walk.get_raw_docs(msg, parts) + for cdoc in cdocs: + if not self._content_does_exist(cdoc): + self._soledad.create_doc(cdoc) def _remove_cb(self, result): return result @@ -1312,17 +1334,30 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # XXX FIXINDEX -- should implement order by in soledad return sorted(all_docs, key=lambda item: item.content['uid']) - def all_msg_iter(self): + def all_uid_iter(self): """ Return an iterator trhough the UIDs of all messages, sorted in ascending order. """ + # XXX we should get this from the uid table, local-only all_uids = (doc.content[self.UID_KEY] for doc in self._soledad.get_from_index( fields.TYPE_MBOX_IDX, fields.TYPE_FLAGS_VAL, self.mbox)) return (u for u in sorted(all_uids)) + def all_flags(self): + """ + Return a dict with all flags documents for this mailbox. + """ + all_flags = dict((( + doc.content[self.UID_KEY], + doc.content[self.FLAGS_KEY]) for doc in + self._soledad.get_from_index( + fields.TYPE_MBOX_IDX, + fields.TYPE_FLAGS_VAL, self.mbox))) + return all_flags + def count(self): """ Return the count of messages for this mailbox. @@ -1447,7 +1482,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :rtype: iterable """ return (LeapMessage(self._soledad, docuid, self.mbox) - for docuid in self.all_msg_iter()) + for docuid in self.all_uid_iter()) def __repr__(self): """ |