From 62b0cd6301b7097dfa2776b677ab3c7d27f60d7b Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 26 Dec 2013 14:10:14 -0400 Subject: Split the near-2k loc file into more handy modules. ...aaaand not a single fuck was given that day! --- src/leap/mail/imap/messages.py | 735 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 735 insertions(+) create mode 100644 src/leap/mail/imap/messages.py (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py new file mode 100644 index 0000000..b0d5da2 --- /dev/null +++ b/src/leap/mail/imap/messages.py @@ -0,0 +1,735 @@ +# -*- coding: utf-8 -*- +# messages.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +LeapMessage and MessageCollection. +""" +import copy +import logging +import StringIO +from collections import namedtuple + +from twisted.mail import imap4 +from twisted.python import log +from u1db import errors as u1db_errors +from zope.interface import implements +from zope.proxy import sameProxiedObjects + +from leap.common.check import leap_assert, leap_assert_type +from leap.common.mail import get_email_charset +from leap.mail.decorators import deferred +from leap.mail.imap.account import SoledadBackedAccount +from leap.mail.imap.index import IndexedDB +from leap.mail.imap.fields import fields, WithMsgFields +from leap.mail.imap.parser import MailParser, MBoxParser +from leap.mail.messageflow import IMessageConsumer, MessageProducer + +logger = logging.getLogger(__name__) + + +class LeapMessage(fields, MailParser, MBoxParser): + + implements(imap4.IMessage) + + def __init__(self, soledad, uid, mbox): + """ + Initializes a LeapMessage. + + :param soledad: a Soledad instance + :type soledad: Soledad + :param uid: the UID for the message. + :type uid: int or basestring + :param mbox: the mbox this message belongs to + :type mbox: basestring + """ + MailParser.__init__(self) + self._soledad = soledad + self._uid = int(uid) + self._mbox = self._parse_mailbox_name(mbox) + self._chash = None + + self.__cdoc = None + + @property + def _fdoc(self): + """ + An accessor to the flags document. + """ + return self._get_flags_doc() + + @property + def _cdoc(self): + """ + An accessor to the content document. + """ + if not self.__cdoc: + self.__cdoc = self._get_content_doc() + return self.__cdoc + + @property + def _chash(self): + """ + An accessor to the content hash for this message. + """ + if not self._fdoc: + return None + return self._fdoc.content.get(fields.CONTENT_HASH_KEY, None) + + # IMessage implementation + + def getUID(self): + """ + Retrieve the unique identifier associated with this message + + :return: uid for this message + :rtype: int + """ + return self._uid + + def getFlags(self): + """ + Retrieve the flags associated with this message + + :return: The flags, represented as strings + :rtype: tuple + """ + if self._uid is None: + return [] + + flags = [] + flag_doc = self._fdoc + if flag_doc: + flags = flag_doc.content.get(self.FLAGS_KEY, None) + if flags: + flags = map(str, flags) + return tuple(flags) + + # setFlags, addFlags, removeFlags are not in the interface spec + # but we use them with store command. + + def setFlags(self, flags): + """ + Sets the flags for this message + + Returns a SoledadDocument that needs to be updated by the caller. + + :param flags: the flags to update in the message. + :type flags: tuple of str + + :return: a SoledadDocument instance + :rtype: SoledadDocument + """ + leap_assert(isinstance(flags, tuple), "flags need to be a tuple") + log.msg('setting flags: %s' % (self._uid)) + + doc = self._fdoc + doc.content[self.FLAGS_KEY] = flags + doc.content[self.SEEN_KEY] = self.SEEN_FLAG in flags + doc.content[self.RECENT_KEY] = self.RECENT_FLAG in flags + self._soledad.put_doc(doc) + + def addFlags(self, flags): + """ + Adds flags to this message. + + Returns a SoledadDocument that needs to be updated by the caller. + + :param flags: the flags to add to the message. + :type flags: tuple of str + + :return: a SoledadDocument instance + :rtype: SoledadDocument + """ + leap_assert(isinstance(flags, tuple), "flags need to be a tuple") + oldflags = self.getFlags() + self.setFlags(tuple(set(flags + oldflags))) + + def removeFlags(self, flags): + """ + Remove flags from this message. + + Returns a SoledadDocument that needs to be updated by the caller. + + :param flags: the flags to be removed from the message. + :type flags: tuple of str + + :return: a SoledadDocument instance + :rtype: SoledadDocument + """ + leap_assert(isinstance(flags, tuple), "flags need to be a tuple") + oldflags = self.getFlags() + self.setFlags(tuple(set(oldflags) - set(flags))) + + def getInternalDate(self): + """ + Retrieve the date internally associated with this message + + :rtype: C{str} + :return: An RFC822-formatted date string. + """ + return str(self._cdoc.content.get(self.DATE_KEY, '')) + + # + # IMessagePart + # + + # XXX we should implement this interface too for the subparts + # so we allow nested parts... + + def getBodyFile(self): + """ + Retrieve a file object containing only the body of this message. + + :return: file-like object opened for reading + :rtype: StringIO + """ + fd = StringIO.StringIO() + + cdoc = self._cdoc + content = cdoc.content.get(self.RAW_KEY, '') + charset = get_email_charset( + unicode(cdoc.content.get(self.RAW_KEY, ''))) + try: + content = content.encode(charset) + except (UnicodeEncodeError, UnicodeDecodeError) as e: + logger.error("Unicode error {0}".format(e)) + content = content.encode(charset, 'replace') + + raw = self._get_raw_msg() + msg = self._get_parsed_msg(raw) + body = msg.get_payload() + fd.write(body) + # XXX SHOULD use a separate BODY FIELD ... + fd.seek(0) + return fd + + def getSize(self): + """ + Return the total size, in octets, of this message. + + :return: size of the message, in octets + :rtype: int + """ + size = self._cdoc.content.get(self.SIZE_KEY, False) + if not size: + # XXX fallback, should remove when all migrated. + size = self.getBodyFile().len + return size + + def _get_headers(self): + """ + Return the headers dict stored in this message document. + """ + # XXX get from the headers doc + return self._cdoc.content.get(self.HEADERS_KEY, {}) + + def getHeaders(self, negate, *names): + """ + Retrieve a group of message headers. + + :param names: The names of the headers to retrieve or omit. + :type names: tuple of str + + :param negate: If True, indicates that the headers listed in names + should be omitted from the return value, rather + than included. + :type negate: bool + + :return: A mapping of header field names to header field values + :rtype: dict + """ + headers = self._get_headers() + names = map(lambda s: s.upper(), names) + if negate: + cond = lambda key: key.upper() not in names + else: + cond = lambda key: key.upper() in names + + # unpack and filter original dict by negate-condition + filter_by_cond = [ + map(str, (key, val)) for + key, val in headers.items() + if cond(key)] + return dict(filter_by_cond) + + def isMultipart(self): + """ + Return True if this message is multipart. + """ + if self._cdoc: + retval = self._fdoc.content.get(self.MULTIPART_KEY, False) + return retval + + def getSubPart(self, part): + """ + Retrieve a MIME submessage + + :type part: C{int} + :param part: The number of the part to retrieve, indexed from 0. + :raise IndexError: Raised if the specified part does not exist. + :raise TypeError: Raised if this message is not multipart. + :rtype: Any object implementing C{IMessagePart}. + :return: The specified sub-part. + """ + if not self.isMultipart(): + raise TypeError + + msg = self._get_parsed_msg() + # XXX should wrap IMessagePart + return msg.get_payload()[part] + + # + # accessors + # + + def _get_flags_doc(self): + """ + Return the document that keeps the flags for this + message. + """ + flag_docs = self._soledad.get_from_index( + SoledadBackedAccount.TYPE_MBOX_UID_IDX, + fields.TYPE_FLAGS_VAL, self._mbox, str(self._uid)) + flag_doc = flag_docs[0] if flag_docs else None + return flag_doc + + def _get_content_doc(self): + """ + Return the document that keeps the flags for this + message. + """ + cont_docs = self._soledad.get_from_index( + SoledadBackedAccount.TYPE_HASH_IDX, + fields.TYPE_MESSAGE_VAL, self._content_hash, str(self._uid)) + cont_doc = cont_docs[0] if cont_docs else None + return cont_doc + + def _get_raw_msg(self): + """ + Return the raw msg. + :rtype: basestring + """ + return self._cdoc.content.get(self.RAW_KEY, '') + + def __getitem__(self, key): + """ + Return the content of the message document. + + :param key: The key + :type key: str + + :return: The content value indexed by C{key} or None + :rtype: str + """ + return self._cdoc.content.get(key, None) + + def does_exist(self): + """ + Return True if there is actually a message for this + UID and mbox. + """ + return bool(self._fdoc) + + +SoledadWriterPayload = namedtuple( + 'SoledadWriterPayload', ['mode', 'payload']) + +SoledadWriterPayload.CREATE = 1 +SoledadWriterPayload.PUT = 2 + + +class SoledadDocWriter(object): + """ + This writer will create docs serially in the local soledad database. + """ + + implements(IMessageConsumer) + + def __init__(self, soledad): + """ + Initialize the writer. + + :param soledad: the soledad instance + :type soledad: Soledad + """ + self._soledad = soledad + + def consume(self, queue): + """ + Creates a new document in soledad db. + + :param queue: queue to get item from, with content of the document + to be inserted. + :type queue: Queue + """ + empty = queue.empty() + while not empty: + item = queue.get() + if item.mode == SoledadWriterPayload.CREATE: + call = self._soledad.create_doc + elif item.mode == SoledadWriterPayload.PUT: + call = self._soledad.put_doc + + # should handle errors + try: + call(item.payload) + except u1db_errors.RevisionConflict as exc: + logger.error("Error: %r" % (exc,)) + raise exc + + empty = queue.empty() + + +class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): + """ + A collection of messages, surprisingly. + + It is tied to a selected mailbox name that is passed to constructor. + Implements a filter query over the messages contained in a soledad + database. + """ + # XXX this should be able to produce a MessageSet methinks + + EMPTY_MSG = { + fields.TYPE_KEY: fields.TYPE_MESSAGE_VAL, + fields.UID_KEY: 1, + fields.MBOX_KEY: fields.INBOX_VAL, + + fields.SUBJECT_KEY: "", + fields.DATE_KEY: "", + fields.RAW_KEY: "", + + # XXX should separate headers into another doc + fields.HEADERS_KEY: {}, + } + + EMPTY_FLAGS = { + fields.TYPE_KEY: fields.TYPE_FLAGS_VAL, + fields.UID_KEY: 1, + fields.MBOX_KEY: fields.INBOX_VAL, + + fields.FLAGS_KEY: [], + fields.SEEN_KEY: False, + fields.RECENT_KEY: True, + fields.MULTIPART_KEY: False, + } + + # get from SoledadBackedAccount the needed index-related constants + INDEXES = SoledadBackedAccount.INDEXES + TYPE_IDX = SoledadBackedAccount.TYPE_IDX + + def __init__(self, mbox=None, soledad=None): + """ + Constructor for MessageCollection. + + :param mbox: the name of the mailbox. It is the name + with which we filter the query over the + messages database + :type mbox: str + + :param soledad: Soledad database + :type soledad: Soledad instance + """ + MailParser.__init__(self) + leap_assert(mbox, "Need a mailbox name to initialize") + leap_assert(mbox.strip() != "", "mbox cannot be blank space") + leap_assert(isinstance(mbox, (str, unicode)), + "mbox needs to be a string") + leap_assert(soledad, "Need a soledad instance to initialize") + + # okay, all in order, keep going... + self.mbox = self._parse_mailbox_name(mbox) + self._soledad = soledad + self.initialize_db() + + # I think of someone like nietzsche when reading this + + # this will be the producer that will enqueue the content + # to be processed serially by the consumer (the writer). We just + # need to `put` the new material on its plate. + + self.soledad_writer = MessageProducer( + SoledadDocWriter(soledad), + period=0.05) + + def _get_empty_msg(self): + """ + Returns an empty message. + + :return: a dict containing a default empty message + :rtype: dict + """ + return copy.deepcopy(self.EMPTY_MSG) + + def _get_empty_flags_doc(self): + """ + Returns an empty doc for storing flags. + + :return: + :rtype: + """ + return copy.deepcopy(self.EMPTY_FLAGS) + + @deferred + def add_msg(self, raw, subject=None, flags=None, date=None, uid=1): + """ + Creates a new message document. + + :param raw: the raw message + :type raw: str + + :param subject: subject of the message. + :type subject: str + + :param flags: flags + :type flags: list + + :param date: the received date for the message + :type date: str + + :param uid: the message uid for this mailbox + :type uid: int + """ + # TODO: split in smaller methods + logger.debug('adding message') + if flags is None: + flags = tuple() + leap_assert_type(flags, tuple) + + content_doc = self._get_empty_msg() + flags_doc = self._get_empty_flags_doc() + + content_doc[self.MBOX_KEY] = self.mbox + flags_doc[self.MBOX_KEY] = self.mbox + # ...should get a sanity check here. + content_doc[self.UID_KEY] = uid + flags_doc[self.UID_KEY] = uid + + if flags: + flags_doc[self.FLAGS_KEY] = map(self._stringify, flags) + flags_doc[self.SEEN_KEY] = self.SEEN_FLAG in flags + + msg = self._get_parsed_msg(raw) + headers = dict(msg) + + logger.debug("adding. is multipart:%s" % msg.is_multipart()) + flags_doc[self.MULTIPART_KEY] = msg.is_multipart() + # XXX get lower case for keys? + # XXX get headers doc + content_doc[self.HEADERS_KEY] = headers + # set subject based on message headers and eventually replace by + # subject given as param + if self.SUBJECT_FIELD in headers: + content_doc[self.SUBJECT_KEY] = headers[self.SUBJECT_FIELD] + if subject is not None: + content_doc[self.SUBJECT_KEY] = subject + + # XXX could separate body into its own doc + # but should also separate multiparts + # that should be wrapped in MessagePart + content_doc[self.RAW_KEY] = self._stringify(raw) + content_doc[self.SIZE_KEY] = len(raw) + + if not date and self.DATE_FIELD in headers: + content_doc[self.DATE_KEY] = headers[self.DATE_FIELD] + else: + content_doc[self.DATE_KEY] = date + + logger.debug('enqueuing message for write') + + ptuple = SoledadWriterPayload + self.soledad_writer.put(ptuple( + mode=ptuple.CREATE, payload=content_doc)) + self.soledad_writer.put(ptuple( + mode=ptuple.CREATE, payload=flags_doc)) + + def remove(self, msg): + """ + Removes a message. + + :param msg: a Leapmessage instance + :type msg: LeapMessage + """ + # XXX remove + #self._soledad.delete_doc(msg) + msg.remove() + + # getters + + def get_msg_by_uid(self, uid): + """ + Retrieves a LeapMessage by UID. + + :param uid: the message uid to query by + :type uid: int + + :return: A LeapMessage instance matching the query, + or None if not found. + :rtype: LeapMessage + """ + msg = LeapMessage(self._soledad, uid, self.mbox) + if not msg.does_exist(): + return None + return msg + + def get_all_docs(self, _type=fields.TYPE_FLAGS_VAL): + """ + Get all documents for the selected mailbox of the + passed type. By default, it returns the flag docs. + + If you want acess to the content, use __iter__ instead + + :return: a list of u1db documents + :rtype: list of SoledadDocument + """ + if _type not in fields.__dict__.values(): + raise TypeError("Wrong type passed to get_all") + + if sameProxiedObjects(self._soledad, None): + logger.warning('Tried to get messages but soledad is None!') + return [] + + all_docs = [doc for doc in self._soledad.get_from_index( + SoledadBackedAccount.TYPE_MBOX_IDX, + _type, self.mbox)] + + # inneficient, but first let's grok it and then + # let's worry about efficiency. + # XXX FIXINDEX -- should implement order by in soledad + return sorted(all_docs, key=lambda item: item.content['uid']) + + def all_msg_iter(self): + """ + Return an iterator trhough the UIDs of all messages, sorted in + ascending order. + """ + all_uids = (doc.content[self.UID_KEY] for doc in + self._soledad.get_from_index( + SoledadBackedAccount.TYPE_MBOX_IDX, + self.TYPE_FLAGS_VAL, self.mbox)) + return (u for u in sorted(all_uids)) + + def count(self): + """ + Return the count of messages for this mailbox. + + :rtype: int + """ + count = self._soledad.get_count_from_index( + SoledadBackedAccount.TYPE_MBOX_IDX, + fields.TYPE_FLAGS_VAL, self.mbox) + return count + + # unseen messages + + def unseen_iter(self): + """ + Get an iterator for the message UIDs with no `seen` flag + for this mailbox. + + :return: iterator through unseen message doc UIDs + :rtype: iterable + """ + return (doc.content[self.UID_KEY] for doc in + self._soledad.get_from_index( + SoledadBackedAccount.TYPE_MBOX_SEEN_IDX, + self.TYPE_FLAGS_VAL, self.mbox, '0')) + + def count_unseen(self): + """ + Count all messages with the `Unseen` flag. + + :returns: count + :rtype: int + """ + count = self._soledad.get_count_from_index( + SoledadBackedAccount.TYPE_MBOX_SEEN_IDX, + self.TYPE_FLAGS_VAL, self.mbox, '0') + return count + + def get_unseen(self): + """ + Get all messages with the `Unseen` flag + + :returns: a list of LeapMessages + :rtype: list + """ + return [LeapMessage(self._soledad, docid, self.mbox) + for docid in self.unseen_iter()] + + # recent messages + + def recent_iter(self): + """ + Get an iterator for the message docs with `recent` flag. + + :return: iterator through recent message docs + :rtype: iterable + """ + return (doc.content[self.UID_KEY] for doc in + self._soledad.get_from_index( + SoledadBackedAccount.TYPE_MBOX_RECT_IDX, + self.TYPE_FLAGS_VAL, self.mbox, '1')) + + def get_recent(self): + """ + Get all messages with the `Recent` flag. + + :returns: a list of LeapMessages + :rtype: list + """ + return [LeapMessage(self._soledad, docid, self.mbox) + for docid in self.recent_iter()] + + def count_recent(self): + """ + Count all messages with the `Recent` flag. + + :returns: count + :rtype: int + """ + count = self._soledad.get_count_from_index( + SoledadBackedAccount.TYPE_MBOX_RECT_IDX, + self.TYPE_FLAGS_VAL, self.mbox, '1') + return count + + def __len__(self): + """ + Returns the number of messages on this mailbox. + + :rtype: int + """ + return self.count() + + def __iter__(self): + """ + Returns an iterator over all messages. + + :returns: iterator of dicts with content for all messages. + :rtype: iterable + """ + return (LeapMessage(self._soledad, docuid, self.mbox) + for docuid in self.all_msg_iter()) + + def __repr__(self): + """ + Representation string for this object. + """ + return u"" % ( + self.mbox, self.count()) + + # XXX should implement __eq__ also !!! --- use a hash + # of content for that, will be used for dedup. -- cgit v1.2.3 From 25a0aea875fd0d67238beed1237f7239474673ec Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 27 Dec 2013 02:06:44 -0400 Subject: First stage of the storage schema rewrite. * Separates between flags, docs, body and attachment docs. * Implement IMessageCopier interface: move and have fun! This little change is known to push forward our beloved architect emotional rollercoster. * Message deduplication. * It also fixes a hidden bug that was rendering the multipart mime interface useless (yes, the "True" parameter in the parsestr method). * Does not handle well nested attachs, includes dirty workaround that flattens them. * Includes chiiph's patch for rc2: * return deferred from addMessage * convert StringIO types to string * remove unneeded yields from the chain of deferreds in fetcher --- src/leap/mail/imap/messages.py | 831 +++++++++++++++++++++++++++++++++-------- 1 file changed, 666 insertions(+), 165 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index b0d5da2..c69c023 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -29,9 +29,9 @@ from zope.interface import implements from zope.proxy import sameProxiedObjects from leap.common.check import leap_assert, leap_assert_type +from leap.common.decorators import memoized_method from leap.common.mail import get_email_charset from leap.mail.decorators import deferred -from leap.mail.imap.account import SoledadBackedAccount from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields from leap.mail.imap.parser import MailParser, MBoxParser @@ -40,6 +40,181 @@ from leap.mail.messageflow import IMessageConsumer, MessageProducer logger = logging.getLogger(__name__) +def first(things): + """ + Return the head of a collection. + """ + try: + return things[0] + except (IndexError, TypeError): + return None + + +class MessageBody(object): + """ + IMessagePart implementor for the main + body of a multipart message. + + Excusatio non petita: see the interface documentation. + """ + + implements(imap4.IMessagePart) + + def __init__(self, fdoc, bdoc): + self._fdoc = fdoc + self._bdoc = bdoc + + def getSize(self): + return len(self._bdoc.content[fields.BODY_KEY]) + + def getBodyFile(self): + fd = StringIO.StringIO() + + if self._bdoc: + body = self._bdoc.content[fields.BODY_KEY] + else: + body = "" + charset = self._get_charset(body) + try: + body = body.encode(charset) + except (UnicodeEncodeError, UnicodeDecodeError) as e: + logger.error("Unicode error {0}".format(e)) + body = body.encode(charset, 'replace') + fd.write(body) + fd.seek(0) + return fd + + @memoized_method + def _get_charset(self, stuff): + return get_email_charset(unicode(stuff)) + + def getHeaders(self, negate, *names): + return {} + + def isMultipart(self): + return False + + def getSubPart(self, part): + return None + + +class MessageAttachment(object): + + implements(imap4.IMessagePart) + + def __init__(self, msg): + """ + Initializes the messagepart with a Message instance. + :param msg: a message instance + :type msg: Message + """ + self._msg = msg + + def getSize(self): + """ + Return the total size, in octets, of this message part. + + :return: size of the message, in octets + :rtype: int + """ + if not self._msg: + return 0 + return len(self._msg.as_string()) + + def getBodyFile(self): + """ + Retrieve a file object containing only the body of this message. + + :return: file-like object opened for reading + :rtype: StringIO + """ + fd = StringIO.StringIO() + if self._msg: + body = self._msg.get_payload() + else: + logger.debug("Empty message!") + body = "" + + # XXX should only do the dance if we're sure it's + # content/text-plain!!! + #charset = self._get_charset(body) + #try: + #body = body.encode(charset) + #except (UnicodeEncodeError, UnicodeDecodeError) as e: + #logger.error("Unicode error {0}".format(e)) + #body = body.encode(charset, 'replace') + fd.write(body) + fd.seek(0) + return fd + + @memoized_method + def _get_charset(self, stuff): + # TODO put in a common class with LeapMessage + """ + Gets (guesses?) the charset of a payload. + + :param stuff: the stuff to guess about. + :type stuff: basestring + :returns: charset + """ + # XXX existential doubt 1. wouldn't be smarter to + # peek into the mail headers? + # XXX existential doubt 2. shouldn't we make the scope + # of the decorator somewhat more persistent? + # ah! yes! and put memory bounds. + return get_email_charset(unicode(stuff)) + + def getHeaders(self, negate, *names): + """ + Retrieve a group of message headers. + + :param names: The names of the headers to retrieve or omit. + :type names: tuple of str + + :param negate: If True, indicates that the headers listed in names + should be omitted from the return value, rather + than included. + :type negate: bool + + :return: A mapping of header field names to header field values + :rtype: dict + """ + if not self._msg: + return {} + headers = dict(self._msg.items()) + names = map(lambda s: s.upper(), names) + if negate: + cond = lambda key: key.upper() not in names + else: + cond = lambda key: key.upper() in names + + # unpack and filter original dict by negate-condition + filter_by_cond = [ + map(str, (key, val)) for + key, val in headers.items() + if cond(key)] + return dict(filter_by_cond) + + def isMultipart(self): + """ + Return True if this message is multipart. + """ + return self._msg.is_multipart() + + def getSubPart(self, part): + """ + Retrieve a MIME submessage + + :type part: C{int} + :param part: The number of the part to retrieve, indexed from 0. + :raise IndexError: Raised if the specified part does not exist. + :raise TypeError: Raised if this message is not multipart. + :rtype: Any object implementing C{IMessagePart}. + :return: The specified sub-part. + """ + return self._msg.get_payload() + + class LeapMessage(fields, MailParser, MBoxParser): implements(imap4.IMessage) @@ -59,25 +234,21 @@ class LeapMessage(fields, MailParser, MBoxParser): self._soledad = soledad self._uid = int(uid) self._mbox = self._parse_mailbox_name(mbox) - self._chash = None - self.__cdoc = None + self.__chash = None + self.__bdoc = None @property def _fdoc(self): """ An accessor to the flags document. """ - return self._get_flags_doc() - - @property - def _cdoc(self): - """ - An accessor to the content document. - """ - if not self.__cdoc: - self.__cdoc = self._get_content_doc() - return self.__cdoc + if all(map(bool, (self._uid, self._mbox))): + fdoc = self._get_flags_doc() + if fdoc: + self.__chash = fdoc.content.get( + fields.CONTENT_HASH_KEY, None) + return fdoc @property def _chash(self): @@ -86,7 +257,26 @@ class LeapMessage(fields, MailParser, MBoxParser): """ if not self._fdoc: return None - return self._fdoc.content.get(fields.CONTENT_HASH_KEY, None) + if not self.__chash and self._fdoc: + self.__chash = self._fdoc.content.get( + fields.CONTENT_HASH_KEY, None) + return self.__chash + + @property + def _hdoc(self): + """ + An accessor to the headers document. + """ + return self._get_headers_doc() + + @property + def _bdoc(self): + """ + An accessor to the body document. + """ + if not self.__bdoc: + self.__bdoc = self._get_body_doc() + return self.__bdoc # IMessage implementation @@ -110,9 +300,9 @@ class LeapMessage(fields, MailParser, MBoxParser): return [] flags = [] - flag_doc = self._fdoc - if flag_doc: - flags = flag_doc.content.get(self.FLAGS_KEY, None) + fdoc = self._fdoc + if fdoc: + flags = fdoc.content.get(self.FLAGS_KEY, None) if flags: flags = map(str, flags) return tuple(flags) @@ -180,7 +370,7 @@ class LeapMessage(fields, MailParser, MBoxParser): :rtype: C{str} :return: An RFC822-formatted date string. """ - return str(self._cdoc.content.get(self.DATE_KEY, '')) + return str(self._hdoc.content.get(self.DATE_KEY, '')) # # IMessagePart @@ -197,25 +387,38 @@ class LeapMessage(fields, MailParser, MBoxParser): :rtype: StringIO """ fd = StringIO.StringIO() + bdoc = self._bdoc + if bdoc: + body = self._bdoc.content.get(self.BODY_KEY, "") + else: + body = "" - cdoc = self._cdoc - content = cdoc.content.get(self.RAW_KEY, '') - charset = get_email_charset( - unicode(cdoc.content.get(self.RAW_KEY, ''))) + charset = self._get_charset(body) try: - content = content.encode(charset) + body = body.encode(charset) except (UnicodeEncodeError, UnicodeDecodeError) as e: logger.error("Unicode error {0}".format(e)) - content = content.encode(charset, 'replace') - - raw = self._get_raw_msg() - msg = self._get_parsed_msg(raw) - body = msg.get_payload() + body = body.encode(charset, 'replace') fd.write(body) - # XXX SHOULD use a separate BODY FIELD ... fd.seek(0) return fd + @memoized_method + def _get_charset(self, stuff): + """ + Gets (guesses?) the charset of a payload. + + :param stuff: the stuff to guess about. + :type stuff: basestring + :returns: charset + """ + # XXX existential doubt 1. wouldn't be smarter to + # peek into the mail headers? + # XXX existential doubt 2. shouldn't we make the scope + # of the decorator somewhat more persistent? + # ah! yes! and put memory bounds. + return get_email_charset(unicode(stuff)) + def getSize(self): """ Return the total size, in octets, of this message. @@ -223,19 +426,17 @@ class LeapMessage(fields, MailParser, MBoxParser): :return: size of the message, in octets :rtype: int """ - size = self._cdoc.content.get(self.SIZE_KEY, False) + size = None + if self._fdoc: + size = self._fdoc.content.get(self.SIZE_KEY, False) + else: + logger.warning("No FLAGS doc for %s:%s" % (self._mbox, + self._uid)) if not size: # XXX fallback, should remove when all migrated. size = self.getBodyFile().len return size - def _get_headers(self): - """ - Return the headers dict stored in this message document. - """ - # XXX get from the headers doc - return self._cdoc.content.get(self.HEADERS_KEY, {}) - def getHeaders(self, negate, *names): """ Retrieve a group of message headers. @@ -252,26 +453,49 @@ class LeapMessage(fields, MailParser, MBoxParser): :rtype: dict """ headers = self._get_headers() + if not headers: + return {'content-type': ''} names = map(lambda s: s.upper(), names) if negate: cond = lambda key: key.upper() not in names else: cond = lambda key: key.upper() in names + head = copy.deepcopy(dict(headers.items())) + + # twisted imap server expects headers to be lowercase + head = dict( + map(str, (key, value)) if key.lower() != "content-type" + else map(str, (key.lower(), value)) + for (key, value) in head.items()) + # unpack and filter original dict by negate-condition - filter_by_cond = [ - map(str, (key, val)) for - key, val in headers.items() - if cond(key)] + filter_by_cond = [(key, val) for key, val in head.items() if cond(key)] return dict(filter_by_cond) + def _get_headers(self): + """ + Return the headers dict for this message. + """ + if self._hdoc is not None: + return self._hdoc.content.get(self.HEADERS_KEY, {}) + else: + logger.warning( + "No HEADERS doc for msg %s:%s" % ( + self._mbox, + self._uid)) + def isMultipart(self): """ Return True if this message is multipart. """ - if self._cdoc: - retval = self._fdoc.content.get(self.MULTIPART_KEY, False) - return retval + if self._fdoc: + return self._fdoc.content.get(self.MULTIPART_KEY, False) + else: + logger.warning( + "No FLAGS doc for msg %s:%s" % ( + self.mbox, + self.uid)) def getSubPart(self, part): """ @@ -284,12 +508,22 @@ class LeapMessage(fields, MailParser, MBoxParser): :rtype: Any object implementing C{IMessagePart}. :return: The specified sub-part. """ + logger.debug("Getting subpart: %s" % part) if not self.isMultipart(): raise TypeError - msg = self._get_parsed_msg() - # XXX should wrap IMessagePart - return msg.get_payload()[part] + if part == 0: + # Let's get the first part, which + # is really the body. + return MessageBody(self._fdoc, self._bdoc) + + attach_doc = self._get_attachment_doc(part) + if not attach_doc: + # so long and thanks for all the fish + logger.debug("...not today") + raise IndexError + msg_part = self._get_parsed_msg(attach_doc.content[self.RAW_KEY]) + return MessageAttachment(msg_part) # # accessors @@ -301,32 +535,87 @@ class LeapMessage(fields, MailParser, MBoxParser): message. """ flag_docs = self._soledad.get_from_index( - SoledadBackedAccount.TYPE_MBOX_UID_IDX, + fields.TYPE_MBOX_UID_IDX, fields.TYPE_FLAGS_VAL, self._mbox, str(self._uid)) - flag_doc = flag_docs[0] if flag_docs else None - return flag_doc + return first(flag_docs) - def _get_content_doc(self): + def _get_headers_doc(self): """ - Return the document that keeps the flags for this + Return the document that keeps the headers for this + message. + """ + head_docs = self._soledad.get_from_index( + fields.TYPE_C_HASH_IDX, + fields.TYPE_HEADERS_VAL, str(self._chash)) + return first(head_docs) + + def _get_body_doc(self): + """ + Return the document that keeps the body for this message. """ - cont_docs = self._soledad.get_from_index( - SoledadBackedAccount.TYPE_HASH_IDX, - fields.TYPE_MESSAGE_VAL, self._content_hash, str(self._uid)) - cont_doc = cont_docs[0] if cont_docs else None - return cont_doc + body_docs = self._soledad.get_from_index( + fields.TYPE_C_HASH_IDX, + fields.TYPE_MESSAGE_VAL, str(self._chash)) + return first(body_docs) + + def _get_num_parts(self): + """ + Return the number of parts for a multipart message. + """ + if not self.isMultipart(): + raise TypeError( + "Tried to get num parts in a non-multipart message") + if not self._hdoc: + return None + return self._hdoc.content.get(fields.NUM_PARTS_KEY, 2) + + def _get_attachment_doc(self, part): + """ + Return the document that keeps the headers for this + message. + + :param part: the part number for the multipart message. + :type part: int + """ + if not self._hdoc: + return None + try: + phash = self._hdoc.content[self.PARTS_MAP_KEY][str(part)] + except KeyError: + # this is the remnant of a debug session until + # I found that the index is actually a string... + # It should be safe to just raise the KeyError now, + # but leaving it here while the blood is fresh... + logger.warning("We expected a phash in the " + "index %s, but noone found" % (part, )) + logger.debug(self._hdoc.content[self.PARTS_MAP_KEY]) + return None + attach_docs = self._soledad.get_from_index( + fields.TYPE_P_HASH_IDX, + fields.TYPE_ATTACHMENT_VAL, str(phash)) + + # The following is true for the fist owner. + # We could use this relationship to flag the "owner" + # and orphan when we delete it. + + #attach_docs = self._soledad.get_from_index( + #fields.TYPE_C_HASH_PART_IDX, + #fields.TYPE_ATTACHMENT_VAL, str(self._chash), str(part)) + return first(attach_docs) def _get_raw_msg(self): """ Return the raw msg. :rtype: basestring """ - return self._cdoc.content.get(self.RAW_KEY, '') + # TODO deprecate this. + return self._bdoc.content.get(self.RAW_KEY, '') def __getitem__(self, key): """ - Return the content of the message document. + Return an item from the content of the flags document, + for convenience. :param key: The key :type key: str @@ -334,14 +623,73 @@ class LeapMessage(fields, MailParser, MBoxParser): :return: The content value indexed by C{key} or None :rtype: str """ - return self._cdoc.content.get(key, None) + return self._fdoc.content.get(key, None) + + # setters + + # XXX to be used in the messagecopier interface?! + + def set_uid(self, uid): + """ + Set new uid for this message. + + :param uid: the new uid + :type uid: basestring + """ + # XXX dangerous! lock? + self._uid = uid + d = self._fdoc + d.content[self.UID_KEY] = uid + self._soledad.put_doc(d) + + def set_mbox(self, mbox): + """ + Set new mbox for this message. + + :param mbox: the new mbox + :type mbox: basestring + """ + # XXX dangerous! lock? + self._mbox = mbox + d = self._fdoc + d.content[self.MBOX_KEY] = mbox + self._soledad.put_doc(d) + + # destructor + + @deferred + def remove(self): + """ + Remove all docs associated with this message. + """ + # XXX this would ve more efficient if we can just pass + # a sequence of uids. + + # XXX For the moment we are only removing the flags and headers + # docs. The rest we leave there polluting your hard disk, + # until we think about a good way of deorphaning. + # Maybe a crawler of unreferenced docs. + + fd = self._get_flags_doc() + hd = self._get_headers_doc() + #bd = self._get_body_doc() + #docs = [fd, hd, bd] + + docs = [fd, hd] + + #for pn in range(self._get_num_parts()[1:]): + #ad = self._get_attachment_doc(pn) + #docs.append(ad) + + for d in filter(None, docs): + self._soledad.delete_doc(d) def does_exist(self): """ - Return True if there is actually a message for this + Return True if there is actually a flags message for this UID and mbox. """ - return bool(self._fdoc) + return self._fdoc is not None SoledadWriterPayload = namedtuple( @@ -349,6 +697,8 @@ SoledadWriterPayload = namedtuple( SoledadWriterPayload.CREATE = 1 SoledadWriterPayload.PUT = 2 +SoledadWriterPayload.BODY_CREATE = 3 +SoledadWriterPayload.ATTACHMENT_CREATE = 4 class SoledadDocWriter(object): @@ -378,20 +728,98 @@ class SoledadDocWriter(object): empty = queue.empty() while not empty: item = queue.get() + call = None + payload = item.payload + if item.mode == SoledadWriterPayload.CREATE: call = self._soledad.create_doc + elif item.mode == SoledadWriterPayload.BODY_CREATE: + if not self._body_does_exist(payload): + call = self._soledad.create_doc + elif item.mode == SoledadWriterPayload.ATTACHMENT_CREATE: + if not self._attachment_does_exist(payload): + call = self._soledad.create_doc elif item.mode == SoledadWriterPayload.PUT: call = self._soledad.put_doc - # should handle errors - try: - call(item.payload) - except u1db_errors.RevisionConflict as exc: - logger.error("Error: %r" % (exc,)) - raise exc + # XXX delete? + + if call: + # should handle errors + try: + call(item.payload) + except u1db_errors.RevisionConflict as exc: + logger.error("Error: %r" % (exc,)) + raise exc empty = queue.empty() + """ + Message deduplication. + + We do a query for the content hashes before writing to our beloved + slcipher backend of Soledad. This means, by now, that: + + 1. We will not store the same attachment twice, only the hash of it. + 2. We will not store the same message body twice, only the hash of it. + + The first case is useful if you are always receiving the same old memes + from unwary friends that still have not discovered that 4chan is the + generator of the internet. The second will save your day if you have + initiated session with the same account in two different machines. I also + wonder why would you do that, but let's respect each other choices, like + with the religious celebrations, and assume that one day we'll be able + to run Bitmask in completely free phones. Yes, I mean that, the whole GSM + Stack. + """ + + def _body_does_exist(self, doc): + """ + Check whether we already have a body payload with this hash in our + database. + + :param doc: tentative body document + :type doc: dict + :returns: True if that happens, False otherwise. + """ + if not doc: + return False + chash = doc[fields.CONTENT_HASH_KEY] + body_docs = self._soledad.get_from_index( + fields.TYPE_C_HASH_IDX, + fields.TYPE_MESSAGE_VAL, str(chash)) + if not body_docs: + return False + if len(body_docs) != 1: + logger.warning("Found more than one copy of chash %s!" + % (chash,)) + logger.debug("Found body doc with that hash! Skipping save!") + return True + + def _attachment_does_exist(self, doc): + """ + Check whether we already have an attachment payload with this hash + in our database. + + :param doc: tentative body document + :type doc: dict + :returns: True if that happens, False otherwise. + """ + if not doc: + return False + phash = doc[fields.PAYLOAD_HASH_KEY] + attach_docs = self._soledad.get_from_index( + fields.TYPE_P_HASH_IDX, + fields.TYPE_ATTACHMENT_VAL, str(phash)) + if not attach_docs: + return False + + if len(attach_docs) != 1: + logger.warning("Found more than one copy of phash %s!" + % (phash,)) + logger.debug("Found attachment doc with that hash! Skipping save!") + return True + class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): """ @@ -402,35 +830,62 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): database. """ # XXX this should be able to produce a MessageSet methinks - - EMPTY_MSG = { - fields.TYPE_KEY: fields.TYPE_MESSAGE_VAL, - fields.UID_KEY: 1, - fields.MBOX_KEY: fields.INBOX_VAL, - - fields.SUBJECT_KEY: "", - fields.DATE_KEY: "", - fields.RAW_KEY: "", - - # XXX should separate headers into another doc - fields.HEADERS_KEY: {}, + # could validate these kinds of objects turning them + # into a template for the class. + FLAGS_DOC = "FLAGS" + HEADERS_DOC = "HEADERS" + ATTACHMENT_DOC = "ATTACHMENT" + BODY_DOC = "BODY" + + templates = { + + FLAGS_DOC: { + fields.TYPE_KEY: fields.TYPE_FLAGS_VAL, + fields.UID_KEY: 1, + fields.MBOX_KEY: fields.INBOX_VAL, + + fields.SEEN_KEY: False, + fields.RECENT_KEY: True, + fields.FLAGS_KEY: [], + fields.MULTIPART_KEY: False, + fields.SIZE_KEY: 0 + }, + + HEADERS_DOC: { + fields.TYPE_KEY: fields.TYPE_HEADERS_VAL, + fields.CONTENT_HASH_KEY: "", + + fields.HEADERS_KEY: {}, + fields.NUM_PARTS_KEY: 0, + fields.PARTS_MAP_KEY: {}, + fields.DATE_KEY: "", + fields.SUBJECT_KEY: "" + }, + + ATTACHMENT_DOC: { + fields.TYPE_KEY: fields.TYPE_ATTACHMENT_VAL, + fields.PART_NUMBER_KEY: 0, + fields.CONTENT_HASH_KEY: "", + fields.PAYLOAD_HASH_KEY: "", + + fields.RAW_KEY: "" + }, + + BODY_DOC: { + fields.TYPE_KEY: fields.TYPE_MESSAGE_VAL, + fields.CONTENT_HASH_KEY: "", + + fields.BODY_KEY: "", + + # this should not be needed, + # but let's keep the raw msg for some time + # until we are sure we can reconstruct + # the original msg from our disection. + fields.RAW_KEY: "", + + } } - EMPTY_FLAGS = { - fields.TYPE_KEY: fields.TYPE_FLAGS_VAL, - fields.UID_KEY: 1, - fields.MBOX_KEY: fields.INBOX_VAL, - - fields.FLAGS_KEY: [], - fields.SEEN_KEY: False, - fields.RECENT_KEY: True, - fields.MULTIPART_KEY: False, - } - - # get from SoledadBackedAccount the needed index-related constants - INDEXES = SoledadBackedAccount.INDEXES - TYPE_IDX = SoledadBackedAccount.TYPE_IDX - def __init__(self, mbox=None, soledad=None): """ Constructor for MessageCollection. @@ -465,23 +920,16 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): SoledadDocWriter(soledad), period=0.05) - def _get_empty_msg(self): + def _get_empty_doc(self, _type=FLAGS_DOC): """ - Returns an empty message. - - :return: a dict containing a default empty message + Returns an empty doc for storing different message parts. + Defaults to returning a template for a flags document. + :return: a dict with the template :rtype: dict """ - return copy.deepcopy(self.EMPTY_MSG) - - def _get_empty_flags_doc(self): - """ - Returns an empty doc for storing flags. - - :return: - :rtype: - """ - return copy.deepcopy(self.EMPTY_FLAGS) + if not _type in self.templates.keys(): + raise TypeError("Improper type passed to _get_empty_doc") + return copy.deepcopy(self.templates[_type]) @deferred def add_msg(self, raw, subject=None, flags=None, date=None, uid=1): @@ -509,52 +957,107 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): flags = tuple() leap_assert_type(flags, tuple) - content_doc = self._get_empty_msg() - flags_doc = self._get_empty_flags_doc() - - content_doc[self.MBOX_KEY] = self.mbox - flags_doc[self.MBOX_KEY] = self.mbox - # ...should get a sanity check here. - content_doc[self.UID_KEY] = uid - flags_doc[self.UID_KEY] = uid - - if flags: - flags_doc[self.FLAGS_KEY] = map(self._stringify, flags) - flags_doc[self.SEEN_KEY] = self.SEEN_FLAG in flags + # docs for flags, headers, and body + fd, hd, bd = map( + lambda t: self._get_empty_doc(t), + (self.FLAGS_DOC, self.HEADERS_DOC, self.BODY_DOC)) msg = self._get_parsed_msg(raw) headers = dict(msg) - - logger.debug("adding. is multipart:%s" % msg.is_multipart()) - flags_doc[self.MULTIPART_KEY] = msg.is_multipart() - # XXX get lower case for keys? - # XXX get headers doc - content_doc[self.HEADERS_KEY] = headers - # set subject based on message headers and eventually replace by - # subject given as param - if self.SUBJECT_FIELD in headers: - content_doc[self.SUBJECT_KEY] = headers[self.SUBJECT_FIELD] - if subject is not None: - content_doc[self.SUBJECT_KEY] = subject - - # XXX could separate body into its own doc - # but should also separate multiparts - # that should be wrapped in MessagePart - content_doc[self.RAW_KEY] = self._stringify(raw) - content_doc[self.SIZE_KEY] = len(raw) - + raw_str = msg.as_string() + chash = self._get_hash(msg) + multi = msg.is_multipart() + + attaches = [] + inner_parts = [] + + if multi: + # XXX should walk down recursively + # in a better way. but fixing this quick + # to have an rc. + # XXX should pick the content-type in txt + body = first(msg.get_payload()).get_payload() + if isinstance(body, list): + # allowing one nesting level for now... + body, rest = body[0].get_payload(), body[1:] + for p in rest: + inner_parts.append(p) + else: + body = msg.get_payload() + logger.debug("adding msg (multipart:%s)" % multi) + + # flags doc --------------------------------------- + fd[self.MBOX_KEY] = self.mbox + fd[self.UID_KEY] = uid + fd[self.CONTENT_HASH_KEY] = chash + fd[self.MULTIPART_KEY] = multi + fd[self.SIZE_KEY] = len(raw_str) + if flags: + fd[self.FLAGS_KEY] = map(self._stringify, flags) + fd[self.SEEN_KEY] = self.SEEN_FLAG in flags + fd[self.RECENT_KEY] = self.RECENT_FLAG in flags + + # headers doc ---------------------------------------- + hd[self.CONTENT_HASH_KEY] = chash + hd[self.HEADERS_KEY] = headers + if not subject and self.SUBJECT_FIELD in headers: + hd[self.SUBJECT_KEY] = headers[self.SUBJECT_FIELD] + else: + hd[self.SUBJECT_KEY] = subject if not date and self.DATE_FIELD in headers: - content_doc[self.DATE_KEY] = headers[self.DATE_FIELD] + hd[self.DATE_KEY] = headers[self.DATE_FIELD] else: - content_doc[self.DATE_KEY] = date - - logger.debug('enqueuing message for write') - + hd[self.DATE_KEY] = date + if multi: + hd[self.NUM_PARTS_KEY] = len(msg.get_payload()) + + # body doc + bd[self.CONTENT_HASH_KEY] = chash + bd[self.BODY_KEY] = body + # in an ideal world, we would not need to save a copy of the + # raw message. But we'll keep it until we can be sure that + # we can rebuild the original message from the parts. + bd[self.RAW_KEY] = raw_str + + docs = [fd, hd] + + # attachment docs + if multi: + outer_parts = msg.get_payload() + parts = outer_parts + inner_parts + + # skip first part, we already got it in body + to_attach = ((i, m) for i, m in enumerate(parts) if i > 0) + for index, part_msg in to_attach: + att_doc = self._get_empty_doc(self.ATTACHMENT_DOC) + att_doc[self.PART_NUMBER_KEY] = index + att_doc[self.CONTENT_HASH_KEY] = chash + phash = self._get_hash(part_msg) + att_doc[self.PAYLOAD_HASH_KEY] = phash + att_doc[self.RAW_KEY] = part_msg.as_string() + + # keep a pointer to the payload hash in the + # headers doc, under the parts_map + hd[self.PARTS_MAP_KEY][str(index)] = phash + attaches.append(att_doc) + + # Saving ... ------------------------------- + # ok, there we go... + logger.debug('enqueuing message docs for write') ptuple = SoledadWriterPayload + + # first, regular docs: flags and headers + for doc in docs: + self.soledad_writer.put(ptuple( + mode=ptuple.CREATE, payload=doc)) + # second, try to create body doc. self.soledad_writer.put(ptuple( - mode=ptuple.CREATE, payload=content_doc)) - self.soledad_writer.put(ptuple( - mode=ptuple.CREATE, payload=flags_doc)) + mode=ptuple.BODY_CREATE, payload=bd)) + # and last, but not least, try to create + # attachment docs if not already there. + for at in attaches: + self.soledad_writer.put(ptuple( + mode=ptuple.ATTACHMENT_CREATE, payload=at)) def remove(self, msg): """ @@ -563,8 +1066,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :param msg: a Leapmessage instance :type msg: LeapMessage """ - # XXX remove - #self._soledad.delete_doc(msg) msg.remove() # getters @@ -596,14 +1097,14 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :rtype: list of SoledadDocument """ if _type not in fields.__dict__.values(): - raise TypeError("Wrong type passed to get_all") + raise TypeError("Wrong type passed to get_all_docs") if sameProxiedObjects(self._soledad, None): logger.warning('Tried to get messages but soledad is None!') return [] all_docs = [doc for doc in self._soledad.get_from_index( - SoledadBackedAccount.TYPE_MBOX_IDX, + fields.TYPE_MBOX_IDX, _type, self.mbox)] # inneficient, but first let's grok it and then @@ -618,8 +1119,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): """ all_uids = (doc.content[self.UID_KEY] for doc in self._soledad.get_from_index( - SoledadBackedAccount.TYPE_MBOX_IDX, - self.TYPE_FLAGS_VAL, self.mbox)) + fields.TYPE_MBOX_IDX, + fields.TYPE_FLAGS_VAL, self.mbox)) return (u for u in sorted(all_uids)) def count(self): @@ -629,7 +1130,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :rtype: int """ count = self._soledad.get_count_from_index( - SoledadBackedAccount.TYPE_MBOX_IDX, + fields.TYPE_MBOX_IDX, fields.TYPE_FLAGS_VAL, self.mbox) return count @@ -645,8 +1146,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): """ return (doc.content[self.UID_KEY] for doc in self._soledad.get_from_index( - SoledadBackedAccount.TYPE_MBOX_SEEN_IDX, - self.TYPE_FLAGS_VAL, self.mbox, '0')) + fields.TYPE_MBOX_SEEN_IDX, + fields.TYPE_FLAGS_VAL, self.mbox, '0')) def count_unseen(self): """ @@ -656,8 +1157,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :rtype: int """ count = self._soledad.get_count_from_index( - SoledadBackedAccount.TYPE_MBOX_SEEN_IDX, - self.TYPE_FLAGS_VAL, self.mbox, '0') + fields.TYPE_MBOX_SEEN_IDX, + fields.TYPE_FLAGS_VAL, self.mbox, '0') return count def get_unseen(self): @@ -681,8 +1182,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): """ return (doc.content[self.UID_KEY] for doc in self._soledad.get_from_index( - SoledadBackedAccount.TYPE_MBOX_RECT_IDX, - self.TYPE_FLAGS_VAL, self.mbox, '1')) + fields.TYPE_MBOX_RECT_IDX, + fields.TYPE_FLAGS_VAL, self.mbox, '1')) def get_recent(self): """ @@ -702,8 +1203,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :rtype: int """ count = self._soledad.get_count_from_index( - SoledadBackedAccount.TYPE_MBOX_RECT_IDX, - self.TYPE_FLAGS_VAL, self.mbox, '1') + fields.TYPE_MBOX_RECT_IDX, + fields.TYPE_FLAGS_VAL, self.mbox, '1') return count def __len__(self): @@ -731,5 +1232,5 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): return u"" % ( self.mbox, self.count()) - # XXX should implement __eq__ also !!! --- use a hash - # of content for that, will be used for dedup. + # XXX should implement __eq__ also !!! + # --- use the content hash for that, will be used for dedup. -- cgit v1.2.3 From a912729c4788d46d648a72126226741b63e0a37c Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 2 Jan 2014 17:14:03 -0400 Subject: add documentation to the decorator, fix errorback. * it also fixes the traceback in the errorback, thanks to chiiph, who reads documentation instead of whinning :D * other minor documentation corrections --- src/leap/mail/imap/messages.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index c69c023..47c40d5 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -695,6 +695,9 @@ class LeapMessage(fields, MailParser, MBoxParser): SoledadWriterPayload = namedtuple( 'SoledadWriterPayload', ['mode', 'payload']) +# TODO we could consider using enum here: +# https://pypi.python.org/pypi/enum + SoledadWriterPayload.CREATE = 1 SoledadWriterPayload.PUT = 2 SoledadWriterPayload.BODY_CREATE = 3 @@ -758,7 +761,7 @@ class SoledadDocWriter(object): Message deduplication. We do a query for the content hashes before writing to our beloved - slcipher backend of Soledad. This means, by now, that: + sqlcipher backend of Soledad. This means, by now, that: 1. We will not store the same attachment twice, only the hash of it. 2. We will not store the same message body twice, only the hash of it. -- cgit v1.2.3 From 5585ff784940dee267576d097076de66797f9188 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 2 Jan 2014 16:08:09 -0400 Subject: fix tests after rewrite --- src/leap/mail/imap/messages.py | 94 +++++++++++++++++++++++++++++++++++------- 1 file changed, 78 insertions(+), 16 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 47c40d5..80411f9 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -20,9 +20,11 @@ LeapMessage and MessageCollection. import copy import logging import StringIO -from collections import namedtuple + +from collections import defaultdict, namedtuple from twisted.mail import imap4 +from twisted.internet import defer from twisted.python import log from u1db import errors as u1db_errors from zope.interface import implements @@ -182,6 +184,7 @@ class MessageAttachment(object): if not self._msg: return {} headers = dict(self._msg.items()) + names = map(lambda s: s.upper(), names) if negate: cond = lambda key: key.upper() not in names @@ -329,6 +332,7 @@ class LeapMessage(fields, MailParser, MBoxParser): doc.content[self.FLAGS_KEY] = flags doc.content[self.SEEN_KEY] = self.SEEN_FLAG in flags doc.content[self.RECENT_KEY] = self.RECENT_FLAG in flags + doc.content[self.DEL_KEY] = self.DELETED_FLAG in flags self._soledad.put_doc(doc) def addFlags(self, flags): @@ -455,6 +459,7 @@ class LeapMessage(fields, MailParser, MBoxParser): headers = self._get_headers() if not headers: return {'content-type': ''} + names = map(lambda s: s.upper(), names) if negate: cond = lambda key: key.upper() not in names @@ -465,8 +470,8 @@ class LeapMessage(fields, MailParser, MBoxParser): # twisted imap server expects headers to be lowercase head = dict( - map(str, (key, value)) if key.lower() != "content-type" - else map(str, (key.lower(), value)) + (str(key), map(str, value)) if key.lower() != "content-type" + else (str(key.lower(), map(str, value))) for (key, value) in head.items()) # unpack and filter original dict by negate-condition @@ -670,6 +675,9 @@ class LeapMessage(fields, MailParser, MBoxParser): # until we think about a good way of deorphaning. # Maybe a crawler of unreferenced docs. + uid = self._uid + print "removing...", uid + fd = self._get_flags_doc() hd = self._get_headers_doc() #bd = self._get_body_doc() @@ -682,7 +690,11 @@ class LeapMessage(fields, MailParser, MBoxParser): #docs.append(ad) for d in filter(None, docs): - self._soledad.delete_doc(d) + try: + self._soledad.delete_doc(d) + except Exception as exc: + logger.error(exc) + return uid def does_exist(self): """ @@ -849,6 +861,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): fields.SEEN_KEY: False, fields.RECENT_KEY: True, + fields.DEL_KEY: False, fields.FLAGS_KEY: [], fields.MULTIPART_KEY: False, fields.SIZE_KEY: 0 @@ -921,7 +934,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): self.soledad_writer = MessageProducer( SoledadDocWriter(soledad), - period=0.05) + period=0.02) def _get_empty_doc(self, _type=FLAGS_DOC): """ @@ -966,7 +979,9 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): (self.FLAGS_DOC, self.HEADERS_DOC, self.BODY_DOC)) msg = self._get_parsed_msg(raw) - headers = dict(msg) + headers = defaultdict(list) + for k, v in msg.items(): + headers[k].append(v) raw_str = msg.as_string() chash = self._get_hash(msg) multi = msg.is_multipart() @@ -987,7 +1002,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): inner_parts.append(p) else: body = msg.get_payload() - logger.debug("adding msg (multipart:%s)" % multi) + logger.debug("adding msg with uid %s (multipart:%s)" % ( + uid, multi)) # flags doc --------------------------------------- fd[self.MBOX_KEY] = self.mbox @@ -998,26 +1014,33 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): if flags: fd[self.FLAGS_KEY] = map(self._stringify, flags) fd[self.SEEN_KEY] = self.SEEN_FLAG in flags - fd[self.RECENT_KEY] = self.RECENT_FLAG in flags + fd[self.DEL_KEY] = self.DELETED_FLAG in flags + fd[self.RECENT_KEY] = True # set always by default # headers doc ---------------------------------------- hd[self.CONTENT_HASH_KEY] = chash hd[self.HEADERS_KEY] = headers + + print "headers" + import pprint + pprint.pprint(headers) + if not subject and self.SUBJECT_FIELD in headers: - hd[self.SUBJECT_KEY] = headers[self.SUBJECT_FIELD] + hd[self.SUBJECT_KEY] = first(headers[self.SUBJECT_FIELD]) else: hd[self.SUBJECT_KEY] = subject if not date and self.DATE_FIELD in headers: - hd[self.DATE_KEY] = headers[self.DATE_FIELD] + hd[self.DATE_KEY] = first(headers[self.DATE_FIELD]) else: hd[self.DATE_KEY] = date if multi: + # XXX fix for multipart nested case hd[self.NUM_PARTS_KEY] = len(msg.get_payload()) # body doc bd[self.CONTENT_HASH_KEY] = chash bd[self.BODY_KEY] = body - # in an ideal world, we would not need to save a copy of the + # XXX in an ideal world, we would not need to save a copy of the # raw message. But we'll keep it until we can be sure that # we can rebuild the original message from the parts. bd[self.RAW_KEY] = raw_str @@ -1062,14 +1085,29 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): self.soledad_writer.put(ptuple( mode=ptuple.ATTACHMENT_CREATE, payload=at)) - def remove(self, msg): + def _remove_cb(self, result): + return result + + def remove_all_deleted(self): + """ + Removes all messages flagged as deleted. """ - Removes a message. + delete_deferl = [] + for msg in self.get_deleted(): + delete_deferl.append(msg.remove()) + d1 = defer.gatherResults(delete_deferl, consumeErrors=True) + d1.addCallback(self._remove_cb) + return d1 - :param msg: a Leapmessage instance + def remove(self, msg): + """ + Remove a given msg. + :param msg: the message to be removed :type msg: LeapMessage """ - msg.remove() + d = msg.remove() + d.addCallback(self._remove_cb) + return d # getters @@ -1178,7 +1216,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): def recent_iter(self): """ - Get an iterator for the message docs with `recent` flag. + Get an iterator for the message UIDs with `recent` flag. :return: iterator through recent message docs :rtype: iterable @@ -1210,6 +1248,30 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): fields.TYPE_FLAGS_VAL, self.mbox, '1') return count + # deleted messages + + def deleted_iter(self): + """ + Get an iterator for the message UIDs with `deleted` flag. + + :return: iterator through deleted message docs + :rtype: iterable + """ + return (doc.content[self.UID_KEY] for doc in + self._soledad.get_from_index( + fields.TYPE_MBOX_DEL_IDX, + fields.TYPE_FLAGS_VAL, self.mbox, '1')) + + def get_deleted(self): + """ + Get all messages with the `Deleted` flag. + + :returns: a generator of LeapMessages + :rtype: generator + """ + return (LeapMessage(self._soledad, docid, self.mbox) + for docid in self.deleted_iter()) + def __len__(self): """ Returns the number of messages on this mailbox. -- cgit v1.2.3 From a203337d155a6e7186980ef175642adc91d472fe Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 7 Jan 2014 14:23:25 -0400 Subject: move utility to its own --- src/leap/mail/imap/messages.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 80411f9..bfe913c 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -33,6 +33,7 @@ from zope.proxy import sameProxiedObjects from leap.common.check import leap_assert, leap_assert_type from leap.common.decorators import memoized_method from leap.common.mail import get_email_charset +from leap.mail.utils import first from leap.mail.decorators import deferred from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields @@ -42,16 +43,6 @@ from leap.mail.messageflow import IMessageConsumer, MessageProducer logger = logging.getLogger(__name__) -def first(things): - """ - Return the head of a collection. - """ - try: - return things[0] - except (IndexError, TypeError): - return None - - class MessageBody(object): """ IMessagePart implementor for the main -- cgit v1.2.3 From 4ba5d5b405e3c6a6bc997df2073ffc8ea3fa75a9 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 7 Jan 2014 11:34:08 -0400 Subject: Second stage of the new year's storage rewrite. * documents of only three types: * flags * headers * content * add algorithm for walking the parsed message tree. * treat special cases like a multipart with a single part. * modify add_msg to use the walk routine * modify twisted interfaces to use the new storage schema. * tests for different multipart cases * fix multipart detection typo in the fetch This is a merge proposal for the 0.5.0-rc3. known bugs ---------- Some things are still know not to work well at this point (some cases of multipart messages do not display the bodies). IMAP server also is left in a bad internal state after a logout/login. --- src/leap/mail/imap/messages.py | 722 ++++++++++++++++++++++------------------- 1 file changed, 388 insertions(+), 334 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index bfe913c..37e4311 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -33,6 +33,7 @@ from zope.proxy import sameProxiedObjects from leap.common.check import leap_assert, leap_assert_type from leap.common.decorators import memoized_method from leap.common.mail import get_email_charset +from leap.mail import walk from leap.mail.utils import first from leap.mail.decorators import deferred from leap.mail.imap.index import IndexedDB @@ -43,65 +44,58 @@ from leap.mail.messageflow import IMessageConsumer, MessageProducer logger = logging.getLogger(__name__) -class MessageBody(object): - """ - IMessagePart implementor for the main - body of a multipart message. - - Excusatio non petita: see the interface documentation. - """ +# TODO ------------------------------------------------------------ - implements(imap4.IMessagePart) - - def __init__(self, fdoc, bdoc): - self._fdoc = fdoc - self._bdoc = bdoc - - def getSize(self): - return len(self._bdoc.content[fields.BODY_KEY]) +# [ ] Add linked-from info. +# [ ] Delete incoming mail only after successful write! +# [ ] Remove UID from syncable db. Store only those indexes locally. +# [ ] Send patch to twisted for bug in imap4.py:5717 (content-type can be +# none? lower-case?) - def getBodyFile(self): - fd = StringIO.StringIO() - - if self._bdoc: - body = self._bdoc.content[fields.BODY_KEY] - else: - body = "" - charset = self._get_charset(body) - try: - body = body.encode(charset) - except (UnicodeEncodeError, UnicodeDecodeError) as e: - logger.error("Unicode error {0}".format(e)) - body = body.encode(charset, 'replace') - fd.write(body) - fd.seek(0) - return fd - - @memoized_method - def _get_charset(self, stuff): - return get_email_charset(unicode(stuff)) - - def getHeaders(self, negate, *names): - return {} +def lowerdict(_dict): + """ + Return a dict with the keys in lowercase. - def isMultipart(self): - return False + :param _dict: the dict to convert + :rtype: dict + """ + return dict((key.lower(), value) + for key, value in _dict.items()) - def getSubPart(self, part): - return None +class MessagePart(object): + """ + IMessagePart implementor. + It takes a subpart message and is able to find + the inner parts. -class MessageAttachment(object): + Excusatio non petita: see the interface documentation. + """ implements(imap4.IMessagePart) - def __init__(self, msg): + def __init__(self, soledad, part_map): """ - Initializes the messagepart with a Message instance. - :param msg: a message instance - :type msg: Message + Initializes the MessagePart. + + :param part_map: a dictionary containing the parts map for this + message + :type part_map: dict """ - self._msg = msg + # TODO + # It would be good to pass the uid/mailbox also + # for references while debugging. + + # We have a problem on bulk moves, and is + # that when the fetch on the new mailbox is done + # the parts maybe are not complete. + # So we should be able to fail with empty + # docs until we solve that. The ideal would be + # to gather the results of the deferred operations + # to signal the operation is complete. + #leap_assert(part_map, "part map dict cannot be null") + self._soledad = soledad + self._pmap = part_map def getSize(self): """ @@ -110,9 +104,12 @@ class MessageAttachment(object): :return: size of the message, in octets :rtype: int """ - if not self._msg: + if not self._pmap: return 0 - return len(self._msg.as_string()) + size = self._pmap.get('size', None) + if not size: + logger.error("Message part cannot find size in the partmap") + return size def getBodyFile(self): """ @@ -122,24 +119,91 @@ class MessageAttachment(object): :rtype: StringIO """ fd = StringIO.StringIO() - if self._msg: - body = self._msg.get_payload() + if self._pmap: + multi = self._pmap.get('multi') + if not multi: + phash = self._pmap.get("phash", None) + else: + pmap = self._pmap.get('part_map') + first_part = pmap.get('1', None) + if first_part: + phash = first_part['phash'] + + if not phash: + logger.warning("Could not find phash for this subpart!") + payload = str("") + else: + payload = self._get_payload_from_document(phash) + else: - logger.debug("Empty message!") - body = "" - - # XXX should only do the dance if we're sure it's - # content/text-plain!!! - #charset = self._get_charset(body) - #try: - #body = body.encode(charset) - #except (UnicodeEncodeError, UnicodeDecodeError) as e: - #logger.error("Unicode error {0}".format(e)) - #body = body.encode(charset, 'replace') - fd.write(body) + logger.warning("Message with no part_map!") + payload = str("") + + if payload: + #headers = self.getHeaders(True) + #headers = lowerdict(headers) + #content_type = headers.get('content-type', "") + content_type = self._get_ctype_from_document(phash) + charset_split = content_type.split('charset=') + # XXX fuck all this, use a regex! + if len(charset_split) > 1: + charset = charset_split[1] + if charset: + charset = charset.strip() + else: + charset = None + if not charset: + charset = self._get_charset(payload) + try: + payload = payload.encode(charset) + except (UnicodeEncodeError, UnicodeDecodeError) as e: + logger.error("Unicode error {0}".format(e)) + payload = payload.encode(charset, 'replace') + + fd.write(payload) fd.seek(0) return fd + # TODO cache the phash retrieval + def _get_payload_from_document(self, phash): + """ + Gets the message payload from the content document. + + :param phash: the payload hash to retrieve by. + :type phash: basestring + """ + cdocs = self._soledad.get_from_index( + fields.TYPE_P_HASH_IDX, + fields.TYPE_CONTENT_VAL, str(phash)) + + cdoc = first(cdocs) + if not cdoc: + logger.warning( + "Could not find the content doc " + "for phash %s" % (phash,)) + payload = cdoc.content.get(fields.RAW_KEY, "") + return payload + + # TODO cache the pahash retrieval + def _get_ctype_from_document(self, phash): + """ + Gets the content-type from the content document. + + :param phash: the payload hash to retrieve by. + :type phash: basestring + """ + cdocs = self._soledad.get_from_index( + fields.TYPE_P_HASH_IDX, + fields.TYPE_CONTENT_VAL, str(phash)) + + cdoc = first(cdocs) + if not cdoc: + logger.warning( + "Could not find the content doc " + "for phash %s" % (phash,)) + ctype = cdoc.content.get('ctype', "") + return ctype + @memoized_method def _get_charset(self, stuff): # TODO put in a common class with LeapMessage @@ -150,8 +214,6 @@ class MessageAttachment(object): :type stuff: basestring :returns: charset """ - # XXX existential doubt 1. wouldn't be smarter to - # peek into the mail headers? # XXX existential doubt 2. shouldn't we make the scope # of the decorator somewhat more persistent? # ah! yes! and put memory bounds. @@ -172,9 +234,17 @@ class MessageAttachment(object): :return: A mapping of header field names to header field values :rtype: dict """ - if not self._msg: + if not self._pmap: + logger.warning("No pmap in Subpart!") return {} - headers = dict(self._msg.items()) + headers = dict(self._pmap.get("headers", [])) + + # twisted imap server expects *some* headers to be lowercase + # We could use a CaseInsensitiveDict here... + headers = dict( + (str(key), str(value)) if key.lower() != "content-type" + else (str(key.lower()), str(value)) + for (key, value) in headers.items()) names = map(lambda s: s.upper(), names) if negate: @@ -187,13 +257,18 @@ class MessageAttachment(object): map(str, (key, val)) for key, val in headers.items() if cond(key)] - return dict(filter_by_cond) + filtered = dict(filter_by_cond) + return filtered def isMultipart(self): """ Return True if this message is multipart. """ - return self._msg.is_multipart() + if not self._pmap: + logger.warning("Could not get part map!") + return False + multi = self._pmap.get("multi", False) + return multi def getSubPart(self, part): """ @@ -206,10 +281,30 @@ class MessageAttachment(object): :rtype: Any object implementing C{IMessagePart}. :return: The specified sub-part. """ - return self._msg.get_payload() + if not self.isMultipart(): + raise TypeError + sub_pmap = self._pmap.get("part_map", {}) + try: + part_map = sub_pmap[str(part + 1)] + except KeyError: + logger.debug("getSubpart for %s: KeyError" % (part,)) + raise IndexError + + # XXX check for validity + return MessagePart(self._soledad, part_map) class LeapMessage(fields, MailParser, MBoxParser): + """ + The main representation of a message. + + It indexes the messages in one mailbox by a combination + of uid+mailbox name. + """ + + # TODO this has to change. + # Should index primarily by chash, and keep a local-lonly + # UID table. implements(imap4.IMessage) @@ -268,6 +363,8 @@ class LeapMessage(fields, MailParser, MBoxParser): """ An accessor to the body document. """ + if not self._hdoc: + return None if not self.__bdoc: self.__bdoc = self._get_body_doc() return self.__bdoc @@ -320,6 +417,11 @@ class LeapMessage(fields, MailParser, MBoxParser): log.msg('setting flags: %s' % (self._uid)) doc = self._fdoc + if not doc: + logger.warning( + "Could not find FDOC for %s:%s while setting flags!" % + (self._mbox, self._uid)) + return doc.content[self.FLAGS_KEY] = flags doc.content[self.SEEN_KEY] = self.SEEN_FLAG in flags doc.content[self.RECENT_KEY] = self.RECENT_FLAG in flags @@ -384,16 +486,25 @@ class LeapMessage(fields, MailParser, MBoxParser): fd = StringIO.StringIO() bdoc = self._bdoc if bdoc: - body = self._bdoc.content.get(self.BODY_KEY, "") + body = str(self._bdoc.content.get(self.RAW_KEY, "")) else: - body = "" + logger.warning("No BDOC found for message.") + body = str("") + + # XXX not needed, isn't it? ---- ivan? + #if bdoc: + #content_type = bdoc.content.get('content-type', "") + #charset = content_type.split('charset=')[1] + #if charset: + #charset = charset.strip() + #if not charset: + #charset = self._get_charset(body) + #try: + #body = str(body.encode(charset)) + #except (UnicodeEncodeError, UnicodeDecodeError) as e: + #logger.error("Unicode error {0}".format(e)) + #body = str(body.encode(charset, 'replace')) - charset = self._get_charset(body) - try: - body = body.encode(charset) - except (UnicodeEncodeError, UnicodeDecodeError) as e: - logger.error("Unicode error {0}".format(e)) - body = body.encode(charset, 'replace') fd.write(body) fd.seek(0) return fd @@ -407,8 +518,7 @@ class LeapMessage(fields, MailParser, MBoxParser): :type stuff: basestring :returns: charset """ - # XXX existential doubt 1. wouldn't be smarter to - # peek into the mail headers? + # TODO get from subpart headers # XXX existential doubt 2. shouldn't we make the scope # of the decorator somewhat more persistent? # ah! yes! and put memory bounds. @@ -447,9 +557,11 @@ class LeapMessage(fields, MailParser, MBoxParser): :return: A mapping of header field names to header field values :rtype: dict """ + # TODO split in smaller methods headers = self._get_headers() if not headers: - return {'content-type': ''} + logger.warning("No headers found") + return {str('content-type'): str('')} names = map(lambda s: s.upper(), names) if negate: @@ -457,16 +569,20 @@ class LeapMessage(fields, MailParser, MBoxParser): else: cond = lambda key: key.upper() in names - head = copy.deepcopy(dict(headers.items())) + if isinstance(headers, list): + headers = dict(headers) - # twisted imap server expects headers to be lowercase - head = dict( - (str(key), map(str, value)) if key.lower() != "content-type" - else (str(key.lower(), map(str, value))) - for (key, value) in head.items()) + # twisted imap server expects *some* headers to be lowercase + # XXX refactor together with MessagePart method + headers = dict( + (str(key), str(value)) if key.lower() != "content-type" + else (str(key.lower()), str(value)) + for (key, value) in headers.items()) # unpack and filter original dict by negate-condition - filter_by_cond = [(key, val) for key, val in head.items() if cond(key)] + filter_by_cond = [(key, val) for key, val + in headers.items() if cond(key)] + return dict(filter_by_cond) def _get_headers(self): @@ -474,7 +590,9 @@ class LeapMessage(fields, MailParser, MBoxParser): Return the headers dict for this message. """ if self._hdoc is not None: - return self._hdoc.content.get(self.HEADERS_KEY, {}) + headers = self._hdoc.content.get(self.HEADERS_KEY, {}) + return headers + else: logger.warning( "No HEADERS doc for msg %s:%s" % ( @@ -486,12 +604,13 @@ class LeapMessage(fields, MailParser, MBoxParser): Return True if this message is multipart. """ if self._fdoc: - return self._fdoc.content.get(self.MULTIPART_KEY, False) + is_multipart = self._fdoc.content.get(self.MULTIPART_KEY, False) + return is_multipart else: logger.warning( "No FLAGS doc for msg %s:%s" % ( - self.mbox, - self.uid)) + self._mbox, + self._uid)) def getSubPart(self, part): """ @@ -504,27 +623,33 @@ class LeapMessage(fields, MailParser, MBoxParser): :rtype: Any object implementing C{IMessagePart}. :return: The specified sub-part. """ - logger.debug("Getting subpart: %s" % part) if not self.isMultipart(): raise TypeError - - if part == 0: - # Let's get the first part, which - # is really the body. - return MessageBody(self._fdoc, self._bdoc) - - attach_doc = self._get_attachment_doc(part) - if not attach_doc: - # so long and thanks for all the fish - logger.debug("...not today") + try: + pmap_dict = self._get_part_from_parts_map(part + 1) + except KeyError: + logger.debug("getSubpart for %s: KeyError" % (part,)) raise IndexError - msg_part = self._get_parsed_msg(attach_doc.content[self.RAW_KEY]) - return MessageAttachment(msg_part) + return MessagePart(self._soledad, pmap_dict) # # accessors # + def _get_part_from_parts_map(self, part): + """ + Get a part map from the headers doc + + :raises: KeyError if key does not exist + :rtype: dict + """ + if not self._hdoc: + logger.warning("Tried to get part but no HDOC found!") + return None + + pmap = self._hdoc.content.get(fields.PARTS_MAP_KEY, {}) + return pmap[str(part)] + def _get_flags_doc(self): """ Return the document that keeps the flags for this @@ -550,63 +675,16 @@ class LeapMessage(fields, MailParser, MBoxParser): Return the document that keeps the body for this message. """ - body_docs = self._soledad.get_from_index( - fields.TYPE_C_HASH_IDX, - fields.TYPE_MESSAGE_VAL, str(self._chash)) - return first(body_docs) - - def _get_num_parts(self): - """ - Return the number of parts for a multipart message. - """ - if not self.isMultipart(): - raise TypeError( - "Tried to get num parts in a non-multipart message") - if not self._hdoc: - return None - return self._hdoc.content.get(fields.NUM_PARTS_KEY, 2) - - def _get_attachment_doc(self, part): - """ - Return the document that keeps the headers for this - message. - - :param part: the part number for the multipart message. - :type part: int - """ - if not self._hdoc: - return None - try: - phash = self._hdoc.content[self.PARTS_MAP_KEY][str(part)] - except KeyError: - # this is the remnant of a debug session until - # I found that the index is actually a string... - # It should be safe to just raise the KeyError now, - # but leaving it here while the blood is fresh... - logger.warning("We expected a phash in the " - "index %s, but noone found" % (part, )) - logger.debug(self._hdoc.content[self.PARTS_MAP_KEY]) + body_phash = self._hdoc.content.get( + fields.BODY_KEY, None) + if not body_phash: + logger.warning("No body phash for this document!") return None - attach_docs = self._soledad.get_from_index( + body_docs = self._soledad.get_from_index( fields.TYPE_P_HASH_IDX, - fields.TYPE_ATTACHMENT_VAL, str(phash)) - - # The following is true for the fist owner. - # We could use this relationship to flag the "owner" - # and orphan when we delete it. + fields.TYPE_CONTENT_VAL, str(body_phash)) - #attach_docs = self._soledad.get_from_index( - #fields.TYPE_C_HASH_PART_IDX, - #fields.TYPE_ATTACHMENT_VAL, str(self._chash), str(part)) - return first(attach_docs) - - def _get_raw_msg(self): - """ - Return the raw msg. - :rtype: basestring - """ - # TODO deprecate this. - return self._bdoc.content.get(self.RAW_KEY, '') + return first(body_docs) def __getitem__(self, key): """ @@ -658,27 +736,22 @@ class LeapMessage(fields, MailParser, MBoxParser): """ Remove all docs associated with this message. """ - # XXX this would ve more efficient if we can just pass - # a sequence of uids. - # XXX For the moment we are only removing the flags and headers # docs. The rest we leave there polluting your hard disk, # until we think about a good way of deorphaning. # Maybe a crawler of unreferenced docs. + # XXX implement elijah's idea of using a PUT document as a + # token to ensure consistency in the removal. + uid = self._uid - print "removing...", uid fd = self._get_flags_doc() - hd = self._get_headers_doc() + #hd = self._get_headers_doc() #bd = self._get_body_doc() #docs = [fd, hd, bd] - docs = [fd, hd] - - #for pn in range(self._get_num_parts()[1:]): - #ad = self._get_attachment_doc(pn) - #docs.append(ad) + docs = [fd] for d in filter(None, docs): try: @@ -703,8 +776,7 @@ SoledadWriterPayload = namedtuple( SoledadWriterPayload.CREATE = 1 SoledadWriterPayload.PUT = 2 -SoledadWriterPayload.BODY_CREATE = 3 -SoledadWriterPayload.ATTACHMENT_CREATE = 4 +SoledadWriterPayload.CONTENT_CREATE = 3 class SoledadDocWriter(object): @@ -723,6 +795,38 @@ class SoledadDocWriter(object): """ self._soledad = soledad + def _get_call_for_item(self, item): + """ + Return the proper call type for a given item. + + :param item: one of the types defined under the + attributes of SoledadWriterPayload + :type item: int + """ + call = None + payload = item.payload + + if item.mode == SoledadWriterPayload.CREATE: + call = self._soledad.create_doc + elif (item.mode == SoledadWriterPayload.CONTENT_CREATE + and not self._content_does_exist(payload)): + call = self._soledad.create_doc + elif item.mode == SoledadWriterPayload.PUT: + call = self._soledad.put_doc + return call + + def _process(self, queue): + """ + Return the item and the proper call type for the next + item in the queue if any. + + :param queue: the queue from where we'll pick item. + :type queue: Queue + """ + item = queue.get() + call = self._get_call_for_item(item) + return item, call + def consume(self, queue): """ Creates a new document in soledad db. @@ -733,24 +837,10 @@ class SoledadDocWriter(object): """ empty = queue.empty() while not empty: - item = queue.get() - call = None - payload = item.payload - - if item.mode == SoledadWriterPayload.CREATE: - call = self._soledad.create_doc - elif item.mode == SoledadWriterPayload.BODY_CREATE: - if not self._body_does_exist(payload): - call = self._soledad.create_doc - elif item.mode == SoledadWriterPayload.ATTACHMENT_CREATE: - if not self._attachment_does_exist(payload): - call = self._soledad.create_doc - elif item.mode == SoledadWriterPayload.PUT: - call = self._soledad.put_doc - - # XXX delete? + item, call = self._process(queue) if call: + # XXX should handle the delete case # should handle errors try: call(item.payload) @@ -779,33 +869,10 @@ class SoledadDocWriter(object): Stack. """ - def _body_does_exist(self, doc): + def _content_does_exist(self, doc): """ - Check whether we already have a body payload with this hash in our - database. - - :param doc: tentative body document - :type doc: dict - :returns: True if that happens, False otherwise. - """ - if not doc: - return False - chash = doc[fields.CONTENT_HASH_KEY] - body_docs = self._soledad.get_from_index( - fields.TYPE_C_HASH_IDX, - fields.TYPE_MESSAGE_VAL, str(chash)) - if not body_docs: - return False - if len(body_docs) != 1: - logger.warning("Found more than one copy of chash %s!" - % (chash,)) - logger.debug("Found body doc with that hash! Skipping save!") - return True - - def _attachment_does_exist(self, doc): - """ - Check whether we already have an attachment payload with this hash - in our database. + Check whether we already have a content document for a payload + with this hash in our database. :param doc: tentative body document :type doc: dict @@ -816,7 +883,7 @@ class SoledadDocWriter(object): phash = doc[fields.PAYLOAD_HASH_KEY] attach_docs = self._soledad.get_from_index( fields.TYPE_P_HASH_IDX, - fields.TYPE_ATTACHMENT_VAL, str(phash)) + fields.TYPE_CONTENT_VAL, str(phash)) if not attach_docs: return False @@ -840,15 +907,15 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # into a template for the class. FLAGS_DOC = "FLAGS" HEADERS_DOC = "HEADERS" - ATTACHMENT_DOC = "ATTACHMENT" - BODY_DOC = "BODY" + CONTENT_DOC = "CONTENT" templates = { FLAGS_DOC: { fields.TYPE_KEY: fields.TYPE_FLAGS_VAL, - fields.UID_KEY: 1, + fields.UID_KEY: 1, # XXX moe to a local table fields.MBOX_KEY: fields.INBOX_VAL, + fields.CONTENT_HASH_KEY: "", fields.SEEN_KEY: False, fields.RECENT_KEY: True, @@ -862,35 +929,28 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): fields.TYPE_KEY: fields.TYPE_HEADERS_VAL, fields.CONTENT_HASH_KEY: "", + fields.DATE_KEY: "", + fields.SUBJECT_KEY: "", + fields.HEADERS_KEY: {}, - fields.NUM_PARTS_KEY: 0, fields.PARTS_MAP_KEY: {}, - fields.DATE_KEY: "", - fields.SUBJECT_KEY: "" }, - ATTACHMENT_DOC: { - fields.TYPE_KEY: fields.TYPE_ATTACHMENT_VAL, - fields.PART_NUMBER_KEY: 0, - fields.CONTENT_HASH_KEY: "", + CONTENT_DOC: { + fields.TYPE_KEY: fields.TYPE_CONTENT_VAL, fields.PAYLOAD_HASH_KEY: "", + fields.LINKED_FROM_KEY: [], + fields.CTYPE_KEY: "", # should index by this too - fields.RAW_KEY: "" - }, - - BODY_DOC: { - fields.TYPE_KEY: fields.TYPE_MESSAGE_VAL, - fields.CONTENT_HASH_KEY: "", - - fields.BODY_KEY: "", - - # this should not be needed, - # but let's keep the raw msg for some time - # until we are sure we can reconstruct - # the original msg from our disection. + # should only get inmutable headers parts + # (for indexing) + fields.HEADERS_KEY: {}, fields.RAW_KEY: "", + fields.PARTS_MAP_KEY: {}, + fields.HEADERS_KEY: {}, + fields.MULTIPART_KEY: False, + }, - } } def __init__(self, mbox=None, soledad=None): @@ -938,128 +998,124 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): raise TypeError("Improper type passed to _get_empty_doc") return copy.deepcopy(self.templates[_type]) - @deferred - def add_msg(self, raw, subject=None, flags=None, date=None, uid=1): + def _do_parse(self, raw): """ - Creates a new message document. + Parse raw message and return it along with + relevant information about its outer level. :param raw: the raw message - :type raw: str - - :param subject: subject of the message. - :type subject: str - - :param flags: flags - :type flags: list - - :param date: the received date for the message - :type date: str - - :param uid: the message uid for this mailbox - :type uid: int + :type raw: StringIO or basestring + :return: msg, chash, size, multi + :rtype: tuple """ - # TODO: split in smaller methods - logger.debug('adding message') - if flags is None: - flags = tuple() - leap_assert_type(flags, tuple) - - # docs for flags, headers, and body - fd, hd, bd = map( - lambda t: self._get_empty_doc(t), - (self.FLAGS_DOC, self.HEADERS_DOC, self.BODY_DOC)) - msg = self._get_parsed_msg(raw) - headers = defaultdict(list) - for k, v in msg.items(): - headers[k].append(v) - raw_str = msg.as_string() chash = self._get_hash(msg) + size = len(msg.as_string()) multi = msg.is_multipart() + return msg, chash, size, multi - attaches = [] - inner_parts = [] - - if multi: - # XXX should walk down recursively - # in a better way. but fixing this quick - # to have an rc. - # XXX should pick the content-type in txt - body = first(msg.get_payload()).get_payload() - if isinstance(body, list): - # allowing one nesting level for now... - body, rest = body[0].get_payload(), body[1:] - for p in rest: - inner_parts.append(p) - else: - body = msg.get_payload() - logger.debug("adding msg with uid %s (multipart:%s)" % ( - uid, multi)) + def _populate_flags(self, flags, uid, chash, size, multi): + """ + Return a flags doc. + + XXX Missing DOC ----------- + """ + fd = self._get_empty_doc(self.FLAGS_DOC) - # flags doc --------------------------------------- fd[self.MBOX_KEY] = self.mbox fd[self.UID_KEY] = uid fd[self.CONTENT_HASH_KEY] = chash + fd[self.SIZE_KEY] = size fd[self.MULTIPART_KEY] = multi - fd[self.SIZE_KEY] = len(raw_str) if flags: fd[self.FLAGS_KEY] = map(self._stringify, flags) fd[self.SEEN_KEY] = self.SEEN_FLAG in flags fd[self.DEL_KEY] = self.DELETED_FLAG in flags fd[self.RECENT_KEY] = True # set always by default + return fd - # headers doc ---------------------------------------- + def _populate_headr(self, msg, chash, subject, date): + """ + Return a headers doc. + + XXX Missing DOC ----------- + """ + headers = defaultdict(list) + for k, v in msg.items(): + headers[k].append(v) + + # "fix" for repeated headers. + for k, v in headers.items(): + newline = "\n%s: " % (k,) + headers[k] = newline.join(v) + + hd = self._get_empty_doc(self.HEADERS_DOC) hd[self.CONTENT_HASH_KEY] = chash hd[self.HEADERS_KEY] = headers - print "headers" - import pprint - pprint.pprint(headers) - if not subject and self.SUBJECT_FIELD in headers: hd[self.SUBJECT_KEY] = first(headers[self.SUBJECT_FIELD]) else: hd[self.SUBJECT_KEY] = subject + if not date and self.DATE_FIELD in headers: hd[self.DATE_KEY] = first(headers[self.DATE_FIELD]) else: hd[self.DATE_KEY] = date - if multi: - # XXX fix for multipart nested case - hd[self.NUM_PARTS_KEY] = len(msg.get_payload()) - - # body doc - bd[self.CONTENT_HASH_KEY] = chash - bd[self.BODY_KEY] = body - # XXX in an ideal world, we would not need to save a copy of the - # raw message. But we'll keep it until we can be sure that - # we can rebuild the original message from the parts. - bd[self.RAW_KEY] = raw_str + return hd + + @deferred + def add_msg(self, raw, subject=None, flags=None, date=None, uid=1): + """ + Creates a new message document. + + :param raw: the raw message + :type raw: str + + :param subject: subject of the message. + :type subject: str + + :param flags: flags + :type flags: list + + :param date: the received date for the message + :type date: str + + :param uid: the message uid for this mailbox + :type uid: int + """ + # TODO signal that we can delete the original message!----- + # when all the processing is done. + + # TODO add the linked-from info ! + + logger.debug('adding message') + if flags is None: + flags = tuple() + leap_assert_type(flags, tuple) + + # parse + msg, chash, size, multi = self._do_parse(raw) + + fd = self._populate_flags(flags, uid, chash, size, multi) + hd = self._populate_headr(msg, chash, subject, date) + + parts = walk.get_parts(msg) + body_phash_fun = [walk.get_body_phash_simple, + walk.get_body_phash_multi][int(multi)] + body_phash = body_phash_fun(walk.get_payloads(msg)) + parts_map = walk.walk_msg_tree(parts, body_phash=body_phash) + + # add parts map to header doc + # (body, multi, part_map) + for key in parts_map: + hd[key] = parts_map[key] + del parts_map docs = [fd, hd] + cdocs = walk.get_raw_docs(msg, parts) - # attachment docs - if multi: - outer_parts = msg.get_payload() - parts = outer_parts + inner_parts - - # skip first part, we already got it in body - to_attach = ((i, m) for i, m in enumerate(parts) if i > 0) - for index, part_msg in to_attach: - att_doc = self._get_empty_doc(self.ATTACHMENT_DOC) - att_doc[self.PART_NUMBER_KEY] = index - att_doc[self.CONTENT_HASH_KEY] = chash - phash = self._get_hash(part_msg) - att_doc[self.PAYLOAD_HASH_KEY] = phash - att_doc[self.RAW_KEY] = part_msg.as_string() - - # keep a pointer to the payload hash in the - # headers doc, under the parts_map - hd[self.PARTS_MAP_KEY][str(index)] = phash - attaches.append(att_doc) - - # Saving ... ------------------------------- - # ok, there we go... + # Saving logger.debug('enqueuing message docs for write') ptuple = SoledadWriterPayload @@ -1067,14 +1123,12 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): for doc in docs: self.soledad_writer.put(ptuple( mode=ptuple.CREATE, payload=doc)) - # second, try to create body doc. - self.soledad_writer.put(ptuple( - mode=ptuple.BODY_CREATE, payload=bd)) + # and last, but not least, try to create - # attachment docs if not already there. - for at in attaches: + # content docs if not already there. + for cd in cdocs: self.soledad_writer.put(ptuple( - mode=ptuple.ATTACHMENT_CREATE, payload=at)) + mode=ptuple.CONTENT_CREATE, payload=cd)) def _remove_cb(self, result): return result -- cgit v1.2.3 From 51eaab77deedf0c923fe40cf3d346fa879bf2ae3 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Mon, 13 Jan 2014 13:20:00 -0400 Subject: Add check for uniqueness when adding mails. Check by mbox + content-hash --- src/leap/mail/imap/messages.py | 50 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 37e4311..a3fcd87 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -1064,10 +1064,27 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): hd[self.DATE_KEY] = date return hd + def _fdoc_already_exists(self, chash): + """ + Check whether we can find a flags doc for this mailbox with the + given content-hash. It enforces that we can only have the same maessage + listed once for a a given mailbox. + + :param chash: the content-hash to check about. + :type chash: basestring + :return: False, if it does not exist, or UID. + """ + exist = self._get_fdoc_from_chash(chash) + if exist: + return exist.content.get(fields.UID_KEY, "unknown-uid") + else: + return False + @deferred def add_msg(self, raw, subject=None, flags=None, date=None, uid=1): """ Creates a new message document. + Here lives the magic of the leap mail. Well, in soledad, really. :param raw: the raw message :type raw: str @@ -1097,6 +1114,14 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # parse msg, chash, size, multi = self._do_parse(raw) + # check for uniqueness. + if self._fdoc_already_exists(chash): + logger.warning("We already have that message in this mailbox.") + # note that this operation will leave holes in the UID sequence, + # but we're gonna change that all the same for a local-only table. + # so not touch it by the moment. + return False + fd = self._populate_flags(flags, uid, chash, size, multi) hd = self._populate_headr(msg, chash, subject, date) @@ -1156,6 +1181,31 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # getters + def _get_fdoc_from_chash(self, chash): + """ + Return a flags document for this mailbox with a given chash. + + :return: A SoledadDocument containing the Flags Document, or None if + the query failed. + :rtype: SoledadDocument or None. + """ + try: + query = self._soledad.get_from_index( + fields.TYPE_MBOX_C_HASH_IDX, + fields.TYPE_FLAGS_VAL, self.mbox, chash) + if query: + if len(query) > 1: + logger.warning( + "More than one fdoc found for this chash, " + "we got a duplicate!!") + # XXX we could take action, like trigger a background + # process to kill dupes. + return query.pop() + else: + return None + except Exception as exc: + logger.exception("Unhandled error %r" % exc) + def get_msg_by_uid(self, uid): """ Retrieves a LeapMessage by UID. -- cgit v1.2.3 From 5adc6b66839b15c23980355774d8d24aba4918bd Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Mon, 13 Jan 2014 14:51:13 -0400 Subject: Restore the encoding of the messages. Fixes: #4956 We still are getting wrong output with unicode chars, but this at least avoids breaking the fetch command. --- src/leap/mail/imap/messages.py | 47 +++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 26 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index a3fcd87..7b49c80 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -19,6 +19,7 @@ LeapMessage and MessageCollection. """ import copy import logging +import re import StringIO from collections import defaultdict, namedtuple @@ -63,6 +64,10 @@ def lowerdict(_dict): for key, value in _dict.items()) +CHARSET_PATTERN = r"""charset=([\w-]+)""" +CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) + + class MessagePart(object): """ IMessagePart implementor. @@ -140,18 +145,9 @@ class MessagePart(object): payload = str("") if payload: - #headers = self.getHeaders(True) - #headers = lowerdict(headers) - #content_type = headers.get('content-type', "") content_type = self._get_ctype_from_document(phash) - charset_split = content_type.split('charset=') - # XXX fuck all this, use a regex! - if len(charset_split) > 1: - charset = charset_split[1] - if charset: - charset = charset.strip() - else: - charset = None + charset = first(CHARSET_RE.findall(content_type)) + logger.debug("Got charset from header: %s" % (charset,)) if not charset: charset = self._get_charset(payload) try: @@ -483,28 +479,27 @@ class LeapMessage(fields, MailParser, MBoxParser): :return: file-like object opened for reading :rtype: StringIO """ + # TODO refactor with getBodyFile in MessagePart fd = StringIO.StringIO() bdoc = self._bdoc if bdoc: - body = str(self._bdoc.content.get(self.RAW_KEY, "")) + body = self._bdoc.content.get(self.RAW_KEY, "") + content_type = bdoc.content.get('content-type', "") + charset = first(CHARSET_RE.findall(content_type)) + logger.debug("Got charset from header: %s" % (charset,)) + if not charset: + charset = self._get_charset(body) + try: + body = body.decode(charset).encode(charset) + except (UnicodeEncodeError, UnicodeDecodeError) as e: + logger.error("Unicode error {0}".format(e)) + body = body.encode(charset, 'replace') + + # We are still returning funky characters from here. else: logger.warning("No BDOC found for message.") body = str("") - # XXX not needed, isn't it? ---- ivan? - #if bdoc: - #content_type = bdoc.content.get('content-type', "") - #charset = content_type.split('charset=')[1] - #if charset: - #charset = charset.strip() - #if not charset: - #charset = self._get_charset(body) - #try: - #body = str(body.encode(charset)) - #except (UnicodeEncodeError, UnicodeDecodeError) as e: - #logger.error("Unicode error {0}".format(e)) - #body = str(body.encode(charset, 'replace')) - fd.write(body) fd.seek(0) return fd -- cgit v1.2.3 From 4856f32ec75cda000fc794d0ac93990e0d1e42f6 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Mon, 13 Jan 2014 17:58:02 -0400 Subject: Very limited support for SEARCH Commands. Closes: #4209 limited to HEADER Message-ID. This is a quick workaround for avoiding duplicate saves in Drafts Folder. but we'll get there! --- src/leap/mail/imap/messages.py | 93 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 83 insertions(+), 10 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 7b49c80..a3d29d6 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -20,6 +20,8 @@ LeapMessage and MessageCollection. import copy import logging import re +import threading +import time import StringIO from collections import defaultdict, namedtuple @@ -44,6 +46,7 @@ from leap.mail.messageflow import IMessageConsumer, MessageProducer logger = logging.getLogger(__name__) +read_write_lock = threading.Lock() # TODO ------------------------------------------------------------ @@ -53,6 +56,7 @@ logger = logging.getLogger(__name__) # [ ] Send patch to twisted for bug in imap4.py:5717 (content-type can be # none? lower-case?) + def lowerdict(_dict): """ Return a dict with the keys in lowercase. @@ -60,12 +64,17 @@ def lowerdict(_dict): :param _dict: the dict to convert :rtype: dict """ + # TODO should properly implement a CaseInsensitive dict. + # Look into requests code. return dict((key.lower(), value) for key, value in _dict.items()) CHARSET_PATTERN = r"""charset=([\w-]+)""" +MSGID_PATTERN = r"""<([\w@.]+)>""" + CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) +MSGID_RE = re.compile(MSGID_PATTERN) class MessagePart(object): @@ -897,6 +906,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): Implements a filter query over the messages contained in a soledad database. """ + # XXX this should be able to produce a MessageSet methinks # could validate these kinds of objects turning them # into a template for the class. @@ -1044,9 +1054,14 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): newline = "\n%s: " % (k,) headers[k] = newline.join(v) + lower_headers = lowerdict(headers) + msgid = first(MSGID_RE.findall( + lower_headers.get('message-id', ''))) + hd = self._get_empty_doc(self.HEADERS_DOC) hd[self.CONTENT_HASH_KEY] = chash hd[self.HEADERS_KEY] = headers + hd[self.MSGID_KEY] = msgid if not subject and self.SUBJECT_FIELD in headers: hd[self.SUBJECT_KEY] = first(headers[self.SUBJECT_FIELD]) @@ -1139,16 +1154,17 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): logger.debug('enqueuing message docs for write') ptuple = SoledadWriterPayload - # first, regular docs: flags and headers - for doc in docs: - self.soledad_writer.put(ptuple( - mode=ptuple.CREATE, payload=doc)) + with read_write_lock: + # first, regular docs: flags and headers + for doc in docs: + self.soledad_writer.put(ptuple( + mode=ptuple.CREATE, payload=doc)) - # and last, but not least, try to create - # content docs if not already there. - for cd in cdocs: - self.soledad_writer.put(ptuple( - mode=ptuple.CONTENT_CREATE, payload=cd)) + # and last, but not least, try to create + # content docs if not already there. + for cd in cdocs: + self.soledad_writer.put(ptuple( + mode=ptuple.CONTENT_CREATE, payload=cd)) def _remove_cb(self, result): return result @@ -1174,7 +1190,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): d.addCallback(self._remove_cb) return d - # getters + # getters: specific queries def _get_fdoc_from_chash(self, chash): """ @@ -1201,6 +1217,63 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): except Exception as exc: logger.exception("Unhandled error %r" % exc) + def _get_uid_from_msgidCb(self, msgid): + hdoc = None + with read_write_lock: + try: + query = self._soledad.get_from_index( + fields.TYPE_MSGID_IDX, + fields.TYPE_HEADERS_VAL, msgid) + if query: + if len(query) > 1: + logger.warning( + "More than one hdoc found for this msgid, " + "we got a duplicate!!") + # XXX we could take action, like trigger a background + # process to kill dupes. + hdoc = query.pop() + except Exception as exc: + logger.exception("Unhandled error %r" % exc) + + if hdoc is None: + logger.warning("Could not find hdoc for msgid %s" + % (msgid,)) + return None + msg_chash = hdoc.content.get(fields.CONTENT_HASH_KEY) + fdoc = self._get_fdoc_from_chash(msg_chash) + if not fdoc: + logger.warning("Could not find fdoc for msgid %s" + % (msgid,)) + return None + return fdoc.content.get(fields.UID_KEY, None) + + @deferred + def _get_uid_from_msgid(self, msgid): + """ + Return a UID for a given message-id. + + It first gets the headers-doc for that msg-id, and + it found it queries the flags doc for the current mailbox + for the matching content-hash. + + :return: A UID, or None + """ + # We need to wait a little bit, cause in some of the cases + # the query is received right after we've saved the document, + # and we cannot find it otherwise. This seems to be enough. + + # Doing a sleep since we'll be calling this in a secondary thread, + # but we'll should be able to collect the results after a + # reactor.callLater. + # Maybe we can implement something like NOT_DONE_YET in the web + # framework, and return from the callback? + # See: http://jcalderone.livejournal.com/50226.html + # reactor.callLater(0.3, self._get_uid_from_msgidCb, msgid) + time.sleep(0.3) + return self._get_uid_from_msgidCb(msgid) + + # getters: generic for a mailbox + def get_msg_by_uid(self, uid): """ Retrieves a LeapMessage by UID. -- cgit v1.2.3 From 2b53238ce5211bc23da8d1e8903335daa12ca02e Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 14 Jan 2014 16:28:07 -0400 Subject: remove locks (moved to soledad client) --- src/leap/mail/imap/messages.py | 50 +++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 27 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index a3d29d6..7c17dbe 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -46,8 +46,6 @@ from leap.mail.messageflow import IMessageConsumer, MessageProducer logger = logging.getLogger(__name__) -read_write_lock = threading.Lock() - # TODO ------------------------------------------------------------ # [ ] Add linked-from info. @@ -1154,17 +1152,16 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): logger.debug('enqueuing message docs for write') ptuple = SoledadWriterPayload - with read_write_lock: - # first, regular docs: flags and headers - for doc in docs: - self.soledad_writer.put(ptuple( - mode=ptuple.CREATE, payload=doc)) + # first, regular docs: flags and headers + for doc in docs: + self.soledad_writer.put(ptuple( + mode=ptuple.CREATE, payload=doc)) - # and last, but not least, try to create - # content docs if not already there. - for cd in cdocs: - self.soledad_writer.put(ptuple( - mode=ptuple.CONTENT_CREATE, payload=cd)) + # and last, but not least, try to create + # content docs if not already there. + for cd in cdocs: + self.soledad_writer.put(ptuple( + mode=ptuple.CONTENT_CREATE, payload=cd)) def _remove_cb(self, result): return result @@ -1219,21 +1216,20 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): def _get_uid_from_msgidCb(self, msgid): hdoc = None - with read_write_lock: - try: - query = self._soledad.get_from_index( - fields.TYPE_MSGID_IDX, - fields.TYPE_HEADERS_VAL, msgid) - if query: - if len(query) > 1: - logger.warning( - "More than one hdoc found for this msgid, " - "we got a duplicate!!") - # XXX we could take action, like trigger a background - # process to kill dupes. - hdoc = query.pop() - except Exception as exc: - logger.exception("Unhandled error %r" % exc) + try: + query = self._soledad.get_from_index( + fields.TYPE_MSGID_IDX, + fields.TYPE_HEADERS_VAL, msgid) + if query: + if len(query) > 1: + logger.warning( + "More than one hdoc found for this msgid, " + "we got a duplicate!!") + # XXX we could take action, like trigger a background + # process to kill dupes. + hdoc = query.pop() + except Exception as exc: + logger.exception("Unhandled error %r" % exc) if hdoc is None: logger.warning("Could not find hdoc for msgid %s" -- cgit v1.2.3 From fc7ef201ea169e76123e15db346ac8d882d93c02 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 15 Jan 2014 16:57:18 -0400 Subject: remove use of soledad_writer Since the soledad client lock gets us covered with writes now, it makes no sense to enqueue using the messageconsumer. The SoledadWriter is left orphaned by now. We might want to reuse it to enqueue low priority tasks that need a strategy of retries in case of revisionconflicts. the MessageConsumer abstraction should also be useful for the case of the smtp queue. --- src/leap/mail/imap/messages.py | 163 +++++++++++++++++++++++++---------------- 1 file changed, 99 insertions(+), 64 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 7c17dbe..b35b808 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -20,7 +20,6 @@ LeapMessage and MessageCollection. import copy import logging import re -import threading import time import StringIO @@ -51,8 +50,6 @@ logger = logging.getLogger(__name__) # [ ] Add linked-from info. # [ ] Delete incoming mail only after successful write! # [ ] Remove UID from syncable db. Store only those indexes locally. -# [ ] Send patch to twisted for bug in imap4.py:5717 (content-type can be -# none? lower-case?) def lowerdict(_dict): @@ -657,10 +654,27 @@ class LeapMessage(fields, MailParser, MBoxParser): Return the document that keeps the flags for this message. """ - flag_docs = self._soledad.get_from_index( - fields.TYPE_MBOX_UID_IDX, - fields.TYPE_FLAGS_VAL, self._mbox, str(self._uid)) - return first(flag_docs) + result = {} + try: + flag_docs = self._soledad.get_from_index( + fields.TYPE_MBOX_UID_IDX, + fields.TYPE_FLAGS_VAL, self._mbox, str(self._uid)) + result = first(flag_docs) + except Exception as exc: + # ugh! Something's broken down there! + logger.warning("FUCKING ERROR ----- getting for UID:", self._uid) + logger.exception(exc) + try: + flag_docs = self._soledad.get_from_index( + fields.TYPE_MBOX_UID_IDX, + fields.TYPE_FLAGS_VAL, self._mbox, str(self._uid)) + result = first(flag_docs) + except Exception as exc: + # ugh! Something's broken down there! + logger.warning("FUCKING ERROR, 2nd time -----") + logger.exception(exc) + finally: + return result def _get_headers_doc(self): """ @@ -770,6 +784,51 @@ class LeapMessage(fields, MailParser, MBoxParser): return self._fdoc is not None +class ContentDedup(object): + """ + Message deduplication. + + We do a query for the content hashes before writing to our beloved + sqlcipher backend of Soledad. This means, by now, that: + + 1. We will not store the same attachment twice, only the hash of it. + 2. We will not store the same message body twice, only the hash of it. + + The first case is useful if you are always receiving the same old memes + from unwary friends that still have not discovered that 4chan is the + generator of the internet. The second will save your day if you have + initiated session with the same account in two different machines. I also + wonder why would you do that, but let's respect each other choices, like + with the religious celebrations, and assume that one day we'll be able + to run Bitmask in completely free phones. Yes, I mean that, the whole GSM + Stack. + """ + + def _content_does_exist(self, doc): + """ + Check whether we already have a content document for a payload + with this hash in our database. + + :param doc: tentative body document + :type doc: dict + :returns: True if that happens, False otherwise. + """ + if not doc: + return False + phash = doc[fields.PAYLOAD_HASH_KEY] + attach_docs = self._soledad.get_from_index( + fields.TYPE_P_HASH_IDX, + fields.TYPE_CONTENT_VAL, str(phash)) + if not attach_docs: + return False + + if len(attach_docs) != 1: + logger.warning("Found more than one copy of phash %s!" + % (phash,)) + logger.debug("Found attachment doc with that hash! Skipping save!") + return True + + SoledadWriterPayload = namedtuple( 'SoledadWriterPayload', ['mode', 'payload']) @@ -781,6 +840,13 @@ SoledadWriterPayload.PUT = 2 SoledadWriterPayload.CONTENT_CREATE = 3 +""" +SoledadDocWriter was used to avoid writing to the db from multiple threads. +Its use here has been deprecated in favor of a local rw_lock in the client. +But we might want to reuse in in the near future to implement priority queues. +""" + + class SoledadDocWriter(object): """ This writer will create docs serially in the local soledad database. @@ -852,51 +918,9 @@ class SoledadDocWriter(object): empty = queue.empty() - """ - Message deduplication. - We do a query for the content hashes before writing to our beloved - sqlcipher backend of Soledad. This means, by now, that: - - 1. We will not store the same attachment twice, only the hash of it. - 2. We will not store the same message body twice, only the hash of it. - - The first case is useful if you are always receiving the same old memes - from unwary friends that still have not discovered that 4chan is the - generator of the internet. The second will save your day if you have - initiated session with the same account in two different machines. I also - wonder why would you do that, but let's respect each other choices, like - with the religious celebrations, and assume that one day we'll be able - to run Bitmask in completely free phones. Yes, I mean that, the whole GSM - Stack. - """ - - def _content_does_exist(self, doc): - """ - Check whether we already have a content document for a payload - with this hash in our database. - - :param doc: tentative body document - :type doc: dict - :returns: True if that happens, False otherwise. - """ - if not doc: - return False - phash = doc[fields.PAYLOAD_HASH_KEY] - attach_docs = self._soledad.get_from_index( - fields.TYPE_P_HASH_IDX, - fields.TYPE_CONTENT_VAL, str(phash)) - if not attach_docs: - return False - - if len(attach_docs) != 1: - logger.warning("Found more than one copy of phash %s!" - % (phash,)) - logger.debug("Found attachment doc with that hash! Skipping save!") - return True - - -class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): +class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, + ContentDedup): """ A collection of messages, surprisingly. @@ -1145,23 +1169,21 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): hd[key] = parts_map[key] del parts_map - docs = [fd, hd] - cdocs = walk.get_raw_docs(msg, parts) - # Saving - logger.debug('enqueuing message docs for write') - ptuple = SoledadWriterPayload # first, regular docs: flags and headers - for doc in docs: - self.soledad_writer.put(ptuple( - mode=ptuple.CREATE, payload=doc)) + self._soledad.create_doc(fd) + + # XXX should check for content duplication on headers too + # but with chash. !!! + self._soledad.create_doc(hd) # and last, but not least, try to create # content docs if not already there. - for cd in cdocs: - self.soledad_writer.put(ptuple( - mode=ptuple.CONTENT_CREATE, payload=cd)) + cdocs = walk.get_raw_docs(msg, parts) + for cdoc in cdocs: + if not self._content_does_exist(cdoc): + self._soledad.create_doc(cdoc) def _remove_cb(self, result): return result @@ -1312,17 +1334,30 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # XXX FIXINDEX -- should implement order by in soledad return sorted(all_docs, key=lambda item: item.content['uid']) - def all_msg_iter(self): + def all_uid_iter(self): """ Return an iterator trhough the UIDs of all messages, sorted in ascending order. """ + # XXX we should get this from the uid table, local-only all_uids = (doc.content[self.UID_KEY] for doc in self._soledad.get_from_index( fields.TYPE_MBOX_IDX, fields.TYPE_FLAGS_VAL, self.mbox)) return (u for u in sorted(all_uids)) + def all_flags(self): + """ + Return a dict with all flags documents for this mailbox. + """ + all_flags = dict((( + doc.content[self.UID_KEY], + doc.content[self.FLAGS_KEY]) for doc in + self._soledad.get_from_index( + fields.TYPE_MBOX_IDX, + fields.TYPE_FLAGS_VAL, self.mbox))) + return all_flags + def count(self): """ Return the count of messages for this mailbox. @@ -1447,7 +1482,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :rtype: iterable """ return (LeapMessage(self._soledad, docuid, self.mbox) - for docuid in self.all_msg_iter()) + for docuid in self.all_uid_iter()) def __repr__(self): """ -- cgit v1.2.3 From 90f4338da088394ade1663871a23b8fb0a4c0d66 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 15 Jan 2014 17:05:24 -0400 Subject: Performance improvement on FLAGS-only FETCH * Compute the intersection set of the uids on a FETCH, so we avoid iterating through the non-existant UIDs. * Dispatch FLAGS query to our specialized method, that fetches all the flags documents and return objects that only specify one subset of the MessagePart interface, apt to render flags quickly with less queries overhead. * Overwrite the do_FETCH command in the imap Server to use fetch_flags. * Use deferLater for a better dispatch of tasks in the reactor. --- src/leap/mail/imap/messages.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index b35b808..22de356 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -662,17 +662,8 @@ class LeapMessage(fields, MailParser, MBoxParser): result = first(flag_docs) except Exception as exc: # ugh! Something's broken down there! - logger.warning("FUCKING ERROR ----- getting for UID:", self._uid) + logger.warning("ERROR while getting flags for UID: %s" % self._uid) logger.exception(exc) - try: - flag_docs = self._soledad.get_from_index( - fields.TYPE_MBOX_UID_IDX, - fields.TYPE_FLAGS_VAL, self._mbox, str(self._uid)) - result = first(flag_docs) - except Exception as exc: - # ugh! Something's broken down there! - logger.warning("FUCKING ERROR, 2nd time -----") - logger.exception(exc) finally: return result -- cgit v1.2.3 From ae56191d2d6f2953bd49f43b9dedb322a7f0db8c Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 16 Jan 2014 17:18:11 -0400 Subject: reset last uid on expunge --- src/leap/mail/imap/messages.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 22de356..02df38e 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -1337,6 +1337,18 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, fields.TYPE_FLAGS_VAL, self.mbox)) return (u for u in sorted(all_uids)) + def reset_last_uid(self, param): + """ + Set the last uid to the highest uid found. + Used while expunging, passed as a callback. + """ + try: + self.last_uid = max(self.all_uid_iter()) + 1 + except ValueError: + # empty sequence + pass + return param + def all_flags(self): """ Return a dict with all flags documents for this mailbox. -- cgit v1.2.3 From 759a3fff83252c6ef67434a860574da49b066df4 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 16 Jan 2014 17:19:31 -0400 Subject: fix internaldate storage --- src/leap/mail/imap/messages.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 02df38e..1b996b6 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -467,7 +467,8 @@ class LeapMessage(fields, MailParser, MBoxParser): :rtype: C{str} :return: An RFC822-formatted date string. """ - return str(self._hdoc.content.get(self.DATE_KEY, '')) + date = self._hdoc.content.get(self.DATE_KEY, '') + return str(date) # # IMessagePart @@ -1077,12 +1078,12 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, hd[self.MSGID_KEY] = msgid if not subject and self.SUBJECT_FIELD in headers: - hd[self.SUBJECT_KEY] = first(headers[self.SUBJECT_FIELD]) + hd[self.SUBJECT_KEY] = headers[self.SUBJECT_FIELD] else: hd[self.SUBJECT_KEY] = subject if not date and self.DATE_FIELD in headers: - hd[self.DATE_KEY] = first(headers[self.DATE_FIELD]) + hd[self.DATE_KEY] = headers[self.DATE_FIELD] else: hd[self.DATE_KEY] = date return hd -- cgit v1.2.3 From 9f9701d42be385aa9a6d7e72fd10104b0025971b Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 16 Jan 2014 22:01:20 -0400 Subject: Separate RECENT Flag to a mailbox document. this way we avoid a bunch of writes. --- src/leap/mail/imap/messages.py | 163 ++++++++++++++++++++++++++++++----------- 1 file changed, 120 insertions(+), 43 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 1b996b6..6556b12 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -21,6 +21,7 @@ import copy import logging import re import time +import threading import StringIO from collections import defaultdict, namedtuple @@ -308,7 +309,7 @@ class LeapMessage(fields, MailParser, MBoxParser): implements(imap4.IMessage) - def __init__(self, soledad, uid, mbox): + def __init__(self, soledad, uid, mbox, collection=None): """ Initializes a LeapMessage. @@ -318,11 +319,14 @@ class LeapMessage(fields, MailParser, MBoxParser): :type uid: int or basestring :param mbox: the mbox this message belongs to :type mbox: basestring + :param collection: a reference to the parent collection object + :type collection: MessageCollection """ MailParser.__init__(self) self._soledad = soledad self._uid = int(uid) self._mbox = self._parse_mailbox_name(mbox) + self._collection = collection self.__chash = None self.__bdoc = None @@ -373,7 +377,7 @@ class LeapMessage(fields, MailParser, MBoxParser): def getUID(self): """ - Retrieve the unique identifier associated with this message + Retrieve the unique identifier associated with this Message. :return: uid for this message :rtype: int @@ -382,18 +386,26 @@ class LeapMessage(fields, MailParser, MBoxParser): def getFlags(self): """ - Retrieve the flags associated with this message + Retrieve the flags associated with this Message. :return: The flags, represented as strings :rtype: tuple """ if self._uid is None: return [] + uid = self._uid flags = [] fdoc = self._fdoc if fdoc: flags = fdoc.content.get(self.FLAGS_KEY, None) + + msgcol = self._collection + + # We treat the recent flag specially: gotten from + # a mailbox-level document. + if msgcol and uid in msgcol.recent_flags: + flags.append(fields.RECENT_FLAG) if flags: flags = map(str, flags) return tuple(flags) @@ -414,7 +426,7 @@ class LeapMessage(fields, MailParser, MBoxParser): :rtype: SoledadDocument """ leap_assert(isinstance(flags, tuple), "flags need to be a tuple") - log.msg('setting flags: %s' % (self._uid)) + log.msg('setting flags: %s (%s)' % (self._uid, flags)) doc = self._fdoc if not doc: @@ -424,7 +436,6 @@ class LeapMessage(fields, MailParser, MBoxParser): return doc.content[self.FLAGS_KEY] = flags doc.content[self.SEEN_KEY] = self.SEEN_FLAG in flags - doc.content[self.RECENT_KEY] = self.RECENT_FLAG in flags doc.content[self.DEL_KEY] = self.DELETED_FLAG in flags self._soledad.put_doc(doc) @@ -927,6 +938,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, FLAGS_DOC = "FLAGS" HEADERS_DOC = "HEADERS" CONTENT_DOC = "CONTENT" + RECENT_DOC = "RECENT" templates = { @@ -937,7 +949,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, fields.CONTENT_HASH_KEY: "", fields.SEEN_KEY: False, - fields.RECENT_KEY: True, fields.DEL_KEY: False, fields.FLAGS_KEY: [], fields.MULTIPART_KEY: False, @@ -970,12 +981,25 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, fields.MULTIPART_KEY: False, }, + RECENT_DOC: { + fields.TYPE_KEY: fields.TYPE_RECENT_VAL, + fields.MBOX_KEY: fields.INBOX_VAL, + fields.RECENTFLAGS_KEY: [], + } } + _rdoc_lock = threading.Lock() + def __init__(self, mbox=None, soledad=None): """ Constructor for MessageCollection. + On initialization, we ensure that we have a document for + storing the recent flags. The nature of this flag make us wanting + to store the set of the UIDs with this flag at the level of the + MessageCollection for each mailbox, instead of treating them + as a property of each message. + :param mbox: the name of the mailbox. It is the name with which we filter the query over the messages database @@ -994,17 +1018,11 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # okay, all in order, keep going... self.mbox = self._parse_mailbox_name(mbox) self._soledad = soledad + self.__rflags = None self.initialize_db() - # I think of someone like nietzsche when reading this - - # this will be the producer that will enqueue the content - # to be processed serially by the consumer (the writer). We just - # need to `put` the new material on its plate. - - self.soledad_writer = MessageProducer( - SoledadDocWriter(soledad), - period=0.02) + # ensure that we have a recent-flags doc + self._get_or_create_rdoc() def _get_empty_doc(self, _type=FLAGS_DOC): """ @@ -1017,6 +1035,18 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, raise TypeError("Improper type passed to _get_empty_doc") return copy.deepcopy(self.templates[_type]) + def _get_or_create_rdoc(self): + """ + Try to retrieve the recent-flags doc for this MessageCollection, + and create one if not found. + """ + rdoc = self._get_recent_doc() + if not rdoc: + rdoc = self._get_empty_doc(self.RECENT_DOC) + if self.mbox != fields.INBOX_VAL: + rdoc[fields.MBOX_KEY] = self.mbox + self._soledad.create_doc(rdoc) + def _do_parse(self, raw): """ Parse raw message and return it along with @@ -1161,7 +1191,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, hd[key] = parts_map[key] del parts_map - # Saving + # Saving ---------------------------------------- + self.set_recent_flag(uid) # first, regular docs: flags and headers self._soledad.create_doc(fd) @@ -1203,6 +1234,76 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # getters: specific queries + def _get_recent_flags(self): + """ + An accessor for the recent-flags set for this mailbox. + """ + if not self.__rflags: + rdoc = self._get_recent_doc() + self.__rflags = set(rdoc.content.get( + fields.RECENTFLAGS_KEY, [])) + return self.__rflags + + def _set_recent_flags(self, value): + """ + Setter for the recent-flags set for this mailbox. + """ + rdoc = self._get_recent_doc() + newv = set(value) + self.__rflags = newv + + with self._rdoc_lock: + rdoc.content[fields.RECENTFLAGS_KEY] = list(newv) + # XXX should deferLater 0 it? + self._soledad.put_doc(rdoc) + + recent_flags = property( + _get_recent_flags, _set_recent_flags, + doc="Set of UIDs with the recent flag for this mailbox.") + + def unset_recent_flags(self, uids): + """ + Unset Recent flag for a sequence of uids. + """ + self.recent_flags = self.recent_flags.difference( + set(uids)) + + def unset_recent_flag(self, uid): + """ + Unset Recent flag for a given uid. + """ + self.recent_flags = self.recent_flags.difference( + set([uid])) + + def set_recent_flag(self, uid): + """ + Set Recent flag for a given uid. + """ + self.recent_flags = self.recent_flags.union( + set([uid])) + + def _get_recent_doc(self): + """ + Get recent-flags document for this inbox. + """ + # TODO refactor this try-catch structure into a utility + try: + query = self._soledad.get_from_index( + fields.TYPE_MBOX_IDX, + fields.TYPE_RECENT_VAL, self.mbox) + if query: + if len(query) > 1: + logger.warning( + "More than one rdoc found for this mbox, " + "we got a duplicate!!") + # XXX we could take action, like trigger a background + # process to kill dupes. + return query.pop() + else: + return None + except Exception as exc: + logger.exception("Unhandled error %r" % exc) + def _get_fdoc_from_chash(self, chash): """ Return a flags document for this mailbox with a given chash. @@ -1287,6 +1388,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, def get_msg_by_uid(self, uid): """ Retrieves a LeapMessage by UID. + This is used primarity in the Mailbox fetch and store methods. :param uid: the message uid to query by :type uid: int @@ -1295,7 +1397,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, or None if not found. :rtype: LeapMessage """ - msg = LeapMessage(self._soledad, uid, self.mbox) + msg = LeapMessage(self._soledad, uid, self.mbox, collection=self) if not msg.does_exist(): return None return msg @@ -1412,28 +1514,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # recent messages - def recent_iter(self): - """ - Get an iterator for the message UIDs with `recent` flag. - - :return: iterator through recent message docs - :rtype: iterable - """ - return (doc.content[self.UID_KEY] for doc in - self._soledad.get_from_index( - fields.TYPE_MBOX_RECT_IDX, - fields.TYPE_FLAGS_VAL, self.mbox, '1')) - - def get_recent(self): - """ - Get all messages with the `Recent` flag. - - :returns: a list of LeapMessages - :rtype: list - """ - return [LeapMessage(self._soledad, docid, self.mbox) - for docid in self.recent_iter()] - def count_recent(self): """ Count all messages with the `Recent` flag. @@ -1441,10 +1521,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, :returns: count :rtype: int """ - count = self._soledad.get_count_from_index( - fields.TYPE_MBOX_RECT_IDX, - fields.TYPE_FLAGS_VAL, self.mbox, '1') - return count + return len(self.recent_flags) # deleted messages -- cgit v1.2.3 From 8ebd48d923466db51b9ea5698f51d1f12867a7cb Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 16 Jan 2014 23:46:08 -0400 Subject: refactor common pattern to utility function --- src/leap/mail/imap/messages.py | 104 +++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 56 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 6556b12..f968c47 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -25,6 +25,7 @@ import threading import StringIO from collections import defaultdict, namedtuple +from functools import partial from twisted.mail import imap4 from twisted.internet import defer @@ -42,7 +43,7 @@ from leap.mail.decorators import deferred from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields from leap.mail.imap.parser import MailParser, MBoxParser -from leap.mail.messageflow import IMessageConsumer, MessageProducer +from leap.mail.messageflow import IMessageConsumer logger = logging.getLogger(__name__) @@ -66,6 +67,31 @@ def lowerdict(_dict): for key, value in _dict.items()) +def try_unique_query(curried): + """ + Try to execute a query that is expected to have a + single outcome, and log a warning if more than one document found. + + :param curried: a curried function + :type curried: callable + """ + leap_assert(callable(curried), "A callable is expected") + try: + query = curried() + if query: + if len(query) > 1: + # TODO we could take action, like trigger a background + # process to kill dupes. + name = getattr(curried, 'expected', 'doc') + logger.warning( + "More than one %s found for this mbox, " + "we got a duplicate!!" % (name,)) + return query.pop() + else: + return None + except Exception as exc: + logger.exception("Unhandled error %r" % exc) + CHARSET_PATTERN = r"""charset=([\w-]+)""" MSGID_PATTERN = r"""<([\w@.]+)>""" @@ -1286,23 +1312,12 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, """ Get recent-flags document for this inbox. """ - # TODO refactor this try-catch structure into a utility - try: - query = self._soledad.get_from_index( - fields.TYPE_MBOX_IDX, - fields.TYPE_RECENT_VAL, self.mbox) - if query: - if len(query) > 1: - logger.warning( - "More than one rdoc found for this mbox, " - "we got a duplicate!!") - # XXX we could take action, like trigger a background - # process to kill dupes. - return query.pop() - else: - return None - except Exception as exc: - logger.exception("Unhandled error %r" % exc) + curried = partial( + self._soledad.get_from_index, + fields.TYPE_MBOX_IDX, + fields.TYPE_RECENT_VAL, self.mbox) + curried.expected = "rdoc" + return try_unique_query(curried) def _get_fdoc_from_chash(self, chash): """ @@ -1312,39 +1327,21 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, the query failed. :rtype: SoledadDocument or None. """ - try: - query = self._soledad.get_from_index( - fields.TYPE_MBOX_C_HASH_IDX, - fields.TYPE_FLAGS_VAL, self.mbox, chash) - if query: - if len(query) > 1: - logger.warning( - "More than one fdoc found for this chash, " - "we got a duplicate!!") - # XXX we could take action, like trigger a background - # process to kill dupes. - return query.pop() - else: - return None - except Exception as exc: - logger.exception("Unhandled error %r" % exc) + curried = partial( + self._soledad.get_from_index, + fields.TYPE_MBOX_C_HASH_IDX, + fields.TYPE_FLAGS_VAL, self.mbox, chash) + curried.expected = "fdoc" + return try_unique_query(curried) def _get_uid_from_msgidCb(self, msgid): hdoc = None - try: - query = self._soledad.get_from_index( - fields.TYPE_MSGID_IDX, - fields.TYPE_HEADERS_VAL, msgid) - if query: - if len(query) > 1: - logger.warning( - "More than one hdoc found for this msgid, " - "we got a duplicate!!") - # XXX we could take action, like trigger a background - # process to kill dupes. - hdoc = query.pop() - except Exception as exc: - logger.exception("Unhandled error %r" % exc) + curried = partial( + self._soledad.get_from_index, + fields.TYPE_MSGID_IDX, + fields.TYPE_HEADERS_VAL, msgid) + curried.expected = "hdoc" + hdoc = try_unique_query(curried) if hdoc is None: logger.warning("Could not find hdoc for msgid %s" @@ -1373,13 +1370,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # the query is received right after we've saved the document, # and we cannot find it otherwise. This seems to be enough. - # Doing a sleep since we'll be calling this in a secondary thread, - # but we'll should be able to collect the results after a - # reactor.callLater. - # Maybe we can implement something like NOT_DONE_YET in the web - # framework, and return from the callback? - # See: http://jcalderone.livejournal.com/50226.html - # reactor.callLater(0.3, self._get_uid_from_msgidCb, msgid) + # XXX do a deferLater instead ?? time.sleep(0.3) return self._get_uid_from_msgidCb(msgid) @@ -1426,6 +1417,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # inneficient, but first let's grok it and then # let's worry about efficiency. # XXX FIXINDEX -- should implement order by in soledad + # FIXME ---------------------------------------------- return sorted(all_docs, key=lambda item: item.content['uid']) def all_uid_iter(self): @@ -1573,4 +1565,4 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, self.mbox, self.count()) # XXX should implement __eq__ also !!! - # --- use the content hash for that, will be used for dedup. + # use chash... -- cgit v1.2.3 From 9ef1cd79397d811575826025b924c615e6ce2aa4 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 17 Jan 2014 02:51:31 -0400 Subject: Add a fetch_headers for mass-header fetch queries --- src/leap/mail/imap/messages.py | 159 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 154 insertions(+), 5 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index f968c47..7a21009 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -964,10 +964,30 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, FLAGS_DOC = "FLAGS" HEADERS_DOC = "HEADERS" CONTENT_DOC = "CONTENT" + """ + RECENT_DOC is a document that stores a list of the UIDs + with the recent flag for this mailbox. It deserves a special treatment + because: + (1) it cannot be set by the user + (2) it's a flag that we set inmediately after a fetch, which is quite + often. + (3) we need to be able to set/unset it in batches without doing a single + write for each element in the sequence. + """ RECENT_DOC = "RECENT" + """ + HDOCS_SET_DOC is a document that stores a set of the Document-IDs + (the u1db index) for all the headers documents for a given mailbox. + We use it to prefetch massively all the headers for a mailbox. + This is the second massive query, after fetching all the FLAGS, that + a MUA will do in a case where we do not have local disk cache. + """ + HDOCS_SET_DOC = "HDOCS_SET" templates = { + # Message Level + FLAGS_DOC: { fields.TYPE_KEY: fields.TYPE_FLAGS_VAL, fields.UID_KEY: 1, # XXX moe to a local table @@ -1007,14 +1027,25 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, fields.MULTIPART_KEY: False, }, + # Mailbox Level + RECENT_DOC: { fields.TYPE_KEY: fields.TYPE_RECENT_VAL, fields.MBOX_KEY: fields.INBOX_VAL, fields.RECENTFLAGS_KEY: [], + }, + + HDOCS_SET_DOC: { + fields.TYPE_KEY: fields.TYPE_HDOCS_SET_VAL, + fields.MBOX_KEY: fields.INBOX_VAL, + fields.HDOCS_SET_KEY: [], } + + } _rdoc_lock = threading.Lock() + _hdocset_lock = threading.Lock() def __init__(self, mbox=None, soledad=None): """ @@ -1045,10 +1076,12 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, self.mbox = self._parse_mailbox_name(mbox) self._soledad = soledad self.__rflags = None + self.__hdocset = None self.initialize_db() - # ensure that we have a recent-flags doc + # ensure that we have a recent-flags and a hdocs-sec doc self._get_or_create_rdoc() + self._get_or_create_hdocset() def _get_empty_doc(self, _type=FLAGS_DOC): """ @@ -1073,6 +1106,18 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, rdoc[fields.MBOX_KEY] = self.mbox self._soledad.create_doc(rdoc) + def _get_or_create_hdocset(self): + """ + Try to retrieve the hdocs-set doc for this MessageCollection, + and create one if not found. + """ + hdocset = self._get_hdocset_doc() + if not hdocset: + hdocset = self._get_empty_doc(self.HDOCS_SET_DOC) + if self.mbox != fields.INBOX_VAL: + hdocset[fields.MBOX_KEY] = self.mbox + self._soledad.create_doc(hdocset) + def _do_parse(self, raw): """ Parse raw message and return it along with @@ -1222,10 +1267,12 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # first, regular docs: flags and headers self._soledad.create_doc(fd) - # XXX should check for content duplication on headers too # but with chash. !!! - self._soledad.create_doc(hd) + hdoc = self._soledad.create_doc(hd) + # We add the newly created hdoc to the fast-access set of + # headers documents associated with the mailbox. + self.add_hdocset_docid(hdoc.doc_id) # and last, but not least, try to create # content docs if not already there. @@ -1258,7 +1305,11 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, d.addCallback(self._remove_cb) return d + # # getters: specific queries + # + + # recent flags def _get_recent_flags(self): """ @@ -1310,14 +1361,85 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, def _get_recent_doc(self): """ - Get recent-flags document for this inbox. + Get recent-flags document for this mailbox. """ curried = partial( self._soledad.get_from_index, fields.TYPE_MBOX_IDX, fields.TYPE_RECENT_VAL, self.mbox) curried.expected = "rdoc" - return try_unique_query(curried) + with self._rdoc_lock: + return try_unique_query(curried) + + # headers-docs-set + + def _get_hdocset(self): + """ + An accessor for the hdocs-set for this mailbox. + """ + if not self.__hdocset: + hdocset_doc = self._get_hdocset_doc() + value = set(hdocset_doc.content.get( + fields.HDOCS_SET_KEY, [])) + self.__hdocset = value + return self.__hdocset + + def _set_hdocset(self, value): + """ + Setter for the hdocs-set for this mailbox. + """ + hdocset_doc = self._get_hdocset_doc() + newv = set(value) + self.__hdocset = newv + + with self._hdocset_lock: + hdocset_doc.content[fields.HDOCS_SET_KEY] = list(newv) + # XXX should deferLater 0 it? + self._soledad.put_doc(hdocset_doc) + + _hdocset = property( + _get_hdocset, _set_hdocset, + doc="Set of Document-IDs for the headers docs associated " + "with this mailbox.") + + def _get_hdocset_doc(self): + """ + Get hdocs-set document for this mailbox. + """ + curried = partial( + self._soledad.get_from_index, + fields.TYPE_MBOX_IDX, + fields.TYPE_HDOCS_SET_VAL, self.mbox) + curried.expected = "hdocset" + with self._hdocset_lock: + hdocset_doc = try_unique_query(curried) + return hdocset_doc + + def remove_hdocset_docids(self, docids): + """ + Remove the given document IDs from the set of + header-documents associated with this mailbox. + """ + self._hdocset = self._hdocset.difference( + set(docids)) + + def remove_hdocset_docid(self, docid): + """ + Remove the given document ID from the set of + header-documents associated with this mailbox. + """ + self._hdocset = self._hdocset.difference( + set([docid])) + + def add_hdocset_docid(self, docid): + """ + Add the given document ID to the set of + header-documents associated with this mailbox. + """ + hdocset = self._hdocset + self._hdocset = hdocset.union(set([docid])) + + # individual doc getters, message layer. def _get_fdoc_from_chash(self, chash): """ @@ -1456,6 +1578,30 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, fields.TYPE_FLAGS_VAL, self.mbox))) return all_flags + def all_flags_chash(self): + """ + Return a dict with the content-hash for all flag documents + for this mailbox. + """ + all_flags_chash = dict((( + doc.content[self.UID_KEY], + doc.content[self.CONTENT_HASH_KEY]) for doc in + self._soledad.get_from_index( + fields.TYPE_MBOX_IDX, + fields.TYPE_FLAGS_VAL, self.mbox))) + return all_flags_chash + + def all_headers(self): + """ + Return a dict with all the headers documents for this + mailbox. + """ + all_headers = dict((( + doc.content[self.CONTENT_HASH_KEY], + doc.content[self.HEADERS_KEY]) for doc in + self._soledad.get_docs(self._hdocset))) + return all_headers + def count(self): """ Return the count of messages for this mailbox. @@ -1509,6 +1655,9 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, def count_recent(self): """ Count all messages with the `Recent` flag. + It just retrieves the length of the recent_flags set, + which is stored in a specific type of document for + this collection. :returns: count :rtype: int -- cgit v1.2.3 From 28694a321a81f4cbe5f4873cdc55e6d3f471dd48 Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Fri, 17 Jan 2014 15:07:37 -0300 Subject: Fix encodings usage, use custom json.loads method. Also remove some unused imports. --- src/leap/mail/imap/messages.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 22de356..28bd272 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -494,8 +494,8 @@ class LeapMessage(fields, MailParser, MBoxParser): if not charset: charset = self._get_charset(body) try: - body = body.decode(charset).encode(charset) - except (UnicodeEncodeError, UnicodeDecodeError) as e: + body = body.encode(charset) + except UnicodeError as e: logger.error("Unicode error {0}".format(e)) body = body.encode(charset, 'replace') -- cgit v1.2.3 From 90d3062b764c405398adb22a5fbaf4f89c6e8d26 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Mon, 20 Jan 2014 12:56:30 -0400 Subject: make the read/write operations over sets atomic Fixes: #5009 --- src/leap/mail/imap/messages.py | 100 ++++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 41 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index d2c0950..378738e 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -1044,8 +1044,15 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, } + # Different locks for wrapping both the u1db document getting/setting + # and the property getting/settting in an atomic operation. + + # TODO we would abstract this to a SoledadProperty class + _rdoc_lock = threading.Lock() + _rdoc_property_lock = threading.Lock() _hdocset_lock = threading.Lock() + _hdocset_property_lock = threading.Lock() def __init__(self, mbox=None, soledad=None): """ @@ -1316,20 +1323,20 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, An accessor for the recent-flags set for this mailbox. """ if not self.__rflags: - rdoc = self._get_recent_doc() - self.__rflags = set(rdoc.content.get( - fields.RECENTFLAGS_KEY, [])) + with self._rdoc_lock: + rdoc = self._get_recent_doc() + self.__rflags = set(rdoc.content.get( + fields.RECENTFLAGS_KEY, [])) return self.__rflags def _set_recent_flags(self, value): """ Setter for the recent-flags set for this mailbox. """ - rdoc = self._get_recent_doc() - newv = set(value) - self.__rflags = newv - with self._rdoc_lock: + rdoc = self._get_recent_doc() + newv = set(value) + self.__rflags = newv rdoc.content[fields.RECENTFLAGS_KEY] = list(newv) # XXX should deferLater 0 it? self._soledad.put_doc(rdoc) @@ -1338,38 +1345,44 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, _get_recent_flags, _set_recent_flags, doc="Set of UIDs with the recent flag for this mailbox.") + def _get_recent_doc(self): + """ + Get recent-flags document for this mailbox. + """ + curried = partial( + self._soledad.get_from_index, + fields.TYPE_MBOX_IDX, + fields.TYPE_RECENT_VAL, self.mbox) + curried.expected = "rdoc" + rdoc = try_unique_query(curried) + return rdoc + + # Property-set modification (protected by a different + # lock to give atomicity to the read/write operation) + def unset_recent_flags(self, uids): """ Unset Recent flag for a sequence of uids. """ - self.recent_flags = self.recent_flags.difference( - set(uids)) + with self._rdoc_property_lock: + self.recent_flags = self.recent_flags.difference( + set(uids)) def unset_recent_flag(self, uid): """ Unset Recent flag for a given uid. """ - self.recent_flags = self.recent_flags.difference( - set([uid])) + with self._rdoc_property_lock: + self.recent_flags = self.recent_flags.difference( + set([uid])) def set_recent_flag(self, uid): """ Set Recent flag for a given uid. """ - self.recent_flags = self.recent_flags.union( - set([uid])) - - def _get_recent_doc(self): - """ - Get recent-flags document for this mailbox. - """ - curried = partial( - self._soledad.get_from_index, - fields.TYPE_MBOX_IDX, - fields.TYPE_RECENT_VAL, self.mbox) - curried.expected = "rdoc" - with self._rdoc_lock: - return try_unique_query(curried) + with self._rdoc_property_lock: + self.recent_flags = self.recent_flags.union( + set([uid])) # headers-docs-set @@ -1378,21 +1391,21 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, An accessor for the hdocs-set for this mailbox. """ if not self.__hdocset: - hdocset_doc = self._get_hdocset_doc() - value = set(hdocset_doc.content.get( - fields.HDOCS_SET_KEY, [])) - self.__hdocset = value + with self._hdocset_lock: + hdocset_doc = self._get_hdocset_doc() + value = set(hdocset_doc.content.get( + fields.HDOCS_SET_KEY, [])) + self.__hdocset = value return self.__hdocset def _set_hdocset(self, value): """ Setter for the hdocs-set for this mailbox. """ - hdocset_doc = self._get_hdocset_doc() - newv = set(value) - self.__hdocset = newv - with self._hdocset_lock: + hdocset_doc = self._get_hdocset_doc() + newv = set(value) + self.__hdocset = newv hdocset_doc.content[fields.HDOCS_SET_KEY] = list(newv) # XXX should deferLater 0 it? self._soledad.put_doc(hdocset_doc) @@ -1411,33 +1424,38 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, fields.TYPE_MBOX_IDX, fields.TYPE_HDOCS_SET_VAL, self.mbox) curried.expected = "hdocset" - with self._hdocset_lock: - hdocset_doc = try_unique_query(curried) + hdocset_doc = try_unique_query(curried) return hdocset_doc + # Property-set modification (protected by a different + # lock to give atomicity to the read/write operation) + def remove_hdocset_docids(self, docids): """ Remove the given document IDs from the set of header-documents associated with this mailbox. """ - self._hdocset = self._hdocset.difference( - set(docids)) + with self._hdocset_property_lock: + self._hdocset = self._hdocset.difference( + set(docids)) def remove_hdocset_docid(self, docid): """ Remove the given document ID from the set of header-documents associated with this mailbox. """ - self._hdocset = self._hdocset.difference( - set([docid])) + with self._hdocset_property_lock: + self._hdocset = self._hdocset.difference( + set([docid])) def add_hdocset_docid(self, docid): """ Add the given document ID to the set of header-documents associated with this mailbox. """ - hdocset = self._hdocset - self._hdocset = hdocset.union(set([docid])) + with self._hdocset_property_lock: + self._hdocset = self._hdocset.union( + set([docid])) # individual doc getters, message layer. -- cgit v1.2.3 From 22c106a7306446a3fa9689f5942a86a53ec884b4 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 21 Jan 2014 01:04:37 -0400 Subject: workaround for recursionlimit due to qtreactor --- src/leap/mail/imap/messages.py | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 378738e..cd4d85f 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -528,7 +528,6 @@ class LeapMessage(fields, MailParser, MBoxParser): body = self._bdoc.content.get(self.RAW_KEY, "") content_type = bdoc.content.get('content-type', "") charset = first(CHARSET_RE.findall(content_type)) - logger.debug("Got charset from header: %s" % (charset,)) if not charset: charset = self._get_charset(body) try: @@ -665,7 +664,6 @@ class LeapMessage(fields, MailParser, MBoxParser): try: pmap_dict = self._get_part_from_parts_map(part + 1) except KeyError: - logger.debug("getSubpart for %s: KeyError" % (part,)) raise IndexError return MessagePart(self._soledad, pmap_dict) -- cgit v1.2.3 From c2e052a08789057d550a0442caa28b27ebc4b416 Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Wed, 22 Jan 2014 11:01:05 -0300 Subject: Add find_charset helper and use where is needed. --- src/leap/mail/imap/messages.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index cd4d85f..862a9f2 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -38,7 +38,7 @@ from leap.common.check import leap_assert, leap_assert_type from leap.common.decorators import memoized_method from leap.common.mail import get_email_charset from leap.mail import walk -from leap.mail.utils import first +from leap.mail.utils import first, find_charset from leap.mail.decorators import deferred from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields @@ -92,10 +92,7 @@ def try_unique_query(curried): except Exception as exc: logger.exception("Unhandled error %r" % exc) -CHARSET_PATTERN = r"""charset=([\w-]+)""" MSGID_PATTERN = r"""<([\w@.]+)>""" - -CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) MSGID_RE = re.compile(MSGID_PATTERN) @@ -177,9 +174,9 @@ class MessagePart(object): if payload: content_type = self._get_ctype_from_document(phash) - charset = first(CHARSET_RE.findall(content_type)) + charset = find_charset(content_type) logger.debug("Got charset from header: %s" % (charset,)) - if not charset: + if charset is None: charset = self._get_charset(payload) try: payload = payload.encode(charset) @@ -527,8 +524,8 @@ class LeapMessage(fields, MailParser, MBoxParser): if bdoc: body = self._bdoc.content.get(self.RAW_KEY, "") content_type = bdoc.content.get('content-type', "") - charset = first(CHARSET_RE.findall(content_type)) - if not charset: + charset = find_charset(content_type) + if charset is None: charset = self._get_charset(body) try: body = body.encode(charset) -- cgit v1.2.3 From 61af338b0dee8a56cd0f302502fe7cd9dc8bc5d1 Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Wed, 22 Jan 2014 11:03:58 -0300 Subject: Handle non-ascii headers. Closes #5021. --- src/leap/mail/imap/messages.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 862a9f2..5bb5f1c 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -605,18 +605,26 @@ class LeapMessage(fields, MailParser, MBoxParser): if isinstance(headers, list): headers = dict(headers) + # default to most likely standard + charset = find_charset(headers, "utf-8") + # twisted imap server expects *some* headers to be lowercase # XXX refactor together with MessagePart method - headers = dict( - (str(key), str(value)) if key.lower() != "content-type" - else (str(key.lower()), str(value)) - for (key, value) in headers.items()) + headers2 = dict() + for key, value in headers.items(): + if key.lower() == "content-type": + key = key.lower() - # unpack and filter original dict by negate-condition - filter_by_cond = [(key, val) for key, val - in headers.items() if cond(key)] + if not isinstance(key, str): + key = key.encode(charset, 'replace') + if not isinstance(value, str): + value = value.encode(charset, 'replace') + + # filter original dict by negate-condition + if cond(key): + headers2[key] = value - return dict(filter_by_cond) + return headers2 def _get_headers(self): """ -- cgit v1.2.3 From 17ea6fd404fd606c74776dc05ce769a7df43569a Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Mon, 27 Jan 2014 14:49:16 -0300 Subject: Use repr() on exceptions, inform if using 'replace'. --- src/leap/mail/imap/messages.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 5bb5f1c..34304ea 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -178,10 +178,11 @@ class MessagePart(object): logger.debug("Got charset from header: %s" % (charset,)) if charset is None: charset = self._get_charset(payload) + logger.debug("Got charset: %s" % (charset,)) try: payload = payload.encode(charset) except (UnicodeEncodeError, UnicodeDecodeError) as e: - logger.error("Unicode error {0}".format(e)) + logger.error("Unicode error, using 'replace'. {0!r}".format(e)) payload = payload.encode(charset, 'replace') fd.write(payload) @@ -530,7 +531,7 @@ class LeapMessage(fields, MailParser, MBoxParser): try: body = body.encode(charset) except UnicodeError as e: - logger.error("Unicode error {0}".format(e)) + logger.error("Unicode error, using 'replace'. {0!r}".format(e)) body = body.encode(charset, 'replace') # We are still returning funky characters from here. -- cgit v1.2.3 From 4ae6ad57a0f80143e3ded867c1fdd2264804a775 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 21 Jan 2014 19:22:09 -0400 Subject: memory store for append/fetch/copy --- src/leap/mail/imap/messages.py | 206 +++++++++++++++++++++++++++++++---------- 1 file changed, 155 insertions(+), 51 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 34304ea..ef0b0a1 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -42,6 +42,7 @@ from leap.mail.utils import first, find_charset from leap.mail.decorators import deferred from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields +from leap.mail.imap.memorystore import MessageDict from leap.mail.imap.parser import MailParser, MBoxParser from leap.mail.messageflow import IMessageConsumer @@ -49,11 +50,20 @@ logger = logging.getLogger(__name__) # TODO ------------------------------------------------------------ +# [ ] Add ref to incoming message during add_msg # [ ] Add linked-from info. # [ ] Delete incoming mail only after successful write! # [ ] Remove UID from syncable db. Store only those indexes locally. +# XXX no longer needed, since i'm using proxies instead of direct weakrefs +def maybe_call(thing): + """ + Return the same thing, or the result of its invocation if it is a callable. + """ + return thing() if callable(thing) else thing + + def lowerdict(_dict): """ Return a dict with the keys in lowercase. @@ -333,7 +343,7 @@ class LeapMessage(fields, MailParser, MBoxParser): implements(imap4.IMessage) - def __init__(self, soledad, uid, mbox, collection=None): + def __init__(self, soledad, uid, mbox, collection=None, container=None): """ Initializes a LeapMessage. @@ -345,12 +355,15 @@ class LeapMessage(fields, MailParser, MBoxParser): :type mbox: basestring :param collection: a reference to the parent collection object :type collection: MessageCollection + :param container: a IMessageContainer implementor instance + :type container: IMessageContainer """ MailParser.__init__(self) self._soledad = soledad self._uid = int(uid) self._mbox = self._parse_mailbox_name(mbox) self._collection = collection + self._container = container self.__chash = None self.__bdoc = None @@ -361,12 +374,28 @@ class LeapMessage(fields, MailParser, MBoxParser): An accessor to the flags document. """ if all(map(bool, (self._uid, self._mbox))): - fdoc = self._get_flags_doc() + fdoc = None + if self._container is not None: + fdoc = self._container.fdoc + if not fdoc: + fdoc = self._get_flags_doc() if fdoc: - self.__chash = fdoc.content.get( + fdoc_content = maybe_call(fdoc.content) + self.__chash = fdoc_content.get( fields.CONTENT_HASH_KEY, None) return fdoc + @property + def _hdoc(self): + """ + An accessor to the headers document. + """ + if self._container is not None: + hdoc = self._container.hdoc + if hdoc: + return hdoc + return self._get_headers_doc() + @property def _chash(self): """ @@ -375,17 +404,10 @@ class LeapMessage(fields, MailParser, MBoxParser): if not self._fdoc: return None if not self.__chash and self._fdoc: - self.__chash = self._fdoc.content.get( + self.__chash = maybe_call(self._fdoc.content).get( fields.CONTENT_HASH_KEY, None) return self.__chash - @property - def _hdoc(self): - """ - An accessor to the headers document. - """ - return self._get_headers_doc() - @property def _bdoc(self): """ @@ -422,7 +444,7 @@ class LeapMessage(fields, MailParser, MBoxParser): flags = [] fdoc = self._fdoc if fdoc: - flags = fdoc.content.get(self.FLAGS_KEY, None) + flags = maybe_call(fdoc.content).get(self.FLAGS_KEY, None) msgcol = self._collection @@ -449,6 +471,8 @@ class LeapMessage(fields, MailParser, MBoxParser): :return: a SoledadDocument instance :rtype: SoledadDocument """ + # XXX use memory store ...! + leap_assert(isinstance(flags, tuple), "flags need to be a tuple") log.msg('setting flags: %s (%s)' % (self._uid, flags)) @@ -461,7 +485,9 @@ class LeapMessage(fields, MailParser, MBoxParser): doc.content[self.FLAGS_KEY] = flags doc.content[self.SEEN_KEY] = self.SEEN_FLAG in flags doc.content[self.DEL_KEY] = self.DELETED_FLAG in flags - self._soledad.put_doc(doc) + + if getattr(doc, 'store', None) != "mem": + self._soledad.put_doc(doc) def addFlags(self, flags): """ @@ -521,18 +547,26 @@ class LeapMessage(fields, MailParser, MBoxParser): """ # TODO refactor with getBodyFile in MessagePart fd = StringIO.StringIO() - bdoc = self._bdoc - if bdoc: - body = self._bdoc.content.get(self.RAW_KEY, "") - content_type = bdoc.content.get('content-type', "") + if self._bdoc is not None: + bdoc_content = self._bdoc.content + body = bdoc_content.get(self.RAW_KEY, "") + content_type = bdoc_content.get('content-type', "") charset = find_charset(content_type) + logger.debug('got charset from content-type: %s' % charset) if charset is None: charset = self._get_charset(body) try: body = body.encode(charset) except UnicodeError as e: - logger.error("Unicode error, using 'replace'. {0!r}".format(e)) - body = body.encode(charset, 'replace') + logger.error("Unicode error {0}".format(e)) + logger.debug("Attempted to encode with: %s" % charset) + try: + body = body.encode(charset, 'replace') + except UnicodeError as e: + try: + body = body.encode('utf-8', 'replace') + except: + pass # We are still returning funky characters from here. else: @@ -567,7 +601,8 @@ class LeapMessage(fields, MailParser, MBoxParser): """ size = None if self._fdoc: - size = self._fdoc.content.get(self.SIZE_KEY, False) + fdoc_content = maybe_call(self._fdoc.content) + size = fdoc_content.get(self.SIZE_KEY, False) else: logger.warning("No FLAGS doc for %s:%s" % (self._mbox, self._uid)) @@ -632,7 +667,8 @@ class LeapMessage(fields, MailParser, MBoxParser): Return the headers dict for this message. """ if self._hdoc is not None: - headers = self._hdoc.content.get(self.HEADERS_KEY, {}) + hdoc_content = maybe_call(self._hdoc.content) + headers = hdoc_content.get(self.HEADERS_KEY, {}) return headers else: @@ -646,7 +682,8 @@ class LeapMessage(fields, MailParser, MBoxParser): Return True if this message is multipart. """ if self._fdoc: - is_multipart = self._fdoc.content.get(self.MULTIPART_KEY, False) + fdoc_content = maybe_call(self._fdoc.content) + is_multipart = fdoc_content.get(self.MULTIPART_KEY, False) return is_multipart else: logger.warning( @@ -688,7 +725,8 @@ class LeapMessage(fields, MailParser, MBoxParser): logger.warning("Tried to get part but no HDOC found!") return None - pmap = self._hdoc.content.get(fields.PARTS_MAP_KEY, {}) + hdoc_content = maybe_call(self._hdoc.content) + pmap = hdoc_content.get(fields.PARTS_MAP_KEY, {}) return pmap[str(part)] def _get_flags_doc(self): @@ -724,16 +762,33 @@ class LeapMessage(fields, MailParser, MBoxParser): Return the document that keeps the body for this message. """ - body_phash = self._hdoc.content.get( + hdoc_content = maybe_call(self._hdoc.content) + body_phash = hdoc_content.get( fields.BODY_KEY, None) if not body_phash: logger.warning("No body phash for this document!") return None - body_docs = self._soledad.get_from_index( - fields.TYPE_P_HASH_IDX, - fields.TYPE_CONTENT_VAL, str(body_phash)) - return first(body_docs) + # XXX get from memstore too... + # if memstore: memstore.get_phrash + # memstore should keep a dict with weakrefs to the + # phash doc... + + if self._container is not None: + bdoc = self._container.memstore.get_by_phash(body_phash) + if bdoc: + return bdoc + else: + print "no doc for that phash found!" + + # no memstore or no doc found there + if self._soledad: + body_docs = self._soledad.get_from_index( + fields.TYPE_P_HASH_IDX, + fields.TYPE_CONTENT_VAL, str(body_phash)) + return first(body_docs) + else: + logger.error("No phash in container, and no soledad found!") def __getitem__(self, key): """ @@ -746,7 +801,7 @@ class LeapMessage(fields, MailParser, MBoxParser): :return: The content value indexed by C{key} or None :rtype: str """ - return self._fdoc.content.get(key, None) + return maybe_call(self._fdoc.content).get(key, None) # setters @@ -790,6 +845,8 @@ class LeapMessage(fields, MailParser, MBoxParser): # until we think about a good way of deorphaning. # Maybe a crawler of unreferenced docs. + # XXX remove from memory store!!! + # XXX implement elijah's idea of using a PUT document as a # token to ensure consistency in the removal. @@ -957,7 +1014,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, """ A collection of messages, surprisingly. - It is tied to a selected mailbox name that is passed to constructor. + It is tied to a selected mailbox name that is passed to its constructor. Implements a filter query over the messages contained in a soledad database. """ @@ -1058,7 +1115,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, _hdocset_lock = threading.Lock() _hdocset_property_lock = threading.Lock() - def __init__(self, mbox=None, soledad=None): + def __init__(self, mbox=None, soledad=None, memstore=None): """ Constructor for MessageCollection. @@ -1068,13 +1125,18 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, MessageCollection for each mailbox, instead of treating them as a property of each message. + We are passed an instance of MemoryStore, the same for the + SoledadBackedAccount, that we use as a read cache and a buffer + for writes. + :param mbox: the name of the mailbox. It is the name with which we filter the query over the - messages database + messages database. :type mbox: str - :param soledad: Soledad database :type soledad: Soledad instance + :param memstore: a MemoryStore instance + :type memstore: MemoryStore """ MailParser.__init__(self) leap_assert(mbox, "Need a mailbox name to initialize") @@ -1086,6 +1148,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # okay, all in order, keep going... self.mbox = self._parse_mailbox_name(mbox) self._soledad = soledad + self._memstore = memstore + self.__rflags = None self.__hdocset = None self.initialize_db() @@ -1241,6 +1305,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # when all the processing is done. # TODO add the linked-from info ! + # TODO add reference to the original message logger.debug('adding message') if flags is None: @@ -1273,24 +1338,29 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, hd[key] = parts_map[key] del parts_map - # Saving ---------------------------------------- - self.set_recent_flag(uid) + # The MessageContainer expects a dict, zero-indexed + # XXX review-me + cdocs = dict((index, doc) for index, doc in + enumerate(walk.get_raw_docs(msg, parts))) + print "cdocs is", cdocs - # first, regular docs: flags and headers - self._soledad.create_doc(fd) + # Saving ---------------------------------------- # XXX should check for content duplication on headers too # but with chash. !!! - hdoc = self._soledad.create_doc(hd) + + # XXX adapt hdocset to use memstore + #hdoc = self._soledad.create_doc(hd) # We add the newly created hdoc to the fast-access set of # headers documents associated with the mailbox. - self.add_hdocset_docid(hdoc.doc_id) + #self.add_hdocset_docid(hdoc.doc_id) - # and last, but not least, try to create - # content docs if not already there. - cdocs = walk.get_raw_docs(msg, parts) - for cdoc in cdocs: - if not self._content_does_exist(cdoc): - self._soledad.create_doc(cdoc) + # XXX move to memory store too + # self.set_recent_flag(uid) + + # TODO ---- add reference to original doc, to be deleted + # after writes are done. + msg_container = MessageDict(fd, hd, cdocs) + self._memstore.put(self.mbox, uid, msg_container) def _remove_cb(self, result): return result @@ -1321,6 +1391,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # # recent flags + # XXX FIXME ------------------------------------- + # This should be rewritten to use memory store. def _get_recent_flags(self): """ @@ -1390,6 +1462,9 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # headers-docs-set + # XXX FIXME ------------------------------------- + # This should be rewritten to use memory store. + def _get_hdocset(self): """ An accessor for the hdocs-set for this mailbox. @@ -1532,7 +1607,16 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, or None if not found. :rtype: LeapMessage """ - msg = LeapMessage(self._soledad, uid, self.mbox, collection=self) + print "getting msg by id!" + msg_container = self._memstore.get(self.mbox, uid) + print "msg container", msg_container + if msg_container is not None: + print "getting LeapMessage (from memstore)" + msg = LeapMessage(None, uid, self.mbox, collection=self, + container=msg_container) + print "got msg:", msg + else: + msg = LeapMessage(self._soledad, uid, self.mbox, collection=self) if not msg.does_exist(): return None return msg @@ -1570,11 +1654,19 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, ascending order. """ # XXX we should get this from the uid table, local-only - all_uids = (doc.content[self.UID_KEY] for doc in - self._soledad.get_from_index( - fields.TYPE_MBOX_IDX, - fields.TYPE_FLAGS_VAL, self.mbox)) - return (u for u in sorted(all_uids)) + # XXX FIXME ------------- + # This should be cached in the memstoretoo + db_uids = set([doc.content[self.UID_KEY] for doc in + self._soledad.get_from_index( + fields.TYPE_MBOX_IDX, + fields.TYPE_FLAGS_VAL, self.mbox)]) + if self._memstore is not None: + mem_uids = self._memstore.get_uids(self.mbox) + uids = db_uids.union(set(mem_uids)) + else: + uids = db_uids + + return (u for u in sorted(uids)) def reset_last_uid(self, param): """ @@ -1592,12 +1684,21 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, """ Return a dict with all flags documents for this mailbox. """ + # XXX get all from memstore and cahce it there all_flags = dict((( doc.content[self.UID_KEY], doc.content[self.FLAGS_KEY]) for doc in self._soledad.get_from_index( fields.TYPE_MBOX_IDX, fields.TYPE_FLAGS_VAL, self.mbox))) + if self._memstore is not None: + # XXX + uids = self._memstore.get_uids(self.mbox) + fdocs = [(uid, self._memstore.get(self.mbox, uid).fdoc) + for uid in uids] + for uid, doc in fdocs: + all_flags[uid] = doc.content[self.FLAGS_KEY] + return all_flags def all_flags_chash(self): @@ -1630,9 +1731,12 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, :rtype: int """ + # XXX We could cache this in memstore too until next write... count = self._soledad.get_count_from_index( fields.TYPE_MBOX_IDX, fields.TYPE_FLAGS_VAL, self.mbox) + if self._memstore is not None: + count += self._memstore.count_new() return count # unseen messages -- cgit v1.2.3 From eaa4bcb241d5d55c4fd2458cb05c74fcdc79368c Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 23 Jan 2014 02:32:52 -0400 Subject: split messageparts --- src/leap/mail/imap/messages.py | 423 +++-------------------------------------- 1 file changed, 24 insertions(+), 399 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index ef0b0a1..67e5a41 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -24,13 +24,12 @@ import time import threading import StringIO -from collections import defaultdict, namedtuple +from collections import defaultdict from functools import partial from twisted.mail import imap4 from twisted.internet import defer from twisted.python import log -from u1db import errors as u1db_errors from zope.interface import implements from zope.proxy import sameProxiedObjects @@ -38,13 +37,12 @@ from leap.common.check import leap_assert, leap_assert_type from leap.common.decorators import memoized_method from leap.common.mail import get_email_charset from leap.mail import walk -from leap.mail.utils import first, find_charset +from leap.mail.utils import first, find_charset, lowerdict from leap.mail.decorators import deferred from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields from leap.mail.imap.memorystore import MessageDict from leap.mail.imap.parser import MailParser, MBoxParser -from leap.mail.messageflow import IMessageConsumer logger = logging.getLogger(__name__) @@ -52,29 +50,18 @@ logger = logging.getLogger(__name__) # [ ] Add ref to incoming message during add_msg # [ ] Add linked-from info. +# * Need a new type of documents: linkage info. +# * HDOCS are linked from FDOCs (ref to chash) +# * CDOCS are linked from HDOCS (ref to chash) + # [ ] Delete incoming mail only after successful write! # [ ] Remove UID from syncable db. Store only those indexes locally. +CHARSET_PATTERN = r"""charset=([\w-]+)""" +MSGID_PATTERN = r"""<([\w@.]+)>""" -# XXX no longer needed, since i'm using proxies instead of direct weakrefs -def maybe_call(thing): - """ - Return the same thing, or the result of its invocation if it is a callable. - """ - return thing() if callable(thing) else thing - - -def lowerdict(_dict): - """ - Return a dict with the keys in lowercase. - - :param _dict: the dict to convert - :rtype: dict - """ - # TODO should properly implement a CaseInsensitive dict. - # Look into requests code. - return dict((key.lower(), value) - for key, value in _dict.items()) +CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) +MSGID_RE = re.compile(MSGID_PATTERN) def try_unique_query(curried): @@ -102,232 +89,6 @@ def try_unique_query(curried): except Exception as exc: logger.exception("Unhandled error %r" % exc) -MSGID_PATTERN = r"""<([\w@.]+)>""" -MSGID_RE = re.compile(MSGID_PATTERN) - - -class MessagePart(object): - """ - IMessagePart implementor. - It takes a subpart message and is able to find - the inner parts. - - Excusatio non petita: see the interface documentation. - """ - - implements(imap4.IMessagePart) - - def __init__(self, soledad, part_map): - """ - Initializes the MessagePart. - - :param part_map: a dictionary containing the parts map for this - message - :type part_map: dict - """ - # TODO - # It would be good to pass the uid/mailbox also - # for references while debugging. - - # We have a problem on bulk moves, and is - # that when the fetch on the new mailbox is done - # the parts maybe are not complete. - # So we should be able to fail with empty - # docs until we solve that. The ideal would be - # to gather the results of the deferred operations - # to signal the operation is complete. - #leap_assert(part_map, "part map dict cannot be null") - self._soledad = soledad - self._pmap = part_map - - def getSize(self): - """ - Return the total size, in octets, of this message part. - - :return: size of the message, in octets - :rtype: int - """ - if not self._pmap: - return 0 - size = self._pmap.get('size', None) - if not size: - logger.error("Message part cannot find size in the partmap") - return size - - def getBodyFile(self): - """ - Retrieve a file object containing only the body of this message. - - :return: file-like object opened for reading - :rtype: StringIO - """ - fd = StringIO.StringIO() - if self._pmap: - multi = self._pmap.get('multi') - if not multi: - phash = self._pmap.get("phash", None) - else: - pmap = self._pmap.get('part_map') - first_part = pmap.get('1', None) - if first_part: - phash = first_part['phash'] - - if not phash: - logger.warning("Could not find phash for this subpart!") - payload = str("") - else: - payload = self._get_payload_from_document(phash) - - else: - logger.warning("Message with no part_map!") - payload = str("") - - if payload: - content_type = self._get_ctype_from_document(phash) - charset = find_charset(content_type) - logger.debug("Got charset from header: %s" % (charset,)) - if charset is None: - charset = self._get_charset(payload) - logger.debug("Got charset: %s" % (charset,)) - try: - payload = payload.encode(charset) - except (UnicodeEncodeError, UnicodeDecodeError) as e: - logger.error("Unicode error, using 'replace'. {0!r}".format(e)) - payload = payload.encode(charset, 'replace') - - fd.write(payload) - fd.seek(0) - return fd - - # TODO cache the phash retrieval - def _get_payload_from_document(self, phash): - """ - Gets the message payload from the content document. - - :param phash: the payload hash to retrieve by. - :type phash: basestring - """ - cdocs = self._soledad.get_from_index( - fields.TYPE_P_HASH_IDX, - fields.TYPE_CONTENT_VAL, str(phash)) - - cdoc = first(cdocs) - if not cdoc: - logger.warning( - "Could not find the content doc " - "for phash %s" % (phash,)) - payload = cdoc.content.get(fields.RAW_KEY, "") - return payload - - # TODO cache the pahash retrieval - def _get_ctype_from_document(self, phash): - """ - Gets the content-type from the content document. - - :param phash: the payload hash to retrieve by. - :type phash: basestring - """ - cdocs = self._soledad.get_from_index( - fields.TYPE_P_HASH_IDX, - fields.TYPE_CONTENT_VAL, str(phash)) - - cdoc = first(cdocs) - if not cdoc: - logger.warning( - "Could not find the content doc " - "for phash %s" % (phash,)) - ctype = cdoc.content.get('ctype', "") - return ctype - - @memoized_method - def _get_charset(self, stuff): - # TODO put in a common class with LeapMessage - """ - Gets (guesses?) the charset of a payload. - - :param stuff: the stuff to guess about. - :type stuff: basestring - :returns: charset - """ - # XXX existential doubt 2. shouldn't we make the scope - # of the decorator somewhat more persistent? - # ah! yes! and put memory bounds. - return get_email_charset(unicode(stuff)) - - def getHeaders(self, negate, *names): - """ - Retrieve a group of message headers. - - :param names: The names of the headers to retrieve or omit. - :type names: tuple of str - - :param negate: If True, indicates that the headers listed in names - should be omitted from the return value, rather - than included. - :type negate: bool - - :return: A mapping of header field names to header field values - :rtype: dict - """ - if not self._pmap: - logger.warning("No pmap in Subpart!") - return {} - headers = dict(self._pmap.get("headers", [])) - - # twisted imap server expects *some* headers to be lowercase - # We could use a CaseInsensitiveDict here... - headers = dict( - (str(key), str(value)) if key.lower() != "content-type" - else (str(key.lower()), str(value)) - for (key, value) in headers.items()) - - names = map(lambda s: s.upper(), names) - if negate: - cond = lambda key: key.upper() not in names - else: - cond = lambda key: key.upper() in names - - # unpack and filter original dict by negate-condition - filter_by_cond = [ - map(str, (key, val)) for - key, val in headers.items() - if cond(key)] - filtered = dict(filter_by_cond) - return filtered - - def isMultipart(self): - """ - Return True if this message is multipart. - """ - if not self._pmap: - logger.warning("Could not get part map!") - return False - multi = self._pmap.get("multi", False) - return multi - - def getSubPart(self, part): - """ - Retrieve a MIME submessage - - :type part: C{int} - :param part: The number of the part to retrieve, indexed from 0. - :raise IndexError: Raised if the specified part does not exist. - :raise TypeError: Raised if this message is not multipart. - :rtype: Any object implementing C{IMessagePart}. - :return: The specified sub-part. - """ - if not self.isMultipart(): - raise TypeError - sub_pmap = self._pmap.get("part_map", {}) - try: - part_map = sub_pmap[str(part + 1)] - except KeyError: - logger.debug("getSubpart for %s: KeyError" % (part,)) - raise IndexError - - # XXX check for validity - return MessagePart(self._soledad, part_map) - class LeapMessage(fields, MailParser, MBoxParser): """ @@ -380,7 +141,7 @@ class LeapMessage(fields, MailParser, MBoxParser): if not fdoc: fdoc = self._get_flags_doc() if fdoc: - fdoc_content = maybe_call(fdoc.content) + fdoc_content = fdoc.content self.__chash = fdoc_content.get( fields.CONTENT_HASH_KEY, None) return fdoc @@ -404,7 +165,7 @@ class LeapMessage(fields, MailParser, MBoxParser): if not self._fdoc: return None if not self.__chash and self._fdoc: - self.__chash = maybe_call(self._fdoc.content).get( + self.__chash = self._fdoc.content.get( fields.CONTENT_HASH_KEY, None) return self.__chash @@ -444,7 +205,7 @@ class LeapMessage(fields, MailParser, MBoxParser): flags = [] fdoc = self._fdoc if fdoc: - flags = maybe_call(fdoc.content).get(self.FLAGS_KEY, None) + flags = fdoc.content.get(self.FLAGS_KEY, None) msgcol = self._collection @@ -557,12 +318,12 @@ class LeapMessage(fields, MailParser, MBoxParser): charset = self._get_charset(body) try: body = body.encode(charset) - except UnicodeError as e: - logger.error("Unicode error {0}".format(e)) + except UnicodeError as exc: + logger.error("Unicode error {0}".format(exc)) logger.debug("Attempted to encode with: %s" % charset) try: body = body.encode(charset, 'replace') - except UnicodeError as e: + except UnicodeError as exc: try: body = body.encode('utf-8', 'replace') except: @@ -601,7 +362,7 @@ class LeapMessage(fields, MailParser, MBoxParser): """ size = None if self._fdoc: - fdoc_content = maybe_call(self._fdoc.content) + fdoc_content = self._fdoc.content size = fdoc_content.get(self.SIZE_KEY, False) else: logger.warning("No FLAGS doc for %s:%s" % (self._mbox, @@ -667,7 +428,7 @@ class LeapMessage(fields, MailParser, MBoxParser): Return the headers dict for this message. """ if self._hdoc is not None: - hdoc_content = maybe_call(self._hdoc.content) + hdoc_content = self._hdoc.content headers = hdoc_content.get(self.HEADERS_KEY, {}) return headers @@ -682,7 +443,7 @@ class LeapMessage(fields, MailParser, MBoxParser): Return True if this message is multipart. """ if self._fdoc: - fdoc_content = maybe_call(self._fdoc.content) + fdoc_content = self._fdoc.content is_multipart = fdoc_content.get(self.MULTIPART_KEY, False) return is_multipart else: @@ -725,7 +486,7 @@ class LeapMessage(fields, MailParser, MBoxParser): logger.warning("Tried to get part but no HDOC found!") return None - hdoc_content = maybe_call(self._hdoc.content) + hdoc_content = self._hdoc.content pmap = hdoc_content.get(fields.PARTS_MAP_KEY, {}) return pmap[str(part)] @@ -762,7 +523,7 @@ class LeapMessage(fields, MailParser, MBoxParser): Return the document that keeps the body for this message. """ - hdoc_content = maybe_call(self._hdoc.content) + hdoc_content = self._hdoc.content body_phash = hdoc_content.get( fields.BODY_KEY, None) if not body_phash: @@ -801,7 +562,7 @@ class LeapMessage(fields, MailParser, MBoxParser): :return: The content value indexed by C{key} or None :rtype: str """ - return maybe_call(self._fdoc.content).get(key, None) + return self._fdoc.content.get(key, None) # setters @@ -874,143 +635,7 @@ class LeapMessage(fields, MailParser, MBoxParser): return self._fdoc is not None -class ContentDedup(object): - """ - Message deduplication. - - We do a query for the content hashes before writing to our beloved - sqlcipher backend of Soledad. This means, by now, that: - - 1. We will not store the same attachment twice, only the hash of it. - 2. We will not store the same message body twice, only the hash of it. - - The first case is useful if you are always receiving the same old memes - from unwary friends that still have not discovered that 4chan is the - generator of the internet. The second will save your day if you have - initiated session with the same account in two different machines. I also - wonder why would you do that, but let's respect each other choices, like - with the religious celebrations, and assume that one day we'll be able - to run Bitmask in completely free phones. Yes, I mean that, the whole GSM - Stack. - """ - - def _content_does_exist(self, doc): - """ - Check whether we already have a content document for a payload - with this hash in our database. - - :param doc: tentative body document - :type doc: dict - :returns: True if that happens, False otherwise. - """ - if not doc: - return False - phash = doc[fields.PAYLOAD_HASH_KEY] - attach_docs = self._soledad.get_from_index( - fields.TYPE_P_HASH_IDX, - fields.TYPE_CONTENT_VAL, str(phash)) - if not attach_docs: - return False - - if len(attach_docs) != 1: - logger.warning("Found more than one copy of phash %s!" - % (phash,)) - logger.debug("Found attachment doc with that hash! Skipping save!") - return True - - -SoledadWriterPayload = namedtuple( - 'SoledadWriterPayload', ['mode', 'payload']) - -# TODO we could consider using enum here: -# https://pypi.python.org/pypi/enum - -SoledadWriterPayload.CREATE = 1 -SoledadWriterPayload.PUT = 2 -SoledadWriterPayload.CONTENT_CREATE = 3 - - -""" -SoledadDocWriter was used to avoid writing to the db from multiple threads. -Its use here has been deprecated in favor of a local rw_lock in the client. -But we might want to reuse in in the near future to implement priority queues. -""" - - -class SoledadDocWriter(object): - """ - This writer will create docs serially in the local soledad database. - """ - - implements(IMessageConsumer) - - def __init__(self, soledad): - """ - Initialize the writer. - - :param soledad: the soledad instance - :type soledad: Soledad - """ - self._soledad = soledad - - def _get_call_for_item(self, item): - """ - Return the proper call type for a given item. - - :param item: one of the types defined under the - attributes of SoledadWriterPayload - :type item: int - """ - call = None - payload = item.payload - - if item.mode == SoledadWriterPayload.CREATE: - call = self._soledad.create_doc - elif (item.mode == SoledadWriterPayload.CONTENT_CREATE - and not self._content_does_exist(payload)): - call = self._soledad.create_doc - elif item.mode == SoledadWriterPayload.PUT: - call = self._soledad.put_doc - return call - - def _process(self, queue): - """ - Return the item and the proper call type for the next - item in the queue if any. - - :param queue: the queue from where we'll pick item. - :type queue: Queue - """ - item = queue.get() - call = self._get_call_for_item(item) - return item, call - - def consume(self, queue): - """ - Creates a new document in soledad db. - - :param queue: queue to get item from, with content of the document - to be inserted. - :type queue: Queue - """ - empty = queue.empty() - while not empty: - item, call = self._process(queue) - - if call: - # XXX should handle the delete case - # should handle errors - try: - call(item.payload) - except u1db_errors.RevisionConflict as exc: - logger.error("Error: %r" % (exc,)) - raise exc - - empty = queue.empty() - - -class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, - ContentDedup): +class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): """ A collection of messages, surprisingly. @@ -1360,7 +985,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser, # TODO ---- add reference to original doc, to be deleted # after writes are done. msg_container = MessageDict(fd, hd, cdocs) - self._memstore.put(self.mbox, uid, msg_container) + self._memstore.create_message(self.mbox, uid, msg_container) def _remove_cb(self, result): return result -- cgit v1.2.3 From e2218eec4fd91e4648160a05e3debc05efa0d0d9 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 23 Jan 2014 02:36:38 -0400 Subject: add soledadstore class move parts-related bits to messageparts pass soledad in initialization for memory messages --- src/leap/mail/imap/messages.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 67e5a41..46c9dc9 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -41,7 +41,7 @@ from leap.mail.utils import first, find_charset, lowerdict from leap.mail.decorators import deferred from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields -from leap.mail.imap.memorystore import MessageDict +from leap.mail.imap.memorystore import MessageWrapper from leap.mail.imap.parser import MailParser, MBoxParser logger = logging.getLogger(__name__) @@ -984,7 +984,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # TODO ---- add reference to original doc, to be deleted # after writes are done. - msg_container = MessageDict(fd, hd, cdocs) + msg_container = MessageWrapper(fd, hd, cdocs) self._memstore.create_message(self.mbox, uid, msg_container) def _remove_cb(self, result): @@ -1215,6 +1215,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # and we cannot find it otherwise. This seems to be enough. # XXX do a deferLater instead ?? + # FIXME this won't be needed after the CHECK command is implemented. time.sleep(0.3) return self._get_uid_from_msgidCb(msgid) @@ -1233,11 +1234,14 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :rtype: LeapMessage """ print "getting msg by id!" - msg_container = self._memstore.get(self.mbox, uid) + msg_container = self._memstore.get_message(self.mbox, uid) print "msg container", msg_container if msg_container is not None: print "getting LeapMessage (from memstore)" - msg = LeapMessage(None, uid, self.mbox, collection=self, + # We pass a reference to soledad just to be able to retrieve + # missing parts that cannot be found in the container, like + # the content docs after a copy. + msg = LeapMessage(self._soledad, uid, self.mbox, collection=self, container=msg_container) print "got msg:", msg else: @@ -1309,7 +1313,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): """ Return a dict with all flags documents for this mailbox. """ - # XXX get all from memstore and cahce it there + # XXX get all from memstore and cache it there all_flags = dict((( doc.content[self.UID_KEY], doc.content[self.FLAGS_KEY]) for doc in @@ -1319,7 +1323,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): if self._memstore is not None: # XXX uids = self._memstore.get_uids(self.mbox) - fdocs = [(uid, self._memstore.get(self.mbox, uid).fdoc) + fdocs = [(uid, self._memstore.get_message(self.mbox, uid).fdoc) for uid in uids] for uid, doc in fdocs: all_flags[uid] = doc.content[self.FLAGS_KEY] -- cgit v1.2.3 From 0754dac293730b02942716991d5edc513c36ff7c Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 23 Jan 2014 04:35:10 -0400 Subject: debug info --- src/leap/mail/imap/messages.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 46c9dc9..3c30aa8 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -524,8 +524,10 @@ class LeapMessage(fields, MailParser, MBoxParser): message. """ hdoc_content = self._hdoc.content + print "hdoc: ", hdoc_content body_phash = hdoc_content.get( fields.BODY_KEY, None) + print "body phash: ", body_phash if not body_phash: logger.warning("No body phash for this document!") return None @@ -537,16 +539,19 @@ class LeapMessage(fields, MailParser, MBoxParser): if self._container is not None: bdoc = self._container.memstore.get_by_phash(body_phash) + print "bdoc from container -->", bdoc if bdoc: return bdoc else: print "no doc for that phash found!" + print "nuthing. soledad?" # no memstore or no doc found there if self._soledad: body_docs = self._soledad.get_from_index( fields.TYPE_P_HASH_IDX, fields.TYPE_CONTENT_VAL, str(body_phash)) + print "returning body docs,,,", body_docs return first(body_docs) else: logger.error("No phash in container, and no soledad found!") -- cgit v1.2.3 From ff28e22977db802c87f0b7be99e37c6de29183e9 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 23 Jan 2014 13:32:01 -0400 Subject: Unset new flag after successful write --- src/leap/mail/imap/messages.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 3c30aa8..94bd714 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -42,6 +42,7 @@ from leap.mail.decorators import deferred from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields from leap.mail.imap.memorystore import MessageWrapper +from leap.mail.imap.messageparts import MessagePart from leap.mail.imap.parser import MailParser, MBoxParser logger = logging.getLogger(__name__) @@ -306,15 +307,25 @@ class LeapMessage(fields, MailParser, MBoxParser): :return: file-like object opened for reading :rtype: StringIO """ + def write_fd(body): + fd.write(body) + fd.seek(0) + return fd + # TODO refactor with getBodyFile in MessagePart fd = StringIO.StringIO() if self._bdoc is not None: bdoc_content = self._bdoc.content + if bdoc_content is None: + logger.warning("No BODC content found for message!!!") + return write_fd(str("")) + body = bdoc_content.get(self.RAW_KEY, "") content_type = bdoc_content.get('content-type', "") charset = find_charset(content_type) logger.debug('got charset from content-type: %s' % charset) if charset is None: + # XXX change for find_charset utility charset = self._get_charset(body) try: body = body.encode(charset) @@ -328,15 +339,13 @@ class LeapMessage(fields, MailParser, MBoxParser): body = body.encode('utf-8', 'replace') except: pass + finally: + return write_fd(body) # We are still returning funky characters from here. else: logger.warning("No BDOC found for message.") - body = str("") - - fd.write(body) - fd.seek(0) - return fd + return write_fd(str("")) @memoized_method def _get_charset(self, stuff): @@ -524,7 +533,7 @@ class LeapMessage(fields, MailParser, MBoxParser): message. """ hdoc_content = self._hdoc.content - print "hdoc: ", hdoc_content + #print "hdoc: ", hdoc_content body_phash = hdoc_content.get( fields.BODY_KEY, None) print "body phash: ", body_phash @@ -540,10 +549,10 @@ class LeapMessage(fields, MailParser, MBoxParser): if self._container is not None: bdoc = self._container.memstore.get_by_phash(body_phash) print "bdoc from container -->", bdoc - if bdoc: + if bdoc and bdoc.content is not None: return bdoc else: - print "no doc for that phash found!" + print "no doc or not bdoc content for that phash found!" print "nuthing. soledad?" # no memstore or no doc found there @@ -551,7 +560,7 @@ class LeapMessage(fields, MailParser, MBoxParser): body_docs = self._soledad.get_from_index( fields.TYPE_P_HASH_IDX, fields.TYPE_CONTENT_VAL, str(body_phash)) - print "returning body docs,,,", body_docs + print "returning body docs...", body_docs return first(body_docs) else: logger.error("No phash in container, and no soledad found!") -- cgit v1.2.3 From e02db78b1b6d8fe021efd4adb250c64a1dd4bac4 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 24 Jan 2014 05:39:13 -0400 Subject: flags use the memstore * add new/dirty deferred dict to notify when written to disk * fix eventual duplication after copy * fix flag flickering on first retrieval. --- src/leap/mail/imap/messages.py | 162 +++++++++++++++++++++++++++-------------- 1 file changed, 106 insertions(+), 56 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 94bd714..c212472 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -37,7 +37,7 @@ from leap.common.check import leap_assert, leap_assert_type from leap.common.decorators import memoized_method from leap.common.mail import get_email_charset from leap.mail import walk -from leap.mail.utils import first, find_charset, lowerdict +from leap.mail.utils import first, find_charset, lowerdict, empty from leap.mail.decorators import deferred from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields @@ -130,6 +130,8 @@ class LeapMessage(fields, MailParser, MBoxParser): self.__chash = None self.__bdoc = None + # XXX make these properties public + @property def _fdoc(self): """ @@ -154,8 +156,9 @@ class LeapMessage(fields, MailParser, MBoxParser): """ if self._container is not None: hdoc = self._container.hdoc - if hdoc: + if hdoc and not empty(hdoc.content): return hdoc + # XXX cache this into the memory store !!! return self._get_headers_doc() @property @@ -248,7 +251,13 @@ class LeapMessage(fields, MailParser, MBoxParser): doc.content[self.SEEN_KEY] = self.SEEN_FLAG in flags doc.content[self.DEL_KEY] = self.DELETED_FLAG in flags - if getattr(doc, 'store', None) != "mem": + if self._collection.memstore is not None: + self._collection.memstore.put_message( + self._mbox, self._uid, + MessageWrapper(fdoc=doc.content, new=False, dirty=True, + docs_id={'fdoc': doc.doc_id})) + else: + # fallback for non-memstore initializations. self._soledad.put_doc(doc) def addFlags(self, flags): @@ -547,20 +556,18 @@ class LeapMessage(fields, MailParser, MBoxParser): # phash doc... if self._container is not None: - bdoc = self._container.memstore.get_by_phash(body_phash) + bdoc = self._container.memstore.get_cdoc_from_phash(body_phash) print "bdoc from container -->", bdoc if bdoc and bdoc.content is not None: return bdoc else: print "no doc or not bdoc content for that phash found!" - print "nuthing. soledad?" # no memstore or no doc found there if self._soledad: body_docs = self._soledad.get_from_index( fields.TYPE_P_HASH_IDX, fields.TYPE_CONTENT_VAL, str(body_phash)) - print "returning body docs...", body_docs return first(body_docs) else: logger.error("No phash in container, and no soledad found!") @@ -581,32 +588,32 @@ class LeapMessage(fields, MailParser, MBoxParser): # setters # XXX to be used in the messagecopier interface?! - - def set_uid(self, uid): - """ - Set new uid for this message. - - :param uid: the new uid - :type uid: basestring - """ +# + #def set_uid(self, uid): + #""" + #Set new uid for this message. +# + #:param uid: the new uid + #:type uid: basestring + #""" # XXX dangerous! lock? - self._uid = uid - d = self._fdoc - d.content[self.UID_KEY] = uid - self._soledad.put_doc(d) - - def set_mbox(self, mbox): - """ - Set new mbox for this message. - - :param mbox: the new mbox - :type mbox: basestring - """ + #self._uid = uid + #d = self._fdoc + #d.content[self.UID_KEY] = uid + #self._soledad.put_doc(d) +# + #def set_mbox(self, mbox): + #""" + #Set new mbox for this message. +# + #:param mbox: the new mbox + #:type mbox: basestring + #""" # XXX dangerous! lock? - self._mbox = mbox - d = self._fdoc - d.content[self.MBOX_KEY] = mbox - self._soledad.put_doc(d) + #self._mbox = mbox + #d = self._fdoc + #d.content[self.MBOX_KEY] = mbox + #self._soledad.put_doc(d) # destructor @@ -614,14 +621,13 @@ class LeapMessage(fields, MailParser, MBoxParser): def remove(self): """ Remove all docs associated with this message. + Currently it removes only the flags doc. """ # XXX For the moment we are only removing the flags and headers # docs. The rest we leave there polluting your hard disk, # until we think about a good way of deorphaning. # Maybe a crawler of unreferenced docs. - # XXX remove from memory store!!! - # XXX implement elijah's idea of using a PUT document as a # token to ensure consistency in the removal. @@ -632,13 +638,35 @@ class LeapMessage(fields, MailParser, MBoxParser): #bd = self._get_body_doc() #docs = [fd, hd, bd] - docs = [fd] + try: + memstore = self._collection.memstore + except AttributeError: + memstore = False + + if memstore and hasattr(fd, "store", None) == "mem": + key = self._mbox, self._uid + if fd.new: + # it's a new document, so we can remove it and it will not + # be writen. Watch out! We need to be sure it has not been + # just queued to write! + memstore.remove_message(*key) + + if fd.dirty: + doc_id = fd.doc_id + doc = self._soledad.get_doc(doc_id) + try: + self._soledad.delete_doc(doc) + except Exception as exc: + logger.exception(exc) - for d in filter(None, docs): + else: + # we just got a soledad_doc try: - self._soledad.delete_doc(d) + doc_id = fd.doc_id + latest_doc = self._soledad.get_doc(doc_id) + self._soledad.delete_doc(latest_doc) except Exception as exc: - logger.error(exc) + logger.exception(exc) return uid def does_exist(self): @@ -786,8 +814,10 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # okay, all in order, keep going... self.mbox = self._parse_mailbox_name(mbox) + + # XXX get a SoledadStore passed instead self._soledad = soledad - self._memstore = memstore + self.memstore = memstore self.__rflags = None self.__hdocset = None @@ -913,13 +943,21 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :type chash: basestring :return: False, if it does not exist, or UID. """ - exist = self._get_fdoc_from_chash(chash) + exist = False + if self.memstore is not None: + exist = self.memstore.get_fdoc_from_chash(chash, self.mbox) + + if not exist: + exist = self._get_fdoc_from_chash(chash) + + print "FDOC EXIST?", exist if exist: return exist.content.get(fields.UID_KEY, "unknown-uid") else: return False - @deferred + # not deferring to thread cause this now uses deferred asa retval + #@deferred def add_msg(self, raw, subject=None, flags=None, date=None, uid=1): """ Creates a new message document. @@ -945,6 +983,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # TODO add the linked-from info ! # TODO add reference to the original message + print "ADDING MESSAGE..." logger.debug('adding message') if flags is None: @@ -956,11 +995,14 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # check for uniqueness. if self._fdoc_already_exists(chash): + print ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" + print + print logger.warning("We already have that message in this mailbox.") # note that this operation will leave holes in the UID sequence, # but we're gonna change that all the same for a local-only table. # so not touch it by the moment. - return False + return defer.succeed('already_exists') fd = self._populate_flags(flags, uid, chash, size, multi) hd = self._populate_headr(msg, chash, subject, date) @@ -999,7 +1041,16 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # TODO ---- add reference to original doc, to be deleted # after writes are done. msg_container = MessageWrapper(fd, hd, cdocs) - self._memstore.create_message(self.mbox, uid, msg_container) + + # XXX Should allow also to dump to disk directly, + # for no-memstore cases. + + # we return a deferred that, by default, will be triggered when + # saved to disk + d = self.memstore.create_message(self.mbox, uid, msg_container) + print "defered-add", d + print "adding message", d + return d def _remove_cb(self, result): return result @@ -1247,17 +1298,13 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): or None if not found. :rtype: LeapMessage """ - print "getting msg by id!" - msg_container = self._memstore.get_message(self.mbox, uid) - print "msg container", msg_container + msg_container = self.memstore.get_message(self.mbox, uid) if msg_container is not None: - print "getting LeapMessage (from memstore)" # We pass a reference to soledad just to be able to retrieve # missing parts that cannot be found in the container, like # the content docs after a copy. msg = LeapMessage(self._soledad, uid, self.mbox, collection=self, container=msg_container) - print "got msg:", msg else: msg = LeapMessage(self._soledad, uid, self.mbox, collection=self) if not msg.does_exist(): @@ -1303,8 +1350,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): self._soledad.get_from_index( fields.TYPE_MBOX_IDX, fields.TYPE_FLAGS_VAL, self.mbox)]) - if self._memstore is not None: - mem_uids = self._memstore.get_uids(self.mbox) + if self.memstore is not None: + mem_uids = self.memstore.get_uids(self.mbox) uids = db_uids.union(set(mem_uids)) else: uids = db_uids @@ -1328,19 +1375,22 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): Return a dict with all flags documents for this mailbox. """ # XXX get all from memstore and cache it there + # FIXME should get all uids, get them fro memstore, + # and get only the missing ones from disk. + all_flags = dict((( doc.content[self.UID_KEY], doc.content[self.FLAGS_KEY]) for doc in self._soledad.get_from_index( fields.TYPE_MBOX_IDX, fields.TYPE_FLAGS_VAL, self.mbox))) - if self._memstore is not None: + if self.memstore is not None: # XXX - uids = self._memstore.get_uids(self.mbox) - fdocs = [(uid, self._memstore.get_message(self.mbox, uid).fdoc) - for uid in uids] - for uid, doc in fdocs: - all_flags[uid] = doc.content[self.FLAGS_KEY] + uids = self.memstore.get_uids(self.mbox) + docs = ((uid, self.memstore.get_message(self.mbox, uid)) + for uid in uids) + for uid, doc in docs: + all_flags[uid] = doc.fdoc.content[self.FLAGS_KEY] return all_flags @@ -1378,8 +1428,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): count = self._soledad.get_count_from_index( fields.TYPE_MBOX_IDX, fields.TYPE_FLAGS_VAL, self.mbox) - if self._memstore is not None: - count += self._memstore.count_new() + if self.memstore is not None: + count += self.memstore.count_new() return count # unseen messages -- cgit v1.2.3 From a5508429b90e2e9b58c5d073610ee5a10274663f Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 24 Jan 2014 23:14:38 -0400 Subject: recent-flags use the memory store --- src/leap/mail/imap/messages.py | 60 ++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 20 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index c212472..5de638b 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -813,6 +813,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): leap_assert(soledad, "Need a soledad instance to initialize") # okay, all in order, keep going... + self.mbox = self._parse_mailbox_name(mbox) # XXX get a SoledadStore passed instead @@ -996,8 +997,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # check for uniqueness. if self._fdoc_already_exists(chash): print ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" - print - print logger.warning("We already have that message in this mailbox.") # note that this operation will leave holes in the UID sequence, # but we're gonna change that all the same for a local-only table. @@ -1023,21 +1022,16 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # XXX review-me cdocs = dict((index, doc) for index, doc in enumerate(walk.get_raw_docs(msg, parts))) - print "cdocs is", cdocs - # Saving ---------------------------------------- - # XXX should check for content duplication on headers too - # but with chash. !!! + self.set_recent_flag(uid) + # Saving ---------------------------------------- # XXX adapt hdocset to use memstore #hdoc = self._soledad.create_doc(hd) # We add the newly created hdoc to the fast-access set of # headers documents associated with the mailbox. #self.add_hdocset_docid(hdoc.doc_id) - # XXX move to memory store too - # self.set_recent_flag(uid) - # TODO ---- add reference to original doc, to be deleted # after writes are done. msg_container = MessageWrapper(fd, hd, cdocs) @@ -1088,24 +1082,48 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): """ An accessor for the recent-flags set for this mailbox. """ - if not self.__rflags: + if self.__rflags is not None: + return self.__rflags + + if self.memstore: + with self._rdoc_lock: + rflags = self.memstore.get_recent_flags(self.mbox) + if not rflags: + # not loaded in the memory store yet. + # let's fetch them from soledad... + rdoc = self._get_recent_doc() + rflags = set(rdoc.content.get( + fields.RECENTFLAGS_KEY, [])) + # ...and cache them now. + self.memstore.load_recent_flags( + self.mbox, + {'doc_id': rdoc.doc_id, 'set': rflags}) + return rflags + + else: + # fallback for cases without memory store with self._rdoc_lock: rdoc = self._get_recent_doc() self.__rflags = set(rdoc.content.get( fields.RECENTFLAGS_KEY, [])) - return self.__rflags + return self.__rflags def _set_recent_flags(self, value): """ Setter for the recent-flags set for this mailbox. """ - with self._rdoc_lock: - rdoc = self._get_recent_doc() - newv = set(value) - self.__rflags = newv - rdoc.content[fields.RECENTFLAGS_KEY] = list(newv) - # XXX should deferLater 0 it? - self._soledad.put_doc(rdoc) + if self.memstore: + self.memstore.set_recent_flags(self.mbox, value) + + else: + # fallback for cases without memory store + with self._rdoc_lock: + rdoc = self._get_recent_doc() + newv = set(value) + self.__rflags = newv + rdoc.content[fields.RECENTFLAGS_KEY] = list(newv) + # XXX should deferLater 0 it? + self._soledad.put_doc(rdoc) recent_flags = property( _get_recent_flags, _set_recent_flags, @@ -1131,15 +1149,17 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): Unset Recent flag for a sequence of uids. """ with self._rdoc_property_lock: - self.recent_flags = self.recent_flags.difference( + self.recent_flags.difference_update( set(uids)) + # Individual flags operations + def unset_recent_flag(self, uid): """ Unset Recent flag for a given uid. """ with self._rdoc_property_lock: - self.recent_flags = self.recent_flags.difference( + self.recent_flags.difference_update( set([uid])) def set_recent_flag(self, uid): -- cgit v1.2.3 From f5365ae0c2edb8b3e879f876f2f7e42b25f4616a Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Mon, 27 Jan 2014 16:11:53 -0400 Subject: handle last_uid property in memory store --- src/leap/mail/imap/messages.py | 336 ++++++++++++++++++----------------------- 1 file changed, 148 insertions(+), 188 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 5de638b..35c07f5 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -202,21 +202,21 @@ class LeapMessage(fields, MailParser, MBoxParser): :return: The flags, represented as strings :rtype: tuple """ - if self._uid is None: - return [] + #if self._uid is None: + #return [] uid = self._uid - flags = [] + flags = set([]) fdoc = self._fdoc if fdoc: - flags = fdoc.content.get(self.FLAGS_KEY, None) + flags = set(fdoc.content.get(self.FLAGS_KEY, None)) msgcol = self._collection # We treat the recent flag specially: gotten from # a mailbox-level document. if msgcol and uid in msgcol.recent_flags: - flags.append(fields.RECENT_FLAG) + flags.add(fields.RECENT_FLAG) if flags: flags = map(str, flags) return tuple(flags) @@ -236,7 +236,7 @@ class LeapMessage(fields, MailParser, MBoxParser): :return: a SoledadDocument instance :rtype: SoledadDocument """ - # XXX use memory store ...! + # XXX Move logic to memory store ... leap_assert(isinstance(flags, tuple), "flags need to be a tuple") log.msg('setting flags: %s (%s)' % (self._uid, flags)) @@ -252,6 +252,7 @@ class LeapMessage(fields, MailParser, MBoxParser): doc.content[self.DEL_KEY] = self.DELETED_FLAG in flags if self._collection.memstore is not None: + print "putting message in collection" self._collection.memstore.put_message( self._mbox, self._uid, MessageWrapper(fdoc=doc.content, new=False, dirty=True, @@ -508,6 +509,8 @@ class LeapMessage(fields, MailParser, MBoxParser): pmap = hdoc_content.get(fields.PARTS_MAP_KEY, {}) return pmap[str(part)] + # XXX moved to memory store + # move the rest too. ------------------------------------------ def _get_flags_doc(self): """ Return the document that keeps the flags for this @@ -617,57 +620,38 @@ class LeapMessage(fields, MailParser, MBoxParser): # destructor - @deferred - def remove(self): - """ - Remove all docs associated with this message. - Currently it removes only the flags doc. - """ - # XXX For the moment we are only removing the flags and headers - # docs. The rest we leave there polluting your hard disk, - # until we think about a good way of deorphaning. - # Maybe a crawler of unreferenced docs. - - # XXX implement elijah's idea of using a PUT document as a - # token to ensure consistency in the removal. - - uid = self._uid - - fd = self._get_flags_doc() - #hd = self._get_headers_doc() - #bd = self._get_body_doc() - #docs = [fd, hd, bd] - - try: - memstore = self._collection.memstore - except AttributeError: - memstore = False - - if memstore and hasattr(fd, "store", None) == "mem": - key = self._mbox, self._uid - if fd.new: - # it's a new document, so we can remove it and it will not - # be writen. Watch out! We need to be sure it has not been - # just queued to write! - memstore.remove_message(*key) - - if fd.dirty: - doc_id = fd.doc_id - doc = self._soledad.get_doc(doc_id) - try: - self._soledad.delete_doc(doc) - except Exception as exc: - logger.exception(exc) - - else: + # XXX this logic moved to remove_message in memory store... + #@deferred + #def remove(self): + #""" + #Remove all docs associated with this message. + #Currently it removes only the flags doc. + #""" + #fd = self._get_flags_doc() +# + #if fd.new: + # it's a new document, so we can remove it and it will not + # be writen. Watch out! We need to be sure it has not been + # just queued to write! + #memstore.remove_message(*key) +# + #if fd.dirty: + #doc_id = fd.doc_id + #doc = self._soledad.get_doc(doc_id) + #try: + #self._soledad.delete_doc(doc) + #except Exception as exc: + #logger.exception(exc) +# + #else: # we just got a soledad_doc - try: - doc_id = fd.doc_id - latest_doc = self._soledad.get_doc(doc_id) - self._soledad.delete_doc(latest_doc) - except Exception as exc: - logger.exception(exc) - return uid + #try: + #doc_id = fd.doc_id + #latest_doc = self._soledad.get_doc(doc_id) + #self._soledad.delete_doc(latest_doc) + #except Exception as exc: + #logger.exception(exc) + #return uid def does_exist(self): """ @@ -826,7 +810,9 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # ensure that we have a recent-flags and a hdocs-sec doc self._get_or_create_rdoc() - self._get_or_create_hdocset() + + # Not for now... + #self._get_or_create_hdocset() def _get_empty_doc(self, _type=FLAGS_DOC): """ @@ -959,7 +945,9 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # not deferring to thread cause this now uses deferred asa retval #@deferred - def add_msg(self, raw, subject=None, flags=None, date=None, uid=1): + #@profile + def add_msg(self, raw, subject=None, flags=None, date=None, uid=None, + notify_on_disk=False): """ Creates a new message document. Here lives the magic of the leap mail. Well, in soledad, really. @@ -994,7 +982,11 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # parse msg, chash, size, multi = self._do_parse(raw) - # check for uniqueness. + # check for uniqueness -------------------------------- + # XXX profiler says that this test is costly. + # So we probably should just do an in-memory check and + # move the complete check to the soledad writer? + # Watch out! We're reserving a UID right after this! if self._fdoc_already_exists(chash): print ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" logger.warning("We already have that message in this mailbox.") @@ -1003,6 +995,9 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # so not touch it by the moment. return defer.succeed('already_exists') + uid = self.memstore.increment_last_soledad_uid(self.mbox) + print "ADDING MSG WITH UID: %s" % uid + fd = self._populate_flags(flags, uid, chash, size, multi) hd = self._populate_headr(msg, chash, subject, date) @@ -1039,36 +1034,22 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # XXX Should allow also to dump to disk directly, # for no-memstore cases. - # we return a deferred that, by default, will be triggered when - # saved to disk - d = self.memstore.create_message(self.mbox, uid, msg_container) - print "defered-add", d + # we return a deferred that by default will be triggered + # inmediately. + d = self.memstore.create_message(self.mbox, uid, msg_container, + notify_on_disk=notify_on_disk) print "adding message", d return d - def _remove_cb(self, result): - return result - - def remove_all_deleted(self): - """ - Removes all messages flagged as deleted. - """ - delete_deferl = [] - for msg in self.get_deleted(): - delete_deferl.append(msg.remove()) - d1 = defer.gatherResults(delete_deferl, consumeErrors=True) - d1.addCallback(self._remove_cb) - return d1 - - def remove(self, msg): - """ - Remove a given msg. - :param msg: the message to be removed - :type msg: LeapMessage - """ - d = msg.remove() - d.addCallback(self._remove_cb) - return d + #def remove(self, msg): + #""" + #Remove a given msg. + #:param msg: the message to be removed + #:type msg: LeapMessage + #""" + #d = msg.remove() + #d.addCallback(self._remove_cb) + #return d # # getters: specific queries @@ -1175,76 +1156,76 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # XXX FIXME ------------------------------------- # This should be rewritten to use memory store. - def _get_hdocset(self): - """ - An accessor for the hdocs-set for this mailbox. - """ - if not self.__hdocset: - with self._hdocset_lock: - hdocset_doc = self._get_hdocset_doc() - value = set(hdocset_doc.content.get( - fields.HDOCS_SET_KEY, [])) - self.__hdocset = value - return self.__hdocset - - def _set_hdocset(self, value): - """ - Setter for the hdocs-set for this mailbox. - """ - with self._hdocset_lock: - hdocset_doc = self._get_hdocset_doc() - newv = set(value) - self.__hdocset = newv - hdocset_doc.content[fields.HDOCS_SET_KEY] = list(newv) + #def _get_hdocset(self): + #""" + #An accessor for the hdocs-set for this mailbox. + #""" + #if not self.__hdocset: + #with self._hdocset_lock: + #hdocset_doc = self._get_hdocset_doc() + #value = set(hdocset_doc.content.get( + #fields.HDOCS_SET_KEY, [])) + #self.__hdocset = value + #return self.__hdocset +# + #def _set_hdocset(self, value): + #""" + #Setter for the hdocs-set for this mailbox. + #""" + #with self._hdocset_lock: + #hdocset_doc = self._get_hdocset_doc() + #newv = set(value) + #self.__hdocset = newv + #hdocset_doc.content[fields.HDOCS_SET_KEY] = list(newv) # XXX should deferLater 0 it? - self._soledad.put_doc(hdocset_doc) - - _hdocset = property( - _get_hdocset, _set_hdocset, - doc="Set of Document-IDs for the headers docs associated " - "with this mailbox.") - - def _get_hdocset_doc(self): - """ - Get hdocs-set document for this mailbox. - """ - curried = partial( - self._soledad.get_from_index, - fields.TYPE_MBOX_IDX, - fields.TYPE_HDOCS_SET_VAL, self.mbox) - curried.expected = "hdocset" - hdocset_doc = try_unique_query(curried) - return hdocset_doc - + #self._soledad.put_doc(hdocset_doc) +# + #_hdocset = property( + #_get_hdocset, _set_hdocset, + #doc="Set of Document-IDs for the headers docs associated " + #"with this mailbox.") +# + #def _get_hdocset_doc(self): + #""" + #Get hdocs-set document for this mailbox. + #""" + #curried = partial( + #self._soledad.get_from_index, + #fields.TYPE_MBOX_IDX, + #fields.TYPE_HDOCS_SET_VAL, self.mbox) + #curried.expected = "hdocset" + #hdocset_doc = try_unique_query(curried) + #return hdocset_doc +# # Property-set modification (protected by a different # lock to give atomicity to the read/write operation) - - def remove_hdocset_docids(self, docids): - """ - Remove the given document IDs from the set of - header-documents associated with this mailbox. - """ - with self._hdocset_property_lock: - self._hdocset = self._hdocset.difference( - set(docids)) - - def remove_hdocset_docid(self, docid): - """ - Remove the given document ID from the set of - header-documents associated with this mailbox. - """ - with self._hdocset_property_lock: - self._hdocset = self._hdocset.difference( - set([docid])) - - def add_hdocset_docid(self, docid): - """ - Add the given document ID to the set of - header-documents associated with this mailbox. - """ - with self._hdocset_property_lock: - self._hdocset = self._hdocset.union( - set([docid])) +# + #def remove_hdocset_docids(self, docids): + #""" + #Remove the given document IDs from the set of + #header-documents associated with this mailbox. + #""" + #with self._hdocset_property_lock: + #self._hdocset = self._hdocset.difference( + #set(docids)) +# + #def remove_hdocset_docid(self, docid): + #""" + #Remove the given document ID from the set of + #header-documents associated with this mailbox. + #""" + #with self._hdocset_property_lock: + #self._hdocset = self._hdocset.difference( + #set([docid])) +# + #def add_hdocset_docid(self, docid): + #""" + #Add the given document ID to the set of + #header-documents associated with this mailbox. + #""" + #with self._hdocset_property_lock: + #self._hdocset = self._hdocset.union( + #set([docid])) # individual doc getters, message layer. @@ -1378,18 +1359,20 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): return (u for u in sorted(uids)) - def reset_last_uid(self, param): - """ - Set the last uid to the highest uid found. - Used while expunging, passed as a callback. - """ - try: - self.last_uid = max(self.all_uid_iter()) + 1 - except ValueError: + # XXX Should be moved to memstore + #def reset_last_uid(self, param): + #""" + #Set the last uid to the highest uid found. + #Used while expunging, passed as a callback. + #""" + #try: + #self.last_uid = max(self.all_uid_iter()) + 1 + #except ValueError: # empty sequence - pass - return param + #pass + #return param + # XXX MOVE to memstore def all_flags(self): """ Return a dict with all flags documents for this mailbox. @@ -1444,7 +1427,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :rtype: int """ - # XXX We could cache this in memstore too until next write... + # XXX We should cache this in memstore too until next write... count = self._soledad.get_count_from_index( fields.TYPE_MBOX_IDX, fields.TYPE_FLAGS_VAL, self.mbox) @@ -1491,6 +1474,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # recent messages + # XXX take it from memstore def count_recent(self): """ Count all messages with the `Recent` flag. @@ -1503,30 +1487,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): """ return len(self.recent_flags) - # deleted messages - - def deleted_iter(self): - """ - Get an iterator for the message UIDs with `deleted` flag. - - :return: iterator through deleted message docs - :rtype: iterable - """ - return (doc.content[self.UID_KEY] for doc in - self._soledad.get_from_index( - fields.TYPE_MBOX_DEL_IDX, - fields.TYPE_FLAGS_VAL, self.mbox, '1')) - - def get_deleted(self): - """ - Get all messages with the `Deleted` flag. - - :returns: a generator of LeapMessages - :rtype: generator - """ - return (LeapMessage(self._soledad, docid, self.mbox) - for docid in self.deleted_iter()) - def __len__(self): """ Returns the number of messages on this mailbox. -- cgit v1.2.3 From f096368cfbc49caab52811ae50388aae74272a1a Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 28 Jan 2014 10:24:04 -0400 Subject: fix find_charset rebase --- src/leap/mail/imap/messages.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 35c07f5..7617fb8 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -335,16 +335,18 @@ class LeapMessage(fields, MailParser, MBoxParser): charset = find_charset(content_type) logger.debug('got charset from content-type: %s' % charset) if charset is None: - # XXX change for find_charset utility charset = self._get_charset(body) try: body = body.encode(charset) except UnicodeError as exc: - logger.error("Unicode error {0}".format(exc)) + logger.error( + "Unicode error, using 'replace'. {0!r}".format(exc)) logger.debug("Attempted to encode with: %s" % charset) try: body = body.encode(charset, 'replace') - except UnicodeError as exc: + + # XXX desperate attempt. I've seen things you wouldn't believe + except UnicodeError: try: body = body.encode('utf-8', 'replace') except: -- cgit v1.2.3 From a7e0054b595822325f749b0b1df7d25cab4e6486 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 28 Jan 2014 18:39:59 -0400 Subject: docstring fixes Also some fixes for None comparisons. --- src/leap/mail/imap/messages.py | 240 ++++------------------------------------- 1 file changed, 21 insertions(+), 219 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 7617fb8..315cdda 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -58,10 +58,7 @@ logger = logging.getLogger(__name__) # [ ] Delete incoming mail only after successful write! # [ ] Remove UID from syncable db. Store only those indexes locally. -CHARSET_PATTERN = r"""charset=([\w-]+)""" MSGID_PATTERN = r"""<([\w@.]+)>""" - -CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) MSGID_RE = re.compile(MSGID_PATTERN) @@ -202,8 +199,6 @@ class LeapMessage(fields, MailParser, MBoxParser): :return: The flags, represented as strings :rtype: tuple """ - #if self._uid is None: - #return [] uid = self._uid flags = set([]) @@ -252,7 +247,7 @@ class LeapMessage(fields, MailParser, MBoxParser): doc.content[self.DEL_KEY] = self.DELETED_FLAG in flags if self._collection.memstore is not None: - print "putting message in collection" + log.msg("putting message in collection") self._collection.memstore.put_message( self._mbox, self._uid, MessageWrapper(fdoc=doc.content, new=False, dirty=True, @@ -327,8 +322,8 @@ class LeapMessage(fields, MailParser, MBoxParser): if self._bdoc is not None: bdoc_content = self._bdoc.content if bdoc_content is None: - logger.warning("No BODC content found for message!!!") - return write_fd(str("")) + logger.warning("No BDOC content found for message!!!") + return write_fd("") body = bdoc_content.get(self.RAW_KEY, "") content_type = bdoc_content.get('content-type', "") @@ -337,20 +332,13 @@ class LeapMessage(fields, MailParser, MBoxParser): if charset is None: charset = self._get_charset(body) try: - body = body.encode(charset) + if isinstance(body, unicode): + body = body.encode(charset) except UnicodeError as exc: logger.error( "Unicode error, using 'replace'. {0!r}".format(exc)) logger.debug("Attempted to encode with: %s" % charset) - try: - body = body.encode(charset, 'replace') - - # XXX desperate attempt. I've seen things you wouldn't believe - except UnicodeError: - try: - body = body.encode('utf-8', 'replace') - except: - pass + body = body.encode(charset, 'replace') finally: return write_fd(body) @@ -409,6 +397,8 @@ class LeapMessage(fields, MailParser, MBoxParser): :rtype: dict """ # TODO split in smaller methods + # XXX refactor together with MessagePart method + headers = self._get_headers() if not headers: logger.warning("No headers found") @@ -425,11 +415,10 @@ class LeapMessage(fields, MailParser, MBoxParser): # default to most likely standard charset = find_charset(headers, "utf-8") - - # twisted imap server expects *some* headers to be lowercase - # XXX refactor together with MessagePart method headers2 = dict() for key, value in headers.items(): + # twisted imap server expects *some* headers to be lowercase + # We could use a CaseInsensitiveDict here... if key.lower() == "content-type": key = key.lower() @@ -441,7 +430,6 @@ class LeapMessage(fields, MailParser, MBoxParser): # filter original dict by negate-condition if cond(key): headers2[key] = value - return headers2 def _get_headers(self): @@ -547,10 +535,8 @@ class LeapMessage(fields, MailParser, MBoxParser): message. """ hdoc_content = self._hdoc.content - #print "hdoc: ", hdoc_content body_phash = hdoc_content.get( fields.BODY_KEY, None) - print "body phash: ", body_phash if not body_phash: logger.warning("No body phash for this document!") return None @@ -562,11 +548,8 @@ class LeapMessage(fields, MailParser, MBoxParser): if self._container is not None: bdoc = self._container.memstore.get_cdoc_from_phash(body_phash) - print "bdoc from container -->", bdoc if bdoc and bdoc.content is not None: return bdoc - else: - print "no doc or not bdoc content for that phash found!" # no memstore or no doc found there if self._soledad: @@ -590,77 +573,12 @@ class LeapMessage(fields, MailParser, MBoxParser): """ return self._fdoc.content.get(key, None) - # setters - - # XXX to be used in the messagecopier interface?! -# - #def set_uid(self, uid): - #""" - #Set new uid for this message. -# - #:param uid: the new uid - #:type uid: basestring - #""" - # XXX dangerous! lock? - #self._uid = uid - #d = self._fdoc - #d.content[self.UID_KEY] = uid - #self._soledad.put_doc(d) -# - #def set_mbox(self, mbox): - #""" - #Set new mbox for this message. -# - #:param mbox: the new mbox - #:type mbox: basestring - #""" - # XXX dangerous! lock? - #self._mbox = mbox - #d = self._fdoc - #d.content[self.MBOX_KEY] = mbox - #self._soledad.put_doc(d) - - # destructor - - # XXX this logic moved to remove_message in memory store... - #@deferred - #def remove(self): - #""" - #Remove all docs associated with this message. - #Currently it removes only the flags doc. - #""" - #fd = self._get_flags_doc() -# - #if fd.new: - # it's a new document, so we can remove it and it will not - # be writen. Watch out! We need to be sure it has not been - # just queued to write! - #memstore.remove_message(*key) -# - #if fd.dirty: - #doc_id = fd.doc_id - #doc = self._soledad.get_doc(doc_id) - #try: - #self._soledad.delete_doc(doc) - #except Exception as exc: - #logger.exception(exc) -# - #else: - # we just got a soledad_doc - #try: - #doc_id = fd.doc_id - #latest_doc = self._soledad.get_doc(doc_id) - #self._soledad.delete_doc(latest_doc) - #except Exception as exc: - #logger.exception(exc) - #return uid - def does_exist(self): """ - Return True if there is actually a flags message for this + Return True if there is actually a flags document for this UID and mbox. """ - return self._fdoc is not None + return not empty(self._fdoc) class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): @@ -938,8 +856,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): if not exist: exist = self._get_fdoc_from_chash(chash) - - print "FDOC EXIST?", exist if exist: return exist.content.get(fields.UID_KEY, "unknown-uid") else: @@ -974,7 +890,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # TODO add the linked-from info ! # TODO add reference to the original message - print "ADDING MESSAGE..." logger.debug('adding message') if flags is None: @@ -990,15 +905,11 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # move the complete check to the soledad writer? # Watch out! We're reserving a UID right after this! if self._fdoc_already_exists(chash): - print ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" logger.warning("We already have that message in this mailbox.") - # note that this operation will leave holes in the UID sequence, - # but we're gonna change that all the same for a local-only table. - # so not touch it by the moment. return defer.succeed('already_exists') uid = self.memstore.increment_last_soledad_uid(self.mbox) - print "ADDING MSG WITH UID: %s" % uid + logger.info("ADDING MSG WITH UID: %s" % uid) fd = self._populate_flags(flags, uid, chash, size, multi) hd = self._populate_headr(msg, chash, subject, date) @@ -1017,58 +928,36 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # The MessageContainer expects a dict, zero-indexed # XXX review-me - cdocs = dict((index, doc) for index, doc in - enumerate(walk.get_raw_docs(msg, parts))) + cdocs = dict(enumerate(walk.get_raw_docs(msg, parts))) self.set_recent_flag(uid) # Saving ---------------------------------------- - # XXX adapt hdocset to use memstore - #hdoc = self._soledad.create_doc(hd) - # We add the newly created hdoc to the fast-access set of - # headers documents associated with the mailbox. - #self.add_hdocset_docid(hdoc.doc_id) - # TODO ---- add reference to original doc, to be deleted # after writes are done. msg_container = MessageWrapper(fd, hd, cdocs) - # XXX Should allow also to dump to disk directly, - # for no-memstore cases. - # we return a deferred that by default will be triggered # inmediately. d = self.memstore.create_message(self.mbox, uid, msg_container, notify_on_disk=notify_on_disk) - print "adding message", d return d - #def remove(self, msg): - #""" - #Remove a given msg. - #:param msg: the message to be removed - #:type msg: LeapMessage - #""" - #d = msg.remove() - #d.addCallback(self._remove_cb) - #return d - # # getters: specific queries # # recent flags - # XXX FIXME ------------------------------------- - # This should be rewritten to use memory store. def _get_recent_flags(self): """ An accessor for the recent-flags set for this mailbox. """ + # XXX check if we should remove this if self.__rflags is not None: return self.__rflags - if self.memstore: + if self.memstore is not None: with self._rdoc_lock: rflags = self.memstore.get_recent_flags(self.mbox) if not rflags: @@ -1091,11 +980,12 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): fields.RECENTFLAGS_KEY, [])) return self.__rflags + @profile def _set_recent_flags(self, value): """ Setter for the recent-flags set for this mailbox. """ - if self.memstore: + if self.memstore is not None: self.memstore.set_recent_flags(self.mbox, value) else: @@ -1112,9 +1002,11 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): _get_recent_flags, _set_recent_flags, doc="Set of UIDs with the recent flag for this mailbox.") + # XXX change naming, indicate soledad query. def _get_recent_doc(self): """ - Get recent-flags document for this mailbox. + Get recent-flags document from Soledad for this mailbox. + :rtype: SoledadDocument or None """ curried = partial( self._soledad.get_from_index, @@ -1153,82 +1045,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): self.recent_flags = self.recent_flags.union( set([uid])) - # headers-docs-set - - # XXX FIXME ------------------------------------- - # This should be rewritten to use memory store. - - #def _get_hdocset(self): - #""" - #An accessor for the hdocs-set for this mailbox. - #""" - #if not self.__hdocset: - #with self._hdocset_lock: - #hdocset_doc = self._get_hdocset_doc() - #value = set(hdocset_doc.content.get( - #fields.HDOCS_SET_KEY, [])) - #self.__hdocset = value - #return self.__hdocset -# - #def _set_hdocset(self, value): - #""" - #Setter for the hdocs-set for this mailbox. - #""" - #with self._hdocset_lock: - #hdocset_doc = self._get_hdocset_doc() - #newv = set(value) - #self.__hdocset = newv - #hdocset_doc.content[fields.HDOCS_SET_KEY] = list(newv) - # XXX should deferLater 0 it? - #self._soledad.put_doc(hdocset_doc) -# - #_hdocset = property( - #_get_hdocset, _set_hdocset, - #doc="Set of Document-IDs for the headers docs associated " - #"with this mailbox.") -# - #def _get_hdocset_doc(self): - #""" - #Get hdocs-set document for this mailbox. - #""" - #curried = partial( - #self._soledad.get_from_index, - #fields.TYPE_MBOX_IDX, - #fields.TYPE_HDOCS_SET_VAL, self.mbox) - #curried.expected = "hdocset" - #hdocset_doc = try_unique_query(curried) - #return hdocset_doc -# - # Property-set modification (protected by a different - # lock to give atomicity to the read/write operation) -# - #def remove_hdocset_docids(self, docids): - #""" - #Remove the given document IDs from the set of - #header-documents associated with this mailbox. - #""" - #with self._hdocset_property_lock: - #self._hdocset = self._hdocset.difference( - #set(docids)) -# - #def remove_hdocset_docid(self, docid): - #""" - #Remove the given document ID from the set of - #header-documents associated with this mailbox. - #""" - #with self._hdocset_property_lock: - #self._hdocset = self._hdocset.difference( - #set([docid])) -# - #def add_hdocset_docid(self, docid): - #""" - #Add the given document ID to the set of - #header-documents associated with this mailbox. - #""" - #with self._hdocset_property_lock: - #self._hdocset = self._hdocset.union( - #set([docid])) - # individual doc getters, message layer. def _get_fdoc_from_chash(self, chash): @@ -1361,19 +1177,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): return (u for u in sorted(uids)) - # XXX Should be moved to memstore - #def reset_last_uid(self, param): - #""" - #Set the last uid to the highest uid found. - #Used while expunging, passed as a callback. - #""" - #try: - #self.last_uid = max(self.all_uid_iter()) + 1 - #except ValueError: - # empty sequence - #pass - #return param - # XXX MOVE to memstore def all_flags(self): """ @@ -1390,7 +1193,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): fields.TYPE_MBOX_IDX, fields.TYPE_FLAGS_VAL, self.mbox))) if self.memstore is not None: - # XXX uids = self.memstore.get_uids(self.mbox) docs = ((uid, self.memstore.get_message(self.mbox, uid)) for uid in uids) -- cgit v1.2.3 From 1b71ba510a2e6680f1ecc84eacfc492b0bbe24fc Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 29 Jan 2014 00:54:20 -0400 Subject: Fix copy and deletion problems * reorganize and simplify STORE command processing * add the notification after the processing of the whole sequence --- src/leap/mail/imap/messages.py | 156 +++++++++++++++++++++++------------------ 1 file changed, 89 insertions(+), 67 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 315cdda..5770868 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -20,7 +20,6 @@ LeapMessage and MessageCollection. import copy import logging import re -import time import threading import StringIO @@ -97,11 +96,13 @@ class LeapMessage(fields, MailParser, MBoxParser): """ # TODO this has to change. - # Should index primarily by chash, and keep a local-lonly + # Should index primarily by chash, and keep a local-only # UID table. implements(imap4.IMessage) + flags_lock = threading.Lock() + def __init__(self, soledad, uid, mbox, collection=None, container=None): """ Initializes a LeapMessage. @@ -111,7 +112,7 @@ class LeapMessage(fields, MailParser, MBoxParser): :param uid: the UID for the message. :type uid: int or basestring :param mbox: the mbox this message belongs to - :type mbox: basestring + :type mbox: str or unicode :param collection: a reference to the parent collection object :type collection: MessageCollection :param container: a IMessageContainer implementor instance @@ -216,23 +217,17 @@ class LeapMessage(fields, MailParser, MBoxParser): flags = map(str, flags) return tuple(flags) - # setFlags, addFlags, removeFlags are not in the interface spec - # but we use them with store command. + # setFlags not in the interface spec but we use it with store command. - def setFlags(self, flags): + def setFlags(self, flags, mode): """ Sets the flags for this message - Returns a SoledadDocument that needs to be updated by the caller. - :param flags: the flags to update in the message. :type flags: tuple of str - - :return: a SoledadDocument instance - :rtype: SoledadDocument + :param mode: the mode for setting. 1 is append, -1 is remove, 0 set. + :type mode: int """ - # XXX Move logic to memory store ... - leap_assert(isinstance(flags, tuple), "flags need to be a tuple") log.msg('setting flags: %s (%s)' % (self._uid, flags)) @@ -242,51 +237,36 @@ class LeapMessage(fields, MailParser, MBoxParser): "Could not find FDOC for %s:%s while setting flags!" % (self._mbox, self._uid)) return - doc.content[self.FLAGS_KEY] = flags - doc.content[self.SEEN_KEY] = self.SEEN_FLAG in flags - doc.content[self.DEL_KEY] = self.DELETED_FLAG in flags - - if self._collection.memstore is not None: - log.msg("putting message in collection") - self._collection.memstore.put_message( - self._mbox, self._uid, - MessageWrapper(fdoc=doc.content, new=False, dirty=True, - docs_id={'fdoc': doc.doc_id})) - else: - # fallback for non-memstore initializations. - self._soledad.put_doc(doc) - - def addFlags(self, flags): - """ - Adds flags to this message. - - Returns a SoledadDocument that needs to be updated by the caller. - - :param flags: the flags to add to the message. - :type flags: tuple of str - - :return: a SoledadDocument instance - :rtype: SoledadDocument - """ - leap_assert(isinstance(flags, tuple), "flags need to be a tuple") - oldflags = self.getFlags() - self.setFlags(tuple(set(flags + oldflags))) - - def removeFlags(self, flags): - """ - Remove flags from this message. - - Returns a SoledadDocument that needs to be updated by the caller. - :param flags: the flags to be removed from the message. - :type flags: tuple of str - - :return: a SoledadDocument instance - :rtype: SoledadDocument - """ - leap_assert(isinstance(flags, tuple), "flags need to be a tuple") - oldflags = self.getFlags() - self.setFlags(tuple(set(oldflags) - set(flags))) + APPEND = 1 + REMOVE = -1 + SET = 0 + + with self.flags_lock: + current = doc.content[self.FLAGS_KEY] + if mode == APPEND: + newflags = tuple(set(tuple(current) + flags)) + elif mode == REMOVE: + newflags = tuple(set(current).difference(set(flags))) + elif mode == SET: + newflags = flags + + # We could defer this, but I think it's better + # to put it under the lock... + doc.content[self.FLAGS_KEY] = newflags + doc.content[self.SEEN_KEY] = self.SEEN_FLAG in flags + doc.content[self.DEL_KEY] = self.DELETED_FLAG in flags + + if self._collection.memstore is not None: + log.msg("putting message in collection") + self._collection.memstore.put_message( + self._mbox, self._uid, + MessageWrapper(fdoc=doc.content, new=False, dirty=True, + docs_id={'fdoc': doc.doc_id})) + else: + # fallback for non-memstore initializations. + self._soledad.put_doc(doc) + return map(str, newflags) def getInternalDate(self): """ @@ -1022,6 +1002,9 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): def unset_recent_flags(self, uids): """ Unset Recent flag for a sequence of uids. + + :param uids: the uids to unset + :type uid: sequence """ with self._rdoc_property_lock: self.recent_flags.difference_update( @@ -1032,6 +1015,9 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): def unset_recent_flag(self, uid): """ Unset Recent flag for a given uid. + + :param uid: the uid to unset + :type uid: int """ with self._rdoc_property_lock: self.recent_flags.difference_update( @@ -1040,6 +1026,9 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): def set_recent_flag(self, uid): """ Set Recent flag for a given uid. + + :param uid: the uid to set + :type uid: int """ with self._rdoc_property_lock: self.recent_flags = self.recent_flags.union( @@ -1099,31 +1088,64 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # and we cannot find it otherwise. This seems to be enough. # XXX do a deferLater instead ?? - # FIXME this won't be needed after the CHECK command is implemented. - time.sleep(0.3) + # XXX is this working? return self._get_uid_from_msgidCb(msgid) + def set_flags(self, mbox, messages, flags, mode): + """ + Set flags for a sequence of messages. + + :param mbox: the mbox this message belongs to + :type mbox: str or unicode + :param messages: the messages to iterate through + :type messages: sequence + :flags: the flags to be set + :type flags: tuple + :param mode: the mode for setting. 1 is append, -1 is remove, 0 set. + :type mode: int + """ + result = {} + for msg_id in messages: + log.msg("MSG ID = %s" % msg_id) + msg = self.get_msg_by_uid(msg_id, mem_only=True, flags_only=True) + if not msg: + continue + result[msg_id] = msg.setFlags(flags, mode) + + return result + # getters: generic for a mailbox - def get_msg_by_uid(self, uid): + def get_msg_by_uid(self, uid, mem_only=False, flags_only=False): """ Retrieves a LeapMessage by UID. This is used primarity in the Mailbox fetch and store methods. :param uid: the message uid to query by :type uid: int + :param mem_only: a flag that indicates whether this Message should + pass a reference to soledad to retrieve missing pieces + or not. + :type mem_only: bool + :param flags_only: whether the message should carry only a reference + to the flags document. + :type flags_only: bool :return: A LeapMessage instance matching the query, or None if not found. :rtype: LeapMessage """ - msg_container = self.memstore.get_message(self.mbox, uid) + msg_container = self.memstore.get_message(self.mbox, uid, flags_only) if msg_container is not None: - # We pass a reference to soledad just to be able to retrieve - # missing parts that cannot be found in the container, like - # the content docs after a copy. - msg = LeapMessage(self._soledad, uid, self.mbox, collection=self, - container=msg_container) + if mem_only: + msg = LeapMessage(None, uid, self.mbox, collection=self, + container=msg_container) + else: + # We pass a reference to soledad just to be able to retrieve + # missing parts that cannot be found in the container, like + # the content docs after a copy. + msg = LeapMessage(self._soledad, uid, self.mbox, + collection=self, container=msg_container) else: msg = LeapMessage(self._soledad, uid, self.mbox, collection=self) if not msg.does_exist(): @@ -1159,7 +1181,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): def all_uid_iter(self): """ - Return an iterator trhough the UIDs of all messages, sorted in + Return an iterator through the UIDs of all messages, sorted in ascending order. """ # XXX we should get this from the uid table, local-only -- cgit v1.2.3 From 5818a2e6826d84cd82cc578fbce95aa549d70e25 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 29 Jan 2014 16:43:15 -0400 Subject: Fix indexing error that was rendering attachments unusable Also, check for empty body-doc --- src/leap/mail/imap/messages.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 5770868..2ace103 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -301,7 +301,7 @@ class LeapMessage(fields, MailParser, MBoxParser): fd = StringIO.StringIO() if self._bdoc is not None: bdoc_content = self._bdoc.content - if bdoc_content is None: + if empty(bdoc_content): logger.warning("No BDOC content found for message!!!") return write_fd("") @@ -906,9 +906,10 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): hd[key] = parts_map[key] del parts_map - # The MessageContainer expects a dict, zero-indexed + # The MessageContainer expects a dict, one-indexed # XXX review-me - cdocs = dict(enumerate(walk.get_raw_docs(msg, parts))) + cdocs = dict(((key + 1, doc) for key, doc in + enumerate(walk.get_raw_docs(msg, parts)))) self.set_recent_flag(uid) @@ -960,7 +961,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): fields.RECENTFLAGS_KEY, [])) return self.__rflags - @profile def _set_recent_flags(self, value): """ Setter for the recent-flags set for this mailbox. -- cgit v1.2.3 From 5cc82b3e8937c0e4488f79db79891c90a2ce3d47 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 30 Jan 2014 17:23:19 -0400 Subject: fix badly terminated headers --- src/leap/mail/imap/messages.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 2ace103..356145f 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -407,6 +407,10 @@ class LeapMessage(fields, MailParser, MBoxParser): if not isinstance(value, str): value = value.encode(charset, 'replace') + if value.endswith(";"): + # bastards + value = value[:-1] + # filter original dict by negate-condition if cond(key): headers2[key] = value -- cgit v1.2.3 From ff7de0c9bc760e097c0286d2d62a19095be3f35e Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 30 Jan 2014 18:35:03 -0400 Subject: prime-uids We pre-fetch the uids from soledad on mailbox initialization --- src/leap/mail/imap/messages.py | 53 ++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 22 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 356145f..0e5c74a 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -219,6 +219,7 @@ class LeapMessage(fields, MailParser, MBoxParser): # setFlags not in the interface spec but we use it with store command. + #@profile def setFlags(self, flags, mode): """ Sets the flags for this message @@ -934,6 +935,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # recent flags + #@profile def _get_recent_flags(self): """ An accessor for the recent-flags set for this mailbox. @@ -957,13 +959,13 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): {'doc_id': rdoc.doc_id, 'set': rflags}) return rflags - else: + #else: # fallback for cases without memory store - with self._rdoc_lock: - rdoc = self._get_recent_doc() - self.__rflags = set(rdoc.content.get( - fields.RECENTFLAGS_KEY, [])) - return self.__rflags + #with self._rdoc_lock: + #rdoc = self._get_recent_doc() + #self.__rflags = set(rdoc.content.get( + #fields.RECENTFLAGS_KEY, [])) + #return self.__rflags def _set_recent_flags(self, value): """ @@ -972,21 +974,22 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): if self.memstore is not None: self.memstore.set_recent_flags(self.mbox, value) - else: + #else: # fallback for cases without memory store - with self._rdoc_lock: - rdoc = self._get_recent_doc() - newv = set(value) - self.__rflags = newv - rdoc.content[fields.RECENTFLAGS_KEY] = list(newv) + #with self._rdoc_lock: + #rdoc = self._get_recent_doc() + #newv = set(value) + #self.__rflags = newv + #rdoc.content[fields.RECENTFLAGS_KEY] = list(newv) # XXX should deferLater 0 it? - self._soledad.put_doc(rdoc) + #self._soledad.put_doc(rdoc) recent_flags = property( _get_recent_flags, _set_recent_flags, doc="Set of UIDs with the recent flag for this mailbox.") # XXX change naming, indicate soledad query. + #@profile def _get_recent_doc(self): """ Get recent-flags document from Soledad for this mailbox. @@ -1027,6 +1030,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): self.recent_flags.difference_update( set([uid])) + @deferred def set_recent_flag(self, uid): """ Set Recent flag for a given uid. @@ -1095,6 +1099,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # XXX is this working? return self._get_uid_from_msgidCb(msgid) + #@profile def set_flags(self, mbox, messages, flags, mode): """ Set flags for a sequence of messages. @@ -1183,25 +1188,29 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # FIXME ---------------------------------------------- return sorted(all_docs, key=lambda item: item.content['uid']) - def all_uid_iter(self): + #@profile + def all_soledad_uid_iter(self): """ Return an iterator through the UIDs of all messages, sorted in ascending order. """ - # XXX we should get this from the uid table, local-only - # XXX FIXME ------------- - # This should be cached in the memstoretoo db_uids = set([doc.content[self.UID_KEY] for doc in self._soledad.get_from_index( fields.TYPE_MBOX_IDX, fields.TYPE_FLAGS_VAL, self.mbox)]) + return db_uids + + #@profile + def all_uid_iter(self): + """ + Return an iterator through the UIDs of all messages, from memory. + """ if self.memstore is not None: mem_uids = self.memstore.get_uids(self.mbox) - uids = db_uids.union(set(mem_uids)) - else: - uids = db_uids - - return (u for u in sorted(uids)) + soledad_known_uids = self.memstore.get_soledad_known_uids( + self.mbox) + combined = tuple(set(mem_uids).union(soledad_known_uids)) + return combined # XXX MOVE to memstore def all_flags(self): -- cgit v1.2.3 From 0f6a8e1c83995cffec51e81f626d4bb29d4f7345 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 31 Jan 2014 03:34:03 -0400 Subject: properly implement deferreds in several commands Passing along a deferred as an observer whose callback will be called with the proper result. Returning to thread in the appropiate points. just let's remember that twisted APIs are not thread safe! SoledadStore process_item also properly returned to thread. Changed @deferred to @deferred_to_thread so it results less confusing to read. "know the territory". aha! --- src/leap/mail/imap/messages.py | 133 +++++++++++++++++++++++++---------------- 1 file changed, 82 insertions(+), 51 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 0e5c74a..03dde29 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -37,7 +37,7 @@ from leap.common.decorators import memoized_method from leap.common.mail import get_email_charset from leap.mail import walk from leap.mail.utils import first, find_charset, lowerdict, empty -from leap.mail.decorators import deferred +from leap.mail.decorators import deferred_to_thread from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields from leap.mail.imap.memorystore import MessageWrapper @@ -243,30 +243,30 @@ class LeapMessage(fields, MailParser, MBoxParser): REMOVE = -1 SET = 0 - with self.flags_lock: - current = doc.content[self.FLAGS_KEY] - if mode == APPEND: - newflags = tuple(set(tuple(current) + flags)) - elif mode == REMOVE: - newflags = tuple(set(current).difference(set(flags))) - elif mode == SET: - newflags = flags - - # We could defer this, but I think it's better - # to put it under the lock... - doc.content[self.FLAGS_KEY] = newflags - doc.content[self.SEEN_KEY] = self.SEEN_FLAG in flags - doc.content[self.DEL_KEY] = self.DELETED_FLAG in flags - - if self._collection.memstore is not None: - log.msg("putting message in collection") - self._collection.memstore.put_message( - self._mbox, self._uid, - MessageWrapper(fdoc=doc.content, new=False, dirty=True, - docs_id={'fdoc': doc.doc_id})) - else: - # fallback for non-memstore initializations. - self._soledad.put_doc(doc) + #with self.flags_lock: + current = doc.content[self.FLAGS_KEY] + if mode == APPEND: + newflags = tuple(set(tuple(current) + flags)) + elif mode == REMOVE: + newflags = tuple(set(current).difference(set(flags))) + elif mode == SET: + newflags = flags + + # We could defer this, but I think it's better + # to put it under the lock... + doc.content[self.FLAGS_KEY] = newflags + doc.content[self.SEEN_KEY] = self.SEEN_FLAG in flags + doc.content[self.DEL_KEY] = self.DELETED_FLAG in flags + + if self._collection.memstore is not None: + log.msg("putting message in collection") + self._collection.memstore.put_message( + self._mbox, self._uid, + MessageWrapper(fdoc=doc.content, new=False, dirty=True, + docs_id={'fdoc': doc.doc_id})) + else: + # fallback for non-memstore initializations. + self._soledad.put_doc(doc) return map(str, newflags) def getInternalDate(self): @@ -457,8 +457,8 @@ class LeapMessage(fields, MailParser, MBoxParser): :rtype: Any object implementing C{IMessagePart}. :return: The specified sub-part. """ - if not self.isMultipart(): - raise TypeError + #if not self.isMultipart(): + #raise TypeError try: pmap_dict = self._get_part_from_parts_map(part + 1) except KeyError: @@ -846,14 +846,11 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): else: return False - # not deferring to thread cause this now uses deferred asa retval - #@deferred #@profile def add_msg(self, raw, subject=None, flags=None, date=None, uid=None, notify_on_disk=False): """ Creates a new message document. - Here lives the magic of the leap mail. Well, in soledad, really. :param raw: the raw message :type raw: str @@ -869,6 +866,31 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :param uid: the message uid for this mailbox :type uid: int + + :return: a deferred that will be fired with the message + uid when the adding succeed. + :rtype: deferred + """ + logger.debug('adding message') + if flags is None: + flags = tuple() + leap_assert_type(flags, tuple) + + d = defer.Deferred() + self._do_add_msg(raw, flags, subject, date, notify_on_disk, d) + return d + + @deferred_to_thread + def _do_add_msg(self, raw, flags, subject, date, notify_on_disk, observer): + """ + Helper that creates a new message document. + Here lives the magic of the leap mail. Well, in soledad, really. + + See `add_msg` docstring for parameter info. + + :param observer: a deferred that will be fired with the message + uid when the adding succeed. + :type observer: deferred """ # TODO signal that we can delete the original message!----- # when all the processing is done. @@ -876,11 +898,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # TODO add the linked-from info ! # TODO add reference to the original message - logger.debug('adding message') - if flags is None: - flags = tuple() - leap_assert_type(flags, tuple) - # parse msg, chash, size, multi = self._do_parse(raw) @@ -918,16 +935,13 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): self.set_recent_flag(uid) - # Saving ---------------------------------------- # TODO ---- add reference to original doc, to be deleted # after writes are done. msg_container = MessageWrapper(fd, hd, cdocs) - # we return a deferred that by default will be triggered - # inmediately. - d = self.memstore.create_message(self.mbox, uid, msg_container, - notify_on_disk=notify_on_disk) - return d + self.memstore.create_message(self.mbox, uid, msg_container, + observer=observer, + notify_on_disk=notify_on_disk) # # getters: specific queries @@ -1030,7 +1044,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): self.recent_flags.difference_update( set([uid])) - @deferred + @deferred_to_thread def set_recent_flag(self, uid): """ Set Recent flag for a given uid. @@ -1080,7 +1094,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): return None return fdoc.content.get(fields.UID_KEY, None) - @deferred + @deferred_to_thread def _get_uid_from_msgid(self, msgid): """ Return a UID for a given message-id. @@ -1100,7 +1114,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): return self._get_uid_from_msgidCb(msgid) #@profile - def set_flags(self, mbox, messages, flags, mode): + def set_flags(self, mbox, messages, flags, mode, observer): """ Set flags for a sequence of messages. @@ -1112,16 +1126,33 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :type flags: tuple :param mode: the mode for setting. 1 is append, -1 is remove, 0 set. :type mode: int + :param observer: a deferred that will be called with the dictionary + mapping UIDs to flags after the operation has been + done. + :type observer: deferred """ - result = {} + # XXX we could defer *this* to thread pool, and gather results... + # XXX use deferredList + + deferreds = [] for msg_id in messages: - log.msg("MSG ID = %s" % msg_id) - msg = self.get_msg_by_uid(msg_id, mem_only=True, flags_only=True) - if not msg: - continue - result[msg_id] = msg.setFlags(flags, mode) + deferreds.append( + self._set_flag_for_uid(msg_id, flags, mode)) - return result + def notify(result): + observer.callback(dict(result)) + d1 = defer.gatherResults(deferreds, consumeErrors=True) + d1.addCallback(notify) + + @deferred_to_thread + def _set_flag_for_uid(self, msg_id, flags, mode): + """ + Run the set_flag operation in the thread pool. + """ + log.msg("MSG ID = %s" % msg_id) + msg = self.get_msg_by_uid(msg_id, mem_only=True, flags_only=True) + if msg is not None: + return msg_id, msg.setFlags(flags, mode) # getters: generic for a mailbox -- cgit v1.2.3 From d9b37a7a1115b76ebc72413cf1ffe9a613b58d52 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 31 Jan 2014 17:32:27 -0400 Subject: remove wrong unicode conversion --- src/leap/mail/imap/messages.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 03dde29..6ff3967 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -337,11 +337,10 @@ class LeapMessage(fields, MailParser, MBoxParser): :type stuff: basestring :returns: charset """ - # TODO get from subpart headers - # XXX existential doubt 2. shouldn't we make the scope + # XXX shouldn't we make the scope # of the decorator somewhat more persistent? # ah! yes! and put memory bounds. - return get_email_charset(unicode(stuff)) + return get_email_charset(stuff) def getSize(self): """ -- cgit v1.2.3 From bed4a7b6abffe9d8cb9178b9dc89d13d9d87c1e8 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 31 Jan 2014 20:27:28 -0400 Subject: Restore expected TypeError. I must have removed this to get rid of a error with some test sample during the testing of the branch, but it's absolutely needed so that mime attachments get shown properly. If the TypeError raises inapropiately due to some malformed part_map, then we will have to catch it using a workaround somewhere else. --- src/leap/mail/imap/messages.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 6ff3967..4a07ef7 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -299,7 +299,9 @@ class LeapMessage(fields, MailParser, MBoxParser): return fd # TODO refactor with getBodyFile in MessagePart + fd = StringIO.StringIO() + if self._bdoc is not None: bdoc_content = self._bdoc.content if empty(bdoc_content): @@ -456,8 +458,8 @@ class LeapMessage(fields, MailParser, MBoxParser): :rtype: Any object implementing C{IMessagePart}. :return: The specified sub-part. """ - #if not self.isMultipart(): - #raise TypeError + if not self.isMultipart(): + raise TypeError try: pmap_dict = self._get_part_from_parts_map(part + 1) except KeyError: -- cgit v1.2.3 From 18fed49c4143eb764ae9e806882d24f8f4e95744 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Sun, 2 Feb 2014 09:26:37 -0400 Subject: fix missing content after in-memory add because THE KEYS WILL BE STRINGS AFTER ADDED TO SOLEDAD Can I remember that? * Fix copy from local folders * Fix copy when we already have a copy of the message in the inbox, marked as deleted. * Fix also bad deferred.succeed in add_msg when it already exist. --- src/leap/mail/imap/messages.py | 88 ++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 41 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 4a07ef7..6f822db 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -37,6 +37,7 @@ from leap.common.decorators import memoized_method from leap.common.mail import get_email_charset from leap.mail import walk from leap.mail.utils import first, find_charset, lowerdict, empty +from leap.mail.utils import stringify_parts_map from leap.mail.decorators import deferred_to_thread from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields @@ -219,7 +220,6 @@ class LeapMessage(fields, MailParser, MBoxParser): # setFlags not in the interface spec but we use it with store command. - #@profile def setFlags(self, flags, mode): """ Sets the flags for this message @@ -243,30 +243,30 @@ class LeapMessage(fields, MailParser, MBoxParser): REMOVE = -1 SET = 0 - #with self.flags_lock: - current = doc.content[self.FLAGS_KEY] - if mode == APPEND: - newflags = tuple(set(tuple(current) + flags)) - elif mode == REMOVE: - newflags = tuple(set(current).difference(set(flags))) - elif mode == SET: - newflags = flags - - # We could defer this, but I think it's better - # to put it under the lock... - doc.content[self.FLAGS_KEY] = newflags - doc.content[self.SEEN_KEY] = self.SEEN_FLAG in flags - doc.content[self.DEL_KEY] = self.DELETED_FLAG in flags - - if self._collection.memstore is not None: - log.msg("putting message in collection") - self._collection.memstore.put_message( - self._mbox, self._uid, - MessageWrapper(fdoc=doc.content, new=False, dirty=True, - docs_id={'fdoc': doc.doc_id})) - else: - # fallback for non-memstore initializations. - self._soledad.put_doc(doc) + with self.flags_lock: + current = doc.content[self.FLAGS_KEY] + if mode == APPEND: + newflags = tuple(set(tuple(current) + flags)) + elif mode == REMOVE: + newflags = tuple(set(current).difference(set(flags))) + elif mode == SET: + newflags = flags + + # We could defer this, but I think it's better + # to put it under the lock... + doc.content[self.FLAGS_KEY] = newflags + doc.content[self.SEEN_KEY] = self.SEEN_FLAG in flags + doc.content[self.DEL_KEY] = self.DELETED_FLAG in flags + + if self._collection.memstore is not None: + log.msg("putting message in collection") + self._collection.memstore.put_message( + self._mbox, self._uid, + MessageWrapper(fdoc=doc.content, new=False, dirty=True, + docs_id={'fdoc': doc.doc_id})) + else: + # fallback for non-memstore initializations. + self._soledad.put_doc(doc) return map(str, newflags) def getInternalDate(self): @@ -483,6 +483,9 @@ class LeapMessage(fields, MailParser, MBoxParser): hdoc_content = self._hdoc.content pmap = hdoc_content.get(fields.PARTS_MAP_KEY, {}) + + # remember, lads, soledad is using strings in its keys, + # not integers! return pmap[str(part)] # XXX moved to memory store @@ -534,10 +537,10 @@ class LeapMessage(fields, MailParser, MBoxParser): if self._container is not None: bdoc = self._container.memstore.get_cdoc_from_phash(body_phash) - if bdoc and bdoc.content is not None: + if not empty(bdoc) and not empty(bdoc.content): return bdoc - # no memstore or no doc found there + # no memstore, or no body doc found there if self._soledad: body_docs = self._soledad.get_from_index( fields.TYPE_P_HASH_IDX, @@ -847,7 +850,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): else: return False - #@profile def add_msg(self, raw, subject=None, flags=None, date=None, uid=None, notify_on_disk=False): """ @@ -881,7 +883,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): self._do_add_msg(raw, flags, subject, date, notify_on_disk, d) return d - @deferred_to_thread + # We SHOULD defer this (or the heavy load here) to the thread pool, + # but it gives troubles with the QSocketNotifier used by Qt... def _do_add_msg(self, raw, flags, subject, date, notify_on_disk, observer): """ Helper that creates a new message document. @@ -907,9 +910,19 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # So we probably should just do an in-memory check and # move the complete check to the soledad writer? # Watch out! We're reserving a UID right after this! - if self._fdoc_already_exists(chash): - logger.warning("We already have that message in this mailbox.") - return defer.succeed('already_exists') + existing_uid = self._fdoc_already_exists(chash) + if existing_uid: + logger.warning("We already have that message in this " + "mailbox, unflagging as deleted") + uid = existing_uid + msg = self.get_msg_by_uid(uid) + msg.setFlags((fields.DELETED_FLAG,), -1) + + # XXX if this is deferred to thread again we should not use + # the callback in the deferred thread, but return and + # call the callback from the caller fun... + observer.callback(uid) + return uid = self.memstore.increment_last_soledad_uid(self.mbox) logger.info("ADDING MSG WITH UID: %s" % uid) @@ -929,17 +942,15 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): hd[key] = parts_map[key] del parts_map + hd = stringify_parts_map(hd) + # The MessageContainer expects a dict, one-indexed # XXX review-me cdocs = dict(((key + 1, doc) for key, doc in enumerate(walk.get_raw_docs(msg, parts)))) self.set_recent_flag(uid) - - # TODO ---- add reference to original doc, to be deleted - # after writes are done. msg_container = MessageWrapper(fd, hd, cdocs) - self.memstore.create_message(self.mbox, uid, msg_container, observer=observer, notify_on_disk=notify_on_disk) @@ -950,7 +961,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # recent flags - #@profile def _get_recent_flags(self): """ An accessor for the recent-flags set for this mailbox. @@ -1004,7 +1014,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): doc="Set of UIDs with the recent flag for this mailbox.") # XXX change naming, indicate soledad query. - #@profile def _get_recent_doc(self): """ Get recent-flags document from Soledad for this mailbox. @@ -1114,7 +1123,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # XXX is this working? return self._get_uid_from_msgidCb(msgid) - #@profile def set_flags(self, mbox, messages, flags, mode, observer): """ Set flags for a sequence of messages. @@ -1220,7 +1228,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # FIXME ---------------------------------------------- return sorted(all_docs, key=lambda item: item.content['uid']) - #@profile def all_soledad_uid_iter(self): """ Return an iterator through the UIDs of all messages, sorted in @@ -1232,7 +1239,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): fields.TYPE_FLAGS_VAL, self.mbox)]) return db_uids - #@profile def all_uid_iter(self): """ Return an iterator through the UIDs of all messages, from memory. -- cgit v1.2.3 From 23e28bae2c3cb74e00e29ee8add0b73adeb65c2b Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 4 Feb 2014 10:57:49 -0400 Subject: fixes after review * Some more docstring completion/fixes. * Removed unneeded str coertion. * Handle mailbox name in logs. * Separate manhole boilerplate into its own file. --- src/leap/mail/imap/messages.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 6f822db..25fc55f 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -328,7 +328,7 @@ class LeapMessage(fields, MailParser, MBoxParser): # We are still returning funky characters from here. else: logger.warning("No BDOC found for message.") - return write_fd(str("")) + return write_fd("") @memoized_method def _get_charset(self, stuff): @@ -945,9 +945,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): hd = stringify_parts_map(hd) # The MessageContainer expects a dict, one-indexed - # XXX review-me - cdocs = dict(((key + 1, doc) for key, doc in - enumerate(walk.get_raw_docs(msg, parts)))) + cdocs = dict(enumerate(walk.get_raw_docs(msg, parts), 1)) self.set_recent_flag(uid) msg_container = MessageWrapper(fd, hd, cdocs) -- cgit v1.2.3 From 06556ec6dc56a4859736fc2782779ee2eb9c1f55 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 5 Feb 2014 23:44:23 -0400 Subject: defer parse to thread --- src/leap/mail/imap/messages.py | 72 +++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 46 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 25fc55f..89beaaa 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -78,7 +78,7 @@ def try_unique_query(curried): # TODO we could take action, like trigger a background # process to kill dupes. name = getattr(curried, 'expected', 'doc') - logger.warning( + logger.debug( "More than one %s found for this mbox, " "we got a duplicate!!" % (name,)) return query.pop() @@ -720,9 +720,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # ensure that we have a recent-flags and a hdocs-sec doc self._get_or_create_rdoc() - # Not for now... - #self._get_or_create_hdocset() - def _get_empty_doc(self, _type=FLAGS_DOC): """ Returns an empty doc for storing different message parts. @@ -758,21 +755,26 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): hdocset[fields.MBOX_KEY] = self.mbox self._soledad.create_doc(hdocset) + @deferred_to_thread def _do_parse(self, raw): """ Parse raw message and return it along with relevant information about its outer level. + This is done in a separate thread, and the callback is passed + to `_do_add_msg` method. + :param raw: the raw message :type raw: StringIO or basestring - :return: msg, chash, size, multi + :return: msg, parts, chash, size, multi :rtype: tuple """ msg = self._get_parsed_msg(raw) chash = self._get_hash(msg) size = len(msg.as_string()) multi = msg.is_multipart() - return msg, chash, size, multi + parts = walk.get_parts(msg) + return msg, parts, chash, size, multi def _populate_flags(self, flags, uid, chash, size, multi): """ @@ -879,19 +881,25 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): flags = tuple() leap_assert_type(flags, tuple) - d = defer.Deferred() - self._do_add_msg(raw, flags, subject, date, notify_on_disk, d) - return d + observer = defer.Deferred() + + d = self._do_parse(raw) + d.addCallback(self._do_add_msg, flags, subject, date, + notify_on_disk, observer) + return observer - # We SHOULD defer this (or the heavy load here) to the thread pool, + # We SHOULD defer the heavy load here) to the thread pool, # but it gives troubles with the QSocketNotifier used by Qt... - def _do_add_msg(self, raw, flags, subject, date, notify_on_disk, observer): + def _do_add_msg(self, parse_result, flags, subject, + date, notify_on_disk, observer): """ Helper that creates a new message document. Here lives the magic of the leap mail. Well, in soledad, really. See `add_msg` docstring for parameter info. + :param parse_result: a tuple with the results of `self._do_parse` + :type parse_result: tuple :param observer: a deferred that will be fired with the message uid when the adding succeed. :type observer: deferred @@ -902,26 +910,17 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # TODO add the linked-from info ! # TODO add reference to the original message - # parse - msg, chash, size, multi = self._do_parse(raw) + from twisted.internet import reactor + msg, parts, chash, size, multi = parse_result # check for uniqueness -------------------------------- - # XXX profiler says that this test is costly. - # So we probably should just do an in-memory check and - # move the complete check to the soledad writer? # Watch out! We're reserving a UID right after this! existing_uid = self._fdoc_already_exists(chash) if existing_uid: - logger.warning("We already have that message in this " - "mailbox, unflagging as deleted") uid = existing_uid msg = self.get_msg_by_uid(uid) - msg.setFlags((fields.DELETED_FLAG,), -1) - - # XXX if this is deferred to thread again we should not use - # the callback in the deferred thread, but return and - # call the callback from the caller fun... - observer.callback(uid) + reactor.callLater(0, msg.setFlags, (fields.DELETED_FLAG,), -1) + reactor.callLater(0, observer.callback, uid) return uid = self.memstore.increment_last_soledad_uid(self.mbox) @@ -930,7 +929,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): fd = self._populate_flags(flags, uid, chash, size, multi) hd = self._populate_headr(msg, chash, subject, date) - parts = walk.get_parts(msg) body_phash_fun = [walk.get_body_phash_simple, walk.get_body_phash_multi][int(multi)] body_phash = body_phash_fun(walk.get_payloads(msg)) @@ -949,9 +947,9 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): self.set_recent_flag(uid) msg_container = MessageWrapper(fd, hd, cdocs) - self.memstore.create_message(self.mbox, uid, msg_container, - observer=observer, - notify_on_disk=notify_on_disk) + self.memstore.create_message( + self.mbox, uid, msg_container, + observer=observer, notify_on_disk=notify_on_disk) # # getters: specific queries @@ -982,14 +980,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): {'doc_id': rdoc.doc_id, 'set': rflags}) return rflags - #else: - # fallback for cases without memory store - #with self._rdoc_lock: - #rdoc = self._get_recent_doc() - #self.__rflags = set(rdoc.content.get( - #fields.RECENTFLAGS_KEY, [])) - #return self.__rflags - def _set_recent_flags(self, value): """ Setter for the recent-flags set for this mailbox. @@ -997,16 +987,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): if self.memstore is not None: self.memstore.set_recent_flags(self.mbox, value) - #else: - # fallback for cases without memory store - #with self._rdoc_lock: - #rdoc = self._get_recent_doc() - #newv = set(value) - #self.__rflags = newv - #rdoc.content[fields.RECENTFLAGS_KEY] = list(newv) - # XXX should deferLater 0 it? - #self._soledad.put_doc(rdoc) - recent_flags = property( _get_recent_flags, _set_recent_flags, doc="Set of UIDs with the recent flag for this mailbox.") -- cgit v1.2.3 From bd83f834920709db3350c58dedd3cd2181c1b2cc Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 6 Feb 2014 02:28:54 -0400 Subject: prefetch flag docs --- src/leap/mail/imap/messages.py | 68 +++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 34 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 89beaaa..3ba9d1b 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -919,7 +919,10 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): if existing_uid: uid = existing_uid msg = self.get_msg_by_uid(uid) - reactor.callLater(0, msg.setFlags, (fields.DELETED_FLAG,), -1) + + # TODO this cannot be deferred, this has to block. + #reactor.callLater(0, msg.setFlags, (fields.DELETED_FLAG,), -1) + msg.setFlags((fields.DELETED_FLAG,), -1) reactor.callLater(0, observer.callback, uid) return @@ -1221,49 +1224,46 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): """ Return an iterator through the UIDs of all messages, from memory. """ - if self.memstore is not None: - mem_uids = self.memstore.get_uids(self.mbox) - soledad_known_uids = self.memstore.get_soledad_known_uids( - self.mbox) - combined = tuple(set(mem_uids).union(soledad_known_uids)) - return combined + mem_uids = self.memstore.get_uids(self.mbox) + soledad_known_uids = self.memstore.get_soledad_known_uids( + self.mbox) + combined = tuple(set(mem_uids).union(soledad_known_uids)) + return combined - # XXX MOVE to memstore - def all_flags(self): + def get_all_soledad_flag_docs(self): """ - Return a dict with all flags documents for this mailbox. - """ - # XXX get all from memstore and cache it there - # FIXME should get all uids, get them fro memstore, - # and get only the missing ones from disk. + Return a dict with the content of all the flag documents + in soledad store for the given mbox. + :param mbox: the mailbox + :type mbox: str or unicode + :rtype: dict + """ + # XXX we really could return a reduced version with + # just {'uid': (flags-tuple,) since the prefetch is + # only oriented to get the flag tuples. all_flags = dict((( doc.content[self.UID_KEY], - doc.content[self.FLAGS_KEY]) for doc in + dict(doc.content)) for doc in self._soledad.get_from_index( fields.TYPE_MBOX_IDX, fields.TYPE_FLAGS_VAL, self.mbox))) - if self.memstore is not None: - uids = self.memstore.get_uids(self.mbox) - docs = ((uid, self.memstore.get_message(self.mbox, uid)) - for uid in uids) - for uid, doc in docs: - all_flags[uid] = doc.fdoc.content[self.FLAGS_KEY] - return all_flags - def all_flags_chash(self): - """ - Return a dict with the content-hash for all flag documents - for this mailbox. - """ - all_flags_chash = dict((( - doc.content[self.UID_KEY], - doc.content[self.CONTENT_HASH_KEY]) for doc in - self._soledad.get_from_index( - fields.TYPE_MBOX_IDX, - fields.TYPE_FLAGS_VAL, self.mbox))) - return all_flags_chash + # XXX Move to memstore too. But we don't need it really, since + # we can cache the headers docs too. + #def all_flags_chash(self): + #""" + #Return a dict with the content-hash for all flag documents + #for this mailbox. + #""" + #all_flags_chash = dict((( + #doc.content[self.UID_KEY], + #doc.content[self.CONTENT_HASH_KEY]) for doc in + #self._soledad.get_from_index( + #fields.TYPE_MBOX_IDX, + #fields.TYPE_FLAGS_VAL, self.mbox))) + #return all_flags_chash def all_headers(self): """ -- cgit v1.2.3 From ff3a6a640fdb345449a5f9cd3379bbaefa36111e Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 6 Feb 2014 15:46:17 -0400 Subject: take recent count from memstore --- src/leap/mail/imap/messages.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 3ba9d1b..cfad1dc 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -1265,6 +1265,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): #fields.TYPE_FLAGS_VAL, self.mbox))) #return all_flags_chash + # XXX get from memstore def all_headers(self): """ Return a dict with all the headers documents for this @@ -1282,13 +1283,10 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :rtype: int """ - # XXX We should cache this in memstore too until next write... - count = self._soledad.get_count_from_index( - fields.TYPE_MBOX_IDX, - fields.TYPE_FLAGS_VAL, self.mbox) - if self.memstore is not None: - count += self.memstore.count_new() - return count + # XXX get this from a public method in memstore + store = self.memstore._msg_store + return len([uid for (mbox, uid) in store.keys() + if mbox == self.mbox]) # unseen messages @@ -1300,10 +1298,10 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :return: iterator through unseen message doc UIDs :rtype: iterable """ - return (doc.content[self.UID_KEY] for doc in - self._soledad.get_from_index( - fields.TYPE_MBOX_SEEN_IDX, - fields.TYPE_FLAGS_VAL, self.mbox, '0')) + # XXX get this from a public method in memstore + store = self.memstore._msg_store + return (uid for (mbox, uid), d in store.items() + if mbox == self.mbox and "\\Seen" not in d["fdoc"]["flags"]) def count_unseen(self): """ @@ -1312,10 +1310,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :returns: count :rtype: int """ - count = self._soledad.get_count_from_index( - fields.TYPE_MBOX_SEEN_IDX, - fields.TYPE_FLAGS_VAL, self.mbox, '0') - return count + return len(list(self.unseen_iter())) def get_unseen(self): """ -- cgit v1.2.3 From ee0786c57d72aa8b8da76533f33c3dd65253a878 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 6 Feb 2014 18:11:20 -0400 Subject: long-due update to unittests! So we're safe under the green lights before further rewriting. :) --- src/leap/mail/imap/messages.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index cfad1dc..3fbe2ad 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -273,11 +273,19 @@ class LeapMessage(fields, MailParser, MBoxParser): """ Retrieve the date internally associated with this message - :rtype: C{str} + According to the spec, this is NOT the date and time in the + RFC-822 header, but rather a date and time that reflects when the + message was received. + + * In SMTP, date and time of final delivery. + * In COPY, internal date/time of the source message. + * In APPEND, date/time specified. + :return: An RFC822-formatted date string. + :rtype: str """ - date = self._hdoc.content.get(self.DATE_KEY, '') - return str(date) + date = self._hdoc.content.get(fields.DATE_KEY, '') + return date # # IMessagePart @@ -882,7 +890,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): leap_assert_type(flags, tuple) observer = defer.Deferred() - d = self._do_parse(raw) d.addCallback(self._do_add_msg, flags, subject, date, notify_on_disk, observer) -- cgit v1.2.3 From 6586c21a12bfa0d9026068629a9d34203ad577c7 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 7 Feb 2014 02:27:58 -0400 Subject: change internal storage and keying scheme in memstore --- src/leap/mail/imap/messages.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 3fbe2ad..3d25598 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -1290,10 +1290,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :rtype: int """ - # XXX get this from a public method in memstore - store = self.memstore._msg_store - return len([uid for (mbox, uid) in store.keys() - if mbox == self.mbox]) + return self.memstore.count(self.mbox) # unseen messages @@ -1305,10 +1302,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :return: iterator through unseen message doc UIDs :rtype: iterable """ - # XXX get this from a public method in memstore - store = self.memstore._msg_store - return (uid for (mbox, uid), d in store.items() - if mbox == self.mbox and "\\Seen" not in d["fdoc"]["flags"]) + return self.memstore.unseen_iter(self.mbox) def count_unseen(self): """ -- cgit v1.2.3 From 3149bbe64346d558ef300a3d760732cf499a28d3 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 7 Feb 2014 02:38:00 -0400 Subject: make fdoc, hdoc, chash 'public' properties --- src/leap/mail/imap/messages.py | 87 +++++++++++++----------------------------- 1 file changed, 26 insertions(+), 61 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 3d25598..fbae05f 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -132,7 +132,7 @@ class LeapMessage(fields, MailParser, MBoxParser): # XXX make these properties public @property - def _fdoc(self): + def fdoc(self): """ An accessor to the flags document. """ @@ -149,7 +149,7 @@ class LeapMessage(fields, MailParser, MBoxParser): return fdoc @property - def _hdoc(self): + def hdoc(self): """ An accessor to the headers document. """ @@ -161,23 +161,23 @@ class LeapMessage(fields, MailParser, MBoxParser): return self._get_headers_doc() @property - def _chash(self): + def chash(self): """ An accessor to the content hash for this message. """ - if not self._fdoc: + if not self.fdoc: return None - if not self.__chash and self._fdoc: - self.__chash = self._fdoc.content.get( + if not self.__chash and self.fdoc: + self.__chash = self.fdoc.content.get( fields.CONTENT_HASH_KEY, None) return self.__chash @property - def _bdoc(self): + def bdoc(self): """ An accessor to the body document. """ - if not self._hdoc: + if not self.hdoc: return None if not self.__bdoc: self.__bdoc = self._get_body_doc() @@ -204,7 +204,7 @@ class LeapMessage(fields, MailParser, MBoxParser): uid = self._uid flags = set([]) - fdoc = self._fdoc + fdoc = self.fdoc if fdoc: flags = set(fdoc.content.get(self.FLAGS_KEY, None)) @@ -232,7 +232,7 @@ class LeapMessage(fields, MailParser, MBoxParser): leap_assert(isinstance(flags, tuple), "flags need to be a tuple") log.msg('setting flags: %s (%s)' % (self._uid, flags)) - doc = self._fdoc + doc = self.fdoc if not doc: logger.warning( "Could not find FDOC for %s:%s while setting flags!" % @@ -284,7 +284,7 @@ class LeapMessage(fields, MailParser, MBoxParser): :return: An RFC822-formatted date string. :rtype: str """ - date = self._hdoc.content.get(fields.DATE_KEY, '') + date = self.hdoc.content.get(fields.DATE_KEY, '') return date # @@ -310,8 +310,8 @@ class LeapMessage(fields, MailParser, MBoxParser): fd = StringIO.StringIO() - if self._bdoc is not None: - bdoc_content = self._bdoc.content + if self.bdoc is not None: + bdoc_content = self.bdoc.content if empty(bdoc_content): logger.warning("No BDOC content found for message!!!") return write_fd("") @@ -360,8 +360,8 @@ class LeapMessage(fields, MailParser, MBoxParser): :rtype: int """ size = None - if self._fdoc: - fdoc_content = self._fdoc.content + if self.fdoc is not None: + fdoc_content = self.fdoc.content size = fdoc_content.get(self.SIZE_KEY, False) else: logger.warning("No FLAGS doc for %s:%s" % (self._mbox, @@ -430,8 +430,8 @@ class LeapMessage(fields, MailParser, MBoxParser): """ Return the headers dict for this message. """ - if self._hdoc is not None: - hdoc_content = self._hdoc.content + if self.hdoc is not None: + hdoc_content = self.hdoc.content headers = hdoc_content.get(self.HEADERS_KEY, {}) return headers @@ -445,8 +445,8 @@ class LeapMessage(fields, MailParser, MBoxParser): """ Return True if this message is multipart. """ - if self._fdoc: - fdoc_content = self._fdoc.content + if self.fdoc: + fdoc_content = self.fdoc.content is_multipart = fdoc_content.get(self.MULTIPART_KEY, False) return is_multipart else: @@ -485,11 +485,11 @@ class LeapMessage(fields, MailParser, MBoxParser): :raises: KeyError if key does not exist :rtype: dict """ - if not self._hdoc: + if not self.hdoc: logger.warning("Tried to get part but no HDOC found!") return None - hdoc_content = self._hdoc.content + hdoc_content = self.hdoc.content pmap = hdoc_content.get(fields.PARTS_MAP_KEY, {}) # remember, lads, soledad is using strings in its keys, @@ -523,7 +523,7 @@ class LeapMessage(fields, MailParser, MBoxParser): """ head_docs = self._soledad.get_from_index( fields.TYPE_C_HASH_IDX, - fields.TYPE_HEADERS_VAL, str(self._chash)) + fields.TYPE_HEADERS_VAL, str(self.chash)) return first(head_docs) def _get_body_doc(self): @@ -531,7 +531,7 @@ class LeapMessage(fields, MailParser, MBoxParser): Return the document that keeps the body for this message. """ - hdoc_content = self._hdoc.content + hdoc_content = self.hdoc.content body_phash = hdoc_content.get( fields.BODY_KEY, None) if not body_phash: @@ -568,14 +568,14 @@ class LeapMessage(fields, MailParser, MBoxParser): :return: The content value indexed by C{key} or None :rtype: str """ - return self._fdoc.content.get(key, None) + return self.fdoc.content.get(key, None) def does_exist(self): """ Return True if there is actually a flags document for this UID and mbox. """ - return not empty(self._fdoc) + return not empty(self.fdoc) class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): @@ -680,8 +680,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): _rdoc_lock = threading.Lock() _rdoc_property_lock = threading.Lock() - _hdocset_lock = threading.Lock() - _hdocset_property_lock = threading.Lock() def __init__(self, mbox=None, soledad=None, memstore=None): """ @@ -722,7 +720,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): self.memstore = memstore self.__rflags = None - self.__hdocset = None self.initialize_db() # ensure that we have a recent-flags and a hdocs-sec doc @@ -751,18 +748,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): rdoc[fields.MBOX_KEY] = self.mbox self._soledad.create_doc(rdoc) - def _get_or_create_hdocset(self): - """ - Try to retrieve the hdocs-set doc for this MessageCollection, - and create one if not found. - """ - hdocset = self._get_hdocset_doc() - if not hdocset: - hdocset = self._get_empty_doc(self.HDOCS_SET_DOC) - if self.mbox != fields.INBOX_VAL: - hdocset[fields.MBOX_KEY] = self.mbox - self._soledad.create_doc(hdocset) - @deferred_to_thread def _do_parse(self, raw): """ @@ -1257,32 +1242,12 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): fields.TYPE_FLAGS_VAL, self.mbox))) return all_flags - # XXX Move to memstore too. But we don't need it really, since - # we can cache the headers docs too. - #def all_flags_chash(self): - #""" - #Return a dict with the content-hash for all flag documents - #for this mailbox. - #""" - #all_flags_chash = dict((( - #doc.content[self.UID_KEY], - #doc.content[self.CONTENT_HASH_KEY]) for doc in - #self._soledad.get_from_index( - #fields.TYPE_MBOX_IDX, - #fields.TYPE_FLAGS_VAL, self.mbox))) - #return all_flags_chash - - # XXX get from memstore + # TODO get from memstore def all_headers(self): """ Return a dict with all the headers documents for this mailbox. """ - all_headers = dict((( - doc.content[self.CONTENT_HASH_KEY], - doc.content[self.HEADERS_KEY]) for doc in - self._soledad.get_docs(self._hdocset))) - return all_headers def count(self): """ -- cgit v1.2.3 From 813db4a356141592337f39f9c801203367c63193 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 7 Feb 2014 02:54:52 -0400 Subject: remove hdoc copy since it's in its own structure now --- src/leap/mail/imap/messages.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index fbae05f..4b95689 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -153,12 +153,20 @@ class LeapMessage(fields, MailParser, MBoxParser): """ An accessor to the headers document. """ - if self._container is not None: + container = self._container + if container is not None: hdoc = self._container.hdoc if hdoc and not empty(hdoc.content): return hdoc - # XXX cache this into the memory store !!! - return self._get_headers_doc() + hdoc = self._get_headers_doc() + + if container and not empty(hdoc.content): + # mem-cache it + hdoc_content = hdoc.content + chash = hdoc_content.get(fields.CONTENT_HASH_KEY) + hdocs = {chash: hdoc_content} + container.memstore.load_header_docs(hdocs) + return hdoc @property def chash(self): -- cgit v1.2.3 From b92e63c316c1cf9f8b6481dbfa70737acfb3eee9 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 7 Feb 2014 05:50:55 -0400 Subject: separate better dirty/new flags; add cdocs --- src/leap/mail/imap/messages.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 4b95689..8b6d3f3 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -264,17 +264,15 @@ class LeapMessage(fields, MailParser, MBoxParser): # to put it under the lock... doc.content[self.FLAGS_KEY] = newflags doc.content[self.SEEN_KEY] = self.SEEN_FLAG in flags + + # XXX check if this is working ok. doc.content[self.DEL_KEY] = self.DELETED_FLAG in flags - if self._collection.memstore is not None: - log.msg("putting message in collection") - self._collection.memstore.put_message( - self._mbox, self._uid, - MessageWrapper(fdoc=doc.content, new=False, dirty=True, - docs_id={'fdoc': doc.doc_id})) - else: - # fallback for non-memstore initializations. - self._soledad.put_doc(doc) + log.msg("putting message in collection") + self._collection.memstore.put_message( + self._mbox, self._uid, + MessageWrapper(fdoc=doc.content, new=False, dirty=True, + docs_id={'fdoc': doc.doc_id})) return map(str, newflags) def getInternalDate(self): @@ -524,6 +522,7 @@ class LeapMessage(fields, MailParser, MBoxParser): finally: return result + # TODO move to soledadstore instead of accessing soledad directly def _get_headers_doc(self): """ Return the document that keeps the headers for this @@ -534,6 +533,7 @@ class LeapMessage(fields, MailParser, MBoxParser): fields.TYPE_HEADERS_VAL, str(self.chash)) return first(head_docs) + # TODO move to soledadstore instead of accessing soledad directly def _get_body_doc(self): """ Return the document that keeps the body for this @@ -1165,7 +1165,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): or None if not found. :rtype: LeapMessage """ - msg_container = self.memstore.get_message(self.mbox, uid, flags_only) + msg_container = self.memstore.get_message( + self.mbox, uid, flags_only=flags_only) if msg_container is not None: if mem_only: msg = LeapMessage(None, uid, self.mbox, collection=self, -- cgit v1.2.3 From f869b7eecab67d07a23dfb8b2931b3844f7523e3 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 11 Feb 2014 01:45:20 -0400 Subject: fine grained locks for puts --- src/leap/mail/imap/messages.py | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 8b6d3f3..de5dd1f 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -88,6 +88,13 @@ def try_unique_query(curried): logger.exception("Unhandled error %r" % exc) +""" +A dictionary that keeps one lock per mbox and uid. +""" +# XXX too much overhead? +fdoc_locks = defaultdict(lambda: defaultdict(lambda: threading.Lock())) + + class LeapMessage(fields, MailParser, MBoxParser): """ The main representation of a message. @@ -102,8 +109,6 @@ class LeapMessage(fields, MailParser, MBoxParser): implements(imap4.IMessage) - flags_lock = threading.Lock() - def __init__(self, soledad, uid, mbox, collection=None, container=None): """ Initializes a LeapMessage. @@ -129,6 +134,9 @@ class LeapMessage(fields, MailParser, MBoxParser): self.__chash = None self.__bdoc = None + from twisted.internet import reactor + self.reactor = reactor + # XXX make these properties public @property @@ -238,20 +246,21 @@ class LeapMessage(fields, MailParser, MBoxParser): :type mode: int """ leap_assert(isinstance(flags, tuple), "flags need to be a tuple") - log.msg('setting flags: %s (%s)' % (self._uid, flags)) + #log.msg('setting flags: %s (%s)' % (self._uid, flags)) - doc = self.fdoc - if not doc: - logger.warning( - "Could not find FDOC for %s:%s while setting flags!" % - (self._mbox, self._uid)) - return + mbox, uid = self._mbox, self._uid APPEND = 1 REMOVE = -1 SET = 0 - with self.flags_lock: + with fdoc_locks[mbox][uid]: + doc = self.fdoc + if not doc: + logger.warning( + "Could not find FDOC for %r:%s while setting flags!" % + (mbox, uid)) + return current = doc.content[self.FLAGS_KEY] if mode == APPEND: newflags = tuple(set(tuple(current) + flags)) @@ -733,6 +742,9 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # ensure that we have a recent-flags and a hdocs-sec doc self._get_or_create_rdoc() + from twisted.internet import reactor + self.reactor = reactor + def _get_empty_doc(self, _type=FLAGS_DOC): """ Returns an empty doc for storing different message parts. @@ -877,7 +889,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): uid when the adding succeed. :rtype: deferred """ - logger.debug('adding message') + logger.debug('Adding message') if flags is None: flags = tuple() leap_assert_type(flags, tuple) @@ -921,7 +933,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): msg = self.get_msg_by_uid(uid) # TODO this cannot be deferred, this has to block. - #reactor.callLater(0, msg.setFlags, (fields.DELETED_FLAG,), -1) msg.setFlags((fields.DELETED_FLAG,), -1) reactor.callLater(0, observer.callback, uid) return -- cgit v1.2.3 From df9cd2e0d59600840acb6aa00f36b7eb43e48297 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 11 Feb 2014 01:49:01 -0400 Subject: fix several bugs in copy/store --- src/leap/mail/imap/messages.py | 64 +++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 39 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index de5dd1f..bbc9deb 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -268,20 +268,12 @@ class LeapMessage(fields, MailParser, MBoxParser): newflags = tuple(set(current).difference(set(flags))) elif mode == SET: newflags = flags + new_fdoc = { + self.FLAGS_KEY: newflags, + self.SEEN_KEY: self.SEEN_FLAG in newflags, + self.DEL_KEY: self.DELETED_FLAG in newflags} + self._collection.memstore.update_flags(mbox, uid, new_fdoc) - # We could defer this, but I think it's better - # to put it under the lock... - doc.content[self.FLAGS_KEY] = newflags - doc.content[self.SEEN_KEY] = self.SEEN_FLAG in flags - - # XXX check if this is working ok. - doc.content[self.DEL_KEY] = self.DELETED_FLAG in flags - - log.msg("putting message in collection") - self._collection.memstore.put_message( - self._mbox, self._uid, - MessageWrapper(fdoc=doc.content, new=False, dirty=True, - docs_id={'fdoc': doc.doc_id})) return map(str, newflags) def getInternalDate(self): @@ -334,7 +326,7 @@ class LeapMessage(fields, MailParser, MBoxParser): body = bdoc_content.get(self.RAW_KEY, "") content_type = bdoc_content.get('content-type', "") charset = find_charset(content_type) - logger.debug('got charset from content-type: %s' % charset) + #logger.debug('got charset from content-type: %s' % charset) if charset is None: charset = self._get_charset(body) try: @@ -855,8 +847,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :return: False, if it does not exist, or UID. """ exist = False - if self.memstore is not None: - exist = self.memstore.get_fdoc_from_chash(chash, self.mbox) + exist = self.memstore.get_fdoc_from_chash(chash, self.mbox) if not exist: exist = self._get_fdoc_from_chash(chash) @@ -1115,6 +1106,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # XXX is this working? return self._get_uid_from_msgidCb(msgid) + @deferred_to_thread def set_flags(self, mbox, messages, flags, mode, observer): """ Set flags for a sequence of messages. @@ -1132,28 +1124,18 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): done. :type observer: deferred """ - # XXX we could defer *this* to thread pool, and gather results... - # XXX use deferredList + reactor = self.reactor + getmsg = self.get_msg_by_uid - deferreds = [] - for msg_id in messages: - deferreds.append( - self._set_flag_for_uid(msg_id, flags, mode)) + def set_flags(uid, flags, mode): + msg = getmsg(uid, mem_only=True, flags_only=True) + if msg is not None: + return uid, msg.setFlags(flags, mode) - def notify(result): - observer.callback(dict(result)) - d1 = defer.gatherResults(deferreds, consumeErrors=True) - d1.addCallback(notify) + result = dict( + set_flags(uid, tuple(flags), mode) for uid in messages) - @deferred_to_thread - def _set_flag_for_uid(self, msg_id, flags, mode): - """ - Run the set_flag operation in the thread pool. - """ - log.msg("MSG ID = %s" % msg_id) - msg = self.get_msg_by_uid(msg_id, mem_only=True, flags_only=True) - if msg is not None: - return msg_id, msg.setFlags(flags, mode) + reactor.callFromThread(observer.callback, result) # getters: generic for a mailbox @@ -1229,7 +1211,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): db_uids = set([doc.content[self.UID_KEY] for doc in self._soledad.get_from_index( fields.TYPE_MBOX_IDX, - fields.TYPE_FLAGS_VAL, self.mbox)]) + fields.TYPE_FLAGS_VAL, self.mbox) + if not empty(doc)]) return db_uids def all_uid_iter(self): @@ -1254,12 +1237,15 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # XXX we really could return a reduced version with # just {'uid': (flags-tuple,) since the prefetch is # only oriented to get the flag tuples. - all_flags = dict((( + all_docs = [( doc.content[self.UID_KEY], - dict(doc.content)) for doc in + dict(doc.content)) + for doc in self._soledad.get_from_index( fields.TYPE_MBOX_IDX, - fields.TYPE_FLAGS_VAL, self.mbox))) + fields.TYPE_FLAGS_VAL, self.mbox) + if not empty(doc.content)] + all_flags = dict(all_docs) return all_flags # TODO get from memstore -- cgit v1.2.3 From fd9c8c2e3c88476b90805b689f6914fe5eac16df Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 11 Feb 2014 02:53:28 -0400 Subject: defer fetch to thread also, dispatch query for all headers to its own method. --- src/leap/mail/imap/messages.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index bbc9deb..7884fb0 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -28,7 +28,6 @@ from functools import partial from twisted.mail import imap4 from twisted.internet import defer -from twisted.python import log from zope.interface import implements from zope.proxy import sameProxiedObjects @@ -1248,12 +1247,14 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): all_flags = dict(all_docs) return all_flags - # TODO get from memstore def all_headers(self): """ - Return a dict with all the headers documents for this + Return a dict with all the header documents for this mailbox. + + :rtype: dict """ + return self.memstore.all_headers(self.mbox) def count(self): """ -- cgit v1.2.3 From f6566fe83c93625b918664526e8858f7be667354 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 11 Feb 2014 16:20:26 -0400 Subject: defer appends too and cut some more time by firing the callback as soon as we've got an UID. --- src/leap/mail/imap/messages.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 7884fb0..c133a6d 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -879,19 +879,18 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): uid when the adding succeed. :rtype: deferred """ - logger.debug('Adding message') if flags is None: flags = tuple() leap_assert_type(flags, tuple) observer = defer.Deferred() d = self._do_parse(raw) - d.addCallback(self._do_add_msg, flags, subject, date, - notify_on_disk, observer) + d.addCallback(lambda result: self.reactor.callInThread( + self._do_add_msg, result, flags, subject, date, + notify_on_disk, observer)) return observer - # We SHOULD defer the heavy load here) to the thread pool, - # but it gives troubles with the QSocketNotifier used by Qt... + # Called in thread def _do_add_msg(self, parse_result, flags, subject, date, notify_on_disk, observer): """ @@ -912,7 +911,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # TODO add the linked-from info ! # TODO add reference to the original message - from twisted.internet import reactor msg, parts, chash, size, multi = parse_result # check for uniqueness -------------------------------- @@ -922,13 +920,14 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): uid = existing_uid msg = self.get_msg_by_uid(uid) - # TODO this cannot be deferred, this has to block. + # We can say the observer that we're done + self.reactor.callFromThread(observer.callback, uid) msg.setFlags((fields.DELETED_FLAG,), -1) - reactor.callLater(0, observer.callback, uid) return uid = self.memstore.increment_last_soledad_uid(self.mbox) - logger.info("ADDING MSG WITH UID: %s" % uid) + # We can say the observer that we're done + self.reactor.callFromThread(observer.callback, uid) fd = self._populate_flags(flags, uid, chash, size, multi) hd = self._populate_headr(msg, chash, subject, date) @@ -953,7 +952,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): msg_container = MessageWrapper(fd, hd, cdocs) self.memstore.create_message( self.mbox, uid, msg_container, - observer=observer, notify_on_disk=notify_on_disk) + observer=None, notify_on_disk=notify_on_disk) # # getters: specific queries -- cgit v1.2.3 From ac4c70f0be36c985e16e3f4ec0a38ef6f8d48166 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 12 Feb 2014 12:42:02 -0400 Subject: remove all refs during removal, and protect from empty docs --- src/leap/mail/imap/messages.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index c133a6d..0aa40f1 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -850,7 +850,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): if not exist: exist = self._get_fdoc_from_chash(chash) - if exist: + if exist and exist.content is not None: return exist.content.get(fields.UID_KEY, "unknown-uid") else: return False @@ -926,8 +926,13 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): return uid = self.memstore.increment_last_soledad_uid(self.mbox) - # We can say the observer that we're done + # We can say the observer that we're done at this point. + # Make sure it has no serious consequences if we're issued + # a fetch command right after... self.reactor.callFromThread(observer.callback, uid) + # if we did the notify, we need to invalidate the deferred + # so not to try to fire it twice. + observer = None fd = self._populate_flags(flags, uid, chash, size, multi) hd = self._populate_headr(msg, chash, subject, date) @@ -952,7 +957,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): msg_container = MessageWrapper(fd, hd, cdocs) self.memstore.create_message( self.mbox, uid, msg_container, - observer=None, notify_on_disk=notify_on_disk) + observer=observer, notify_on_disk=notify_on_disk) # # getters: specific queries @@ -1130,8 +1135,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): if msg is not None: return uid, msg.setFlags(flags, mode) - result = dict( - set_flags(uid, tuple(flags), mode) for uid in messages) + setted_flags = [set_flags(uid, flags, mode) for uid in messages] + result = dict(filter(None, setted_flags)) reactor.callFromThread(observer.callback, result) @@ -1158,6 +1163,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): """ msg_container = self.memstore.get_message( self.mbox, uid, flags_only=flags_only) + if msg_container is not None: if mem_only: msg = LeapMessage(None, uid, self.mbox, collection=self, @@ -1170,6 +1176,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): collection=self, container=msg_container) else: msg = LeapMessage(self._soledad, uid, self.mbox, collection=self) + if not msg.does_exist(): return None return msg -- cgit v1.2.3 From 1ff1663fb2968adff87208134c374c4084670ace Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 12 Feb 2014 13:05:08 -0400 Subject: docstring fixes --- src/leap/mail/imap/messages.py | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 0aa40f1..a49ea90 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -245,8 +245,6 @@ class LeapMessage(fields, MailParser, MBoxParser): :type mode: int """ leap_assert(isinstance(flags, tuple), "flags need to be a tuple") - #log.msg('setting flags: %s (%s)' % (self._uid, flags)) - mbox, uid = self._mbox, self._uid APPEND = 1 @@ -325,7 +323,6 @@ class LeapMessage(fields, MailParser, MBoxParser): body = bdoc_content.get(self.RAW_KEY, "") content_type = bdoc_content.get('content-type', "") charset = find_charset(content_type) - #logger.debug('got charset from content-type: %s' % charset) if charset is None: charset = self._get_charset(body) try: -- cgit v1.2.3 From 1baafbaa8e3dd7d62580ba4ad3a829ceaf16f583 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 12 Feb 2014 13:13:36 -0400 Subject: remove early notification on append for now this can be done to save some msec, but additional measures have to be taken to avoid inconsistencies with reads right after this is done. we could make those wait until a second deferred is done, for example. --- src/leap/mail/imap/messages.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index a49ea90..fc1ec55 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -923,13 +923,14 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): return uid = self.memstore.increment_last_soledad_uid(self.mbox) - # We can say the observer that we're done at this point. - # Make sure it has no serious consequences if we're issued - # a fetch command right after... - self.reactor.callFromThread(observer.callback, uid) + + # We can say the observer that we're done at this point, but + # before that we should make sure it has no serious consequences + # if we're issued, for instance, a fetch command right after... + #self.reactor.callFromThread(observer.callback, uid) # if we did the notify, we need to invalidate the deferred # so not to try to fire it twice. - observer = None + #observer = None fd = self._populate_flags(flags, uid, chash, size, multi) hd = self._populate_headr(msg, chash, subject, date) -- cgit v1.2.3 From 99ec94f08fb2d062eb2c350b64971ea9ad8d87dd Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Mon, 17 Feb 2014 13:59:06 -0400 Subject: avoid unneeded db index updates and rdoc creation --- src/leap/mail/imap/messages.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index fc1ec55..9bd64fc 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -686,6 +686,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): _rdoc_lock = threading.Lock() _rdoc_property_lock = threading.Lock() + _initialized = {} + def __init__(self, mbox=None, soledad=None, memstore=None): """ Constructor for MessageCollection. @@ -725,10 +727,12 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): self.memstore = memstore self.__rflags = None - self.initialize_db() - # ensure that we have a recent-flags and a hdocs-sec doc - self._get_or_create_rdoc() + if not self._initialized.get(mbox, False): + self.initialize_db() + # ensure that we have a recent-flags and a hdocs-sec doc + self._get_or_create_rdoc() + self._initialized[mbox] = True from twisted.internet import reactor self.reactor = reactor -- cgit v1.2.3 From 4bcb32639bff9a5aab076dba2bdc7667cea60c7f Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 20 Feb 2014 01:11:26 -0400 Subject: fix rdoc duplication --- src/leap/mail/imap/messages.py | 51 ++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 22 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 9bd64fc..8c777f5 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -77,7 +77,7 @@ def try_unique_query(curried): # TODO we could take action, like trigger a background # process to kill dupes. name = getattr(curried, 'expected', 'doc') - logger.debug( + logger.warning( "More than one %s found for this mbox, " "we got a duplicate!!" % (name,)) return query.pop() @@ -683,8 +683,10 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # TODO we would abstract this to a SoledadProperty class - _rdoc_lock = threading.Lock() - _rdoc_property_lock = threading.Lock() + _rdoc_lock = defaultdict(lambda: threading.Lock()) + _rdoc_write_lock = defaultdict(lambda: threading.Lock()) + _rdoc_read_lock = defaultdict(lambda: threading.Lock()) + _rdoc_property_lock = defaultdict(lambda: threading.Lock()) _initialized = {} @@ -729,10 +731,14 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): self.__rflags = None if not self._initialized.get(mbox, False): - self.initialize_db() - # ensure that we have a recent-flags and a hdocs-sec doc - self._get_or_create_rdoc() - self._initialized[mbox] = True + try: + self.initialize_db() + # ensure that we have a recent-flags doc + self._get_or_create_rdoc() + except Exception: + logger.debug("Error initializing %r" % (mbox,)) + else: + self._initialized[mbox] = True from twisted.internet import reactor self.reactor = reactor @@ -753,12 +759,14 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): Try to retrieve the recent-flags doc for this MessageCollection, and create one if not found. """ - rdoc = self._get_recent_doc() - if not rdoc: - rdoc = self._get_empty_doc(self.RECENT_DOC) - if self.mbox != fields.INBOX_VAL: - rdoc[fields.MBOX_KEY] = self.mbox - self._soledad.create_doc(rdoc) + # XXX should move this to memstore too + with self._rdoc_write_lock[self.mbox]: + rdoc = self._get_recent_doc_from_soledad() + if rdoc is None: + rdoc = self._get_empty_doc(self.RECENT_DOC) + if self.mbox != fields.INBOX_VAL: + rdoc[fields.MBOX_KEY] = self.mbox + self._soledad.create_doc(rdoc) @deferred_to_thread def _do_parse(self, raw): @@ -976,12 +984,12 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): return self.__rflags if self.memstore is not None: - with self._rdoc_lock: + with self._rdoc_lock[self.mbox]: rflags = self.memstore.get_recent_flags(self.mbox) if not rflags: # not loaded in the memory store yet. # let's fetch them from soledad... - rdoc = self._get_recent_doc() + rdoc = self._get_recent_doc_from_soledad() rflags = set(rdoc.content.get( fields.RECENTFLAGS_KEY, [])) # ...and cache them now. @@ -1001,8 +1009,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): _get_recent_flags, _set_recent_flags, doc="Set of UIDs with the recent flag for this mailbox.") - # XXX change naming, indicate soledad query. - def _get_recent_doc(self): + def _get_recent_doc_from_soledad(self): """ Get recent-flags document from Soledad for this mailbox. :rtype: SoledadDocument or None @@ -1012,8 +1019,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): fields.TYPE_MBOX_IDX, fields.TYPE_RECENT_VAL, self.mbox) curried.expected = "rdoc" - rdoc = try_unique_query(curried) - return rdoc + with self._rdoc_read_lock[self.mbox]: + return try_unique_query(curried) # Property-set modification (protected by a different # lock to give atomicity to the read/write operation) @@ -1025,7 +1032,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :param uids: the uids to unset :type uid: sequence """ - with self._rdoc_property_lock: + with self._rdoc_property_lock[self.mbox]: self.recent_flags.difference_update( set(uids)) @@ -1038,7 +1045,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :param uid: the uid to unset :type uid: int """ - with self._rdoc_property_lock: + with self._rdoc_property_lock[self.mbox]: self.recent_flags.difference_update( set([uid])) @@ -1050,7 +1057,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :param uid: the uid to set :type uid: int """ - with self._rdoc_property_lock: + with self._rdoc_property_lock[self.mbox]: self.recent_flags = self.recent_flags.union( set([uid])) -- cgit v1.2.3 From c9ada6da8f3c94efd0739abd8be46c6356854a49 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 20 Feb 2014 01:22:01 -0400 Subject: catch empty rdoc --- src/leap/mail/imap/messages.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 8c777f5..9f7f6e2 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -990,6 +990,8 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # not loaded in the memory store yet. # let's fetch them from soledad... rdoc = self._get_recent_doc_from_soledad() + if rdoc is None: + return set([]) rflags = set(rdoc.content.get( fields.RECENTFLAGS_KEY, [])) # ...and cache them now. -- cgit v1.2.3 From b2d97c9faef6037a065e2903afe5b0ab2624917e Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 20 Feb 2014 02:52:17 -0400 Subject: mail parsing performance improvements Although the do_parse function is deferred to threads, we were actually waiting till its return to fire the callback of the deferred, and hence the "append ok" was being delayed. During massive appends, this was a tight loop contributing as much as 35 msec, of a total of 100 msec average. Several ineficiencies are addressed here: * use pycryptopp hash functions. * avoiding function calling overhead. * avoid duplicate call to message.as_string * make use of the string size caching capabilities. * avoiding the mail Parser initialization/method call completely, in favor of the module helper to get the object from string. Overall, these changes cut parsing to 50% of the initial timing by my measurements with line_profiler, YMMV. --- src/leap/mail/imap/messages.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 9f7f6e2..9a001b3 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -24,8 +24,10 @@ import threading import StringIO from collections import defaultdict +from email import message_from_string from functools import partial +from pycryptopp.hash import sha256 from twisted.mail import imap4 from twisted.internet import defer from zope.interface import implements @@ -42,7 +44,7 @@ from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields from leap.mail.imap.memorystore import MessageWrapper from leap.mail.imap.messageparts import MessagePart -from leap.mail.imap.parser import MailParser, MBoxParser +from leap.mail.imap.parser import MBoxParser logger = logging.getLogger(__name__) @@ -94,7 +96,7 @@ A dictionary that keeps one lock per mbox and uid. fdoc_locks = defaultdict(lambda: defaultdict(lambda: threading.Lock())) -class LeapMessage(fields, MailParser, MBoxParser): +class LeapMessage(fields, MBoxParser): """ The main representation of a message. @@ -123,7 +125,6 @@ class LeapMessage(fields, MailParser, MBoxParser): :param container: a IMessageContainer implementor instance :type container: IMessageContainer """ - MailParser.__init__(self) self._soledad = soledad self._uid = int(uid) self._mbox = self._parse_mailbox_name(mbox) @@ -583,7 +584,7 @@ class LeapMessage(fields, MailParser, MBoxParser): return not empty(self.fdoc) -class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): +class MessageCollection(WithMsgFields, IndexedDB, MBoxParser): """ A collection of messages, surprisingly. @@ -713,7 +714,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :param memstore: a MemoryStore instance :type memstore: MemoryStore """ - MailParser.__init__(self) leap_assert(mbox, "Need a mailbox name to initialize") leap_assert(mbox.strip() != "", "mbox cannot be blank space") leap_assert(isinstance(mbox, (str, unicode)), @@ -782,11 +782,11 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :return: msg, parts, chash, size, multi :rtype: tuple """ - msg = self._get_parsed_msg(raw) - chash = self._get_hash(msg) - size = len(msg.as_string()) - multi = msg.is_multipart() + msg = message_from_string(raw) parts = walk.get_parts(msg) + size = len(raw) + chash = sha256.SHA256(raw).hexdigest() + multi = msg.is_multipart() return msg, parts, chash, size, multi def _populate_flags(self, flags, uid, chash, size, multi): @@ -803,7 +803,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): fd[self.SIZE_KEY] = size fd[self.MULTIPART_KEY] = multi if flags: - fd[self.FLAGS_KEY] = map(self._stringify, flags) + fd[self.FLAGS_KEY] = flags fd[self.SEEN_KEY] = self.SEEN_FLAG in flags fd[self.DEL_KEY] = self.DELETED_FLAG in flags fd[self.RECENT_KEY] = True # set always by default @@ -926,11 +926,10 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # Watch out! We're reserving a UID right after this! existing_uid = self._fdoc_already_exists(chash) if existing_uid: - uid = existing_uid - msg = self.get_msg_by_uid(uid) + msg = self.get_msg_by_uid(existing_uid) # We can say the observer that we're done - self.reactor.callFromThread(observer.callback, uid) + self.reactor.callFromThread(observer.callback, existing_uid) msg.setFlags((fields.DELETED_FLAG,), -1) return -- cgit v1.2.3 From bd476d7ba97a479db14a9b72b8b52ef5997d98f6 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 20 Feb 2014 17:07:58 -0400 Subject: Fix regression on "duplicate drafts" issue. Not a permanent solution, but it looks for fdoc matching a given msgid to avoid duplication of drafts in thunderbird folders. --- src/leap/mail/imap/messages.py | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) (limited to 'src/leap/mail/imap/messages.py') diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 9a001b3..b0b2f95 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -43,7 +43,7 @@ from leap.mail.decorators import deferred_to_thread from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields from leap.mail.imap.memorystore import MessageWrapper -from leap.mail.imap.messageparts import MessagePart +from leap.mail.imap.messageparts import MessagePart, MessagePartDoc from leap.mail.imap.parser import MBoxParser logger = logging.getLogger(__name__) @@ -126,7 +126,7 @@ class LeapMessage(fields, MBoxParser): :type container: IMessageContainer """ self._soledad = soledad - self._uid = int(uid) + self._uid = int(uid) if uid is not None else None self._mbox = self._parse_mailbox_name(mbox) self._collection = collection self._container = container @@ -1077,7 +1077,21 @@ class MessageCollection(WithMsgFields, IndexedDB, MBoxParser): fields.TYPE_MBOX_C_HASH_IDX, fields.TYPE_FLAGS_VAL, self.mbox, chash) curried.expected = "fdoc" - return try_unique_query(curried) + fdoc = try_unique_query(curried) + if fdoc is not None: + return fdoc + else: + # probably this should be the other way round, + # ie, try fist on memstore... + cf = self.memstore._chash_fdoc_store + fdoc = cf[chash][self.mbox] + # hey, I just needed to wrap fdoc thing into + # a "content" attribute, look a better way... + if not empty(fdoc): + return MessagePartDoc( + new=None, dirty=None, part=None, + store=None, doc_id=None, + content=fdoc) def _get_uid_from_msgidCb(self, msgid): hdoc = None @@ -1088,11 +1102,26 @@ class MessageCollection(WithMsgFields, IndexedDB, MBoxParser): curried.expected = "hdoc" hdoc = try_unique_query(curried) - if hdoc is None: + # XXX this is only a quick hack to avoid regression + # on the "multiple copies of the draft" issue, but + # this is currently broken since it's not efficient to + # look for this. Should lookup better. + # FIXME! + + if hdoc is not None: + hdoc_dict = hdoc.content + + else: + hdocstore = self.memstore._hdoc_store + match = [x for _, x in hdocstore.items() if x['msgid'] == msgid] + hdoc_dict = first(match) + + if hdoc_dict is None: logger.warning("Could not find hdoc for msgid %s" % (msgid,)) return None - msg_chash = hdoc.content.get(fields.CONTENT_HASH_KEY) + msg_chash = hdoc_dict.get(fields.CONTENT_HASH_KEY) + fdoc = self._get_fdoc_from_chash(msg_chash) if not fdoc: logger.warning("Could not find fdoc for msgid %s" -- cgit v1.2.3