diff options
author | Kali Kaneko <kali@leap.se> | 2014-02-20 02:52:17 -0400 |
---|---|---|
committer | Kali Kaneko <kali@leap.se> | 2014-02-20 16:17:25 -0400 |
commit | b2d97c9faef6037a065e2903afe5b0ab2624917e (patch) | |
tree | 268a86e5a668e604bfa8907a8d0f4a7025819238 /src/leap/mail/imap/messages.py | |
parent | 66c5602b77547ec24674f5e40c1d244f28ff5a49 (diff) |
mail parsing performance improvements
Although the do_parse function is deferred to threads, we were actually
waiting till its return to fire the callback of the deferred, and hence
the "append ok" was being delayed. During massive appends, this was a
tight loop contributing as much as 35 msec, of a total of 100 msec
average.
Several ineficiencies are addressed here:
* use pycryptopp hash functions.
* avoiding function calling overhead.
* avoid duplicate call to message.as_string
* make use of the string size caching capabilities.
* avoiding the mail Parser initialization/method call completely,
in favor of the module helper to get the object from string.
Overall, these changes cut parsing to 50% of the initial timing by my
measurements with line_profiler, YMMV.
Diffstat (limited to 'src/leap/mail/imap/messages.py')
-rw-r--r-- | src/leap/mail/imap/messages.py | 25 |
1 files changed, 12 insertions, 13 deletions
diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 9f7f6e2..9a001b3 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -24,8 +24,10 @@ import threading import StringIO from collections import defaultdict +from email import message_from_string from functools import partial +from pycryptopp.hash import sha256 from twisted.mail import imap4 from twisted.internet import defer from zope.interface import implements @@ -42,7 +44,7 @@ from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields from leap.mail.imap.memorystore import MessageWrapper from leap.mail.imap.messageparts import MessagePart -from leap.mail.imap.parser import MailParser, MBoxParser +from leap.mail.imap.parser import MBoxParser logger = logging.getLogger(__name__) @@ -94,7 +96,7 @@ A dictionary that keeps one lock per mbox and uid. fdoc_locks = defaultdict(lambda: defaultdict(lambda: threading.Lock())) -class LeapMessage(fields, MailParser, MBoxParser): +class LeapMessage(fields, MBoxParser): """ The main representation of a message. @@ -123,7 +125,6 @@ class LeapMessage(fields, MailParser, MBoxParser): :param container: a IMessageContainer implementor instance :type container: IMessageContainer """ - MailParser.__init__(self) self._soledad = soledad self._uid = int(uid) self._mbox = self._parse_mailbox_name(mbox) @@ -583,7 +584,7 @@ class LeapMessage(fields, MailParser, MBoxParser): return not empty(self.fdoc) -class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): +class MessageCollection(WithMsgFields, IndexedDB, MBoxParser): """ A collection of messages, surprisingly. @@ -713,7 +714,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :param memstore: a MemoryStore instance :type memstore: MemoryStore """ - MailParser.__init__(self) leap_assert(mbox, "Need a mailbox name to initialize") leap_assert(mbox.strip() != "", "mbox cannot be blank space") leap_assert(isinstance(mbox, (str, unicode)), @@ -782,11 +782,11 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :return: msg, parts, chash, size, multi :rtype: tuple """ - msg = self._get_parsed_msg(raw) - chash = self._get_hash(msg) - size = len(msg.as_string()) - multi = msg.is_multipart() + msg = message_from_string(raw) parts = walk.get_parts(msg) + size = len(raw) + chash = sha256.SHA256(raw).hexdigest() + multi = msg.is_multipart() return msg, parts, chash, size, multi def _populate_flags(self, flags, uid, chash, size, multi): @@ -803,7 +803,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): fd[self.SIZE_KEY] = size fd[self.MULTIPART_KEY] = multi if flags: - fd[self.FLAGS_KEY] = map(self._stringify, flags) + fd[self.FLAGS_KEY] = flags fd[self.SEEN_KEY] = self.SEEN_FLAG in flags fd[self.DEL_KEY] = self.DELETED_FLAG in flags fd[self.RECENT_KEY] = True # set always by default @@ -926,11 +926,10 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # Watch out! We're reserving a UID right after this! existing_uid = self._fdoc_already_exists(chash) if existing_uid: - uid = existing_uid - msg = self.get_msg_by_uid(uid) + msg = self.get_msg_by_uid(existing_uid) # We can say the observer that we're done - self.reactor.callFromThread(observer.callback, uid) + self.reactor.callFromThread(observer.callback, existing_uid) msg.setFlags((fields.DELETED_FLAG,), -1) return |