diff options
author | Tomás Touceda <chiiph@leap.se> | 2014-02-20 17:19:53 -0300 |
---|---|---|
committer | Tomás Touceda <chiiph@leap.se> | 2014-02-20 17:19:53 -0300 |
commit | 6e7d4b77e2800678e7c5d83f24b8e30261d70e9b (patch) | |
tree | 7be6d990a7dd4b46b84b9c64f2b3430d7816e184 /mail/src/leap | |
parent | 9b20cd5042a3f0d351cadefb220b56b0bd6a9205 (diff) | |
parent | 4648cb5a5cc6084d1949de7622def2c74c1de6e9 (diff) |
Merge remote-tracking branch 'refs/remotes/kali/feature/parse-perf' into develop
Diffstat (limited to 'mail/src/leap')
-rw-r--r-- | mail/src/leap/mail/imap/messages.py | 25 | ||||
-rw-r--r-- | mail/src/leap/mail/imap/parser.py | 75 | ||||
-rw-r--r-- | mail/src/leap/mail/imap/soledadstore.py | 4 | ||||
-rw-r--r-- | mail/src/leap/mail/imap/tests/walktree.py | 4 | ||||
-rw-r--r-- | mail/src/leap/mail/walk.py | 7 |
5 files changed, 21 insertions, 94 deletions
diff --git a/mail/src/leap/mail/imap/messages.py b/mail/src/leap/mail/imap/messages.py index 9f7f6e2..9a001b3 100644 --- a/mail/src/leap/mail/imap/messages.py +++ b/mail/src/leap/mail/imap/messages.py @@ -24,8 +24,10 @@ import threading import StringIO from collections import defaultdict +from email import message_from_string from functools import partial +from pycryptopp.hash import sha256 from twisted.mail import imap4 from twisted.internet import defer from zope.interface import implements @@ -42,7 +44,7 @@ from leap.mail.imap.index import IndexedDB from leap.mail.imap.fields import fields, WithMsgFields from leap.mail.imap.memorystore import MessageWrapper from leap.mail.imap.messageparts import MessagePart -from leap.mail.imap.parser import MailParser, MBoxParser +from leap.mail.imap.parser import MBoxParser logger = logging.getLogger(__name__) @@ -94,7 +96,7 @@ A dictionary that keeps one lock per mbox and uid. fdoc_locks = defaultdict(lambda: defaultdict(lambda: threading.Lock())) -class LeapMessage(fields, MailParser, MBoxParser): +class LeapMessage(fields, MBoxParser): """ The main representation of a message. @@ -123,7 +125,6 @@ class LeapMessage(fields, MailParser, MBoxParser): :param container: a IMessageContainer implementor instance :type container: IMessageContainer """ - MailParser.__init__(self) self._soledad = soledad self._uid = int(uid) self._mbox = self._parse_mailbox_name(mbox) @@ -583,7 +584,7 @@ class LeapMessage(fields, MailParser, MBoxParser): return not empty(self.fdoc) -class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): +class MessageCollection(WithMsgFields, IndexedDB, MBoxParser): """ A collection of messages, surprisingly. @@ -713,7 +714,6 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :param memstore: a MemoryStore instance :type memstore: MemoryStore """ - MailParser.__init__(self) leap_assert(mbox, "Need a mailbox name to initialize") leap_assert(mbox.strip() != "", "mbox cannot be blank space") leap_assert(isinstance(mbox, (str, unicode)), @@ -782,11 +782,11 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): :return: msg, parts, chash, size, multi :rtype: tuple """ - msg = self._get_parsed_msg(raw) - chash = self._get_hash(msg) - size = len(msg.as_string()) - multi = msg.is_multipart() + msg = message_from_string(raw) parts = walk.get_parts(msg) + size = len(raw) + chash = sha256.SHA256(raw).hexdigest() + multi = msg.is_multipart() return msg, parts, chash, size, multi def _populate_flags(self, flags, uid, chash, size, multi): @@ -803,7 +803,7 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): fd[self.SIZE_KEY] = size fd[self.MULTIPART_KEY] = multi if flags: - fd[self.FLAGS_KEY] = map(self._stringify, flags) + fd[self.FLAGS_KEY] = flags fd[self.SEEN_KEY] = self.SEEN_FLAG in flags fd[self.DEL_KEY] = self.DELETED_FLAG in flags fd[self.RECENT_KEY] = True # set always by default @@ -926,11 +926,10 @@ class MessageCollection(WithMsgFields, IndexedDB, MailParser, MBoxParser): # Watch out! We're reserving a UID right after this! existing_uid = self._fdoc_already_exists(chash) if existing_uid: - uid = existing_uid - msg = self.get_msg_by_uid(uid) + msg = self.get_msg_by_uid(existing_uid) # We can say the observer that we're done - self.reactor.callFromThread(observer.callback, uid) + self.reactor.callFromThread(observer.callback, existing_uid) msg.setFlags((fields.DELETED_FLAG,), -1) return diff --git a/mail/src/leap/mail/imap/parser.py b/mail/src/leap/mail/imap/parser.py index 6a9ace9..4a801b0 100644 --- a/mail/src/leap/mail/imap/parser.py +++ b/mail/src/leap/mail/imap/parser.py @@ -15,83 +15,10 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """ -Mail parser mixins. +Mail parser mixin. """ -import cStringIO -import StringIO -import hashlib import re -from email.message import Message -from email.parser import Parser - -from leap.common.check import leap_assert_type - - -class MailParser(object): - """ - Mixin with utility methods to parse raw messages. - """ - def __init__(self): - """ - Initializes the mail parser. - """ - self._parser = Parser() - - def _get_parsed_msg(self, raw, headersonly=False): - """ - Return a parsed Message. - - :param raw: the raw string to parse - :type raw: basestring, or StringIO object - - :param headersonly: True for parsing only the headers. - :type headersonly: bool - """ - msg = self._get_parser_fun(raw)(raw, headersonly=headersonly) - return msg - - def _get_hash(self, msg): - """ - Returns a hash of the string representation of the raw message, - suitable for indexing the inmutable pieces. - - :param msg: a Message object - :type msg: Message - """ - leap_assert_type(msg, Message) - return hashlib.sha256(msg.as_string()).hexdigest() - - def _get_parser_fun(self, o): - """ - Retunn the proper parser function for an object. - - :param o: object - :type o: object - :param parser: an instance of email.parser.Parser - :type parser: email.parser.Parser - """ - if isinstance(o, (cStringIO.OutputType, StringIO.StringIO)): - return self._parser.parse - if isinstance(o, basestring): - return self._parser.parsestr - # fallback - return self._parser.parsestr - - def _stringify(self, o): - """ - Return a string object. - - :param o: object - :type o: object - """ - # XXX Maybe we don't need no more, we're using - # msg.as_string() - if isinstance(o, (cStringIO.OutputType, StringIO.StringIO)): - return o.getvalue() - else: - return o - class MBoxParser(object): """ diff --git a/mail/src/leap/mail/imap/soledadstore.py b/mail/src/leap/mail/imap/soledadstore.py index 25f00bb..f3de8eb 100644 --- a/mail/src/leap/mail/imap/soledadstore.py +++ b/mail/src/leap/mail/imap/soledadstore.py @@ -314,8 +314,8 @@ class SoledadStore(ContentDedup): except Exception as exc: logger.debug("ITEM WAS: %s" % repr(item)) if hasattr(item, 'content'): - logger.debug("ITEM CONTENT WAS: %s" % - repr(item.content)) + logger.debug("ITEM CONTENT WAS: %s" % + repr(item.content)) logger.exception(exc) failed = True continue diff --git a/mail/src/leap/mail/imap/tests/walktree.py b/mail/src/leap/mail/imap/tests/walktree.py index f3cbcb0..695f487 100644 --- a/mail/src/leap/mail/imap/tests/walktree.py +++ b/mail/src/leap/mail/imap/tests/walktree.py @@ -36,11 +36,11 @@ p = parser.Parser() if len(sys.argv) > 1: FILENAME = sys.argv[1] else: - FILENAME = "rfc822.multi-minimal.message" + FILENAME = "rfc822.multi-signed.message" """ -FILENAME = "rfc822.multi-signed.message" FILENAME = "rfc822.plain.message" +FILENAME = "rfc822.multi-minimal.message" """ msg = p.parse(open(FILENAME)) diff --git a/mail/src/leap/mail/walk.py b/mail/src/leap/mail/walk.py index 49f2c22..f747377 100644 --- a/mail/src/leap/mail/walk.py +++ b/mail/src/leap/mail/walk.py @@ -17,17 +17,18 @@ """ Utilities for walking along a message tree. """ -import hashlib import os +from pycryptopp.hash import sha256 + from leap.mail.utils import first DEBUG = os.environ.get("BITMASK_MAIL_DEBUG") if DEBUG: - get_hash = lambda s: hashlib.sha256(s).hexdigest()[:10] + get_hash = lambda s: sha256.SHA256(s).hexdigest()[:10] else: - get_hash = lambda s: hashlib.sha256(s).hexdigest() + get_hash = lambda s: sha256.SHA256(s).hexdigest() """ |