From b2d97c9faef6037a065e2903afe5b0ab2624917e Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 20 Feb 2014 02:52:17 -0400 Subject: mail parsing performance improvements Although the do_parse function is deferred to threads, we were actually waiting till its return to fire the callback of the deferred, and hence the "append ok" was being delayed. During massive appends, this was a tight loop contributing as much as 35 msec, of a total of 100 msec average. Several ineficiencies are addressed here: * use pycryptopp hash functions. * avoiding function calling overhead. * avoid duplicate call to message.as_string * make use of the string size caching capabilities. * avoiding the mail Parser initialization/method call completely, in favor of the module helper to get the object from string. Overall, these changes cut parsing to 50% of the initial timing by my measurements with line_profiler, YMMV. --- src/leap/mail/walk.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/leap/mail/walk.py') diff --git a/src/leap/mail/walk.py b/src/leap/mail/walk.py index 49f2c22..f747377 100644 --- a/src/leap/mail/walk.py +++ b/src/leap/mail/walk.py @@ -17,17 +17,18 @@ """ Utilities for walking along a message tree. """ -import hashlib import os +from pycryptopp.hash import sha256 + from leap.mail.utils import first DEBUG = os.environ.get("BITMASK_MAIL_DEBUG") if DEBUG: - get_hash = lambda s: hashlib.sha256(s).hexdigest()[:10] + get_hash = lambda s: sha256.SHA256(s).hexdigest()[:10] else: - get_hash = lambda s: hashlib.sha256(s).hexdigest() + get_hash = lambda s: sha256.SHA256(s).hexdigest() """ -- cgit v1.2.3