diff options
Diffstat (limited to 'mail')
-rw-r--r-- | mail/src/leap/mail/adaptors/soledad.py | 19 | ||||
-rw-r--r-- | mail/src/leap/mail/mail.py | 66 | ||||
-rw-r--r-- | mail/src/leap/mail/tests/rfc822.bounce.message | 152 | ||||
-rw-r--r-- | mail/src/leap/mail/tests/test_walk.py | 81 | ||||
-rw-r--r-- | mail/src/leap/mail/walk.py | 211 |
5 files changed, 298 insertions, 231 deletions
diff --git a/mail/src/leap/mail/adaptors/soledad.py b/mail/src/leap/mail/adaptors/soledad.py index 298d0172..46c5a2c5 100644 --- a/mail/src/leap/mail/adaptors/soledad.py +++ b/mail/src/leap/mail/adaptors/soledad.py @@ -1185,14 +1185,13 @@ def _split_into_parts(raw): # TODO populate Default FLAGS/TAGS (unseen?) # TODO seed propely the content_docs with defaults?? - msg, parts, chash, multi = _parse_msg(raw) + msg, chash, multi = _parse_msg(raw) size = len(msg.as_string()) - body_phash = walk.get_body_phash(msg) - - parts_map = walk.walk_msg_tree(parts, body_phash=body_phash) - cdocs_list = list(walk.get_raw_docs(msg, parts)) + parts_map = walk.get_tree(msg) + cdocs_list = list(walk.get_raw_docs(msg)) cdocs_phashes = [c['phash'] for c in cdocs_list] + body_phash = walk.get_body_phash(msg) mdoc = _build_meta_doc(chash, cdocs_phashes) fdoc = _build_flags_doc(chash, size, multi) @@ -1206,10 +1205,9 @@ def _split_into_parts(raw): def _parse_msg(raw): msg = message_from_string(raw) - parts = walk.get_parts(msg) chash = walk.get_hash(raw) multi = msg.is_multipart() - return msg, parts, chash, multi + return msg, chash, multi def _build_meta_doc(chash, cdocs_phashes): @@ -1220,6 +1218,7 @@ def _build_meta_doc(chash, cdocs_phashes): _mdoc.fdoc = constants.FDOCID.format(mbox_uuid=INBOX_NAME, chash=chash) _mdoc.hdoc = constants.HDOCID.format(chash=chash) _mdoc.cdocs = [constants.CDOCID.format(phash=p) for p in cdocs_phashes] + return _mdoc.serialize() @@ -1259,8 +1258,8 @@ def _build_headers_doc(msg, chash, body_phash, parts_map): copy_attr(lower_headers, "date", _hdoc) hdoc = _hdoc.serialize() - # add parts map to header doc - # (body, multi, part_map) + # add some of the attr from the parts map to header doc for key in parts_map: - hdoc[key] = parts_map[key] + if key in ('body', 'multi', 'part_map'): + hdoc[key] = parts_map[key] return stringify_parts_map(hdoc) diff --git a/mail/src/leap/mail/mail.py b/mail/src/leap/mail/mail.py index d3659de8..2fde3a1b 100644 --- a/mail/src/leap/mail/mail.py +++ b/mail/src/leap/mail/mail.py @@ -36,7 +36,6 @@ from twisted.python import log from leap.common.check import leap_assert_type from leap.common.events import emit_async, catalog -from leap.common.mail import get_email_charset from leap.mail.adaptors.soledad import SoledadMailAdaptor from leap.mail.constants import INBOX_NAME @@ -124,33 +123,6 @@ def _unpack_headers(headers_dict): return headers_l -def _get_index_for_cdoc(part_map, cdocs_dict): - """ - Get, if possible, the index for a given content-document matching the phash - of the passed part_map. - - This is used when we are initializing a MessagePart, because we just pass a - reference to the parent message cdocs container and we need to iterate - through the cdocs to figure out which content-doc matches the phash of the - part we're currently rendering. - - It is also used when recursing through a nested multipart message, because - in the initialization of the child MessagePart we pass a dictionary only - for the referenced cdoc. - - :param part_map: a dict describing the mapping of the parts for the current - message-part. - :param cdocs: a dict of content-documents, 0-indexed. - :rtype: int - """ - phash = part_map.get('phash', None) - if phash: - for i, cdoc_wrapper in cdocs_dict.items(): - if cdoc_wrapper.phash == phash: - return i - return None - - class MessagePart(object): # TODO This class should be better abstracted from the data model. # TODO support arbitrarily nested multiparts (right now we only support @@ -159,7 +131,7 @@ class MessagePart(object): Represents a part of a multipart MIME Message. """ - def __init__(self, part_map, cdocs={}, nested=False): + def __init__(self, part_map, cdocs=None, nested=False): """ :param part_map: a dictionary mapping the subparts for this MessagePart (1-indexed). @@ -178,13 +150,12 @@ class MessagePart(object): :param cdocs: optional, a reference to the top-level dict of wrappers for content-docs (1-indexed). """ + if cdocs is None: + cdocs = {} self._pmap = part_map self._cdocs = cdocs self._nested = nested - index = _get_index_for_cdoc(part_map, self._cdocs) or 1 - self._index = index - def get_size(self): """ Size of the body, in octets. @@ -199,13 +170,10 @@ class MessagePart(object): def get_body_file(self): payload = "" pmap = self._pmap + multi = pmap.get('multi') if not multi: - payload = self._get_payload(self._index) - else: - # XXX uh, multi also... should recurse. - # This needs to be implemented in a more general and elegant way. - raise NotImplementedError + payload = self._get_payload(pmap.get('phash')) if payload: payload = _encode_payload(payload) @@ -220,33 +188,19 @@ class MessagePart(object): def get_subpart(self, part): if not self.is_multipart(): raise TypeError - sub_pmap = self._pmap.get("part_map", {}) - # XXX BUG --- workaround. Subparts with more than 1 subparts - # need to get the requested index for the subpart decremented. - # Off-by-one error, should investigate which is the real reason and - # fix it, this is only a quick workaround. - num_parts = self._pmap.get("parts", 0) - if num_parts > 1: - part = part - 1 - # ------------------------------------------------------------- - try: part_map = sub_pmap[str(part)] except KeyError: log.msg("getSubpart for %s: KeyError" % (part,)) raise IndexError + return MessagePart(part_map, cdocs=self._cdocs, nested=True) - cdoc_index = _get_index_for_cdoc(part_map, self._cdocs) - cdoc = self._cdocs.get(cdoc_index, {}) - - return MessagePart(part_map, cdocs={1: cdoc}, nested=True) - - def _get_payload(self, index): - cdoc_wrapper = self._cdocs.get(index, None) - if cdoc_wrapper: - return cdoc_wrapper.raw + def _get_payload(self, phash): + for cdocw in self._cdocs.values(): + if cdocw.phash == phash: + return cdocw.raw return "" diff --git a/mail/src/leap/mail/tests/rfc822.bounce.message b/mail/src/leap/mail/tests/rfc822.bounce.message new file mode 100644 index 00000000..7a51ac04 --- /dev/null +++ b/mail/src/leap/mail/tests/rfc822.bounce.message @@ -0,0 +1,152 @@ +Return-Path: <> +X-Original-To: yoyo@dev.pixelated-project.org +Delivered-To: a6973ec1af0a6d1e2a1e4db4ff85f6c2@deliver.local +Received: by dev1.dev.pixelated-project.org (Postfix) + id 92CEA83164; Thu, 16 Jun 2016 14:53:34 +0200 (CEST) +Date: Thu, 16 Jun 2016 14:53:34 +0200 (CEST) +From: MAILER-DAEMON@dev1.dev.pixelated-project.org (Mail Delivery System) +Subject: Undelivered Mail Returned to Sender +To: yoyo@dev.pixelated-project.org +Auto-Submitted: auto-replied +MIME-Version: 1.0 +Content-Type: multipart/report; report-type=delivery-status; + boundary="8F60183010.1466081614/dev1.dev.pixelated-project.org" +Message-Id: <20160616125334.92CEA83164@dev1.dev.pixelated-project.org> + +This is a MIME-encapsulated message. + +--8F60183010.1466081614/dev1.dev.pixelated-project.org +Content-Description: Notification +Content-Type: text/plain; charset=us-ascii + +This is the mail system at host dev1.dev.pixelated-project.org. + +I'm sorry to have to inform you that your message could not +be delivered to one or more recipients. It's attached below. + +For further assistance, please send mail to postmaster. + +If you do so, please include this problem report. You can +delete your own text from the attached returned message. + + The mail system + +<nobody@leap.se>: host caribou.leap.se[176.53.69.122] said: 550 5.1.1 + <nobody@leap.se>: Recipient address rejected: User unknown in virtual alias + table (in reply to RCPT TO command) + +--8F60183010.1466081614/dev1.dev.pixelated-project.org +Content-Description: Delivery report +Content-Type: message/delivery-status + +Reporting-MTA: dns; dev1.dev.pixelated-project.org +X-Postfix-Queue-ID: 8F60183010 +X-Postfix-Sender: rfc822; yoyo@dev.pixelated-project.org +Arrival-Date: Thu, 16 Jun 2016 14:53:33 +0200 (CEST) + +Final-Recipient: rfc822; nobody@leap.se +Original-Recipient: rfc822;nobody@leap.se +Action: failed +Status: 5.1.1 +Remote-MTA: dns; caribou.leap.se +Diagnostic-Code: smtp; 550 5.1.1 <nobody@leap.se>: Recipient address rejected: + User unknown in virtual alias table + +--8F60183010.1466081614/dev1.dev.pixelated-project.org +Content-Description: Undelivered Message +Content-Type: message/rfc822 + +Return-Path: <yoyo@dev.pixelated-project.org> +Received: from leap.mail-0.4.0rc1+111.g736ea86 (localhost [127.0.0.1]) + (using TLSv1 with cipher ECDHE-RSA-AES128-SHA (128/128 bits)) + (Client CN "yoyo@dev.pixelated-project.org", Issuer "Pixelated Project Root CA (client certificates only!)" (verified OK)) + by dev1.dev.pixelated-project.org (Postfix) with ESMTPS id 8F60183010 + for <nobody@leap.se>; Thu, 16 Jun 2016 14:53:33 +0200 (CEST) +MIME-Version: 1.0 +Content-Type: multipart/signed; protocol="application/pgp-signature"; + micalg="pgp-sha512"; boundary="===============7598747164910592838==" +To: nobody@leap.se +Subject: vrgg +From: yoyo@dev.pixelated-project.org +Date: Thu, 16 Jun 2016 13:53:32 -0000 +Message-Id: <20160616125332.16961.677041909.5@dev1.dev.pixelated-project.org> +OpenPGP: id=CB546109E857BC34DFF2BCB3288870B39C400C24; + url="https://dev.pixelated-project.org/key/yoyo"; preference="signencrypt" + +--===============7598747164910592838== +Content-Type: multipart/mixed; boundary="===============3737055506052708210==" +MIME-Version: 1.0 +To: nobody@leap.se +Subject: vrgg +From: yoyo@dev.pixelated-project.org +Date: Thu, 16 Jun 2016 13:53:32 -0000 + +--===============3737055506052708210== +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: base64 + + +--===============3737055506052708210== +Content-Type: application/pgp-keys +MIME-Version: 1.0 +content-disposition: attachment; filename="yoyo@dev.pixelated-project.org-email-key.asc" +Content-Transfer-Encoding: base64 + +LS0tLS1CRUdJTiBQR1AgUFVCTElDIEtFWSBCTE9DSy0tLS0tCgptUUlOQkZkZ01BZ0JFQURIWWpU +T20wcTdOT0lYVUpoTmlHVXg2S05OZ1M0Q0I2VlMvbGtab2UvYjZuRjdCSENmCkFnRVkxeFlxMkIv +MzA3YzBtNTZWMEZvOWt2ZmZCUWhQckU5WG9rckI5blRlN1RsSDZUNTdiV09LSWMyMHhNSy8KSlVU +djZ3UEpybjdLN0VyNEdxbzdrUmpWcFVBcWlBbGFxMkhVYllGd2NEMnBIb0VENmU2L01CZDBVUTFX +b2s4QQpPNURDc2ZmeWhBZ0NFU1poK2w2VHlsVEJXYTJDTmJvUTl0SWtPZ0ZWTk9kTW9uWkxoTk1N +Y0tIeU54dmF5bUdCCjhjQlRISVE2UWhGRThvR2JDRTdvczdZWWhyTmNmcUsyMzJJQllzTHNXN3Vk +QmdwRTA0YkpwQWlvbW1zTHBCYmwKV0pCSjdqeEhwWmhJR3JGL1ltejNsSXpkbm9Mb3BSSWJyS0pC +MmxaVDhIUHBlTVVJdVE2eHErd3RhQXFJVzlPTgo5U29uZWYyVU5BL3VseW1LeDRkOFhxbEwxY3hE +aDFQU1E5YVlPcVg0RDlrMklmOXZmR2hET0xVMzR2Y2VFOC8vCnM1WGdTY2ZFbHg2SWlEVWZHdGx2 +aE5zQUM4TmhhUU1sOHJjUXVoRDA2RFdvSUowMVhkeFJVM2JSVVZkc0I1NWMKcXRWSHJMbVBVb256 +NU13MGFURzlTZzZudUlQcU1QOVNKRlBzbVpzR3ZYVnZWbCtSNzl1SFBlc25yWkoyTjZqOQpNaUth +S045NFBhL1dJUnRoYWdzVnpHeHNtd2orTVZCRkZKRmh0TUtnNlFzYUsvbzRLNGJFR1ZLdWNXQk1i +MnNxCldmd0o0SndTcHcrOHgyS3p6aXhWTllTZXhRdm9oMkc3RDRmRXdISDJzazNST3k3dTlldjhs +bEVqUFFBUkFRQUIKdEQ5NWIzbHZRR1JsZGk1d2FYaGxiR0YwWldRdGNISnZhbVZqZEM1dmNtY2dQ +SGx2ZVc5QVpHVjJMbkJwZUdWcwpZWFJsWkMxd2NtOXFaV04wTG05eVp6NkpBajRFRXdFQ0FDZ0ZB +bGRnTUFnQ0d5OEZDUUhnTUZnR0N3a0lCd01DCkJoVUlBZ2tLQ3dRV0FnTUJBaDRCQWhlQUFBb0pF +Q2lJY0xPY1FBd2s4djBQL2o2MmNyNjRUMlZPMVNKdHp1RlEKWjVpeVJsVFVHSGN2NW5hQjlUSDdI +VVB3cTVwekZiTkg5SnhNRjVFRWtvZjdvV0hWeldWVTFBM1NDdzVNZ2FFbwppWTk5ZFBGNzdHazJ4 +ZEczNXZlWmIwWkg2WkVLdks1S042VXBucG5IeStxaVZVc1FLcE9DdUZKNkF0UlVEOTRJClJ2YnUv +S1hsMHdORDlzVXFlYkJZN1BBSlRNY1RjLzVEdWpIT1Erd3VlSkFtaFZZbEozVnpZK1lBS2t5U05B +QVoKZ3VVenNyUm5xQWU5SmU5TGgrcERpcVpHT2tEK1Z3b2kvRlVPQXJwbWFnNzZONTVjR3hiK2VG +QUlzRHYrM1NNOQpjUDFyQkFON2lEaGgvdkdJeHgzMFlrYUlpMmpmcXg3VXUydnNwSXh6K0NsWWdi +dm1wZm1CWmFqVzYzR0FsK3YvCngrby92eFZmVTMraTZ3alFjRS8vRTBTR2pvY3lQdUw0ZTZLNERy +S3k2SHQycjBQckdHVFZ0dUZPaWU2dnVzbVcKL09sdVB1dGszU3o1S1BmRDFpRXBobmpPQ0pNRkZx +Z2xRM1pPa3MweG00WGdwWW1ycnpQcXc1WWlzK1NEVjhobwp6anlrSzRWUlcrcC9IcUVzU29GQm5a +MG5XSmg2Q1pZOExIeVNiMVJwaFlMRFpWd21JRXd1OW12Vm1ISVIyWUZVCllNZEx4UExiOFZNei9t +QWpMb2Q0OGNSSzdSTzBSZ1RoMTUyK0VieXRGR3k5Y2tiS3VzRmJzVTFCQjN2MFJyUlUKenozTTcx +T3hjcFhVQ0tpWlI0MEVYZnErSnVtZVFudm1wSWdZdUNaQkh5MzJwQUJuOHNDdUlrMStyQnp4bXdt +bgp0WGh0K0RvNlExYXYyVjZYR00xV2xoKzEKPU8zaHEKLS0tLS1FTkQgUEdQIFBVQkxJQyBLRVkg +QkxPQ0stLS0tLQo= +--===============3737055506052708210==-- + +--===============7598747164910592838== +Content-Type: application/pgp-signature; name="signature.asc" +MIME-Version: 1.0 +Content-Description: OpenPGP Digital Signature + +-----BEGIN PGP SIGNATURE----- + +iQIcBAABCgAGBQJXYqFNAAoJECiIcLOcQAwkDEIQAL67/XJXDv+lusoy18jr7Ony +WQEP0pIRLp4GywGpH3dAITFAkuamO4VX3QEdVGjOHNoaT8VkSVWf9mnsYLl+Mh2v +1OIwMv0u8WyVtrcxyXijIznnJv8X1RgyCzpUJcmOh04VZcDyxKbnFHWSDMfJ4Jtq +qnXDONcfEeT8pwrGjP5qzTgcF/irG3w5svyQjEtj6kycddYtqUc9Hx3cMaRIzsHg +kuUzznSzU/6P0Z345q/kXyYvU9rlcsP9vogrsqL2ueLwYSipxUJQUrRWG82FYoCo +PAKNdGIt0xl2gEW+xWZkJqFarPiUFCx//+bVBelKrqj6rjwbj+E7mHJW318JYVHQ +en3Smv7pEWlT4hZHXnoe8ng6TAvKzQjf7/bUxq2JpKSycp2hDO3Qz3Tv+kc+jC/r +5UDWe/flR+syq8lAQTRSn6057g3BgDG2RtAwsjedg1aTFSrljSxbKlK4vsj5Muek +Olq9+MUdMFSE3Jj/JC2COcS3rlt/Qt+JLDYXKahU3CodaSgF2dobikDe1bW0/QNS +7O4Ng2PK0pA416RCFRUgPXerUnMGiWAiq7BoRHeym9y7fkHYhIYGpPVKXJ6t67y5 +JjvuzwfwG8SZTp4Wy2pg1Mr6znm6uVBxUDxTHyP3BjciI1zpEigOIg9UwJ9nCDxL +uUGz4VqipNKbkpRkjLLW +=3IaF +-----END PGP SIGNATURE----- + +--===============7598747164910592838==-- + +--8F60183010.1466081614/dev1.dev.pixelated-project.org-- diff --git a/mail/src/leap/mail/tests/test_walk.py b/mail/src/leap/mail/tests/test_walk.py new file mode 100644 index 00000000..826ec10c --- /dev/null +++ b/mail/src/leap/mail/tests/test_walk.py @@ -0,0 +1,81 @@ +""" +Tests for leap.mail.walk module +""" +import os.path +from email.parser import Parser + +from leap.mail import walk + +CORPUS = { + 'simple': 'rfc822.message', + 'multimin': 'rfc822.multi-minimal.message', + 'multisigned': 'rfc822.multi-signed.message', + 'bounced': 'rfc822.bounce.message', +} + +_here = os.path.dirname(__file__) +_parser = Parser() + + +# tests + + +def test_simple_mail(): + msg = _parse('simple') + tree = walk.get_tree(msg) + assert len(tree['part_map']) == 0 + assert tree['ctype'] == 'text/plain' + assert tree['multi'] is False + + +def test_multipart_minimal(): + msg = _parse('multimin') + tree = walk.get_tree(msg) + + assert tree['multi'] is True + assert len(tree['part_map']) == 1 + first = tree['part_map'][1] + assert first['multi'] is False + assert first['ctype'] == 'text/plain' + + +def test_multi_signed(): + msg = _parse('multisigned') + tree = walk.get_tree(msg) + assert tree['multi'] is True + assert len(tree['part_map']) == 2 + + _first = tree['part_map'][1] + _second = tree['part_map'][2] + assert len(_first['part_map']) == 3 + assert(_second['multi'] is False) + + +def test_bounce_mime(): + msg = _parse('bounced') + tree = walk.get_tree(msg) + + ctypes = [tree['part_map'][index]['ctype'] + for index in sorted(tree['part_map'].keys())] + third = tree['part_map'][3] + three_one_ctype = third['part_map'][1]['ctype'] + assert three_one_ctype == 'multipart/signed' + + assert ctypes == [ + 'text/plain', + 'message/delivery-status', + 'message/rfc822'] + + +# utils + +def _parse(name): + _str = _get_string_for_message(name) + return _parser.parsestr(_str) + + +def _get_string_for_message(name): + filename = os.path.join(_here, CORPUS[name]) + with open(filename) as f: + msgstr = f.read() + return msgstr diff --git a/mail/src/leap/mail/walk.py b/mail/src/leap/mail/walk.py index c1166014..d143d61e 100644 --- a/mail/src/leap/mail/walk.py +++ b/mail/src/leap/mail/walk.py @@ -15,8 +15,11 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """ -Utilities for walking along a message tree. +Walk a message tree and generate documents that can be inserted in the backend +store. """ +from email.parser import Parser + from cryptography.hazmat.backends.multibackend import MultiBackend from cryptography.hazmat.backends.openssl.backend import ( Backend as OpenSSLBackend) @@ -26,49 +29,32 @@ from leap.mail.utils import first crypto_backend = MultiBackend([OpenSSLBackend()]) +_parser = Parser() -def get_hash(s): - digest = hashes.Hash(hashes.SHA256(), crypto_backend) - digest.update(s) - return digest.finalize().encode("hex").upper() - - -""" -Get interesting message parts -""" - - -def get_parts(msg): - return [ - { - 'multi': part.is_multipart(), - 'ctype': part.get_content_type(), - 'size': len(part.as_string()), - 'parts': - len(part.get_payload()) - if isinstance(part.get_payload(), list) - else 1, - 'headers': part.items(), - 'phash': - get_hash(part.get_payload()) - if not part.is_multipart() - else None - } for part in msg.walk()] - -""" -Utility lambda functions for getting the parts vector and the -payloads from the original message. -""" +def get_tree(msg): + p = {} + p['ctype'] = msg.get_content_type() + p['headers'] = msg.items() -def get_parts_vector(parts): - return (x.get('parts', 1) for x in parts) + payload = msg.get_payload() + is_multi = msg.is_multipart() + if is_multi: + p['part_map'] = dict( + [(idx, get_tree(part)) for idx, part in enumerate(payload, 1)]) + p['parts'] = len(payload) + p['phash'] = None + else: + p['parts'] = 0 + p['size'] = len(payload) + p['phash'] = get_hash(payload) + p['part_map'] = {} + p['multi'] = is_multi + return p -def get_payloads(msg): - return ((x.get_payload(), - dict(((str.lower(k), v) for k, v in (x.items())))) - for x in msg.walk()) +def get_tree_from_string(messagestr): + return get_tree(_parser.parsestr(messagestr)) def get_body_phash(msg): @@ -81,27 +67,29 @@ def get_body_phash(msg): # XXX avoid hashing again return get_hash(part.get_payload()) -""" -On getting the raw docs, we get also some of the headers to be able to -index the content. Here we remove any mutable part, as the the filename -in the content disposition. -""" - -def get_raw_docs(msg, parts): +def get_raw_docs(msg): + """ + We get also some of the headers to be able to + index the content. Here we remove any mutable part, as the the filename + in the content disposition. + """ return ( - { - "type": "cnt", # type content they'll be - "raw": payload, - "phash": get_hash(payload), - "content-disposition": first(headers.get( - 'content-disposition', '').split(';')), - "content-type": headers.get( - 'content-type', ''), - "content-transfer-encoding": headers.get( - 'content-transfer-encoding', '') - } for payload, headers in get_payloads(msg) - if not isinstance(payload, list)) + {'type': 'cnt', + 'raw': part.get_payload(), + 'phash': get_hash(part.get_payload()), + 'content-type': part.get_content_type(), + 'content-disposition': first(part.get( + 'content-disposition', '').split(';')), + 'content-transfer-encoding': part.get( + 'content-transfer-encoding', '') + } for part in msg.walk() if not isinstance(part.get_payload(), list)) + + +def get_hash(s): + digest = hashes.Hash(hashes.SHA256(), crypto_backend) + digest.update(s) + return digest.finalize().encode("hex").upper() """ @@ -116,111 +104,4 @@ Groucho Marx: What's the matter with it? Chico Marx: I don't know, let's hear it again. Groucho Marx: So the party of the first part shall be known in this contract as the party of the first part. - -Chico Marx: Well it sounds a little better this time. -Groucho Marx: Well, it grows on you. Would you like to hear it once more? - -Chico Marx: Just the first part. -Groucho Marx: All right. It says the first part of the party of the first part - shall be known in this contract as the first part of the party of - the first part, shall be known in this contract - look, why - should we quarrel about a thing like this, we'll take it right - out, eh? - -Chico Marx: Yes, it's too long anyhow. Now what have we got left? -Groucho Marx: Well I've got about a foot and a half. Now what's the matter? - -Chico Marx: I don't like the second party either. """ - - -def walk_msg_tree(parts, body_phash=None): - """ - Take a list of interesting items of a message subparts structure, - and return a dict of dicts almost ready to be written to the content - documents that will be stored in Soledad. - - It walks down the subparts in the parsed message tree, and collapses - the leaf documents into a wrapper document until no multipart submessages - are left. To achieve this, it iteratively calculates a wrapper vector of - all documents in the sequence that have more than one part and have unitary - documents to their right. To collapse a multipart, take as many - unitary documents as parts the submessage contains, and replace the object - in the sequence with the new wrapper document. - - :param parts: A list of dicts containing the interesting properties for - the message structure. Normally this has been generated by - doing a message walk. - :type parts: list of dicts. - :param body_phash: the payload hash of the body part, to be included - in the outer content doc for convenience. - :type body_phash: basestring or None - """ - PART_MAP = "part_map" - MULTI = "multi" - HEADERS = "headers" - PHASH = "phash" - BODY = "body" - - # parts vector - pv = list(get_parts_vector(parts)) - - inner_headers = parts[1].get(HEADERS, None) if ( - len(parts) == 2) else None - - # wrappers vector - def getwv(pv): - return [ - True if pv[i] != 1 and pv[i + 1] == 1 - else False - for i in range(len(pv) - 1) - ] - wv = getwv(pv) - - # do until no wrapper document is left - while any(wv): - wind = wv.index(True) # wrapper index - nsub = pv[wind] # number of subparts to pick - slic = parts[wind + 1:wind + 1 + nsub] # slice with subparts - - cwra = { - MULTI: True, - PART_MAP: dict((index + 1, part) # content wrapper - for index, part in enumerate(slic)), - HEADERS: dict(parts[wind][HEADERS]) - } - - # remove subparts and substitute wrapper - map(lambda i: parts.remove(i), slic) - parts[wind] = cwra - - # refresh vectors for this iteration - pv = list(get_parts_vector(parts)) - wv = getwv(pv) - - if all(x == 1 for x in pv): - # special case in the rightmost element - main_pmap = parts[0].get(PART_MAP, None) - if main_pmap is not None: - last_part = max(main_pmap.keys()) - main_pmap[last_part][PART_MAP] = {} - for partind in range(len(pv) - 1): - main_pmap[last_part][PART_MAP][partind] = parts[partind + 1] - - outer = parts[0] - outer.pop(HEADERS) - if PART_MAP not in outer: - # we have a multipart with 1 part only, so kind of fix it - # although it would be prettier if I take this special case at - # the beginning of the walk. - pdoc = {MULTI: True, - PART_MAP: {1: outer}} - pdoc[PART_MAP][1][MULTI] = False - if not pdoc[PART_MAP][1].get(PHASH, None): - pdoc[PART_MAP][1][PHASH] = body_phash - if inner_headers: - pdoc[PART_MAP][1][HEADERS] = inner_headers - else: - pdoc = outer - pdoc[BODY] = body_phash - return pdoc |