From f97104e25fe504993615f194825a757d4c381a24 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Mon, 23 Mar 2015 12:59:12 -0400 Subject: [bug] re-add fix for multiple headers This fix stores as multi-line headers that are repeated, and that were being discarded when storing them in a regular dict. It had been removed during the last refactor. I also store headers now as a case-insensitive dict, which solves other problems with the implementation of the twisted imap. Releases: 0.4.0 --- src/leap/mail/adaptors/soledad.py | 18 ++++++++---------- src/leap/mail/imap/messages.py | 25 +++++++++++++++++++------ src/leap/mail/mail.py | 20 +++++++++++++++++++- src/leap/mail/tests/test_mail.py | 8 +++++--- 4 files changed, 51 insertions(+), 20 deletions(-) diff --git a/src/leap/mail/adaptors/soledad.py b/src/leap/mail/adaptors/soledad.py index 490e014..7a1a92d 100644 --- a/src/leap/mail/adaptors/soledad.py +++ b/src/leap/mail/adaptors/soledad.py @@ -1114,6 +1114,7 @@ def _split_into_parts(raw): msg, parts, chash, multi = _parse_msg(raw) size = len(msg.as_string()) + body_phash = walk.get_body_phash(msg) parts_map = walk.walk_msg_tree(parts, body_phash=body_phash) @@ -1161,16 +1162,13 @@ def _build_headers_doc(msg, chash, body_phash, parts_map): It takes into account possibly repeated headers. """ - headers = msg.items() - - # TODO move this manipulation to IMAP - #headers = defaultdict(list) - #for k, v in msg.items(): - #headers[k].append(v) - ## "fix" for repeated headers. - #for k, v in headers.items(): - #newline = "\n%s: " % (k,) - #headers[k] = newline.join(v) + headers = defaultdict(list) + for k, v in msg.items(): + headers[k].append(v) + # "fix" for repeated headers (as in "Received:" + for k, v in headers.items(): + newline = "\n%s: " % (k.lower(),) + headers[k] = newline.join(v) lower_headers = lowerdict(dict(headers)) msgid = first(_MSGID_RE.findall( diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 02aac2e..13943b1 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -208,6 +208,18 @@ class IMAPMessagePart(object): return IMAPMessagePart(subpart) +class CaseInsensitiveDict(dict): + """ + A dictionary subclass that will allow case-insenstive key lookups. + """ + + def __setitem__(self, key, value): + super(CaseInsensitiveDict, self).__setitem__(key.lower(), value) + + def __getitem__(self, key): + return super(CaseInsensitiveDict, self).__getitem__(key.lower()) + + def _format_headers(headers, negate, *names): # current server impl. expects content-type to be present, so if for # some reason we do not have headers, we have to return at least that @@ -228,13 +240,13 @@ def _format_headers(headers, negate, *names): # default to most likely standard charset = find_charset(headers, "utf-8") - _headers = dict() - for key, value in headers.items(): - # twisted imap server expects *some* headers to be lowercase - # We could use a CaseInsensitiveDict here... - if key.lower() == "content-type": - key = key.lower() + # We will return a copy of the headers dictionary that + # will allow case-insensitive lookups. In some parts of the twisted imap + # server code the keys are expected to be in lower case, and in this way + # we avoid having to convert them. + _headers = CaseInsensitiveDict() + for key, value in headers.items(): if not isinstance(key, str): key = key.encode(charset, 'replace') if not isinstance(value, str): @@ -247,4 +259,5 @@ def _format_headers(headers, negate, *names): # filter original dict by negate-condition if cond(key): _headers[key] = value + return _headers diff --git a/src/leap/mail/mail.py b/src/leap/mail/mail.py index fd2f39a..99c3873 100644 --- a/src/leap/mail/mail.py +++ b/src/leap/mail/mail.py @@ -17,6 +17,7 @@ """ Generic Access to Mail objects: Public LEAP Mail API. """ +import itertools import uuid import logging import StringIO @@ -98,6 +99,23 @@ def _encode_payload(payload, ctype=""): return payload +def _unpack_headers(headers_dict): + """ + Take a "packed" dict containing headers (with repeated keys represented as + line breaks inside each value, preceded by the header key) and return a + list of tuples in which each repeated key has a different tuple. + """ + headers_l = headers_dict.items() + for i, (k, v) in enumerate(headers_l): + splitted = v.split(k.lower() + ": ") + if len(splitted) != 1: + inner = zip( + itertools.cycle([k]), + map(lambda l: l.rstrip('\n'), splitted)) + headers_l = headers_l[:i] + inner + headers_l[i+1:] + return headers_l + + class MessagePart(object): # TODO This class should be better abstracted from the data model. # TODO support arbitrarily nested multiparts (right now we only support @@ -242,7 +260,7 @@ class Message(object): """ Get the raw headers document. """ - return [tuple(item) for item in self._wrapper.hdoc.headers] + return self._wrapper.hdoc.headers def get_body_file(self, store): """ diff --git a/src/leap/mail/tests/test_mail.py b/src/leap/mail/tests/test_mail.py index d326ca8..2c03933 100644 --- a/src/leap/mail/tests/test_mail.py +++ b/src/leap/mail/tests/test_mail.py @@ -26,7 +26,7 @@ from email.parser import Parser from email.Utils import formatdate from leap.mail.adaptors.soledad import SoledadMailAdaptor -from leap.mail.mail import MessageCollection, Account +from leap.mail.mail import MessageCollection, Account, _unpack_headers from leap.mail.mailbox_indexer import MailboxIndexer from leap.mail.tests.common import SoledadTestMixin @@ -144,8 +144,10 @@ class MessageTestCase(SoledadTestMixin, CollectionMixin): def _test_get_headers_cb(self, msg): self.assertTrue(msg is not None) - expected = _get_parsed_msg().items() - self.assertEqual(msg.get_headers(), expected) + expected = [ + (str(key.lower()), str(value)) + for (key, value) in _get_parsed_msg().items()] + self.assertItemsEqual(_unpack_headers(msg.get_headers()), expected) def test_get_body_file(self): d = self.get_inserted_msg(multi=True) -- cgit v1.2.3