From 5c5500f91a520ee363c8e553718a5a0e763257e5 Mon Sep 17 00:00:00 2001 From: Folker Bernitt Date: Tue, 1 Sep 2015 10:21:42 +0200 Subject: Decode encoded mail headers to unicode - Issue #446 --- service/pixelated/adapter/mailstore/leap_mailstore.py | 12 +++++++++++- service/pixelated/adapter/search/__init__.py | 10 +++++----- service/test/unit/adapter/mailstore/test_leap_mail.py | 14 ++++++++++++++ .../test/unit/adapter/mailstore/test_leap_mailstore.py | 18 ++++++++++++++++-- 4 files changed, 46 insertions(+), 8 deletions(-) (limited to 'service') diff --git a/service/pixelated/adapter/mailstore/leap_mailstore.py b/service/pixelated/adapter/mailstore/leap_mailstore.py index c18748b6..73ce543f 100644 --- a/service/pixelated/adapter/mailstore/leap_mailstore.py +++ b/service/pixelated/adapter/mailstore/leap_mailstore.py @@ -88,9 +88,19 @@ class LeapMail(Mail): return result + def _decoded_header_utf_8(self, header_value): + if isinstance(header_value, list): + return [self._decoded_header_utf_8(v) for v in header_value] + else: + content, encoding = decode_header(header_value)[0] + if encoding: + return unicode(content, encoding=encoding) + else: + return unicode(content, encoding='ascii') + def as_dict(self): return { - 'header': {k.lower(): v for k, v in self.headers.items()}, + 'header': {k.lower(): self._decoded_header_utf_8(v) for k, v in self.headers.items()}, 'ident': self._mail_id, 'tags': self.tags, 'status': list(self.status), diff --git a/service/pixelated/adapter/search/__init__.py b/service/pixelated/adapter/search/__init__.py index 46691926..84e69a9a 100644 --- a/service/pixelated/adapter/search/__init__.py +++ b/service/pixelated/adapter/search/__init__.py @@ -126,8 +126,8 @@ class SearchEngine(object): bounced = mail.bounced if mail.bounced else [''] index_data = { - 'sender': self._unicode_header_field(header.get('from', '')), - 'subject': self._unicode_header_field(header.get('subject', '')), + 'sender': self._empty_string_to_none(header.get('from', '')), + 'subject': self._empty_string_to_none(header.get('subject', '')), 'date': milliseconds(header.get('date', '')), 'to': self._format_recipient(header, 'to'), 'cc': self._format_recipient(header, 'cc'), @@ -146,11 +146,11 @@ class SearchEngine(object): list = headers.get(name, ['']) return u','.join(list) if list else u'' - def _unicode_header_field(self, field_value): + def _empty_string_to_none(self, field_value): if not field_value: return None - - return unicode(field_value.decode('utf-8')) + else: + return field_value def index_mails(self, mails, callback=None): try: diff --git a/service/test/unit/adapter/mailstore/test_leap_mail.py b/service/test/unit/adapter/mailstore/test_leap_mail.py index dd749aa3..9dab4e28 100644 --- a/service/test/unit/adapter/mailstore/test_leap_mail.py +++ b/service/test/unit/adapter/mailstore/test_leap_mail.py @@ -84,6 +84,20 @@ class TestLeapMail(TestCase): self.assertEqual([{'ident': 'id', 'name': 'name', 'encoding': 'encoding'}], mail.as_dict()['attachments']) + def test_as_dict_headers_with_special_chars(self): + expected_address = u'"\xc4lbert \xdcbr\xf6" <\xe4\xfc\xf6@example.mail>' + expected_subject = u'H\xe4ll\xf6 W\xf6rld' + mail = LeapMail('', 'INBOX', + {'From': '=?iso-8859-1?q?=22=C4lbert_=DCbr=F6=22_=3C=E4=FC=F6=40example=2Email=3E?=', + 'To': '=?iso-8859-1?q?=22=C4lbert_=DCbr=F6=22_=3C=E4=FC=F6=40example=2Email=3E?=', + 'Cc': '=?iso-8859-1?q?=22=C4lbert_=DCbr=F6=22_=3C=E4=FC=F6=40example=2Email=3E?=', + 'Subject': '=?iso-8859-1?q?H=E4ll=F6_W=F6rld?='}) + + self.assertEqual(expected_address, mail.as_dict()['header']['from']) + self.assertEqual([expected_address], mail.as_dict()['header']['to']) + self.assertEqual([expected_address], mail.as_dict()['header']['cc']) + self.assertEqual(expected_subject, mail.as_dict()['header']['subject']) + def test_raw_constructed_by_headers_and_body(self): body = 'some body content' mail = LeapMail('doc id', 'INBOX', {'From': 'test@example.test', 'Subject': 'A test Mail', 'To': 'receiver@example.test'}, ('foo', 'bar'), body=body) diff --git a/service/test/unit/adapter/mailstore/test_leap_mailstore.py b/service/test/unit/adapter/mailstore/test_leap_mailstore.py index c173d87c..09c92980 100644 --- a/service/test/unit/adapter/mailstore/test_leap_mailstore.py +++ b/service/test/unit/adapter/mailstore/test_leap_mailstore.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # # Copyright (c) 2015 ThoughtWorks, Inc. # @@ -13,6 +14,7 @@ # # You should have received a copy of the GNU Affero General Public License # along with Pixelated. If not, see . +from email.header import Header from email.mime.application import MIMEApplication from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText @@ -241,6 +243,18 @@ class TestLeapMailStore(TestCase): expected = [{'ident': self._cdoc_phash_from_message(mocked_message, 2), 'name': 'filename.txt', 'encoding': 'base64'}] self.assertEqual(expected, message.as_dict()['attachments']) + @defer.inlineCallbacks + def test_add_mail_with_special_chars(self): + input_mail = MIMEText(u'a utf8 message', _charset='utf-8') + input_mail['From'] = Header(u'"Älbert Übrö" <äüö@example.mail>', 'iso-8859-1') + input_mail['Subject'] = Header(u'Hällö Wörld', 'iso-8859-1') + self._add_create_mail_mocks_to_soledad(input_mail) + store = LeapMailStore(self.soledad) + + message = yield store.add_mail('INBOX', input_mail.as_string()) + + self.assertEqual(u'"\xc4lbert \xdcbr\xf6" <\xe4\xfc\xf6@example.mail>', message.as_dict()['header']['from']) + def _cdoc_phash_from_message(self, mocked_message, attachment_nr): return mocked_message.get_wrapper().cdocs[attachment_nr].future_doc_id[2:] @@ -350,8 +364,8 @@ class TestLeapMailStore(TestCase): mail = self._load_mail_from_file(mail_file) return self._add_create_mail_mocks_to_soledad(mail) - def _add_create_mail_mocks_to_soledad(self, mail): - mail = self._convert_mail_to_leap_message(mail) + def _add_create_mail_mocks_to_soledad(self, example_mail): + mail = self._convert_mail_to_leap_message(example_mail) wrapper = mail.get_wrapper() mdoc_id = wrapper.mdoc.future_doc_id -- cgit v1.2.3