diff options
author | NavaL <ayoyo@thoughtworks.com> | 2016-10-21 21:11:22 +0200 |
---|---|---|
committer | NavaL <ayoyo@thoughtworks.com> | 2016-10-21 21:11:22 +0200 |
commit | aaed875ecce770bde03a2066ade40944cd1ab31d (patch) | |
tree | 8b1808dd803a1b45e1d3be394b97ac9010d9be96 | |
parent | 2dcdbc47e6232309329b4ec53f5daaab201d3d5b (diff) |
[#797] reinstating use of the provided email body charstet encoding
4 files changed, 22 insertions, 3 deletions
diff --git a/service/pixelated/adapter/mailstore/body_parser.py b/service/pixelated/adapter/mailstore/body_parser.py index 5408bd9d..8cac75cf 100644 --- a/service/pixelated/adapter/mailstore/body_parser.py +++ b/service/pixelated/adapter/mailstore/body_parser.py @@ -31,13 +31,14 @@ def _parse_charset_header(content_type_and_charset_header, default_charset='us-a class BodyParser(object): - def __init__(self, content, content_type='text/plain; charset="us-ascii"', content_transfer_encoding=None): + def __init__(self, content, content_type='text/plain; charset="us-ascii"', content_transfer_encoding=None, charset=None): self._content = content self._content_type = content_type self._content_transfer_encoding = content_transfer_encoding + self._charset = charset def parsed_content(self): - charset = _parse_charset_header(self._content_type) + charset = self._charset or _parse_charset_header(self._content_type) text = self._serialize_for_parser(charset) decoded_body = self._parse_and_decode(text) diff --git a/service/pixelated/adapter/mailstore/leap_mailstore.py b/service/pixelated/adapter/mailstore/leap_mailstore.py index 900a7cbb..288223dd 100644 --- a/service/pixelated/adapter/mailstore/leap_mailstore.py +++ b/service/pixelated/adapter/mailstore/leap_mailstore.py @@ -330,7 +330,8 @@ class LeapMailStore(MailStore): parser = BodyParser('', content_type='text/plain', content_transfer_encoding='UTF-8') # It fix the problem when leap doesn'r found body_phash and returns empty string if not isinstance(content_doc, str): - parser = BodyParser(content_doc.raw, content_type=content_doc.content_type, content_transfer_encoding=content_doc.content_transfer_encoding) + parser = BodyParser(content_doc.raw, content_type=content_doc.content_type, + content_transfer_encoding=content_doc.content_transfer_encoding, charset=content_doc.charset) defer.returnValue(parser.parsed_content()) diff --git a/service/test/integration/test_search.py b/service/test/integration/test_search.py index afad12b7..c36e29aa 100644 --- a/service/test/integration/test_search.py +++ b/service/test/integration/test_search.py @@ -134,3 +134,14 @@ class SearchTest(SoledadTestBase): results = yield self.app_test_client.get_mails_by_tag('inbox') self.assertEqual(results[0].ident, mail2.ident) self.assertEqual(results[1].ident, mail1.ident) + + @defer.inlineCallbacks + def test_search_base64_body(self): + body = u'bl\xe1' + input_mail = MailBuilder().with_body(body.encode('utf-8')).build_input_mail() + + mail = yield self.app_test_client.add_mail_to_inbox(input_mail) + results = yield self.app_test_client.search(body) + + self.assertGreater(len(results), 0, 'No results returned from search') + self.assertEquals(results[0].ident, mail.ident) diff --git a/service/test/unit/adapter/mailstore/test_body_parser.py b/service/test/unit/adapter/mailstore/test_body_parser.py index 9d58637c..155b326c 100644 --- a/service/test/unit/adapter/mailstore/test_body_parser.py +++ b/service/test/unit/adapter/mailstore/test_body_parser.py @@ -55,3 +55,9 @@ class BodyParserTest(unittest.TestCase): self.assertEqual(u'unkown char: ', parser.parsed_content()) logger_mock.warn.assert_called_with(u'Failed to encode content for charset iso-8859-1. Ignoring invalid chars: \'latin-1\' codec can\'t encode character u\'\\ufffd\' in position 13: ordinal not in range(256)') + + def test_charset_can_be_explicitely_set_and_take_precedence_over_content_type_if_set(self): + parser = BodyParser('YmzDoQ==\n', content_type='text/plain; us-ascii', content_transfer_encoding='base64', + charset="utf-8") + + self.assertEqual(u'bl\xe1', parser.parsed_content()) |