From aaed875ecce770bde03a2066ade40944cd1ab31d Mon Sep 17 00:00:00 2001 From: NavaL Date: Fri, 21 Oct 2016 21:11:22 +0200 Subject: [#797] reinstating use of the provided email body charstet encoding --- service/pixelated/adapter/mailstore/body_parser.py | 5 +++-- service/pixelated/adapter/mailstore/leap_mailstore.py | 3 ++- service/test/integration/test_search.py | 11 +++++++++++ service/test/unit/adapter/mailstore/test_body_parser.py | 6 ++++++ 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/service/pixelated/adapter/mailstore/body_parser.py b/service/pixelated/adapter/mailstore/body_parser.py index 5408bd9d..8cac75cf 100644 --- a/service/pixelated/adapter/mailstore/body_parser.py +++ b/service/pixelated/adapter/mailstore/body_parser.py @@ -31,13 +31,14 @@ def _parse_charset_header(content_type_and_charset_header, default_charset='us-a class BodyParser(object): - def __init__(self, content, content_type='text/plain; charset="us-ascii"', content_transfer_encoding=None): + def __init__(self, content, content_type='text/plain; charset="us-ascii"', content_transfer_encoding=None, charset=None): self._content = content self._content_type = content_type self._content_transfer_encoding = content_transfer_encoding + self._charset = charset def parsed_content(self): - charset = _parse_charset_header(self._content_type) + charset = self._charset or _parse_charset_header(self._content_type) text = self._serialize_for_parser(charset) decoded_body = self._parse_and_decode(text) diff --git a/service/pixelated/adapter/mailstore/leap_mailstore.py b/service/pixelated/adapter/mailstore/leap_mailstore.py index 900a7cbb..288223dd 100644 --- a/service/pixelated/adapter/mailstore/leap_mailstore.py +++ b/service/pixelated/adapter/mailstore/leap_mailstore.py @@ -330,7 +330,8 @@ class LeapMailStore(MailStore): parser = BodyParser('', content_type='text/plain', content_transfer_encoding='UTF-8') # It fix the problem when leap doesn'r found body_phash and returns empty string if not isinstance(content_doc, str): - parser = BodyParser(content_doc.raw, content_type=content_doc.content_type, content_transfer_encoding=content_doc.content_transfer_encoding) + parser = BodyParser(content_doc.raw, content_type=content_doc.content_type, + content_transfer_encoding=content_doc.content_transfer_encoding, charset=content_doc.charset) defer.returnValue(parser.parsed_content()) diff --git a/service/test/integration/test_search.py b/service/test/integration/test_search.py index afad12b7..c36e29aa 100644 --- a/service/test/integration/test_search.py +++ b/service/test/integration/test_search.py @@ -134,3 +134,14 @@ class SearchTest(SoledadTestBase): results = yield self.app_test_client.get_mails_by_tag('inbox') self.assertEqual(results[0].ident, mail2.ident) self.assertEqual(results[1].ident, mail1.ident) + + @defer.inlineCallbacks + def test_search_base64_body(self): + body = u'bl\xe1' + input_mail = MailBuilder().with_body(body.encode('utf-8')).build_input_mail() + + mail = yield self.app_test_client.add_mail_to_inbox(input_mail) + results = yield self.app_test_client.search(body) + + self.assertGreater(len(results), 0, 'No results returned from search') + self.assertEquals(results[0].ident, mail.ident) diff --git a/service/test/unit/adapter/mailstore/test_body_parser.py b/service/test/unit/adapter/mailstore/test_body_parser.py index 9d58637c..155b326c 100644 --- a/service/test/unit/adapter/mailstore/test_body_parser.py +++ b/service/test/unit/adapter/mailstore/test_body_parser.py @@ -55,3 +55,9 @@ class BodyParserTest(unittest.TestCase): self.assertEqual(u'unkown char: ', parser.parsed_content()) logger_mock.warn.assert_called_with(u'Failed to encode content for charset iso-8859-1. Ignoring invalid chars: \'latin-1\' codec can\'t encode character u\'\\ufffd\' in position 13: ordinal not in range(256)') + + def test_charset_can_be_explicitely_set_and_take_precedence_over_content_type_if_set(self): + parser = BodyParser('YmzDoQ==\n', content_type='text/plain; us-ascii', content_transfer_encoding='base64', + charset="utf-8") + + self.assertEqual(u'bl\xe1', parser.parsed_content()) -- cgit v1.2.3