summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNavaL <ayoyo@thoughtworks.com>2016-10-21 21:11:22 +0200
committerNavaL <ayoyo@thoughtworks.com>2016-10-21 21:11:22 +0200
commitaaed875ecce770bde03a2066ade40944cd1ab31d (patch)
tree8b1808dd803a1b45e1d3be394b97ac9010d9be96
parent2dcdbc47e6232309329b4ec53f5daaab201d3d5b (diff)
[#797] reinstating use of the provided email body charstet encoding
-rw-r--r--service/pixelated/adapter/mailstore/body_parser.py5
-rw-r--r--service/pixelated/adapter/mailstore/leap_mailstore.py3
-rw-r--r--service/test/integration/test_search.py11
-rw-r--r--service/test/unit/adapter/mailstore/test_body_parser.py6
4 files changed, 22 insertions, 3 deletions
diff --git a/service/pixelated/adapter/mailstore/body_parser.py b/service/pixelated/adapter/mailstore/body_parser.py
index 5408bd9d..8cac75cf 100644
--- a/service/pixelated/adapter/mailstore/body_parser.py
+++ b/service/pixelated/adapter/mailstore/body_parser.py
@@ -31,13 +31,14 @@ def _parse_charset_header(content_type_and_charset_header, default_charset='us-a
class BodyParser(object):
- def __init__(self, content, content_type='text/plain; charset="us-ascii"', content_transfer_encoding=None):
+ def __init__(self, content, content_type='text/plain; charset="us-ascii"', content_transfer_encoding=None, charset=None):
self._content = content
self._content_type = content_type
self._content_transfer_encoding = content_transfer_encoding
+ self._charset = charset
def parsed_content(self):
- charset = _parse_charset_header(self._content_type)
+ charset = self._charset or _parse_charset_header(self._content_type)
text = self._serialize_for_parser(charset)
decoded_body = self._parse_and_decode(text)
diff --git a/service/pixelated/adapter/mailstore/leap_mailstore.py b/service/pixelated/adapter/mailstore/leap_mailstore.py
index 900a7cbb..288223dd 100644
--- a/service/pixelated/adapter/mailstore/leap_mailstore.py
+++ b/service/pixelated/adapter/mailstore/leap_mailstore.py
@@ -330,7 +330,8 @@ class LeapMailStore(MailStore):
parser = BodyParser('', content_type='text/plain', content_transfer_encoding='UTF-8')
# It fix the problem when leap doesn'r found body_phash and returns empty string
if not isinstance(content_doc, str):
- parser = BodyParser(content_doc.raw, content_type=content_doc.content_type, content_transfer_encoding=content_doc.content_transfer_encoding)
+ parser = BodyParser(content_doc.raw, content_type=content_doc.content_type,
+ content_transfer_encoding=content_doc.content_transfer_encoding, charset=content_doc.charset)
defer.returnValue(parser.parsed_content())
diff --git a/service/test/integration/test_search.py b/service/test/integration/test_search.py
index afad12b7..c36e29aa 100644
--- a/service/test/integration/test_search.py
+++ b/service/test/integration/test_search.py
@@ -134,3 +134,14 @@ class SearchTest(SoledadTestBase):
results = yield self.app_test_client.get_mails_by_tag('inbox')
self.assertEqual(results[0].ident, mail2.ident)
self.assertEqual(results[1].ident, mail1.ident)
+
+ @defer.inlineCallbacks
+ def test_search_base64_body(self):
+ body = u'bl\xe1'
+ input_mail = MailBuilder().with_body(body.encode('utf-8')).build_input_mail()
+
+ mail = yield self.app_test_client.add_mail_to_inbox(input_mail)
+ results = yield self.app_test_client.search(body)
+
+ self.assertGreater(len(results), 0, 'No results returned from search')
+ self.assertEquals(results[0].ident, mail.ident)
diff --git a/service/test/unit/adapter/mailstore/test_body_parser.py b/service/test/unit/adapter/mailstore/test_body_parser.py
index 9d58637c..155b326c 100644
--- a/service/test/unit/adapter/mailstore/test_body_parser.py
+++ b/service/test/unit/adapter/mailstore/test_body_parser.py
@@ -55,3 +55,9 @@ class BodyParserTest(unittest.TestCase):
self.assertEqual(u'unkown char: ', parser.parsed_content())
logger_mock.warn.assert_called_with(u'Failed to encode content for charset iso-8859-1. Ignoring invalid chars: \'latin-1\' codec can\'t encode character u\'\\ufffd\' in position 13: ordinal not in range(256)')
+
+ def test_charset_can_be_explicitely_set_and_take_precedence_over_content_type_if_set(self):
+ parser = BodyParser('YmzDoQ==\n', content_type='text/plain; us-ascii', content_transfer_encoding='base64',
+ charset="utf-8")
+
+ self.assertEqual(u'bl\xe1', parser.parsed_content())