From fa1299aca064d2ac347a3e14d4c2169335a8f40c Mon Sep 17 00:00:00 2001 From: mnandri Date: Mon, 7 Dec 2015 16:53:39 +0100 Subject: #454 added utf-8 encoding on message body, and corresponding query search term --- .../pixelated/adapter/mailstore/leap_mailstore.py | 4 +- service/pixelated/adapter/search/__init__.py | 4 +- service/pixelated/resources/mails_resource.py | 12 ++--- service/pixelated/utils.py | 22 +++++++++ service/test/unit/adapter/search/test_search.py | 52 +++++++++++++++++++++- service/test/unit/resources/test_mails_resource.py | 51 +++++++++++++++++++++ service/test/unit/test_utils.py | 36 +++++++++++++++ 7 files changed, 172 insertions(+), 9 deletions(-) create mode 100755 service/pixelated/utils.py create mode 100644 service/test/unit/resources/test_mails_resource.py create mode 100755 service/test/unit/test_utils.py diff --git a/service/pixelated/adapter/mailstore/leap_mailstore.py b/service/pixelated/adapter/mailstore/leap_mailstore.py index 90035d20..14b0e417 100644 --- a/service/pixelated/adapter/mailstore/leap_mailstore.py +++ b/service/pixelated/adapter/mailstore/leap_mailstore.py @@ -27,6 +27,8 @@ from pixelated.adapter.mailstore.mailstore import MailStore, underscore_uuid from leap.mail.mail import Message from pixelated.adapter.model.mail import Mail, InputMail +from pixelated.utils import to_unicode + class AttachmentInfo(object): def __init__(self, ident, name, encoding): @@ -41,7 +43,7 @@ class LeapMail(Mail): self._mail_id = mail_id self._mailbox_name = mailbox_name self._headers = headers if headers is not None else {} - self._body = body + self._body = to_unicode(body) self.tags = set(tags) # TODO test that asserts copy self._flags = set(flags) # TODO test that asserts copy self._attachments = attachments diff --git a/service/pixelated/adapter/search/__init__.py b/service/pixelated/adapter/search/__init__.py index 065dd5e5..8d0b1f4e 100644 --- a/service/pixelated/adapter/search/__init__.py +++ b/service/pixelated/adapter/search/__init__.py @@ -31,6 +31,8 @@ from whoosh import sorting from pixelated.support.functional import unique import traceback +from pixelated.utils import to_unicode + class SearchEngine(object): DEFAULT_INDEX_HOME = os.path.join(os.environ['HOME'], '.leap') @@ -132,7 +134,7 @@ class SearchEngine(object): 'cc': self._format_recipient(header, 'cc'), 'bcc': self._format_recipient(header, 'bcc'), 'tag': u','.join(unique(tags)), - 'body': unicode(mdict['textPlainBody'] if 'textPlainBody' in mdict else mdict['body']), + 'body': to_unicode(mdict.get('textPlainBody', mdict.get('body', ''))), 'ident': unicode(mdict['ident']), 'flags': unicode(','.join(unique(mail.flags))), 'raw': unicode(mail.raw) diff --git a/service/pixelated/resources/mails_resource.py b/service/pixelated/resources/mails_resource.py index 34dd042a..613c5e6f 100644 --- a/service/pixelated/resources/mails_resource.py +++ b/service/pixelated/resources/mails_resource.py @@ -7,10 +7,9 @@ from twisted.web.resource import Resource from twisted.web import server from twisted.internet import defer from twisted.python.log import err -from leap.common.events import ( - register, - catalog as events -) +from leap.common import events + +from pixelated.utils import to_unicode class MailsUnreadResource(Resource): @@ -120,7 +119,7 @@ class MailsResource(Resource): delivery_error_mail = InputMail.delivery_error_template(delivery_address=event.content) self._mail_service.mailboxes.inbox.add(delivery_error_mail) - register(events.SMTP_SEND_MESSAGE_ERROR, callback=on_error) + events.register(events.catalog.SMTP_SEND_MESSAGE_ERROR, callback=on_error) def __init__(self, mail_service, draft_service): Resource.__init__(self) @@ -136,7 +135,8 @@ class MailsResource(Resource): def render_GET(self, request): query, window_size, page = request.args.get('q')[0], request.args.get('w')[0], request.args.get('p')[0] - d = self._mail_service.mails(query, window_size, page) + unicode_query = to_unicode(query) + d = self._mail_service.mails(unicode_query, window_size, page) d.addCallback(lambda (mails, total): { "stats": { diff --git a/service/pixelated/utils.py b/service/pixelated/utils.py new file mode 100755 index 00000000..2e0f1f88 --- /dev/null +++ b/service/pixelated/utils.py @@ -0,0 +1,22 @@ +# +# Copyright (c) 2015 ThoughtWorks, Inc. +# +# Pixelated is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Pixelated is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with Pixelated. If not, see . + + +def to_unicode(text): + if text and not isinstance(text, unicode): + encoding = 'utf-8' + return unicode(text, encoding=encoding) + return text diff --git a/service/test/unit/adapter/search/test_search.py b/service/test/unit/adapter/search/test_search.py index 76e704b6..2e7badff 100644 --- a/service/test/unit/adapter/search/test_search.py +++ b/service/test/unit/adapter/search/test_search.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # # Copyright (c) 2014 ThoughtWorks, Inc. # @@ -21,6 +22,8 @@ from pixelated.adapter.search import SearchEngine from tempdir import TempDir from test.support import test_helper +from pixelated.utils import to_unicode + INDEX_KEY = '\xde3?\x87\xff\xd9\xd3\x14\xf0\xa7>\x1f%C{\x16.\\\xae\x8c\x13\xa7\xfb\x04\xd4]+\x8d_\xed\xd1\x8d\x0bI' \ '\x8a\x0e\xa4tm\xab\xbf\xb4\xa5\x99\x00d\xd5w\x9f\x18\xbc\x1d\xd4_W\xd2\xb6\xe8H\x83\x1b\xd8\x9d\xad' @@ -45,7 +48,7 @@ class SearchEngineTest(unittest.TestCase): def tearDown(self): self.tempdir.dissolve() - def test_encoding(self): + def test_headers_encoding(self): # given se = SearchEngine(INDEX_KEY, self.agent_home) @@ -62,3 +65,50 @@ class SearchEngineTest(unittest.TestCase): result = se.search('folker') self.assertEqual((['mailid'], 1), result) + + def test_contents_encoding_accents(self): + # given + se = SearchEngine(INDEX_KEY, self.agent_home) + + headers = { + 'From': 'foo@bar.tld', + 'To': '=?utf-8?b?IsOEw7zDtiDDlsO8w6QiIDxmb2xrZXJAcGl4ZWxhdGVkLXByb2plY3Qub3Jn?=\n =?utf-8?b?PiwgRsO2bGtlciA8Zm9sa2VyQHBpeGVsYXRlZC1wcm9qZWN0Lm9yZz4=?=', + 'Cc': '=?utf-8?b?IsOEw7zDtiDDlsO8w6QiIDxmb2xrZXJAcGl4ZWxhdGVkLXByb2plY3Qub3Jn?=\n =?utf-8?b?PiwgRsO2bGtlciA8Zm9sa2VyQHBpeGVsYXRlZC1wcm9qZWN0Lm9yZz4=?=', + 'Subject': 'Some test mail', + } + + body = "When doing the search, it's not possible to find words with graphical accents, e.g.: 'coração', 'é', 'Fièvre', La Pluie d'été, 'não'." + + # when + se.index_mail(LeapMail('mailid', 'INBOX', headers=headers, body=body)) # test_helper.pixelated_mail(extra_headers=headers, chash='mailid')) + + result = se.search(u"'coração', 'é',") + self.assertEqual((['mailid'], 1), result) + + result = se.search(u"Fièvre") + self.assertEqual((['mailid'], 1), result) + + result = se.search(u"été") + self.assertEqual((['mailid'], 1), result) + + def test_contents_encoding_special_characters(self): + # given + se = SearchEngine(INDEX_KEY, self.agent_home) + + headers = { + 'From': 'foo@bar.tld', + 'To': '=?utf-8?b?IsOEw7zDtiDDlsO8w6QiIDxmb2xrZXJAcGl4ZWxhdGVkLXByb2plY3Qub3Jn?=\n =?utf-8?b?PiwgRsO2bGtlciA8Zm9sa2VyQHBpeGVsYXRlZC1wcm9qZWN0Lm9yZz4=?=', + 'Cc': '=?utf-8?b?IsOEw7zDtiDDlsO8w6QiIDxmb2xrZXJAcGl4ZWxhdGVkLXByb2plY3Qub3Jn?=\n =?utf-8?b?PiwgRsO2bGtlciA8Zm9sa2VyQHBpeGVsYXRlZC1wcm9qZWN0Lm9yZz4=?=', + 'Subject': 'Some test mail', + } + + body = "When doing the search, 您好 أهلا" + + # when + se.index_mail(LeapMail('mailid', 'INBOX', headers=headers, body=body)) # test_helper.pixelated_mail(extra_headers=headers, chash='mailid')) + + result = se.search(u"您好") + self.assertEqual((['mailid'], 1), result) + + result = se.search(u"أهلا") + self.assertEqual((['mailid'], 1), result) diff --git a/service/test/unit/resources/test_mails_resource.py b/service/test/unit/resources/test_mails_resource.py new file mode 100644 index 00000000..02b17bf1 --- /dev/null +++ b/service/test/unit/resources/test_mails_resource.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2014 ThoughtWorks, Inc. +# +# Pixelated is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Pixelated is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with Pixelated. If not, see . +import unittest +import json +from mockito import mock, when, verify +from test.unit.resources import DummySite +from twisted.web.test.requesthelper import DummyRequest +from pixelated.resources.mails_resource import MailsResource +from twisted.internet import defer +from mock import patch + + +class TestArchiveResource(unittest.TestCase): + def setUp(self): + self.mail_service = mock() + + @patch('leap.common.events.register') + def test_render_GET_should_unicode_mails_search_query(self, mock_register): + request = DummyRequest(['/mails']) + non_unicode_search_term = 'coração' + request.addArg('q', non_unicode_search_term) + request.addArg('w', 25) + request.addArg('p', 1) + + unicodified_search_term = u'coração' + when(self.mail_service).mails(unicodified_search_term, 25, 1).thenReturn(defer.Deferred()) + + mails_resource = MailsResource(self.mail_service, mock()) + mails_resource.isLeaf = True + web = DummySite(mails_resource) + d = web.get(request) + + def assert_response(_): + verify(self.mail_service).mails(unicodified_search_term, 25, 1) + + d.addCallback(assert_response) + return d diff --git a/service/test/unit/test_utils.py b/service/test/unit/test_utils.py new file mode 100755 index 00000000..ffaf3c8d --- /dev/null +++ b/service/test/unit/test_utils.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2014 ThoughtWorks, Inc. +# +# Pixelated is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Pixelated is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# along with Pixelated. If not, see . +# You should have received a copy of the GNU Affero General Public License + + +import unittest + +from pixelated.utils import to_unicode + + +class UtilsTest(unittest.TestCase): + + def test_to_unicode_guesses_encoding_and_unicode_text(self): + text = 'coração' + self.assertEqual(u'coração', to_unicode(text)) + + def test_to_unicode_self(self): + text = u'already unicode' + self.assertEqual(text, to_unicode(text)) + + def test_to_unicode_empty_string(self): + text = '' + self.assertEqual(text, to_unicode(text)) -- cgit v1.2.3