diff options
author | Victor Shyba <victor.shyba@gmail.com> | 2015-02-05 16:56:16 -0300 |
---|---|---|
committer | Victor Shyba <victor.shyba@gmail.com> | 2015-02-05 16:56:21 -0300 |
commit | c0974ba8fe9030d56bbfff8f93acec42af43b71e (patch) | |
tree | 1f594ca1efcebdb86117f6e5d54b179cb71cce97 | |
parent | dd8059dc6938f64947e590e170189fb751d6be4b (diff) |
for #167, deduplicate contacts by largest
-rw-r--r-- | service/pixelated/adapter/contacts.py | 30 | ||||
-rw-r--r-- | service/pixelated/adapter/search/__init__.py | 3 | ||||
-rw-r--r-- | service/test/integration/test_contacts.py | 23 | ||||
-rw-r--r-- | service/test/unit/adapter/test_contacts.py | 35 |
4 files changed, 90 insertions, 1 deletions
diff --git a/service/pixelated/adapter/contacts.py b/service/pixelated/adapter/contacts.py new file mode 100644 index 00000000..f286f220 --- /dev/null +++ b/service/pixelated/adapter/contacts.py @@ -0,0 +1,30 @@ +# +# Copyright (c) 2014 ThoughtWorks, Inc. +# +# Pixelated is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Pixelated is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with Pixelated. If not, see <http://www.gnu.org/licenses/>. +from email.utils import parseaddr + + +def address_duplication_filter(contacts): + contacts_by_mail = dict() + + for contact in contacts: + mail_address = extract_mail_address(contact) + previous = contacts_by_mail.get(mail_address, '') + contacts_by_mail[mail_address] = contact if len(contact) > len(previous) else previous + return contacts_by_mail.values() + + +def extract_mail_address(text): + return parseaddr(text)[1] diff --git a/service/pixelated/adapter/search/__init__.py b/service/pixelated/adapter/search/__init__.py index 5491632f..12829c65 100644 --- a/service/pixelated/adapter/search/__init__.py +++ b/service/pixelated/adapter/search/__init__.py @@ -18,6 +18,7 @@ from pixelated.support.encrypted_file_storage import EncryptedFileStorage import os from pixelated.adapter.model.status import Status +from pixelated.adapter.contacts import address_duplication_filter from pixelated.support.functional import flatten from whoosh.index import FileIndex from whoosh.fields import * @@ -211,6 +212,6 @@ class SearchEngine(object): groupedby=sorting.FieldFacet('bcc', allow_overlap=True)).groups() sender = searcher.search(sender.parse("*%s*" % query), limit=None, mask=restrict_q, groupedby=sorting.FieldFacet('sender', allow_overlap=True)).groups() - return flatten([to, cc, bcc, sender]) + return address_duplication_filter(flatten([to, cc, bcc, sender])) return [] diff --git a/service/test/integration/test_contacts.py b/service/test/integration/test_contacts.py index 4974c471..c5baa094 100644 --- a/service/test/integration/test_contacts.py +++ b/service/test/integration/test_contacts.py @@ -57,3 +57,26 @@ class ContactsTest(SoledadTestBase): self.assertFalse('recipient@trash.com' in contacts) d.addCallback(_assert) return d + + def test_deduplication_on_same_mail_address_using_largest(self): + input_mail = MailBuilder().with_tags(['important']).build_input_mail() + + formatted_input_mail = MailBuilder().with_tags(['important']) + formatted_input_mail.with_to('Recipient Principal <recipient@to.com>') + formatted_input_mail.with_cc('Recipient Copied <recipient@cc.com>') + formatted_input_mail.with_bcc('Recipient Carbon <recipient@bcc.com>') + formatted_input_mail = formatted_input_mail.build_input_mail() + + self.client.add_mail_to_inbox(input_mail) + self.client.add_mail_to_inbox(formatted_input_mail) + + d = self.get_contacts(query='Recipient') + + def _assert(contacts): + print contacts + self.assertEquals(3, len(contacts)) + self.assertTrue('Recipient Principal <recipient@to.com>' in contacts) + self.assertTrue('Recipient Copied <recipient@cc.com>' in contacts) + self.assertTrue('Recipient Carbon <recipient@bcc.com>' in contacts) + d.addCallback(_assert) + return d diff --git a/service/test/unit/adapter/test_contacts.py b/service/test/unit/adapter/test_contacts.py new file mode 100644 index 00000000..ee9bf618 --- /dev/null +++ b/service/test/unit/adapter/test_contacts.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2014 ThoughtWorks, Inc. +# +# Pixelated is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Pixelated is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with Pixelated. If not, see <http://www.gnu.org/licenses/>. +import unittest + +from pixelated.adapter.contacts import address_duplication_filter +from pixelated.adapter.contacts import extract_mail_address + + +class TestContacts(unittest.TestCase): + + def test_contacts_filter_duplication_by_largest(self): + contacts = ['John Large Name <john@name.example.com>', 'john@name.example.com', 'dont.delete@example.com'] + contacts_filtered = address_duplication_filter(contacts) + self.assertIn('dont.delete@example.com', contacts_filtered) + self.assertIn('John Large Name <john@name.example.com>', contacts_filtered) + self.assertNotIn('john@name.example.com', contacts_filtered) + + def test_extract_mail_address_from_contact(self): + full_address = 'John Large Name <john@name.example.com>' + mail_address = 'john@name.example.com' + + self.assertEquals(mail_address, extract_mail_address(full_address)) |