From c0974ba8fe9030d56bbfff8f93acec42af43b71e Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 5 Feb 2015 16:56:16 -0300 Subject: for #167, deduplicate contacts by largest --- service/pixelated/adapter/contacts.py | 30 ++++++++++++++++++++++++ service/pixelated/adapter/search/__init__.py | 3 ++- service/test/integration/test_contacts.py | 23 ++++++++++++++++++ service/test/unit/adapter/test_contacts.py | 35 ++++++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 service/pixelated/adapter/contacts.py create mode 100644 service/test/unit/adapter/test_contacts.py (limited to 'service') diff --git a/service/pixelated/adapter/contacts.py b/service/pixelated/adapter/contacts.py new file mode 100644 index 00000000..f286f220 --- /dev/null +++ b/service/pixelated/adapter/contacts.py @@ -0,0 +1,30 @@ +# +# Copyright (c) 2014 ThoughtWorks, Inc. +# +# Pixelated is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Pixelated is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with Pixelated. If not, see . +from email.utils import parseaddr + + +def address_duplication_filter(contacts): + contacts_by_mail = dict() + + for contact in contacts: + mail_address = extract_mail_address(contact) + previous = contacts_by_mail.get(mail_address, '') + contacts_by_mail[mail_address] = contact if len(contact) > len(previous) else previous + return contacts_by_mail.values() + + +def extract_mail_address(text): + return parseaddr(text)[1] diff --git a/service/pixelated/adapter/search/__init__.py b/service/pixelated/adapter/search/__init__.py index 5491632f..12829c65 100644 --- a/service/pixelated/adapter/search/__init__.py +++ b/service/pixelated/adapter/search/__init__.py @@ -18,6 +18,7 @@ from pixelated.support.encrypted_file_storage import EncryptedFileStorage import os from pixelated.adapter.model.status import Status +from pixelated.adapter.contacts import address_duplication_filter from pixelated.support.functional import flatten from whoosh.index import FileIndex from whoosh.fields import * @@ -211,6 +212,6 @@ class SearchEngine(object): groupedby=sorting.FieldFacet('bcc', allow_overlap=True)).groups() sender = searcher.search(sender.parse("*%s*" % query), limit=None, mask=restrict_q, groupedby=sorting.FieldFacet('sender', allow_overlap=True)).groups() - return flatten([to, cc, bcc, sender]) + return address_duplication_filter(flatten([to, cc, bcc, sender])) return [] diff --git a/service/test/integration/test_contacts.py b/service/test/integration/test_contacts.py index 4974c471..c5baa094 100644 --- a/service/test/integration/test_contacts.py +++ b/service/test/integration/test_contacts.py @@ -57,3 +57,26 @@ class ContactsTest(SoledadTestBase): self.assertFalse('recipient@trash.com' in contacts) d.addCallback(_assert) return d + + def test_deduplication_on_same_mail_address_using_largest(self): + input_mail = MailBuilder().with_tags(['important']).build_input_mail() + + formatted_input_mail = MailBuilder().with_tags(['important']) + formatted_input_mail.with_to('Recipient Principal ') + formatted_input_mail.with_cc('Recipient Copied ') + formatted_input_mail.with_bcc('Recipient Carbon ') + formatted_input_mail = formatted_input_mail.build_input_mail() + + self.client.add_mail_to_inbox(input_mail) + self.client.add_mail_to_inbox(formatted_input_mail) + + d = self.get_contacts(query='Recipient') + + def _assert(contacts): + print contacts + self.assertEquals(3, len(contacts)) + self.assertTrue('Recipient Principal ' in contacts) + self.assertTrue('Recipient Copied ' in contacts) + self.assertTrue('Recipient Carbon ' in contacts) + d.addCallback(_assert) + return d diff --git a/service/test/unit/adapter/test_contacts.py b/service/test/unit/adapter/test_contacts.py new file mode 100644 index 00000000..ee9bf618 --- /dev/null +++ b/service/test/unit/adapter/test_contacts.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2014 ThoughtWorks, Inc. +# +# Pixelated is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Pixelated is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with Pixelated. If not, see . +import unittest + +from pixelated.adapter.contacts import address_duplication_filter +from pixelated.adapter.contacts import extract_mail_address + + +class TestContacts(unittest.TestCase): + + def test_contacts_filter_duplication_by_largest(self): + contacts = ['John Large Name ', 'john@name.example.com', 'dont.delete@example.com'] + contacts_filtered = address_duplication_filter(contacts) + self.assertIn('dont.delete@example.com', contacts_filtered) + self.assertIn('John Large Name ', contacts_filtered) + self.assertNotIn('john@name.example.com', contacts_filtered) + + def test_extract_mail_address_from_contact(self): + full_address = 'John Large Name ' + mail_address = 'john@name.example.com' + + self.assertEquals(mail_address, extract_mail_address(full_address)) -- cgit v1.2.3