From c0974ba8fe9030d56bbfff8f93acec42af43b71e Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 5 Feb 2015 16:56:16 -0300 Subject: for #167, deduplicate contacts by largest --- service/pixelated/adapter/contacts.py | 30 ++++++++++++++++++++++++++++ service/pixelated/adapter/search/__init__.py | 3 ++- 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 service/pixelated/adapter/contacts.py (limited to 'service/pixelated/adapter') diff --git a/service/pixelated/adapter/contacts.py b/service/pixelated/adapter/contacts.py new file mode 100644 index 00000000..f286f220 --- /dev/null +++ b/service/pixelated/adapter/contacts.py @@ -0,0 +1,30 @@ +# +# Copyright (c) 2014 ThoughtWorks, Inc. +# +# Pixelated is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Pixelated is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with Pixelated. If not, see . +from email.utils import parseaddr + + +def address_duplication_filter(contacts): + contacts_by_mail = dict() + + for contact in contacts: + mail_address = extract_mail_address(contact) + previous = contacts_by_mail.get(mail_address, '') + contacts_by_mail[mail_address] = contact if len(contact) > len(previous) else previous + return contacts_by_mail.values() + + +def extract_mail_address(text): + return parseaddr(text)[1] diff --git a/service/pixelated/adapter/search/__init__.py b/service/pixelated/adapter/search/__init__.py index 5491632f..12829c65 100644 --- a/service/pixelated/adapter/search/__init__.py +++ b/service/pixelated/adapter/search/__init__.py @@ -18,6 +18,7 @@ from pixelated.support.encrypted_file_storage import EncryptedFileStorage import os from pixelated.adapter.model.status import Status +from pixelated.adapter.contacts import address_duplication_filter from pixelated.support.functional import flatten from whoosh.index import FileIndex from whoosh.fields import * @@ -211,6 +212,6 @@ class SearchEngine(object): groupedby=sorting.FieldFacet('bcc', allow_overlap=True)).groups() sender = searcher.search(sender.parse("*%s*" % query), limit=None, mask=restrict_q, groupedby=sorting.FieldFacet('sender', allow_overlap=True)).groups() - return flatten([to, cc, bcc, sender]) + return address_duplication_filter(flatten([to, cc, bcc, sender])) return [] -- cgit v1.2.3