diff options
Diffstat (limited to 'service/pixelated/adapter/search')
-rw-r--r-- | service/pixelated/adapter/search/__init__.py | 40 | ||||
-rw-r--r-- | service/pixelated/adapter/search/contacts.py | 11 | ||||
-rw-r--r-- | service/pixelated/adapter/search/index_storage_key.py | 42 |
3 files changed, 66 insertions, 27 deletions
diff --git a/service/pixelated/adapter/search/__init__.py b/service/pixelated/adapter/search/__init__.py index b8d3e7ca..56ab2255 100644 --- a/service/pixelated/adapter/search/__init__.py +++ b/service/pixelated/adapter/search/__init__.py @@ -18,6 +18,8 @@ from pixelated.support.encrypted_file_storage import EncryptedFileStorage import os import re +import dateutil.parser +import time from pixelated.adapter.model.status import Status from pixelated.adapter.search.contacts import contacts_suggestions from whoosh.index import FileIndex @@ -27,7 +29,6 @@ from whoosh.qparser import MultifieldParser from whoosh.writing import AsyncWriter from whoosh import sorting from pixelated.support.functional import unique -from pixelated.support.date import milliseconds import traceback @@ -102,7 +103,6 @@ class SearchEngine(object): to=KEYWORD(stored=False, commas=True), cc=KEYWORD(stored=False, commas=True), bcc=KEYWORD(stored=False, commas=True), - bounced=KEYWORD(stored=False, commas=True), subject=TEXT(stored=False), date=NUMERIC(stored=False, sortable=True, bits=64, signed=False), body=TEXT(stored=False), @@ -121,32 +121,38 @@ class SearchEngine(object): def _index_mail(self, writer, mail): mdict = mail.as_dict() header = mdict['header'] - tags = mdict.get('tags', []) - tags.append(mail.mailbox_name.lower()) - bounced = mail.bounced if mail.bounced else [''] + tags = set(mdict.get('tags', {})) + tags.add(mail.mailbox_name.lower()) index_data = { - 'sender': self._unicode_header_field(header.get('from', '')), - 'subject': self._unicode_header_field(header.get('subject', '')), - 'date': milliseconds(header.get('date', '')), - 'to': u','.join([h.decode('utf-8') for h in header.get('to', [''])]), - 'cc': u','.join([h.decode('utf-8') for h in header.get('cc', [''])]), - 'bcc': u','.join([h.decode('utf-8') for h in header.get('bcc', [''])]), + 'sender': self._empty_string_to_none(header.get('from', '')), + 'subject': self._empty_string_to_none(header.get('subject', '')), + 'date': self._format_utc_integer(header.get('date', '')), + 'to': self._format_recipient(header, 'to'), + 'cc': self._format_recipient(header, 'cc'), + 'bcc': self._format_recipient(header, 'bcc'), 'tag': u','.join(unique(tags)), - 'bounced': u','.join(bounced), - 'body': unicode(mdict['textPlainBody']), + 'body': unicode(mdict['textPlainBody'] if 'textPlainBody' in mdict else mdict['body']), 'ident': unicode(mdict['ident']), 'flags': unicode(','.join(unique(mail.flags))), - 'raw': unicode(mail.raw.decode('utf-8')) + 'raw': unicode(mail.raw) } writer.update_document(**index_data) - def _unicode_header_field(self, field_value): + def _format_utc_integer(self, date): + timetuple = dateutil.parser.parse(date).utctimetuple() + return time.strftime('%s', timetuple) + + def _format_recipient(self, headers, name): + list = headers.get(name, ['']) + return u','.join(list) if list else u'' + + def _empty_string_to_none(self, field_value): if not field_value: return None - - return unicode(field_value.decode('utf-8')) + else: + return field_value def index_mails(self, mails, callback=None): try: diff --git a/service/pixelated/adapter/search/contacts.py b/service/pixelated/adapter/search/contacts.py index 0dfeb15b..0729e146 100644 --- a/service/pixelated/adapter/search/contacts.py +++ b/service/pixelated/adapter/search/contacts.py @@ -31,21 +31,12 @@ def address_duplication_filter(contacts): return contacts_by_mail.values() -def bounced_addresses_filter(searcher, contacts): - query = QueryParser('bounced', searcher.schema).parse('*') - bounced_addresses = searcher.search(query, - limit=None, - groupedby=sorting.FieldFacet('bounced', - allow_overlap=True)).groups() - return set(contacts) - set(flatten([bounced_addresses])) - - def extract_mail_address(text): return parseaddr(text)[1] def contacts_suggestions(query, searcher): - return address_duplication_filter(bounced_addresses_filter(searcher, search_addresses(searcher, query))) if query else [] + return address_duplication_filter(search_addresses(searcher, query)) if query else [] def search_addresses(searcher, query): diff --git a/service/pixelated/adapter/search/index_storage_key.py b/service/pixelated/adapter/search/index_storage_key.py new file mode 100644 index 00000000..b2761849 --- /dev/null +++ b/service/pixelated/adapter/search/index_storage_key.py @@ -0,0 +1,42 @@ +# +# Copyright (c) 2015 ThoughtWorks, Inc. +# +# Pixelated is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Pixelated is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with Pixelated. If not, see <http://www.gnu.org/licenses/>. +import base64 +from twisted.internet import defer +import os + + +class SearchIndexStorageKey(object): + __slots__ = '_soledad' + + def __init__(self, soledad): + self._soledad = soledad + + @defer.inlineCallbacks + def get_or_create_key(self): + docs = yield self._soledad.get_from_index('by-type', 'index_key') + + if len(docs): + key = docs[0].content['value'] + else: + key = self._new_index_key() + yield self._store_key_in_soledad(key) + defer.returnValue(key) + + def _new_index_key(self): + return os.urandom(64) # 32 for encryption, 32 for hmac + + def _store_key_in_soledad(self, index_key): + return self._soledad.create_doc(dict(type='index_key', value=base64.encodestring(index_key))) |