summaryrefslogtreecommitdiff
path: root/service/pixelated/adapter/search
diff options
context:
space:
mode:
Diffstat (limited to 'service/pixelated/adapter/search')
-rw-r--r--service/pixelated/adapter/search/__init__.py40
-rw-r--r--service/pixelated/adapter/search/contacts.py11
-rw-r--r--service/pixelated/adapter/search/index_storage_key.py42
3 files changed, 66 insertions, 27 deletions
diff --git a/service/pixelated/adapter/search/__init__.py b/service/pixelated/adapter/search/__init__.py
index b8d3e7ca..56ab2255 100644
--- a/service/pixelated/adapter/search/__init__.py
+++ b/service/pixelated/adapter/search/__init__.py
@@ -18,6 +18,8 @@ from pixelated.support.encrypted_file_storage import EncryptedFileStorage
import os
import re
+import dateutil.parser
+import time
from pixelated.adapter.model.status import Status
from pixelated.adapter.search.contacts import contacts_suggestions
from whoosh.index import FileIndex
@@ -27,7 +29,6 @@ from whoosh.qparser import MultifieldParser
from whoosh.writing import AsyncWriter
from whoosh import sorting
from pixelated.support.functional import unique
-from pixelated.support.date import milliseconds
import traceback
@@ -102,7 +103,6 @@ class SearchEngine(object):
to=KEYWORD(stored=False, commas=True),
cc=KEYWORD(stored=False, commas=True),
bcc=KEYWORD(stored=False, commas=True),
- bounced=KEYWORD(stored=False, commas=True),
subject=TEXT(stored=False),
date=NUMERIC(stored=False, sortable=True, bits=64, signed=False),
body=TEXT(stored=False),
@@ -121,32 +121,38 @@ class SearchEngine(object):
def _index_mail(self, writer, mail):
mdict = mail.as_dict()
header = mdict['header']
- tags = mdict.get('tags', [])
- tags.append(mail.mailbox_name.lower())
- bounced = mail.bounced if mail.bounced else ['']
+ tags = set(mdict.get('tags', {}))
+ tags.add(mail.mailbox_name.lower())
index_data = {
- 'sender': self._unicode_header_field(header.get('from', '')),
- 'subject': self._unicode_header_field(header.get('subject', '')),
- 'date': milliseconds(header.get('date', '')),
- 'to': u','.join([h.decode('utf-8') for h in header.get('to', [''])]),
- 'cc': u','.join([h.decode('utf-8') for h in header.get('cc', [''])]),
- 'bcc': u','.join([h.decode('utf-8') for h in header.get('bcc', [''])]),
+ 'sender': self._empty_string_to_none(header.get('from', '')),
+ 'subject': self._empty_string_to_none(header.get('subject', '')),
+ 'date': self._format_utc_integer(header.get('date', '')),
+ 'to': self._format_recipient(header, 'to'),
+ 'cc': self._format_recipient(header, 'cc'),
+ 'bcc': self._format_recipient(header, 'bcc'),
'tag': u','.join(unique(tags)),
- 'bounced': u','.join(bounced),
- 'body': unicode(mdict['textPlainBody']),
+ 'body': unicode(mdict['textPlainBody'] if 'textPlainBody' in mdict else mdict['body']),
'ident': unicode(mdict['ident']),
'flags': unicode(','.join(unique(mail.flags))),
- 'raw': unicode(mail.raw.decode('utf-8'))
+ 'raw': unicode(mail.raw)
}
writer.update_document(**index_data)
- def _unicode_header_field(self, field_value):
+ def _format_utc_integer(self, date):
+ timetuple = dateutil.parser.parse(date).utctimetuple()
+ return time.strftime('%s', timetuple)
+
+ def _format_recipient(self, headers, name):
+ list = headers.get(name, [''])
+ return u','.join(list) if list else u''
+
+ def _empty_string_to_none(self, field_value):
if not field_value:
return None
-
- return unicode(field_value.decode('utf-8'))
+ else:
+ return field_value
def index_mails(self, mails, callback=None):
try:
diff --git a/service/pixelated/adapter/search/contacts.py b/service/pixelated/adapter/search/contacts.py
index 0dfeb15b..0729e146 100644
--- a/service/pixelated/adapter/search/contacts.py
+++ b/service/pixelated/adapter/search/contacts.py
@@ -31,21 +31,12 @@ def address_duplication_filter(contacts):
return contacts_by_mail.values()
-def bounced_addresses_filter(searcher, contacts):
- query = QueryParser('bounced', searcher.schema).parse('*')
- bounced_addresses = searcher.search(query,
- limit=None,
- groupedby=sorting.FieldFacet('bounced',
- allow_overlap=True)).groups()
- return set(contacts) - set(flatten([bounced_addresses]))
-
-
def extract_mail_address(text):
return parseaddr(text)[1]
def contacts_suggestions(query, searcher):
- return address_duplication_filter(bounced_addresses_filter(searcher, search_addresses(searcher, query))) if query else []
+ return address_duplication_filter(search_addresses(searcher, query)) if query else []
def search_addresses(searcher, query):
diff --git a/service/pixelated/adapter/search/index_storage_key.py b/service/pixelated/adapter/search/index_storage_key.py
new file mode 100644
index 00000000..b2761849
--- /dev/null
+++ b/service/pixelated/adapter/search/index_storage_key.py
@@ -0,0 +1,42 @@
+#
+# Copyright (c) 2015 ThoughtWorks, Inc.
+#
+# Pixelated is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Pixelated is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with Pixelated. If not, see <http://www.gnu.org/licenses/>.
+import base64
+from twisted.internet import defer
+import os
+
+
+class SearchIndexStorageKey(object):
+ __slots__ = '_soledad'
+
+ def __init__(self, soledad):
+ self._soledad = soledad
+
+ @defer.inlineCallbacks
+ def get_or_create_key(self):
+ docs = yield self._soledad.get_from_index('by-type', 'index_key')
+
+ if len(docs):
+ key = docs[0].content['value']
+ else:
+ key = self._new_index_key()
+ yield self._store_key_in_soledad(key)
+ defer.returnValue(key)
+
+ def _new_index_key(self):
+ return os.urandom(64) # 32 for encryption, 32 for hmac
+
+ def _store_key_in_soledad(self, index_key):
+ return self._soledad.create_doc(dict(type='index_key', value=base64.encodestring(index_key)))