diff options
Diffstat (limited to 'service/pixelated/adapter/search/__init__.py')
-rw-r--r-- | service/pixelated/adapter/search/__init__.py | 27 |
1 files changed, 18 insertions, 9 deletions
diff --git a/service/pixelated/adapter/search/__init__.py b/service/pixelated/adapter/search/__init__.py index 0b1a1034..91eff4c3 100644 --- a/service/pixelated/adapter/search/__init__.py +++ b/service/pixelated/adapter/search/__init__.py @@ -17,10 +17,11 @@ from pixelated.support.encrypted_file_storage import EncryptedFileStorage import os +import re from pixelated.adapter.model.status import Status from pixelated.adapter.search.contacts import contacts_suggestions from whoosh.index import FileIndex -from whoosh.fields import * +from whoosh.fields import Schema, ID, KEYWORD, TEXT, NUMERIC from whoosh.qparser import QueryParser from whoosh.qparser import MultifieldParser from whoosh import sorting @@ -116,8 +117,9 @@ class SearchEngine(object): return FileIndex.create(storage, self._mail_schema(), indexname='mails') def index_mail(self, mail): - with self._index.writer() as writer: - self._index_mail(writer, mail) + with self._write_lock: + with self._index.writer() as writer: + self._index_mail(writer, mail) def _index_mail(self, writer, mail): mdict = mail.as_dict() @@ -125,23 +127,30 @@ class SearchEngine(object): tags = mdict.get('tags', []) tags.append(mail.mailbox_name.lower()) bounced = mail.bounced if mail.bounced else [''] + index_data = { - 'sender': unicode(header.get('from', '')), - 'subject': unicode(header.get('subject', '')), + 'sender': self._unicode_header_field(header.get('from', '')), + 'subject': self._unicode_header_field(header.get('subject', '')), 'date': milliseconds(header.get('date', '')), - 'to': u','.join(header.get('to', [''])), - 'cc': u','.join(header.get('cc', [''])), - 'bcc': u','.join(header.get('bcc', [''])), + 'to': u','.join([h.decode('utf-8') for h in header.get('to', [''])]), + 'cc': u','.join([h.decode('utf-8') for h in header.get('cc', [''])]), + 'bcc': u','.join([h.decode('utf-8') for h in header.get('bcc', [''])]), 'tag': u','.join(unique(tags)), 'bounced': u','.join(bounced), 'body': unicode(mdict['textPlainBody']), 'ident': unicode(mdict['ident']), 'flags': unicode(','.join(unique(mail.flags))), - 'raw': unicode(mail.raw) + 'raw': unicode(mail.raw.decode('utf-8')) } writer.update_document(**index_data) + def _unicode_header_field(self, field_value): + if not field_value: + return None + + return unicode(field_value.decode('utf-8')) + def index_mails(self, mails, callback=None): try: with self._write_lock: |