summaryrefslogtreecommitdiff
path: root/service/pixelated/adapter/search.py
diff options
context:
space:
mode:
authorFolker Bernitt <fbernitt@thoughtworks.com>2014-10-08 18:06:28 +0200
committerFolker Bernitt <fbernitt@thoughtworks.com>2014-10-09 18:24:41 +0200
commit85264c9ab3052dd844016d3ec44cb8af43dd5869 (patch)
treed6a5cb6511039407fdef40804c5338347b71c551 /service/pixelated/adapter/search.py
parentfb8782d3f66b1068153a9948a01864ee52e39b10 (diff)
Added whoosh as search engine (issue #94).
Diffstat (limited to 'service/pixelated/adapter/search.py')
-rw-r--r--service/pixelated/adapter/search.py71
1 files changed, 71 insertions, 0 deletions
diff --git a/service/pixelated/adapter/search.py b/service/pixelated/adapter/search.py
new file mode 100644
index 00000000..6c64cc40
--- /dev/null
+++ b/service/pixelated/adapter/search.py
@@ -0,0 +1,71 @@
+from whoosh.fields import *
+from whoosh.filedb.filestore import RamStorage
+from whoosh.qparser import QueryParser
+
+
+class SearchEngine(object):
+ __slots__ = '_index'
+
+ def __init__(self):
+ self._index = self._create_index()
+
+ def _mail_schema(self):
+ return Schema(
+ ident=ID(stored=True, unique=True),
+ sender=ID(stored=False),
+ to=ID(stored=False),
+ cc=ID(stored=False),
+ bcc=ID(stored=False),
+ subject=TEXT(stored=False),
+ body=TEXT(stored=False),
+ tag=KEYWORD(stored=False, commas=True))
+
+ def _create_index(self):
+ return RamStorage().create_index(self._mail_schema(), indexname='mails')
+
+ def index_mail(self, mail):
+ writer = self._index.writer()
+ self._index_mail(writer, mail)
+ writer.commit()
+
+ def _index_mail(self, writer, mail):
+ mdict = mail.as_dict()
+ header = mdict['header']
+ tags = mdict.get('tags', [])
+ tags.append(mail.mailbox_name.lower())
+ index_data = {
+ 'sender': unicode(header.get('from', '')),
+ 'subject': unicode(header.get('subject', '')),
+ 'to': unicode(header.get('to', '')),
+ 'cc': unicode(header.get('cc', '')),
+ 'bcc': unicode(header.get('bcc', '')),
+ 'tag': u','.join(tags),
+ 'body': unicode(mdict['body']),
+ 'ident': unicode(mdict['ident'])
+ }
+
+ writer.update_document(**index_data)
+
+ def index_mails(self, mails):
+ writer = self._index.writer()
+ try:
+ for mail in mails:
+ self._index_mail(writer, mail)
+ finally:
+ writer.commit()
+
+ def search(self, query):
+ query = query.replace('\"', '')
+ query = query.replace('-in:', 'AND NOT tag:')
+ query = query.replace('in:all', '*')
+ with self._index.searcher() as searcher:
+ query = QueryParser('body', self._index.schema).parse(query)
+ results = searcher.search(query)
+ return [mail['ident'] for mail in results]
+
+ def remove_from_index(self, mail_id):
+ writer = self._index.writer()
+ try:
+ writer.delete_by_term('ident', mail_id)
+ finally:
+ writer.commit()