summaryrefslogtreecommitdiff
path: root/service/pixelated/adapter/search.py
diff options
context:
space:
mode:
authorDuda Dornelles <ddornell@thoughtworks.com>2014-10-13 19:29:36 +0200
committerDuda Dornelles <ddornell@thoughtworks.com>2014-10-14 17:48:27 +0200
commitcd4dffcfcb3f7473913e2b50571a182689efeedc (patch)
treedb8ff0748e5a63efcb3d8d82a7b8cc774376d86a /service/pixelated/adapter/search.py
parent451218929536b59d3e0a72a9c651a1911e1bd1cd (diff)
No more tag_index - now whoosh does everythin
Diffstat (limited to 'service/pixelated/adapter/search.py')
-rw-r--r--service/pixelated/adapter/search.py69
1 files changed, 66 insertions, 3 deletions
diff --git a/service/pixelated/adapter/search.py b/service/pixelated/adapter/search.py
index 4a2fb097..df45b76e 100644
--- a/service/pixelated/adapter/search.py
+++ b/service/pixelated/adapter/search.py
@@ -2,18 +2,69 @@ import os
import whoosh.index
from whoosh.fields import *
from whoosh.qparser import QueryParser
+from whoosh import sorting
class SearchEngine(object):
__slots__ = '_index'
INDEX_FOLDER = os.path.join(os.environ['HOME'], '.leap', 'search_index')
+ DEFAULT_TAGS = ['inbox', 'sent', 'drafts', 'trash']
def __init__(self):
if not os.path.exists(self.INDEX_FOLDER):
os.makedirs(self.INDEX_FOLDER)
self._index = self._create_index()
+ def _add_to_tags(self, tags, seen, skip_default_tags):
+ for tag, count in seen.iteritems():
+ if skip_default_tags and tag in self.DEFAULT_TAGS:
+ continue
+ if not tags.get(tag):
+ tags[tag] = {'ident': tag, 'name': tag, 'default': False, 'counts': {'total': 0, 'read': 0}, 'mails': []}
+ tags[tag]['counts']['read'] += count
+
+ def _search_tag_groups(self, query):
+ seen = None
+ query_string = (query + '*' if query else '*').lower()
+ query_parser = QueryParser('tag', self._index.schema)
+ options = {'limit': None, 'groupedby': sorting.FieldFacet('tag', allow_overlap=True), 'maptype': sorting.Count}
+
+ with self._index.searcher() as searcher:
+ total = searcher.search(query_parser.parse(query_string), **options).groups()
+ if not query:
+ seen = searcher.search(query_parser.parse('* AND flags:\\Seen'), **options).groups()
+
+ return seen, total
+
+ def _init_tags_defaults(self):
+ tags = {}
+ for default_tag in self.DEFAULT_TAGS:
+ tags[default_tag] = {
+ 'ident': default_tag,
+ 'name': default_tag,
+ 'default': True,
+ 'counts': {
+ 'total': 0,
+ 'read': 0
+ },
+ 'mails': []
+ }
+ return tags
+
+ def _build_tags(self, seen, total, skip_default_tags):
+ tags = {}
+ if not skip_default_tags:
+ tags = self._init_tags_defaults()
+ self._add_to_tags(tags, total, skip_default_tags)
+ if seen:
+ self._add_to_tags(tags, seen, skip_default_tags)
+ return tags.values()
+
+ def tags(self, query, skip_default_tags):
+ seen, total = self._search_tag_groups(query)
+ return self._build_tags(seen, total, skip_default_tags)
+
def _mail_schema(self):
return Schema(
ident=ID(stored=True, unique=True),
@@ -23,7 +74,8 @@ class SearchEngine(object):
bcc=ID(stored=False),
subject=TEXT(stored=False),
body=TEXT(stored=False),
- tag=KEYWORD(stored=False, commas=True))
+ tag=KEYWORD(stored=False, commas=True),
+ flags=KEYWORD(stored=False, commas=True))
def _create_index(self):
return whoosh.index.create_in(self.INDEX_FOLDER, self._mail_schema(), indexname='mails')
@@ -45,8 +97,10 @@ class SearchEngine(object):
'bcc': unicode(header.get('bcc', '')),
'tag': u','.join(tags),
'body': unicode(mdict['body']),
- 'ident': unicode(mdict['ident'])
+ 'ident': unicode(mdict['ident']),
+ 'flags': unicode(','.join(mail._flags))
}
+
writer.update_document(**index_data)
def index_mails(self, mails):
@@ -54,13 +108,22 @@ class SearchEngine(object):
for mail in mails:
self._index_mail(writer, mail)
+ def _search_with_options(self, options, query):
+ with self._index.searcher() as searcher:
+ query = QueryParser('body', self._index.schema).parse(query)
+ results = searcher.search(query, **options)
+ return results
+
def search(self, query):
+ options = {'limit': 100}
+
query = query.replace('\"', '')
query = query.replace('-in:', 'AND NOT tag:')
query = query.replace('in:all', '*')
+
with self._index.searcher() as searcher:
query = QueryParser('body', self._index.schema).parse(query)
- results = searcher.search(query, limit=100)
+ results = searcher.search(query, **options)
return [mail['ident'] for mail in results]
def remove_from_index(self, mail_id):