1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
import os
import whoosh.index
from whoosh.fields import *
from whoosh.qparser import QueryParser
class SearchEngine(object):
__slots__ = '_index'
INDEX_FOLDER = os.path.join(os.environ['HOME'], '.leap', 'search_index')
def __init__(self):
if not os.path.exists(self.INDEX_FOLDER):
os.makedirs(self.INDEX_FOLDER)
self._index = self._create_index()
def _mail_schema(self):
return Schema(
ident=ID(stored=True, unique=True),
sender=ID(stored=False),
to=ID(stored=False),
cc=ID(stored=False),
bcc=ID(stored=False),
subject=TEXT(stored=False),
body=TEXT(stored=False),
tag=KEYWORD(stored=False, commas=True))
def _create_index(self):
return whoosh.index.create_in(self.INDEX_FOLDER, self._mail_schema(), indexname='mails')
def index_mail(self, mail):
with self._index.writer() as writer:
self._index_mail(writer, mail)
def _index_mail(self, writer, mail):
mdict = mail.as_dict()
header = mdict['header']
tags = mdict.get('tags', [])
tags.append(mail.mailbox_name.lower())
index_data = {
'sender': unicode(header.get('from', '')),
'subject': unicode(header.get('subject', '')),
'to': unicode(header.get('to', '')),
'cc': unicode(header.get('cc', '')),
'bcc': unicode(header.get('bcc', '')),
'tag': u','.join(tags),
'body': unicode(mdict['body']),
'ident': unicode(mdict['ident'])
}
writer.update_document(**index_data)
def index_mails(self, mails):
with self._index.writer() as writer:
for mail in mails:
self._index_mail(writer, mail)
def search(self, query):
query = query.replace('\"', '')
query = query.replace('-in:', 'AND NOT tag:')
query = query.replace('in:all', '*')
with self._index.searcher() as searcher:
query = QueryParser('body', self._index.schema).parse(query)
results = searcher.search(query, limit=100)
return [mail['ident'] for mail in results]
def remove_from_index(self, mail_id):
writer = self._index.writer()
try:
writer.delete_by_term('ident', mail_id)
finally:
writer.commit()
|