summaryrefslogtreecommitdiff
path: root/service/pixelated/adapter/search.py
blob: 4a2fb097cb4a3ea1d8966b7c74159504a34cd13f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import whoosh.index
from whoosh.fields import *
from whoosh.qparser import QueryParser


class SearchEngine(object):
    __slots__ = '_index'

    INDEX_FOLDER = os.path.join(os.environ['HOME'], '.leap', 'search_index')

    def __init__(self):
        if not os.path.exists(self.INDEX_FOLDER):
            os.makedirs(self.INDEX_FOLDER)
        self._index = self._create_index()

    def _mail_schema(self):
        return Schema(
            ident=ID(stored=True, unique=True),
            sender=ID(stored=False),
            to=ID(stored=False),
            cc=ID(stored=False),
            bcc=ID(stored=False),
            subject=TEXT(stored=False),
            body=TEXT(stored=False),
            tag=KEYWORD(stored=False, commas=True))

    def _create_index(self):
        return whoosh.index.create_in(self.INDEX_FOLDER, self._mail_schema(), indexname='mails')

    def index_mail(self, mail):
        with self._index.writer() as writer:
            self._index_mail(writer, mail)

    def _index_mail(self, writer, mail):
        mdict = mail.as_dict()
        header = mdict['header']
        tags = mdict.get('tags', [])
        tags.append(mail.mailbox_name.lower())
        index_data = {
            'sender': unicode(header.get('from', '')),
            'subject': unicode(header.get('subject', '')),
            'to': unicode(header.get('to', '')),
            'cc': unicode(header.get('cc', '')),
            'bcc': unicode(header.get('bcc', '')),
            'tag': u','.join(tags),
            'body': unicode(mdict['body']),
            'ident': unicode(mdict['ident'])
        }
        writer.update_document(**index_data)

    def index_mails(self, mails):
        with self._index.writer() as writer:
            for mail in mails:
                self._index_mail(writer, mail)

    def search(self, query):
        query = query.replace('\"', '')
        query = query.replace('-in:', 'AND NOT tag:')
        query = query.replace('in:all', '*')
        with self._index.searcher() as searcher:
            query = QueryParser('body', self._index.schema).parse(query)
            results = searcher.search(query, limit=100)
            return [mail['ident'] for mail in results]

    def remove_from_index(self, mail_id):
        writer = self._index.writer()
        try:
            writer.delete_by_term('ident', mail_id)
        finally:
            writer.commit()