summaryrefslogtreecommitdiff
path: root/service/pixelated/adapter/search.py
blob: ad5c3bbb91bdb9f2cb0746b22915123141d37c2a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
import whoosh.index
from whoosh.fields import *
from whoosh.qparser import QueryParser
from whoosh import sorting


class SearchEngine(object):
    __slots__ = '_index'

    INDEX_FOLDER = os.path.join(os.environ['HOME'], '.leap', 'search_index')
    DEFAULT_TAGS = ['inbox', 'sent', 'drafts', 'trash']

    def __init__(self):
        if not os.path.exists(self.INDEX_FOLDER):
            os.makedirs(self.INDEX_FOLDER)
        self._index = self._create_index()

    def _add_to_tags(self, tags, seen, skip_default_tags, count_type):
        for tag, count in seen.iteritems():
            if skip_default_tags and tag in self.DEFAULT_TAGS:
                continue
            if not tags.get(tag):
                tags[tag] = {'ident': tag, 'name': tag, 'default': False, 'counts': {'total': 0, 'read': 0}, 'mails': []}
            tags[tag]['counts'][count_type] += count

    def _search_tag_groups(self, query):
        seen = None
        query_string = (query + '*' if query else '*').lower()
        query_parser = QueryParser('tag', self._index.schema)
        options = {'limit': None, 'groupedby': sorting.FieldFacet('tag', allow_overlap=True), 'maptype': sorting.Count}

        with self._index.searcher() as searcher:
            total = searcher.search(query_parser.parse(query_string), **options).groups()
            if not query:
                seen = searcher.search(query_parser.parse('* AND flags:\\Seen'), **options).groups()

        return seen, total

    def _init_tags_defaults(self):
        tags = {}
        for default_tag in self.DEFAULT_TAGS:
            tags[default_tag] = {
                'ident': default_tag,
                'name': default_tag,
                'default': True,
                'counts': {
                    'total': 0,
                    'read': 0
                },
                'mails': []
            }
        return tags

    def _build_tags(self, seen, total, skip_default_tags):
        tags = {}
        if not skip_default_tags:
            tags = self._init_tags_defaults()
        self._add_to_tags(tags, total, skip_default_tags, count_type='total')
        if seen:
            self._add_to_tags(tags, seen, skip_default_tags, count_type='read')
        return tags.values()

    def tags(self, query, skip_default_tags):
        seen, total = self._search_tag_groups(query)
        return self._build_tags(seen, total, skip_default_tags)

    def _mail_schema(self):
        return Schema(
            ident=ID(stored=True, unique=True),
            sender=ID(stored=False),
            to=ID(stored=False),
            cc=ID(stored=False),
            bcc=ID(stored=False),
            subject=TEXT(stored=False),
            body=TEXT(stored=False),
            tag=KEYWORD(stored=False, commas=True),
            flags=KEYWORD(stored=False, commas=True),
            raw=TEXT(stored=False))

    def _create_index(self):
        return whoosh.index.create_in(self.INDEX_FOLDER, self._mail_schema(), indexname='mails')

    def index_mail(self, mail):
        with self._index.writer() as writer:
            self._index_mail(writer, mail)

    def _index_mail(self, writer, mail):
        mdict = mail.as_dict()
        header = mdict['header']
        tags = mdict.get('tags', [])
        tags.append(mail.mailbox_name.lower())
        index_data = {
            'sender': unicode(header.get('from', '')),
            'subject': unicode(header.get('subject', '')),
            'to': unicode(header.get('to', '')),
            'cc': unicode(header.get('cc', '')),
            'bcc': unicode(header.get('bcc', '')),
            'tag': u','.join(tags),
            'body': unicode(mdict['body']),
            'ident': unicode(mdict['ident']),
            'flags': unicode(','.join(mail.flags)),
            'raw': unicode(mail.raw)
        }

        writer.update_document(**index_data)

    def index_mails(self, mails):
        with self._index.writer() as writer:
            for mail in mails:
                self._index_mail(writer, mail)

    def _search_with_options(self, options, query):
        with self._index.searcher() as searcher:
            query = QueryParser('raw', self._index.schema).parse(query)
            results = searcher.search(query, **options)
        return results

    def search(self, query, window=25, page=1, all_mails=False):
        query = self.prepare_query(query)

        if(all_mails):
            return self._search_all_mails(query)
        else:
            return self._paginated_search_mails(query, window, page)

    def _search_all_mails(self, query):
        with self._index.searcher() as searcher:
            results = searcher.search(query, limit=None)
            return [mail['ident'] for mail in results]

    def _paginated_search_mails(self, query, window, page):
        page = int(page) if int(page) > 1 else 1
        window = int(window)

        with self._index.searcher() as searcher:
            tags_facet = sorting.FieldFacet('tag', allow_overlap=True, maptype=sorting.Count)
            results = searcher.search_page(query, page, pagelen=window, groupedby=tags_facet)
            return [mail['ident'] for mail in results], sum(results.results.groups().values())

    def prepare_query(self, query):
        query = (
            query
            .replace('\"', '')
            .replace('-in:', 'AND NOT tag:')
            .replace('in:all', '*')
        )
        return QueryParser('raw', self._index.schema).parse(query)

    def remove_from_index(self, mail_id):
        writer = self._index.writer()
        try:
            writer.delete_by_term('ident', mail_id)
        finally:
            writer.commit()