summaryrefslogtreecommitdiff
path: root/src/leap/bitmask/mail/mailbox_indexer.py
blob: da123276720cdb6b2107258a49594c65034103a8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
# -*- coding: utf-8 -*-
# mailbox_indexer.py
# Copyright (C) 2014 LEAP
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
.. :py:module::mailbox_indexer

Local tables to store the message Unique Identifiers for a given mailbox.
"""
import re
import uuid

from leap.bitmask.mail.constants import METAMSGID_RE


def _maybe_first_query_item(thing):
    """
    Return the first item the returned query result, or None
    if empty.
    """
    try:
        return thing[0][0]
    except (TypeError, IndexError):
        return None


class WrongMetaDocIDError(Exception):
    pass


def sanitize(mailbox_uuid):
    return mailbox_uuid.replace("-", "_")


def check_good_uuid(mailbox_uuid):
    """
    Check that the passed mailbox identifier is a valid UUID.
    :param mailbox_uuid: the uuid to check
    :type mailbox_uuid: str
    :return: None
    :raises: AssertionError if a wrong uuid was passed.
    """
    try:
        uuid.UUID(str(mailbox_uuid))
    except (AttributeError, ValueError):
        raise AssertionError(
            "the mbox_id is not a valid uuid: %s" % mailbox_uuid)


class MailboxIndexer(object):
    """
    This class contains the commands needed to create, modify and alter the
    local-only UID tables for a given mailbox.

    Its purpouse is to keep a local-only index with the messages in each
    mailbox, mainly to satisfy the demands of the IMAP specification, but
    useful too for any effective listing of the messages in a mailbox.

    Since the incoming mail can be processed at any time in any replica, it's
    preferred not to attempt to maintain a global chronological global index.

    These indexes are Message Attributes needed for the IMAP specification (rfc
    3501), although they can be useful for other non-imap store
    implementations.

    """
    # The uids are expected to be 32-bits values, but the ROWIDs in sqlite
    # are 64-bit values. I *don't* think it really matters for any
    # practical use, but it's good to remember we've got that difference going
    # on.

    store = None
    table_preffix = "leapmail_uid_"

    def __init__(self, store):
        self.store = store

    def _query(self, *args, **kw):
        assert self.store is not None
        return self.store.raw_sqlcipher_query(*args, **kw)

    def _operation(self, *args, **kw):
        assert self.store is not None
        return self.store.raw_sqlcipher_operation(*args, **kw)

    def create_table(self, mailbox_uuid):
        """
        Create the UID table for a given mailbox.
        :param mailbox: the mailbox identifier.
        :type mailbox: str
        :rtype: Deferred
        """
        check_good_uuid(mailbox_uuid)
        sql = ("CREATE TABLE if not exists {preffix}{name}( "
               "uid  INTEGER PRIMARY KEY AUTOINCREMENT, "
               "hash TEXT UNIQUE NOT NULL)".format(
                   preffix=self.table_preffix, name=sanitize(mailbox_uuid)))
        return self._operation(sql)

    def delete_table(self, mailbox_uuid):
        """
        Delete the UID table for a given mailbox.
        :param mailbox: the mailbox name
        :type mailbox: str
        :rtype: Deferred
        """
        check_good_uuid(mailbox_uuid)
        sql = ("DROP TABLE if exists {preffix}{name}".format(
            preffix=self.table_preffix, name=sanitize(mailbox_uuid)))
        return self._operation(sql)

    def insert_doc(self, mailbox_uuid, doc_id):
        """
        Insert the doc_id for a MetaMsg in the UID table for a given mailbox.

        The doc_id must be in the format:

            M-<mailbox>-<content-hash-of-the-message>

        :param mailbox: the mailbox name
        :type mailbox: str
        :param doc_id: the doc_id for the MetaMsg
        :type doc_id: str
        :return: a deferred that will fire with the uid of the newly inserted
                 document.
        :rtype: Deferred
        """
        check_good_uuid(mailbox_uuid)
        assert doc_id
        mailbox_uuid = mailbox_uuid.replace('-', '_')

        if not re.findall(METAMSGID_RE.format(mbox_uuid=mailbox_uuid), doc_id):
            raise WrongMetaDocIDError("Wrong format for the MetaMsg doc_id")

        def get_rowid(result):
            return _maybe_first_query_item(result)

        sql = ("INSERT INTO {preffix}{name} VALUES ("
               "NULL, ?)".format(
                   preffix=self.table_preffix, name=sanitize(mailbox_uuid)))
        values = (doc_id,)

        sql_last = ("SELECT MAX(rowid) FROM {preffix}{name} "
                    "LIMIT 1;").format(
            preffix=self.table_preffix, name=sanitize(mailbox_uuid))

        d = self._operation(sql, values)
        d.addCallback(lambda _: self._query(sql_last))
        d.addCallback(get_rowid)
        d.addErrback(lambda f: f.printTraceback())
        return d

    def delete_doc_by_uid(self, mailbox_uuid, uid):
        """
        Delete the entry for a MetaMsg in the UID table for a given mailbox.

        :param mailbox_uuid: the mailbox uuid
        :type mailbox: str
        :param uid: the UID of the message.
        :type uid: int
        :rtype: Deferred
        """
        check_good_uuid(mailbox_uuid)
        assert uid
        sql = ("DELETE FROM {preffix}{name} "
               "WHERE uid=?".format(
                   preffix=self.table_preffix, name=sanitize(mailbox_uuid)))
        values = (uid,)
        return self._query(sql, values)

    def delete_doc_by_hash(self, mailbox_uuid, doc_id):
        """
        Delete the entry for a MetaMsg in the UID table for a given mailbox.

        The doc_id must be in the format:

            M-<mailbox_uuid>-<content-hash-of-the-message>

        :param mailbox_uuid: the mailbox uuid
        :type mailbox: str
        :param doc_id: the doc_id for the MetaMsg
        :type doc_id: str
        :return: a deferred that will fire when the deletion has succed.
        :rtype: Deferred
        """
        check_good_uuid(mailbox_uuid)
        assert doc_id
        sql = ("DELETE FROM {preffix}{name} "
               "WHERE hash=?".format(
                   preffix=self.table_preffix, name=sanitize(mailbox_uuid)))
        values = (doc_id,)
        return self._query(sql, values)

    def get_doc_id_from_uid(self, mailbox_uuid, uid):
        """
        Get the doc_id for a MetaMsg in the UID table for a given mailbox.

        :param mailbox_uuid: the mailbox uuid
        :type mailbox: str
        :param uid: the uid for the MetaMsg for this mailbox
        :type uid: int
        :rtype: Deferred
        """
        check_good_uuid(mailbox_uuid)
        mailbox_uuid = mailbox_uuid.replace('-', '_')

        def get_hash(result):
            return _maybe_first_query_item(result)

        sql = ("SELECT hash from {preffix}{name} "
               "WHERE uid=?".format(
                   preffix=self.table_preffix, name=sanitize(mailbox_uuid)))
        values = (uid,)
        d = self._query(sql, values)
        d.addCallback(get_hash)
        return d

    def get_uid_from_doc_id(self, mailbox_uuid, doc_id):
        check_good_uuid(mailbox_uuid)
        mailbox_uuid = mailbox_uuid.replace('-', '_')

        def get_uid(result):
            return _maybe_first_query_item(result)

        sql = ("SELECT uid from {preffix}{name} "
               "WHERE hash=?".format(
                   preffix=self.table_preffix, name=sanitize(mailbox_uuid)))
        values = (doc_id,)
        d = self._query(sql, values)
        d.addCallback(get_uid)
        return d

    def get_doc_ids_from_uids(self, mailbox_uuid, uids):
        # For IMAP relative numbering /sequences.
        # XXX dereference the range (n,*)
        raise NotImplementedError()

    def count(self, mailbox_uuid):
        """
        Get the number of entries in the UID table for a given mailbox.

        :param mailbox_uuid: the mailbox uuid
        :type mailbox_uuid: str
        :return: a deferred that will fire with an integer returning the count.
        :rtype: Deferred
        """
        check_good_uuid(mailbox_uuid)

        def get_count(result):
            return _maybe_first_query_item(result)

        sql = ("SELECT Count(*) FROM {preffix}{name};".format(
            preffix=self.table_preffix, name=sanitize(mailbox_uuid)))
        d = self._query(sql)
        d.addCallback(get_count)
        d.addErrback(lambda _: 0)
        return d

    def get_next_uid(self, mailbox_uuid):
        """
        Get the next integer beyond the highest UID count for a given mailbox.

        This is expected by the IMAP implementation. There are no guarantees
        that a document to be inserted in the future gets the returned UID: the
        only thing that can be assured is that it will be equal or greater than
        the value returned.

        :param mailbox_uuid: the mailbox uuid
        :type mailbox: str
        :return: a deferred that will fire with an integer returning the next
                 uid.
        :rtype: Deferred
        """
        check_good_uuid(mailbox_uuid)
        d = self.get_last_uid(mailbox_uuid)
        d.addCallback(lambda uid: uid + 1)
        return d

    def get_last_uid(self, mailbox_uuid):
        """
        Get the highest UID for a given mailbox.
        """
        check_good_uuid(mailbox_uuid)
        sql = ("SELECT MAX(rowid) FROM {preffix}{name} "
               "LIMIT 1;").format(
            preffix=self.table_preffix, name=sanitize(mailbox_uuid))

        def getit(result):
            rowid = _maybe_first_query_item(result)
            if not rowid:
                rowid = 0
            return rowid

        d = self._query(sql)
        d.addCallback(getit)
        return d

    def all_uid_iter(self, mailbox_uuid):
        """
        Get a sequence of all the uids in this mailbox.

        :param mailbox_uuid: the mailbox uuid
        :type mailbox_uuid: str
        """
        check_good_uuid(mailbox_uuid)

        sql = ("SELECT uid from {preffix}{name} ").format(
            preffix=self.table_preffix, name=sanitize(mailbox_uuid))

        def get_results(result):
            return [x[0] for x in result]

        d = self._query(sql)
        d.addCallback(get_results)
        return d