summaryrefslogtreecommitdiff
path: root/client/src/leap/soledad/client/encdecpool.py
blob: 7e807dcf7c2a48a5504ced535d9740b5fddb90b2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
# -*- coding: utf-8 -*-
# encdecpool.py
# Copyright (C) 2015 LEAP
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.


"""
A pool of encryption/decryption concurrent and parallel workers for using
during synchronization.
"""


import json
import logging
from uuid import uuid4

from twisted.internet.task import LoopingCall
from twisted.internet import threads
from twisted.internet import defer
from twisted.python import log

from leap.soledad.common.document import SoledadDocument
from leap.soledad.common import soledad_assert

from leap.soledad.client.crypto import encrypt_docstr
from leap.soledad.client.crypto import decrypt_doc_dict


logger = logging.getLogger(__name__)


#
# Encrypt/decrypt pools of workers
#

class SyncEncryptDecryptPool(object):
    """
    Base class for encrypter/decrypter pools.
    """

    def __init__(self, crypto, sync_db):
        """
        Initialize the pool of encryption-workers.

        :param crypto: A SoledadCryto instance to perform the encryption.
        :type crypto: leap.soledad.crypto.SoledadCrypto

        :param sync_db: A database connection handle
        :type sync_db: pysqlcipher.dbapi2.Connection
        """
        self._crypto = crypto
        self._sync_db = sync_db
        self._delayed_call = None
        self._started = False

    def start(self):
        self._started = True

    def stop(self):
        self._started = False
        # maybe cancel the next delayed call
        if self._delayed_call \
                and not self._delayed_call.called:
            self._delayed_call.cancel()

    @property
    def running(self):
        return self._started

    def _runOperation(self, query, *args):
        """
        Run an operation on the sync db.

        :param query: The query to be executed.
        :type query: str
        :param args: A list of query arguments.
        :type args: list

        :return: A deferred that will fire when the operation in the database
                 has finished.
        :rtype: twisted.internet.defer.Deferred
        """
        return self._sync_db.runOperation(query, *args)

    def _runQuery(self, query, *args):
        """
        Run a query on the sync db.

        :param query: The query to be executed.
        :type query: str
        :param args: A list of query arguments.
        :type args: list

        :return: A deferred that will fire with the results of the database
                 query.
        :rtype: twisted.internet.defer.Deferred
        """
        return self._sync_db.runQuery(query, *args)


def encrypt_doc_task(doc_id, doc_rev, content, key, secret):
    """
    Encrypt the content of the given document.

    :param doc_id: The document id.
    :type doc_id: str
    :param doc_rev: The document revision.
    :type doc_rev: str
    :param content: The serialized content of the document.
    :type content: str
    :param key: The encryption key.
    :type key: str
    :param secret: The Soledad storage secret (used for MAC auth).
    :type secret: str

    :return: A tuple containing the doc id, revision and encrypted content.
    :rtype: tuple(str, str, str)
    """
    encrypted_content = encrypt_docstr(
        content, doc_id, doc_rev, key, secret)
    return doc_id, doc_rev, encrypted_content


class SyncEncrypterPool(SyncEncryptDecryptPool):
    """
    Pool of workers that spawn subprocesses to execute the symmetric encryption
    of documents to be synced.
    """
    TABLE_NAME = "docs_tosync"
    FIELD_NAMES = "doc_id PRIMARY KEY, rev, content"

    ENCRYPT_LOOP_PERIOD = 2

    def __init__(self, *args, **kwargs):
        """
        Initialize the sync encrypter pool.
        """
        SyncEncryptDecryptPool.__init__(self, *args, **kwargs)
        # TODO delete already synced files from database

    def start(self):
        """
        Start the encrypter pool.
        """
        SyncEncryptDecryptPool.start(self)
        logger.debug("Starting the encryption loop...")

    def stop(self):
        """
        Stop the encrypter pool.
        """

        SyncEncryptDecryptPool.stop(self)

    def encrypt_doc(self, doc):
        """
        Encrypt document asynchronously then insert it on
        local staging database.

        :param doc: The document to be encrypted.
        :type doc: SoledadDocument
        """
        soledad_assert(self._crypto is not None, "need a crypto object")
        docstr = doc.get_json()
        key = self._crypto.doc_passphrase(doc.doc_id)
        secret = self._crypto.secret
        args = doc.doc_id, doc.rev, docstr, key, secret
        # encrypt asynchronously
        # TODO use dedicated threadpool / move to ampoule
        d = threads.deferToThread(
            encrypt_doc_task, *args)
        d.addCallback(self._encrypt_doc_cb)
        return d

    def _encrypt_doc_cb(self, result):
        """
        Insert results of encryption routine into the local sync database.

        :param result: A tuple containing the doc id, revision and encrypted
                       content.
        :type result: tuple(str, str, str)
        """
        doc_id, doc_rev, content = result
        return self._insert_encrypted_local_doc(doc_id, doc_rev, content)

    def _insert_encrypted_local_doc(self, doc_id, doc_rev, content):
        """
        Insert the contents of the encrypted doc into the local sync
        database.

        :param doc_id: The document id.
        :type doc_id: str
        :param doc_rev: The document revision.
        :type doc_rev: str
        :param content: The serialized content of the document.
        :type content: str
        """
        query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?)" \
                % (self.TABLE_NAME,)
        return self._runOperation(query, (doc_id, doc_rev, content))

    @defer.inlineCallbacks
    def get_encrypted_doc(self, doc_id, doc_rev):
        """
        Get an encrypted document from the sync db.

        :param doc_id: The id of the document.
        :type doc_id: str
        :param doc_rev: The revision of the document.
        :type doc_rev: str

        :return: A deferred that will fire with the encrypted content of the
                 document or None if the document was not found in the sync
                 db.
        :rtype: twisted.internet.defer.Deferred
        """
        query = "SELECT content FROM %s WHERE doc_id=? and rev=?" \
                % self.TABLE_NAME
        result = yield self._runQuery(query, (doc_id, doc_rev))
        if result:
            logger.debug("Found doc on sync db: %s" % doc_id)
            val = result.pop()
            defer.returnValue(val[0])
        logger.debug("Did not find doc on sync db: %s" % doc_id)
        defer.returnValue(None)

    def delete_encrypted_doc(self, doc_id, doc_rev):
        """
        Delete an encrypted document from the sync db.

        :param doc_id: The id of the document.
        :type doc_id: str
        :param doc_rev: The revision of the document.
        :type doc_rev: str

        :return: A deferred that will fire when the operation in the database
                 has finished.
        :rtype: twisted.internet.defer.Deferred
        """
        query = "DELETE FROM %s WHERE doc_id=? and rev=?" \
                % self.TABLE_NAME
        self._runOperation(query, (doc_id, doc_rev))


def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret,
                     idx):
    """
    Decrypt the content of the given document.

    :param doc_id: The document id.
    :type doc_id: str
    :param doc_rev: The document revision.
    :type doc_rev: str
    :param content: The encrypted content of the document as JSON dict.
    :type content: dict
    :param gen: The generation corresponding to the modification of that
                document.
    :type gen: int
    :param trans_id: The transaction id corresponding to the modification of
                     that document.
    :type trans_id: str
    :param key: The encryption key.
    :type key: str
    :param secret: The Soledad storage secret (used for MAC auth).
    :type secret: str
    :param idx: The index of this document in the current sync process.
    :type idx: int

    :return: A tuple containing the doc id, revision and encrypted content.
    :rtype: tuple(str, str, str)
    """
    decrypted_content = decrypt_doc_dict(content, doc_id, doc_rev, key, secret)
    return doc_id, doc_rev, decrypted_content, gen, trans_id, idx


class SyncDecrypterPool(SyncEncryptDecryptPool):
    """
    Pool of workers that spawn subprocesses to execute the symmetric decryption
    of documents that were received.

    The decryption of the received documents is done in two steps:

        1. Encrypted documents are stored in the sync db by the actual soledad
           sync loop.
        2. The soledad sync loop tells us how many documents we should expect
           to process.
        3. We start a decrypt-and-process loop:

            a. Encrypted documents are fetched.
            b. Encrypted documents are decrypted.
            c. The longest possible list of decrypted documents are inserted
               in the soledad db (this depends on which documents have already
               arrived and which documents have already been decrypte, because
               the order of insertion in the local soledad db matters).
            d. Processed documents are deleted from the database.

        4. When we have processed as many documents as we should, the loop
           finishes.
    """
    TABLE_NAME = "docs_received"
    FIELD_NAMES = "doc_id PRIMARY KEY, rev, content, gen, " \
                  "trans_id, encrypted, idx, sync_id"

    """
    Period of recurrence of the periodic decrypting task, in seconds.
    """
    DECRYPT_LOOP_PERIOD = 0.5

    def __init__(self, *args, **kwargs):
        """
        Initialize the decrypter pool, and setup a dict for putting the
        results of the decrypted docs until they are picked by the insert
        routine that gets them in order.

        :param insert_doc_cb: A callback for inserting received documents from
                              target. If not overriden, this will call u1db
                              insert_doc_from_target in synchronizer, which
                              implements the TAKE OTHER semantics.
        :type insert_doc_cb: function
        :param source_replica_uid: The source replica uid, used to find the
                                   correct callback for inserting documents.
        :type source_replica_uid: str
        """
        self._insert_doc_cb = kwargs.pop("insert_doc_cb")
        self.source_replica_uid = kwargs.pop("source_replica_uid")

        SyncEncryptDecryptPool.__init__(self, *args, **kwargs)

        self._docs_to_process = None
        self._processed_docs = 0
        self._last_inserted_idx = 0

        self._loop = LoopingCall(self._decrypt_and_recurse)

    def start(self, docs_to_process):
        """
        Set the number of documents we expect to process.

        This should be called by the during the sync exchange process as soon
        as we know how many documents are arriving from the server.

        :param docs_to_process: The number of documents to process.
        :type docs_to_process: int
        """
        SyncEncryptDecryptPool.start(self)
        self._decrypted_docs_indexes = set()
        self._sync_id = uuid4().hex
        self._docs_to_process = docs_to_process
        self._deferred = defer.Deferred()
        d = self._init_db()
        d.addCallback(lambda _: self._loop.start(self.DECRYPT_LOOP_PERIOD))
        return d

    def stop(self):
        if self._loop.running:
            self._loop.stop()
        self._finish()
        SyncEncryptDecryptPool.stop(self)

    def _init_db(self):
        """
        Ensure sync_id column is present then
        Empty the received docs table of the sync database.

        :return: A deferred that will fire when the operation in the database
                 has finished.
        :rtype: twisted.internet.defer.Deferred
        """
        ensure_sync_id_column = "ALTER TABLE %s ADD COLUMN sync_id" % self.TABLE_NAME
        d = self._runQuery(ensure_sync_id_column)

        def empty_received_docs(_):
            query = "DELETE FROM %s WHERE sync_id <> ?" % (self.TABLE_NAME,)
            return self._runOperation(query, (self._sync_id,))

        d.addCallbacks(empty_received_docs, empty_received_docs)
        return d

    def _errback(self, failure):
        log.err(failure)
        self._deferred.errback(failure)
        self._processed_docs = 0
        self._last_inserted_idx = 0

    @property
    def deferred(self):
        """
        Deferred that will be fired when the decryption loop has finished
        processing all the documents.
        """
        return self._deferred

    def insert_encrypted_received_doc(
            self, doc_id, doc_rev, content, gen, trans_id, idx):
        """
        Decrypt and insert a received document into local staging area to be
        processed later on.

        :param doc_id: The document ID.
        :type doc_id: str
        :param doc_rev: The document Revision
        :param doc_rev: str
        :param content: The content of the document
        :type content: dict
        :param gen: The document Generation
        :type gen: int
        :param trans_id: Transaction ID
        :type trans_id: str
        :param idx: The index of this document in the current sync process.
        :type idx: int

        :return: A deferred that will fire after the decrypted document has
                 been inserted in the sync db.
        :rtype: twisted.internet.defer.Deferred
        """
        soledad_assert(self._crypto is not None, "need a crypto object")

        key = self._crypto.doc_passphrase(doc_id)
        secret = self._crypto.secret
        args = doc_id, doc_rev, content, gen, trans_id, key, secret, idx
        # decrypt asynchronously
        # TODO use dedicated threadpool / move to ampoule
        d = threads.deferToThread(
            decrypt_doc_task, *args)
        # callback will insert it for later processing
        d.addCallback(self._decrypt_doc_cb)
        return d

    def insert_received_doc(
            self, doc_id, doc_rev, content, gen, trans_id, idx):
        """
        Insert a document that is not symmetrically encrypted.
        We store it in the staging area (the decrypted_docs dictionary) to be
        picked up in order as the preceding documents are decrypted.

        :param doc_id: The document id
        :type doc_id: str
        :param doc_rev: The document revision
        :param doc_rev: str or dict
        :param content: The content of the document
        :type content: dict
        :param gen: The document generation
        :type gen: int
        :param trans_id: The transaction id
        :type trans_id: str
        :param idx: The index of this document in the current sync process.
        :type idx: int

        :return: A deferred that will fire when the operation in the database
                 has finished.
        :rtype: twisted.internet.defer.Deferred
        """
        if not isinstance(content, str):
            content = json.dumps(content)
        query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?, ?, ?)" \
                % self.TABLE_NAME
        d = self._runOperation(
            query, (doc_id, doc_rev, content, gen, trans_id, 0,
                    idx, self._sync_id))
        d.addCallback(lambda _: self._decrypted_docs_indexes.add(idx))
        return d

    def _delete_received_docs(self, doc_ids):
        """
        Delete a list of received docs after get them inserted into the db.

        :param doc_id: Document ID list.
        :type doc_id: list

        :return: A deferred that will fire when the operation in the database
                 has finished.
        :rtype: twisted.internet.defer.Deferred
        """
        placeholders = ', '.join('?' for _ in doc_ids)
        query = "DELETE FROM '%s' WHERE doc_id in (%s)" \
                % (self.TABLE_NAME, placeholders)
        return self._runOperation(query, (doc_ids))

    def _decrypt_doc_cb(self, result):
        """
        Store the decryption result in the sync db from where it will later be
        picked by _process_decrypted_docs.

        :param result: A tuple containing the document's id, revision,
                       content, generation, transaction id and sync index.
        :type result: tuple(str, str, str, int, str, int)

        :return: A deferred that will fire after the document has been
                 inserted in the sync db.
        :rtype: twisted.internet.defer.Deferred
        """
        doc_id, rev, content, gen, trans_id, idx = result
        logger.debug("Sync decrypter pool: decrypted doc %s: %s %s %s"
                     % (doc_id, rev, gen, trans_id))
        return self.insert_received_doc(
            doc_id, rev, content, gen, trans_id, idx)

    def _get_docs(self, encrypted=None, sequence=None):
        """
        Get documents from the received docs table in the sync db.

        :param encrypted: If not None, only return documents with encrypted
                          field equal to given parameter.
        :type encrypted: bool or None
        :param order_by: The name of the field to order results.

        :return: A deferred that will fire with the results of the database
                 query.
        :rtype: twisted.internet.defer.Deferred
        """
        query = "SELECT doc_id, rev, content, gen, trans_id, encrypted, " \
                "idx FROM %s" % self.TABLE_NAME
        parameters = []
        if encrypted or sequence:
            query += " WHERE sync_id = ? and"
            parameters += [self._sync_id]
        if encrypted:
            query += " encrypted = ?"
            parameters += [int(encrypted)]
        if sequence:
            query += " idx in (" + ', '.join('?' * len(sequence)) + ")"
            parameters += [int(i) for i in sequence]
        query += " ORDER BY idx ASC"
        return self._runQuery(query, parameters)

    @defer.inlineCallbacks
    def _get_insertable_docs(self):
        """
        Return a list of non-encrypted documents ready to be inserted.

        :return: A deferred that will fire with the list of insertable
                 documents.
        :rtype: twisted.internet.defer.Deferred
        """
        # Here, check in memory what are the insertable indexes that can
        # form a sequence starting from the last inserted index
        sequence = []
        insertable_docs = []
        next_index = self._last_inserted_idx + 1
        while next_index in self._decrypted_docs_indexes:
            sequence.append(str(next_index))
            next_index += 1
        # Then fetch all the ones ready for insertion.
        if sequence:
            insertable_docs = yield self._get_docs(encrypted=False,
                                                   sequence=sequence)
        defer.returnValue(insertable_docs)

    @defer.inlineCallbacks
    def _process_decrypted_docs(self):
        """
        Fetch as many decrypted documents as can be taken from the expected
        order and insert them in the local replica.

        :return: A deferred that will fire with the list of inserted
                 documents.
        :rtype: twisted.internet.defer.Deferred
        """
        insertable = yield self._get_insertable_docs()
        processed_docs_ids = []
        for doc_fields in insertable:
            method = self._insert_decrypted_local_doc
            # FIXME: This is used only because SQLCipherU1DBSync is synchronous
            # When adbapi is used there is no need for an external thread
            # Without this the reactor can freeze and fail docs download
            yield threads.deferToThread(method, *doc_fields)
            processed_docs_ids.append(doc_fields[0])
        yield self._delete_received_docs(processed_docs_ids)

    def _insert_decrypted_local_doc(self, doc_id, doc_rev, content,
                                    gen, trans_id, encrypted, idx):
        """
        Insert the decrypted document into the local replica.

        Make use of the passed callback `insert_doc_cb` passed to the caller
        by u1db sync.

        :param doc_id: The document id.
        :type doc_id: str
        :param doc_rev: The document revision.
        :type doc_rev: str
        :param content: The serialized content of the document.
        :type content: str
        :param gen: The generation corresponding to the modification of that
                    document.
        :type gen: int
        :param trans_id: The transaction id corresponding to the modification
                         of that document.
        :type trans_id: str
        """
        # could pass source_replica in params for callback chain
        logger.debug("Sync decrypter pool: inserting doc in local db: "
                     "%s:%s %s" % (doc_id, doc_rev, gen))

        # convert deleted documents to avoid error on document creation
        if content == 'null':
            content = None
        doc = SoledadDocument(doc_id, doc_rev, content)
        gen = int(gen)
        self._insert_doc_cb(doc, gen, trans_id)

        # store info about processed docs
        self._last_inserted_idx = idx
        self._processed_docs += 1

    @defer.inlineCallbacks
    def _decrypt_and_recurse(self):
        """
        Decrypt the documents received from remote replica and insert them
        into the local one.

        This method implicitelly returns a defferred (see the decorator
        above). It should only be called by _launch_decrypt_and_process().
        because this way any exceptions raised here will be stored by the
        errback attached to the deferred returned.

        :return: A deferred which will fire after all decrypt, process and
                 delete operations have been executed.
        :rtype: twisted.internet.defer.Deferred
        """
        if not self.running:
            defer.returnValue(None)
        processed = self._processed_docs
        pending = self._docs_to_process

        if processed < pending:
            yield self._process_decrypted_docs()
        else:
            self._finish()

    def _finish(self):
        self._processed_docs = 0
        self._last_inserted_idx = 0
        self._decrypted_docs_indexes = set()
        if not self._deferred.called:
            self._deferred.callback(None)