From 69f5087c718cc534a969fcba0fcb35812c88ad8b Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 14 Jul 2014 20:01:01 -0300 Subject: Add encrypted field to sync db (#5895). --- client/src/leap/soledad/client/crypto.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index 7133f804..89220860 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -691,7 +691,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): """ # TODO implement throttling to reduce cpu usage?? TABLE_NAME = "docs_received" - FIELD_NAMES = "doc_id, rev, content, gen, trans_id" + FIELD_NAMES = "doc_id, rev, content, gen, trans_id, encrypted" write_encrypted_lock = threading.Lock() @@ -733,13 +733,15 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :type trans_id: str """ docstr = json.dumps(content) - sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?, ?, ?)" % ( + sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?, ?, ?, ?)" % ( self.TABLE_NAME,) con = self._sync_db with self._sync_db_write_lock: with con: - con.execute(sql_ins, (doc_id, doc_rev, docstr, gen, trans_id)) + con.execute( + sql_ins, + (doc_id, doc_rev, docstr, gen, trans_id, 1)) def insert_marker_for_received_doc(self, doc_id, doc_rev, gen): """ @@ -757,12 +759,12 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :param gen: the Document Generation :type gen: int """ - sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?, ?, ?)" % ( + sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?, ?, ?, ?)" % ( self.TABLE_NAME,) con = self._sync_db with self._sync_db_write_lock: with con: - con.execute(sql_ins, (doc_id, doc_rev, '', gen, '')) + con.execute(sql_ins, (doc_id, doc_rev, '', gen, '', 0)) def insert_received_doc(self, doc_id, doc_rev, content, gen, trans_id): """ -- cgit v1.2.3 From 95f34ccab21e36ea48e0d01a4b9ee00e6094d1ec Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 14 Jul 2014 20:05:00 -0300 Subject: Store non-encrypted docs in the sync db (#5895). --- client/src/leap/soledad/client/crypto.py | 40 +++++++------------------------- 1 file changed, 8 insertions(+), 32 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index 89220860..128e40d7 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -743,29 +743,6 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): sql_ins, (doc_id, doc_rev, docstr, gen, trans_id, 1)) - def insert_marker_for_received_doc(self, doc_id, doc_rev, gen): - """ - Insert a marker with the document id, revision and generation on the - sync db. This document does not have an encrypted payload, so the - content has already been inserted into the decrypted_docs dictionary - from where it can be picked following generation order. - We need to leave here the marker to be able to calculate the expected - insertion order for a synchronization batch. - - :param doc_id: The Document ID. - :type doc_id: str - :param doc_rev: The Document Revision - :param doc_rev: str - :param gen: the Document Generation - :type gen: int - """ - sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?, ?, ?, ?)" % ( - self.TABLE_NAME,) - con = self._sync_db - with self._sync_db_write_lock: - with con: - con.execute(sql_ins, (doc_id, doc_rev, '', gen, '', 0)) - def insert_received_doc(self, doc_id, doc_rev, content, gen, trans_id): """ Insert a document that is not symmetrically encrypted. @@ -783,16 +760,15 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :param trans_id: Transaction ID :type trans_id: str """ - # XXX this need a deeper review / testing. - # I believe that what I'm doing here is prone to problems - # if the sync is interrupted (ie, client crash) in the worst possible - # moment. We would need a recover strategy in that case - # (or, insert the document in the table all the same, but with a flag - # saying if the document is sym-encrypted or not), content = json.dumps(content) - result = doc_id, doc_rev, content, gen, trans_id - self.decrypted_docs[gen] = result - self.insert_marker_for_received_doc(doc_id, doc_rev, gen) + sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?, ?, ?, ?)" % ( + self.TABLE_NAME,) + con = self._sync_db + with self._sync_db_write_lock: + with con: + con.execute( + sql_ins, + (doc_id, doc_rev, content, gen, trans_id, 0)) def delete_encrypted_received_doc(self, doc_id, doc_rev): """ -- cgit v1.2.3 From 51e0bf7f79a444661b10fe418af85b0a60f41afb Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 14 Jul 2014 20:09:01 -0300 Subject: Insert received docs in sync db after decryption (#5895). --- client/src/leap/soledad/client/crypto.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index 128e40d7..d0a5a693 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -876,7 +876,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): """ doc_id, rev, content, gen, trans_id = result logger.debug("Sync decrypter pool: decrypted doc %s: %s %s" % (doc_id, rev, gen)) - self.decrypted_docs[gen] = result + self.insert_received_doc(doc_id, rev, content, gen, trans_id) def get_docs_by_generation(self): """ -- cgit v1.2.3 From 54a69eb14189e06556af15dcdf5d5ed424778fc2 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 15 Jul 2014 13:46:42 -0300 Subject: Store all received docs in sync db (#5895). --- client/src/leap/soledad/client/crypto.py | 156 ++++++++++++++++--------------- client/src/leap/soledad/client/target.py | 12 ++- 2 files changed, 89 insertions(+), 79 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index d0a5a693..4a73a910 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -256,11 +256,11 @@ class SoledadCrypto(object): secret = property( _get_secret, doc='The secret used for symmetric encryption') + # # Crypto utilities for a SoledadDocument. # - def mac_doc(doc_id, doc_rev, ciphertext, mac_method, secret): """ Calculate a MAC for C{doc} using C{ciphertext}. @@ -657,26 +657,6 @@ def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret): return doc_id, doc_rev, decrypted_content, gen, trans_id -def get_insertable_docs_by_gen(expected, got): - """ - Return a list of documents ready to be inserted. This list is computed - by aligning the expected list with the already gotten docs, and returning - the maximum number of docs that can be processed in the expected order - before finding a gap. - - :param expected: A list of generations to be inserted. - :type expected: list - - :param got: A dictionary whose values are the docs to be inserted. - :type got: dict - """ - ordered = [got.get(i) for i in expected] - if None in ordered: - return ordered[:ordered.index(None)] - else: - return ordered - - class SyncDecrypterPool(SyncEncryptDecryptPool): """ Pool of workers that spawn subprocesses to execute the symmetric decryption @@ -700,10 +680,18 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): Initialize the decrypter pool, and setup a dict for putting the results of the decrypted docs until they are picked by the insert routine that gets them in order. + + :param insert_doc_cb: A callback for inserting received documents from + target. If not overriden, this will call u1db + insert_doc_from_target in synchronizer, which + implements the TAKE OTHER semantics. + :type insert_doc_cb: function + :param last_known_generation: Target's last known generation. + :type last_known_generation: int """ self._insert_doc_cb = kwargs.pop("insert_doc_cb") + self._last_known_generation = kwargs.pop("last_known_generation") SyncEncryptDecryptPool.__init__(self, *args, **kwargs) - self.decrypted_docs = {} self.source_replica_uid = None def set_source_replica_uid(self, source_replica_uid): @@ -733,12 +721,14 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :type trans_id: str """ docstr = json.dumps(content) + sql_del = "DELETE FROM '%s' WHERE doc_id=?" % (self.TABLE_NAME,) sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?, ?, ?, ?)" % ( self.TABLE_NAME,) con = self._sync_db with self._sync_db_write_lock: with con: + con.execute(sql_del, (doc_id, )) con.execute( sql_ins, (doc_id, doc_rev, docstr, gen, trans_id, 1)) @@ -760,20 +750,23 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :param trans_id: Transaction ID :type trans_id: str """ - content = json.dumps(content) + if not isinstance(content, str): + content = json.dumps(content) + sql_del = "DELETE FROM '%s' WHERE doc_id=?" % ( + self.TABLE_NAME,) sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?, ?, ?, ?)" % ( self.TABLE_NAME,) con = self._sync_db with self._sync_db_write_lock: with con: + con.execute(sql_del, (doc_id,)) con.execute( sql_ins, (doc_id, doc_rev, content, gen, trans_id, 0)) - def delete_encrypted_received_doc(self, doc_id, doc_rev): + def delete_received_doc(self, doc_id, doc_rev): """ - Delete a encrypted received doc after it was inserted into the local - db. + Delete a received doc after it was inserted into the local db. :param doc_id: Document ID. :type doc_id: str @@ -787,7 +780,8 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): with con: con.execute(sql_del, (doc_id, doc_rev)) - def decrypt_doc(self, doc_id, rev, source_replica_uid, workers=True): + def decrypt_doc(self, doc_id, rev, content, gen, trans_id, + source_replica_uid, workers=True): """ Symmetrically decrypt a document. @@ -795,6 +789,14 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :type doc: str :param rev: The revision of the document. :type rev: str + :param content: The serialized content of the document. + :type content: str + :param gen: The generation corresponding to the modification of that + document. + :type gen: int + :param trans_id: The transaction id corresponding to the modification + of that document. + :type trans_id: str :param source_replica_uid: :type source_replica_uid: str @@ -813,33 +815,14 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): logger.debug("Sync decrypter pool: no insert_doc_cb() yet.") return - # XXX move to get_doc function... - c = self._sync_db.cursor() - sql = "SELECT * FROM '%s' WHERE doc_id=? AND rev=?" % ( - self.TABLE_NAME,) - try: - c.execute(sql, (doc_id, rev)) - res = c.fetchone() - except Exception as exc: - logger.warning("Error getting docs from syncdb: %r" % (exc,)) - return - if res is None: - logger.debug("Doc %s:%s does not exist in sync db" % (doc_id, rev)) - return - soledad_assert(self._crypto is not None, "need a crypto object") - try: - doc_id, rev, docstr, gen, trans_id = res - except ValueError: - logger.warning("Wrong entry in sync db") - return - if len(docstr) == 0: + if len(content) == 0: # not encrypted payload return try: - content = json.loads(docstr) + content = json.loads(content) except TypeError: logger.warning("Wrong type while decoding json: %s" % repr(docstr)) return @@ -867,34 +850,61 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): def decrypt_doc_cb(self, result): """ - Temporarily store the decryption result in a dictionary where it will - be picked by process_decrypted. + Store the decryption result in the sync db from where it will later be + picked by process_decrypted. :param result: A tuple containing the doc id, revision and encrypted content. :type result: tuple(str, str, str) """ doc_id, rev, content, gen, trans_id = result - logger.debug("Sync decrypter pool: decrypted doc %s: %s %s" % (doc_id, rev, gen)) + logger.debug("Sync decrypter pool: decrypted doc %s: %s %s" + % (doc_id, rev, gen)) self.insert_received_doc(doc_id, rev, content, gen, trans_id) - def get_docs_by_generation(self): + def get_docs_by_generation(self, encrypted=None): """ Get all documents in the received table from the sync db, ordered by generation. - :return: list of doc_id, rev, generation + :param encrypted: If not None, only return documents with encrypted + field equal to given parameter. + :type encrypted: bool + + :return: list of doc_id, rev, generation, gen, trans_id + :rtype: list """ + sql = "SELECT doc_id, rev, content, gen, trans_id, encrypted FROM %s" \ + % self.TABLE_NAME + if encrypted is not None: + sql += " WHERE encrypted = %d" % int(encrypted) + sql += " ORDER BY gen" c = self._sync_db.cursor() - sql = "SELECT doc_id, rev, gen FROM %s ORDER BY gen" % ( - self.TABLE_NAME,) c.execute(sql) - return c.fetchall() + # TODO: due to unknown reasons, the fetchall() method may return empty + # values, so we filter them out here. We have to perform some tests to + # understand why and when this happens. + docs = filter(lambda entry: len(entry) > 0, c.fetchall()) + return docs + + def get_insertable_docs_by_gen(self): + """ + Return a list of documents ready to be inserted. + """ + docs = self.get_docs_by_generation(encrypted=False) + insertable = [] + if docs: + last_gen = self._last_known_generation + for doc_id, rev, content, gen, trans_id, _ in docs: + if gen != (last_gen + 1): + break + insertable.append((doc_id, rev, content, gen, trans_id)) + last_gen = gen + return insertable - def count_received_encrypted_docs(self): + def count_docs_in_sync_db(self): """ - Count how many documents we have in the table for received and - encrypted docs. + Count how many documents we have in the table for received docs. :return: The count of documents. :rtype: int @@ -916,11 +926,13 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): Get all the encrypted documents from the sync database and dispatch a decrypt worker to decrypt each one of them. """ - docs_by_generation = self.get_docs_by_generation() + docs_by_generation = self.get_docs_by_generation(encrypted=True) logger.debug("Sync decrypter pool: There are %d documents to " \ "decrypt." % len(docs_by_generation)) - for doc_id, rev, gen in filter(None, docs_by_generation): - self.decrypt_doc(doc_id, rev, self.source_replica_uid) + for doc_id, rev, content, gen, trans_id, _ \ + in filter(None, docs_by_generation): + self.decrypt_doc( + doc_id, rev, content, gen, trans_id, self.source_replica_uid) def process_decrypted(self): """ @@ -934,15 +946,9 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): # getting data from the syncing stream, to avoid InvalidGeneration # problems. with self.write_encrypted_lock: - already_decrypted = self.decrypted_docs - docs = self.get_docs_by_generation() - docs = filter(lambda entry: len(entry) > 0, docs) - expected = [gen for doc_id, rev, gen in docs] - docs_to_insert = get_insertable_docs_by_gen( - expected, already_decrypted) - for doc_fields in docs_to_insert: + for doc_fields in self.get_insertable_docs_by_gen(): self.insert_decrypted_local_doc(*doc_fields) - remaining = self.count_received_encrypted_docs() + remaining = self.count_docs_in_sync_db() return remaining == 0 def insert_decrypted_local_doc(self, doc_id, doc_rev, content, @@ -974,14 +980,14 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): if content == 'null': content = None doc = SoledadDocument(doc_id, doc_rev, content) - insert_fun(doc, int(gen), trans_id) + gen = int(gen) + insert_fun(doc, gen, trans_id) + self._last_known_generation = gen except Exception as exc: logger.error("Sync decrypter pool: error while inserting " "decrypted doc into local db.") logger.exception(exc) else: - # If no errors found, remove it from the local temporary dict - # and from the received database. - self.decrypted_docs.pop(gen) - self.delete_encrypted_received_doc(doc_id, doc_rev) + # If no errors found, remove it from the received database. + self.delete_received_doc(doc_id, doc_rev) diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index 70e4d3a2..089a48a0 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -804,16 +804,20 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self._sync_db = sync_db self._sync_db_write_lock = sync_db_write_lock - def _setup_sync_decr_pool(self): + def _setup_sync_decr_pool(self, last_known_generation): """ Set up the SyncDecrypterPool for deferred decryption. + + :param last_known_generation: Target's last known generation. + :type last_known_generation: int """ if self._sync_decr_pool is None: # initialize syncing queue decryption pool self._sync_decr_pool = SyncDecrypterPool( self._crypto, self._sync_db, self._sync_db_write_lock, - insert_doc_cb=self._insert_doc_cb) + insert_doc_cb=self._insert_doc_cb, + last_known_generation=last_known_generation) self._sync_decr_pool.set_source_replica_uid( self.source_replica_uid) @@ -1127,7 +1131,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): if defer_decryption: self._sync_exchange_lock.acquire() - self._setup_sync_decr_pool() + self._setup_sync_decr_pool(last_known_generation) self._setup_sync_watcher() self._defer_decryption = True @@ -1402,7 +1406,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): :rtype: bool """ if self._sync_decr_pool is not None: - return self._sync_decr_pool.count_received_encrypted_docs() == 0 + return self._sync_decr_pool.count_docs_in_sync_db() == 0 else: return True -- cgit v1.2.3 From 5e4dae3427f40879156ddfaaaa8f878ab2504ee3 Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 23 Jul 2014 10:29:09 -0300 Subject: On sync, fetch all docs before decrypting. --- client/src/leap/soledad/client/crypto.py | 30 +++++++++++++++++------------- client/src/leap/soledad/client/target.py | 32 +++++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 22 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index 4a73a910..5ae5937f 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -690,7 +690,6 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :type last_known_generation: int """ self._insert_doc_cb = kwargs.pop("insert_doc_cb") - self._last_known_generation = kwargs.pop("last_known_generation") SyncEncryptDecryptPool.__init__(self, *args, **kwargs) self.source_replica_uid = None @@ -858,8 +857,8 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :type result: tuple(str, str, str) """ doc_id, rev, content, gen, trans_id = result - logger.debug("Sync decrypter pool: decrypted doc %s: %s %s" - % (doc_id, rev, gen)) + logger.debug("Sync decrypter pool: decrypted doc %s: %s %s %s" + % (doc_id, rev, gen, trans_id)) self.insert_received_doc(doc_id, rev, content, gen, trans_id) def get_docs_by_generation(self, encrypted=None): @@ -878,7 +877,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): % self.TABLE_NAME if encrypted is not None: sql += " WHERE encrypted = %d" % int(encrypted) - sql += " ORDER BY gen" + sql += " ORDER BY gen ASC" c = self._sync_db.cursor() c.execute(sql) # TODO: due to unknown reasons, the fetchall() method may return empty @@ -891,21 +890,25 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): """ Return a list of documents ready to be inserted. """ - docs = self.get_docs_by_generation(encrypted=False) + all_docs = self.get_docs_by_generation() + decrypted_docs = self.get_docs_by_generation(encrypted=False) insertable = [] - if docs: - last_gen = self._last_known_generation - for doc_id, rev, content, gen, trans_id, _ in docs: - if gen != (last_gen + 1): - break + for doc_id, rev, content, gen, trans_id, encrypted in all_docs: + next_decrypted = decrypted_docs.pop(0) + if doc_id == next_decrypted[0]: insertable.append((doc_id, rev, content, gen, trans_id)) - last_gen = gen + else: + break return insertable - def count_docs_in_sync_db(self): + def count_docs_in_sync_db(self, encrypted=None): """ Count how many documents we have in the table for received docs. + :param encrypted: If not None, return count of documents with + encrypted field equal to given parameter. + :type encrypted: bool + :return: The count of documents. :rtype: int """ @@ -914,6 +917,8 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): return c = self._sync_db.cursor() sql = "SELECT COUNT(*) FROM %s" % (self.TABLE_NAME,) + if encrypted is not None: + sql += " WHERE encrypted = %d" % int(encrypted) c.execute(sql) res = c.fetchone() if res is not None: @@ -982,7 +987,6 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): doc = SoledadDocument(doc_id, doc_rev, content) gen = int(gen) insert_fun(doc, gen, trans_id) - self._last_known_generation = gen except Exception as exc: logger.error("Sync decrypter pool: error while inserting " "decrypted doc into local db.") diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index 089a48a0..032134ec 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -816,8 +816,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self._sync_decr_pool = SyncDecrypterPool( self._crypto, self._sync_db, self._sync_db_write_lock, - insert_doc_cb=self._insert_doc_cb, - last_known_generation=last_known_generation) + insert_doc_cb=self._insert_doc_cb) self._sync_decr_pool.set_source_replica_uid( self.source_replica_uid) @@ -1251,15 +1250,26 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): sent += 1 # make sure all threads finished and we have up-to-date info + last_successful_thread = None while threads: # check if there are failures t, doc = threads.pop(0) t.join() if t.success: synced.append((doc.doc_id, doc.rev)) + last_successful_thread = t - if defer_decryption: - self._sync_watcher.start() + # delete documents from the sync database + if defer_encryption: + self.delete_encrypted_docs_from_db(synced) + + # get target gen and trans_id after docs + gen_after_send = None + trans_id_after_send = None + if last_successful_thread is not None: + response_dict = json.loads(last_successful_thread.response[0])[0] + gen_after_send = response_dict['new_generation'] + trans_id_after_send = response_dict['new_transaction_id'] # get docs from target if self.stopped is False: @@ -1268,20 +1278,24 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): last_known_generation, last_known_trans_id, headers, return_doc_cb, ensure_callback, sync_id, syncer_pool, defer_decryption=defer_decryption) - syncer_pool.cleanup() - # delete documents from the sync database - if defer_encryption: - self.delete_encrypted_docs_from_db(synced) + syncer_pool.cleanup() - # wait for deferred decryption to finish + # decrypt docs in case of deferred decryption if defer_decryption: + self._sync_watcher.start() while self.clear_to_sync() is False: sleep(self.DECRYPT_TASK_PERIOD) self._teardown_sync_watcher() self._teardown_sync_decr_pool() self._sync_exchange_lock.release() + # update gen and trans id info in case we just sent and did not + # receive docs. + if gen_after_send is not None and gen_after_send > cur_target_gen: + cur_target_gen = gen_after_send + cur_target_trans_id = trans_id_after_send + self.stop() return cur_target_gen, cur_target_trans_id -- cgit v1.2.3 From 8afb79c4d2171b03270143639296cbb6d9d0fdfa Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 23 Jul 2014 10:29:39 -0300 Subject: Allow deferred decryption without deferred encryption. --- client/src/leap/soledad/client/sqlcipher.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 2df9606e..5a30b125 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -243,19 +243,14 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): self._ensure_schema() self._crypto = crypto + # define sync-db attrs self._sync_db = None self._sync_db_write_lock = None self._sync_enc_pool = None + self._init_sync_db(sqlcipher_file) if self.defer_encryption: - if sqlcipher_file != ":memory:": - self._sync_db_path = "%s-sync" % sqlcipher_file - else: - self._sync_db_path = ":memory:" - # initialize sync db - self._init_sync_db() - # initialize syncing queue encryption pool self._sync_enc_pool = SyncEncrypterPool( self._crypto, self._sync_db, self._sync_db_write_lock) @@ -449,7 +444,6 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): # the following context manager blocks until the syncing lock can be # acquired. with self.syncer(url, creds=creds) as syncer: - # XXX could mark the critical section here... try: res = syncer.sync(autocreate=autocreate, @@ -542,14 +536,21 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): 'ALTER TABLE document ' 'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE') - def _init_sync_db(self): + def _init_sync_db(self, sqlcipher_file): """ Initialize the Symmetrically-Encrypted document to be synced database, and the queue to communicate with subprocess workers. + + :param sqlcipher_file: The path for the SQLCipher file. + :type sqlcipher_file: str """ - self._sync_db = sqlite3.connect(self._sync_db_path, + sync_db_path = None + if sqlcipher_file != ":memory:": + sync_db_path = "%s-sync" % sqlcipher_file + else: + sync_db_path = ":memory:" + self._sync_db = sqlite3.connect(sync_db_path, check_same_thread=False) - self._sync_db_write_lock = threading.Lock() self._create_sync_db_tables() self.sync_queue = multiprocessing.Queue() -- cgit v1.2.3 From 609669077b2f7223c31feed3679c8fcd74ab9ba7 Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 23 Jul 2014 10:49:44 -0300 Subject: Avoid deadlocks when cancelling failed sync threads. --- client/src/leap/soledad/client/target.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index 032134ec..5fe55216 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -376,6 +376,12 @@ class DocumentSyncerPool(object): t.request_lock.release() t.callback_lock.acquire(False) # just in case t.callback_lock.release() + # release any blocking semaphores + for i in xrange(DocumentSyncerPool.POOL_SIZE): + try: + self._semaphore_pool.release() + except ValueError: + break logger.warning("Soledad sync: cancelled sync threads.") def cleanup(self): -- cgit v1.2.3 From 622708945d51a1e22dde95424a6214e8e67be180 Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 23 Jul 2014 16:26:24 -0300 Subject: Make sync database multiprocessing-safe. --- client/src/leap/soledad/client/crypto.py | 46 +++++------- client/src/leap/soledad/client/mp_safe_db.py | 101 +++++++++++++++++++++++++++ client/src/leap/soledad/client/sqlcipher.py | 22 ++++-- client/src/leap/soledad/client/target.py | 17 ++--- 4 files changed, 143 insertions(+), 43 deletions(-) create mode 100644 client/src/leap/soledad/client/mp_safe_db.py (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index 5ae5937f..eb5a4f64 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -224,7 +224,7 @@ class SoledadCrypto(object): The password is derived using HMAC having sha256 as underlying hash function. The key used for HMAC are the first - C{soledad.REMOTE_STORAGE_SECRET_KENGTH} bytes of Soledad's storage + C{soledad.REMOTE_STORAGE_SECRET_LENGTH} bytes of Soledad's storage secret stripped from the first MAC_KEY_LENGTH characters. The HMAC message is C{doc_id}. @@ -623,9 +623,8 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): con = self._sync_db with self._sync_db_write_lock: - with con: - con.execute(sql_del, (doc_id, )) - con.execute(sql_ins, (doc_id, doc_rev, content)) + con.execute(sql_del, (doc_id, )) + con.execute(sql_ins, (doc_id, doc_rev, content)) def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret): @@ -726,11 +725,10 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): con = self._sync_db with self._sync_db_write_lock: - with con: - con.execute(sql_del, (doc_id, )) - con.execute( - sql_ins, - (doc_id, doc_rev, docstr, gen, trans_id, 1)) + con.execute(sql_del, (doc_id, )) + con.execute( + sql_ins, + (doc_id, doc_rev, docstr, gen, trans_id, 1)) def insert_received_doc(self, doc_id, doc_rev, content, gen, trans_id): """ @@ -757,11 +755,10 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): self.TABLE_NAME,) con = self._sync_db with self._sync_db_write_lock: - with con: - con.execute(sql_del, (doc_id,)) - con.execute( - sql_ins, - (doc_id, doc_rev, content, gen, trans_id, 0)) + con.execute(sql_del, (doc_id,)) + con.execute( + sql_ins, + (doc_id, doc_rev, content, gen, trans_id, 0)) def delete_received_doc(self, doc_id, doc_rev): """ @@ -776,8 +773,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): self.TABLE_NAME,) con = self._sync_db with self._sync_db_write_lock: - with con: - con.execute(sql_del, (doc_id, doc_rev)) + con.execute(sql_del, (doc_id, doc_rev)) def decrypt_doc(self, doc_id, rev, content, gen, trans_id, source_replica_uid, workers=True): @@ -878,12 +874,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): if encrypted is not None: sql += " WHERE encrypted = %d" % int(encrypted) sql += " ORDER BY gen ASC" - c = self._sync_db.cursor() - c.execute(sql) - # TODO: due to unknown reasons, the fetchall() method may return empty - # values, so we filter them out here. We have to perform some tests to - # understand why and when this happens. - docs = filter(lambda entry: len(entry) > 0, c.fetchall()) + docs = self._sync_db.select(sql) return docs def get_insertable_docs_by_gen(self): @@ -894,7 +885,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): decrypted_docs = self.get_docs_by_generation(encrypted=False) insertable = [] for doc_id, rev, content, gen, trans_id, encrypted in all_docs: - next_decrypted = decrypted_docs.pop(0) + next_decrypted = decrypted_docs.next() if doc_id == next_decrypted[0]: insertable.append((doc_id, rev, content, gen, trans_id)) else: @@ -915,14 +906,13 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): if self._sync_db is None: logger.warning("cannot return count with null sync_db") return - c = self._sync_db.cursor() sql = "SELECT COUNT(*) FROM %s" % (self.TABLE_NAME,) if encrypted is not None: sql += " WHERE encrypted = %d" % int(encrypted) - c.execute(sql) - res = c.fetchone() + res = self._sync_db.select(sql) if res is not None: - return res[0] + val = res.next() + return val[0] else: return 0 @@ -932,8 +922,6 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): decrypt worker to decrypt each one of them. """ docs_by_generation = self.get_docs_by_generation(encrypted=True) - logger.debug("Sync decrypter pool: There are %d documents to " \ - "decrypt." % len(docs_by_generation)) for doc_id, rev, content, gen, trans_id, _ \ in filter(None, docs_by_generation): self.decrypt_doc( diff --git a/client/src/leap/soledad/client/mp_safe_db.py b/client/src/leap/soledad/client/mp_safe_db.py new file mode 100644 index 00000000..a9ab5649 --- /dev/null +++ b/client/src/leap/soledad/client/mp_safe_db.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +# crypto.py +# Copyright (C) 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +""" +Multiprocessing-safe SQLite database. +""" + + +from threading import Thread +from Queue import Queue +from sqlite3 import connect as sqlite3_connect + + +# Thanks to http://code.activestate.com/recipes/526618/ + +class MPSafeSQLiteDB(Thread): + """ + A multiprocessing-safe SQLite database accessor. + """ + + CLOSE = "--close--" + NO_MORE = "--no more--" + + def __init__(self, db_path): + """ + Initialize the process + """ + Thread.__init__(self) + self._db_path = db_path + self._requests = Queue() + self.start() + + def run(self): + """ + Run the multiprocessing-safe database accessor. + """ + conn = sqlite3_connect(self._db_path) + while True: + req, arg, res = self._requests.get() + if req == self.CLOSE: + break + with conn: + cursor = conn.cursor() + cursor.execute(req, arg) + if res: + for rec in cursor.fetchall(): + res.put(rec) + res.put(self.NO_MORE) + conn.close() + + def execute(self, req, arg=None, res=None): + """ + Execute a request on the database. + + :param req: The request to be executed. + :type req: str + :param arg: The arguments for the request. + :type arg: tuple + :param res: A queue to write request results. + :type res: multiprocessing.Queue + """ + self._requests.put((req, arg or tuple(), res)) + + def select(self, req, arg=None): + """ + Run a select query on the database and yield results. + + :param req: The request to be executed. + :type req: str + :param arg: The arguments for the request. + :type arg: tuple + """ + res = Queue() + self.execute(req, arg, res) + while True: + rec=res.get() + if rec == self.NO_MORE: + break + yield rec + + def close(self): + """ + Close the database connection. + """ + self.execute(self.CLOSE) + self.join() diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 5a30b125..85b0391b 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -44,7 +44,6 @@ handled by Soledad should be created by SQLCipher >= 2.0. import logging import multiprocessing import os -import sqlite3 import string import threading import time @@ -63,6 +62,7 @@ from leap.soledad.client.crypto import SyncEncrypterPool, SyncDecrypterPool from leap.soledad.client.target import SoledadSyncTarget from leap.soledad.client.target import PendingReceivedDocsSyncError from leap.soledad.client.sync import SoledadSynchronizer +from leap.soledad.client.mp_safe_db import MPSafeSQLiteDB from leap.soledad.common.document import SoledadDocument @@ -549,8 +549,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): sync_db_path = "%s-sync" % sqlcipher_file else: sync_db_path = ":memory:" - self._sync_db = sqlite3.connect(sync_db_path, - check_same_thread=False) + self._sync_db = MPSafeSQLiteDB(sync_db_path) self._sync_db_write_lock = threading.Lock() self._create_sync_db_tables() self.sync_queue = multiprocessing.Queue() @@ -567,9 +566,8 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): decr.TABLE_NAME, decr.FIELD_NAMES)) with self._sync_db_write_lock: - with self._sync_db: - self._sync_db.execute(sql_encr) - self._sync_db.execute(sql_decr) + self._sync_db.execute(sql_encr) + self._sync_db.execute(sql_decr) # # Symmetric encryption of syncing docs @@ -1076,16 +1074,28 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): Close db_handle and close syncer. """ logger.debug("Sqlcipher backend: closing") + # stop the sync watcher for deferred encryption if self._sync_watcher is not None: self._sync_watcher.stop() self._sync_watcher.shutdown() + # close all open syncers for url in self._syncers: _, syncer = self._syncers[url] syncer.close() + # stop the encryption pool if self._sync_enc_pool is not None: self._sync_enc_pool.close() + # close the actual database if self._db_handle is not None: self._db_handle.close() + # close the sync database + if self._sync_db is not None: + self._sync_db.close() + # close the sync queue + if self.sync_queue is not None: + self.sync_queue.close() + del self.sync_queue + self.sync_queue = None @property def replica_uid(self): diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index 5fe55216..01e1231a 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -1346,13 +1346,16 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): :type doc_rev: str """ encr = SyncEncrypterPool - c = self._sync_db.cursor() sql = ("SELECT content FROM %s WHERE doc_id=? and rev=?" % ( encr.TABLE_NAME,)) - c.execute(sql, (doc_id, doc_rev)) - res = c.fetchall() - if len(res) != 0: - return res[0][0] + res = self._sync_db.select(sql, (doc_id, doc_rev)) + try: + val = res.next() + return val[0] + except StopIteration: + # no doc found + return None + def delete_encrypted_docs_from_db(self, docs_ids): """ @@ -1365,12 +1368,10 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): """ if docs_ids: encr = SyncEncrypterPool - c = self._sync_db.cursor() for doc_id, doc_rev in docs_ids: sql = ("DELETE FROM %s WHERE doc_id=? and rev=?" % ( encr.TABLE_NAME,)) - c.execute(sql, (doc_id, doc_rev)) - self._sync_db.commit() + self._sync_db.execute(sql, (doc_id, doc_rev)) def _save_encrypted_received_doc(self, doc, gen, trans_id, idx, total): """ -- cgit v1.2.3 From 074848f78bdac78328eb4de7fe72d85830da561d Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 1 Aug 2014 12:47:43 -0300 Subject: Refactor secrets out of main soledad client class. --- client/src/leap/soledad/client/__init__.py | 659 +++------------------------- client/src/leap/soledad/client/crypto.py | 21 +- client/src/leap/soledad/client/secrets.py | 681 +++++++++++++++++++++++++++++ 3 files changed, 757 insertions(+), 604 deletions(-) create mode 100644 client/src/leap/soledad/client/secrets.py (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 586e3389..0fd6672a 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -31,9 +31,7 @@ import os import socket import ssl import urlparse -import hmac -from hashlib import sha256 try: import cchardet as chardet @@ -43,41 +41,20 @@ except ImportError: from u1db.remote import http_client from u1db.remote.ssl_match_hostname import match_hostname -import scrypt -import simplejson as json - from leap.common.config import get_path_prefix from leap.soledad.common import ( SHARED_DB_NAME, soledad_assert, soledad_assert_type ) -from leap.soledad.common.errors import ( - InvalidTokenError, - NotLockedError, - AlreadyLockedError, - LockTimedOutError, -) -from leap.soledad.common.crypto import ( - MacMethods, - UnknownMacMethod, - WrongMac, - MAC_KEY, - MAC_METHOD_KEY, -) from leap.soledad.client.events import ( - SOLEDAD_CREATING_KEYS, - SOLEDAD_DONE_CREATING_KEYS, - SOLEDAD_DOWNLOADING_KEYS, - SOLEDAD_DONE_DOWNLOADING_KEYS, - SOLEDAD_UPLOADING_KEYS, - SOLEDAD_DONE_UPLOADING_KEYS, SOLEDAD_NEW_DATA_TO_SYNC, SOLEDAD_DONE_DATA_SYNC, signal, ) from leap.soledad.common.document import SoledadDocument from leap.soledad.client.crypto import SoledadCrypto +from leap.soledad.client.secrets import SoledadSecrets from leap.soledad.client.shared_db import SoledadSharedDatabase from leap.soledad.client.sqlcipher import open as sqlcipher_open from leap.soledad.client.sqlcipher import SQLCipherDatabase @@ -102,27 +79,6 @@ Soledad client and server. # Soledad: local encrypted storage and remote encrypted sync. # -class NoStorageSecret(Exception): - """ - Raised when trying to use a storage secret but none is available. - """ - pass - - -class PassphraseTooShort(Exception): - """ - Raised when trying to change the passphrase but the provided passphrase is - too short. - """ - - -class BootstrapSequenceError(Exception): - """ - Raised when an attempt to generate a secret and store it in a recovery - documents on server failed. - """ - - class Soledad(object): """ Soledad provides encrypted data storage and sync. @@ -166,57 +122,6 @@ class Soledad(object): The name of the file where the storage secrets will be stored. """ - GENERATED_SECRET_LENGTH = 1024 - """ - The length of the generated secret used to derive keys for symmetric - encryption for local and remote storage. - """ - - LOCAL_STORAGE_SECRET_LENGTH = 512 - """ - The length of the secret used to derive a passphrase for the SQLCipher - database. - """ - - REMOTE_STORAGE_SECRET_LENGTH = \ - GENERATED_SECRET_LENGTH - LOCAL_STORAGE_SECRET_LENGTH - """ - The length of the secret used to derive an encryption key and a MAC auth - key for remote storage. - """ - - SALT_LENGTH = 64 - """ - The length of the salt used to derive the key for the storage secret - encryption. - """ - - MINIMUM_PASSPHRASE_LENGTH = 6 - """ - The minimum length for a passphrase. The passphrase length is only checked - when the user changes her passphrase, not when she instantiates Soledad. - """ - - IV_SEPARATOR = ":" - """ - A separator used for storing the encryption initial value prepended to the - ciphertext. - """ - - UUID_KEY = 'uuid' - STORAGE_SECRETS_KEY = 'storage_secrets' - SECRET_KEY = 'secret' - CIPHER_KEY = 'cipher' - LENGTH_KEY = 'length' - KDF_KEY = 'kdf' - KDF_SALT_KEY = 'kdf_salt' - KDF_LENGTH_KEY = 'kdf_length' - KDF_SCRYPT = 'scrypt' - CIPHER_AES256 = 'aes256' - """ - Keys used to access storage secrets in recovery documents. - """ - DEFAULT_PREFIX = os.path.join(get_path_prefix(), 'leap', 'soledad') """ Prefix for default values for path. @@ -266,41 +171,49 @@ class Soledad(object): storage on server sequence has failed for some reason. """ - # get config params + # store config params self._uuid = uuid - soledad_assert_type(passphrase, unicode) self._passphrase = passphrase - # init crypto variables - self._secrets = {} - self._secret_id = secret_id + self._secrets_path = secrets_path + self._local_db_path = local_db_path + self._server_url = server_url + # configure SSL certificate + global SOLEDAD_CERT + SOLEDAD_CERT = cert_file + self._set_token(auth_token) self._defer_encryption = defer_encryption - self._init_config(secrets_path, local_db_path, server_url) + self._init_config() + self._init_dirs() - self._set_token(auth_token) + # init crypto variables self._shared_db_instance = None - # configure SSL certificate - global SOLEDAD_CERT - SOLEDAD_CERT = cert_file + self._crypto = SoledadCrypto(self) + self._secrets = SoledadSecrets( + self._uuid, + self._passphrase, + self._secrets_path, + self._shared_db, + self._crypto, + secret_id=secret_id) + # initiate bootstrap sequence self._bootstrap() # might raise BootstrapSequenceError() - def _init_config(self, secrets_path, local_db_path, server_url): + def _init_config(self): """ Initialize configuration using default values for missing params. """ + soledad_assert_type(self._passphrase, unicode) # initialize secrets_path - self._secrets_path = secrets_path if self._secrets_path is None: self._secrets_path = os.path.join( self.DEFAULT_PREFIX, self.STORAGE_SECRETS_FILE_NAME) # initialize local_db_path - self._local_db_path = local_db_path if self._local_db_path is None: self._local_db_path = os.path.join( self.DEFAULT_PREFIX, self.LOCAL_DATABASE_FILE_NAME) # initialize server_url - self._server_url = server_url soledad_assert( self._server_url is not None, 'Missing URL for Soledad server.') @@ -309,129 +222,18 @@ class Soledad(object): # initialization/destruction methods # - def _get_or_gen_crypto_secrets(self): - """ - Retrieves or generates the crypto secrets. - - Might raise BootstrapSequenceError - """ - doc = self._get_secrets_from_shared_db() - - if doc: - logger.info( - 'Found cryptographic secrets in shared recovery ' - 'database.') - _, mac = self.import_recovery_document(doc.content) - if mac is False: - self.put_secrets_in_shared_db() - self._store_secrets() # save new secrets in local file - if self._secret_id is None: - self._set_secret_id(self._secrets.items()[0][0]) - else: - # STAGE 3 - there are no secrets in server also, so - # generate a secret and store it in remote db. - logger.info( - 'No cryptographic secrets found, creating new ' - ' secrets...') - self._set_secret_id(self._gen_secret()) - try: - self._put_secrets_in_shared_db() - except Exception as ex: - # storing generated secret in shared db failed for - # some reason, so we erase the generated secret and - # raise. - try: - os.unlink(self._secrets_path) - except OSError as e: - if e.errno != errno.ENOENT: # no such file or directory - logger.exception(e) - logger.exception(ex) - raise BootstrapSequenceError( - 'Could not store generated secret in the shared ' - 'database, bailing out...') - def _bootstrap(self): """ Bootstrap local Soledad instance. - Soledad Client bootstrap is the following sequence of stages: - - * stage 0 - local environment setup. - - directory initialization. - - crypto submodule initialization - * stage 1 - local secret loading: - - if secrets exist locally, load them. - * stage 2 - remote secret loading: - - else, if secrets exist in server, download them. - * stage 3 - secret generation: - - else, generate a new secret and store in server. - * stage 4 - database initialization. - - This method decides which bootstrap stages have already been performed - and performs the missing ones in order. - :raise BootstrapSequenceError: Raised when the secret generation and storage on server sequence has failed for some reason. """ - # STAGE 0 - local environment setup - self._init_dirs() - self._crypto = SoledadCrypto(self) - - secrets_problem = None - - # STAGE 1 - verify if secrets exist locally - if not self._has_secret(): # try to load from local storage. - - # STAGE 2 - there are no secrets in local storage, so try to fetch - # encrypted secrets from server. - logger.info( - 'Trying to fetch cryptographic secrets from shared recovery ' - 'database...') - - # --- start of atomic operation in shared db --- - - # obtain lock on shared db - token = timeout = None - try: - token, timeout = self._shared_db.lock() - except AlreadyLockedError: - raise BootstrapSequenceError('Database is already locked.') - except LockTimedOutError: - raise BootstrapSequenceError('Lock operation timed out.') - - try: - self._get_or_gen_crypto_secrets() - except Exception as e: - secrets_problem = e - - # release the lock on shared db - try: - self._shared_db.unlock(token) - self._shared_db.close() - except NotLockedError: - # for some reason the lock expired. Despite that, secret - # loading or generation/storage must have been executed - # successfully, so we pass. - pass - except InvalidTokenError: - # here, our lock has not only expired but also some other - # client application has obtained a new lock and is currently - # doing its thing in the shared database. Using the same - # reasoning as above, we assume everything went smooth and - # pass. - pass - except Exception as e: - logger.error("Unhandled exception when unlocking shared " - "database.") - logger.exception(e) - - # --- end of atomic operation in shared db --- - - # STAGE 4 - local database initialization - if secrets_problem is None: + try: + self._secrets.bootstrap() self._init_db() - else: - raise secrets_problem + except: + raise def _init_dirs(self): """ @@ -460,27 +262,8 @@ class Soledad(object): Currently, Soledad uses the default SQLCipher cipher, i.e. 'aes-256-cbc'. We use scrypt to derive a 256-bit encryption key and uses the 'raw PRAGMA key' format to handle the key to SQLCipher. - - The first C{self.REMOTE_STORAGE_SECRET_LENGTH} bytes of the storage - secret are used for remote storage encryption. We use the next - C{self.LOCAL_STORAGE_SECRET} bytes to derive a key for local storage. - From these bytes, the first C{self.SALT_LENGTH} are used as the salt - and the rest as the password for the scrypt hashing. - """ - # salt indexes - salt_start = self.REMOTE_STORAGE_SECRET_LENGTH - salt_end = salt_start + self.SALT_LENGTH - # password indexes - pwd_start = salt_end - pwd_end = salt_start + self.LOCAL_STORAGE_SECRET_LENGTH - # calculate the key for local encryption - secret = self._get_storage_secret() - key = scrypt.hash( - secret[pwd_start:pwd_end], # the password - secret[salt_start:salt_end], # the salt - buflen=32, # we need a key with 256 bits (32 bytes) - ) - + """ + key = self._secrets.get_local_storage_key() self._db = sqlcipher_open( self._local_db_path, binascii.b2a_hex(key), # sqlcipher only accepts the hex version @@ -501,186 +284,6 @@ class Soledad(object): self._db.stop_sync() self._db.close() - # - # Management of secret for symmetric encryption. - # - - def _get_storage_secret(self): - """ - Return the storage secret. - - Storage secret is encrypted before being stored. This method decrypts - and returns the stored secret. - - :return: The storage secret. - :rtype: str - """ - # calculate the encryption key - key = scrypt.hash( - self._passphrase_as_string(), - # the salt is stored base64 encoded - binascii.a2b_base64( - self._secrets[self._secret_id][self.KDF_SALT_KEY]), - buflen=32, # we need a key with 256 bits (32 bytes). - ) - # recover the initial value and ciphertext - iv, ciphertext = self._secrets[self._secret_id][self.SECRET_KEY].split( - self.IV_SEPARATOR, 1) - ciphertext = binascii.a2b_base64(ciphertext) - return self._crypto.decrypt_sym(ciphertext, key, iv=iv) - - def _set_secret_id(self, secret_id): - """ - Define the id of the storage secret to be used. - - This method will also replace the secret in the crypto object. - - :param secret_id: The id of the storage secret to be used. - :type secret_id: str - """ - self._secret_id = secret_id - - def _load_secrets(self): - """ - Load storage secrets from local file. - """ - # does the file exist in disk? - if not os.path.isfile(self._secrets_path): - raise IOError('File does not exist: %s' % self._secrets_path) - # read storage secrets from file - content = None - with open(self._secrets_path, 'r') as f: - content = json.loads(f.read()) - _, mac = self.import_recovery_document(content) - if mac is False: - self._store_secrets() - self._put_secrets_in_shared_db() - # choose first secret if no secret_id was given - if self._secret_id is None: - self._set_secret_id(self._secrets.items()[0][0]) - - def _has_secret(self): - """ - Return whether there is a storage secret available for use or not. - - :return: Whether there's a storage secret for symmetric encryption. - :rtype: bool - """ - if self._secret_id is None or self._secret_id not in self._secrets: - try: - self._load_secrets() # try to load from disk - except IOError, e: - logger.warning('IOError: %s' % str(e)) - try: - self._get_storage_secret() - return True - except Exception: - return False - - def _gen_secret(self): - """ - Generate a secret for symmetric encryption and store in a local - encrypted file. - - This method emits the following signals: - - * SOLEDAD_CREATING_KEYS - * SOLEDAD_DONE_CREATING_KEYS - - A secret has the following structure: - - { - '': { - 'kdf': 'scrypt', - 'kdf_salt': '' - 'kdf_length': - 'cipher': 'aes256', - 'length': , - 'secret': '', - } - } - - :return: The id of the generated secret. - :rtype: str - """ - signal(SOLEDAD_CREATING_KEYS, self._uuid) - # generate random secret - secret = os.urandom(self.GENERATED_SECRET_LENGTH) - secret_id = sha256(secret).hexdigest() - # generate random salt - salt = os.urandom(self.SALT_LENGTH) - # get a 256-bit key - key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32) - iv, ciphertext = self._crypto.encrypt_sym(secret, key) - self._secrets[secret_id] = { - # leap.soledad.crypto submodule uses AES256 for symmetric - # encryption. - self.KDF_KEY: self.KDF_SCRYPT, - self.KDF_SALT_KEY: binascii.b2a_base64(salt), - self.KDF_LENGTH_KEY: len(key), - self.CIPHER_KEY: self.CIPHER_AES256, - self.LENGTH_KEY: len(secret), - self.SECRET_KEY: '%s%s%s' % ( - str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)), - } - self._store_secrets() - signal(SOLEDAD_DONE_CREATING_KEYS, self._uuid) - return secret_id - - def _store_secrets(self): - """ - Store secrets in C{Soledad.STORAGE_SECRETS_FILE_PATH}. - """ - with open(self._secrets_path, 'w') as f: - f.write( - json.dumps( - self.export_recovery_document())) - - def change_passphrase(self, new_passphrase): - """ - Change the passphrase that encrypts the storage secret. - - :param new_passphrase: The new passphrase. - :type new_passphrase: unicode - - :raise NoStorageSecret: Raised if there's no storage secret available. - """ - # maybe we want to add more checks to guarantee passphrase is - # reasonable? - soledad_assert_type(new_passphrase, unicode) - if len(new_passphrase) < self.MINIMUM_PASSPHRASE_LENGTH: - raise PassphraseTooShort( - 'Passphrase must be at least %d characters long!' % - self.MINIMUM_PASSPHRASE_LENGTH) - # ensure there's a secret for which the passphrase will be changed. - if not self._has_secret(): - raise NoStorageSecret() - secret = self._get_storage_secret() - # generate random salt - new_salt = os.urandom(self.SALT_LENGTH) - # get a 256-bit key - key = scrypt.hash(new_passphrase.encode('utf-8'), new_salt, buflen=32) - iv, ciphertext = self._crypto.encrypt_sym(secret, key) - # XXX update all secrets in the dict - self._secrets[self._secret_id] = { - # leap.soledad.crypto submodule uses AES256 for symmetric - # encryption. - self.KDF_KEY: self.KDF_SCRYPT, # TODO: remove hard coded kdf - self.KDF_SALT_KEY: binascii.b2a_base64(new_salt), - self.KDF_LENGTH_KEY: len(key), - self.CIPHER_KEY: self.CIPHER_AES256, - self.LENGTH_KEY: len(secret), - self.SECRET_KEY: '%s%s%s' % ( - str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)), - } - self._passphrase = new_passphrase - self._store_secrets() - self._put_secrets_in_shared_db() - - # - # General crypto utility methods. - # - @property def _shared_db(self): """ @@ -697,63 +300,6 @@ class Soledad(object): creds=self._creds) return self._shared_db_instance - def _shared_db_doc_id(self): - """ - Calculate the doc_id of the document in the shared db that stores key - material. - - :return: the hash - :rtype: str - """ - return sha256( - '%s%s' % - (self._passphrase_as_string(), self.uuid)).hexdigest() - - def _get_secrets_from_shared_db(self): - """ - Retrieve the document with encrypted key material from the shared - database. - - :return: a document with encrypted key material in its contents - :rtype: SoledadDocument - """ - signal(SOLEDAD_DOWNLOADING_KEYS, self._uuid) - db = self._shared_db - if not db: - logger.warning('No shared db found') - return - doc = db.get_doc(self._shared_db_doc_id()) - signal(SOLEDAD_DONE_DOWNLOADING_KEYS, self._uuid) - return doc - - def _put_secrets_in_shared_db(self): - """ - Assert local keys are the same as shared db's ones. - - Try to fetch keys from shared recovery database. If they already exist - in the remote db, assert that that data is the same as local data. - Otherwise, upload keys to shared recovery database. - """ - soledad_assert( - self._has_secret(), - 'Tried to send keys to server but they don\'t exist in local ' - 'storage.') - # try to get secrets doc from server, otherwise create it - doc = self._get_secrets_from_shared_db() - if doc is None: - doc = SoledadDocument( - doc_id=self._shared_db_doc_id()) - # fill doc with encrypted secrets - doc.content = self.export_recovery_document() - # upload secrets to server - signal(SOLEDAD_UPLOADING_KEYS, self._uuid) - db = self._shared_db - if not db: - logger.warning('No shared db found') - return - db.put_doc(doc) - signal(SOLEDAD_DONE_UPLOADING_KEYS, self._uuid) - # # Document storage, retrieval and sync. # @@ -1152,104 +698,6 @@ class Soledad(object): token = property(_get_token, _set_token, doc='The authentication Token.') - # - # Recovery document export and import methods - # - - def export_recovery_document(self): - """ - Export the storage secrets. - - A recovery document has the following structure: - - { - 'storage_secrets': { - '': { - 'kdf': 'scrypt', - 'kdf_salt': '' - 'kdf_length': - 'cipher': 'aes256', - 'length': , - 'secret': '', - }, - }, - 'kdf': 'scrypt', - 'kdf_salt': '', - 'kdf_length: , - '_mac_method': 'hmac', - '_mac': '' - } - - Note that multiple storage secrets might be stored in one recovery - document. This method will also calculate a MAC of a string - representation of the secrets dictionary. - - :return: The recovery document. - :rtype: dict - """ - # create salt and key for calculating MAC - salt = os.urandom(self.SALT_LENGTH) - key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32) - data = { - self.STORAGE_SECRETS_KEY: self._secrets, - self.KDF_KEY: self.KDF_SCRYPT, - self.KDF_SALT_KEY: binascii.b2a_base64(salt), - self.KDF_LENGTH_KEY: len(key), - MAC_METHOD_KEY: MacMethods.HMAC, - MAC_KEY: hmac.new( - key, - json.dumps(self._secrets), - sha256).hexdigest(), - } - return data - - def import_recovery_document(self, data): - """ - Import storage secrets for symmetric encryption and uuid (if present) - from a recovery document. - - Note that this method does not store the imported data on disk. For - that, use C{self._store_secrets()}. - - :param data: The recovery document. - :type data: dict - - :return: A tuple containing the number of imported secrets and whether - there was MAC informationa available for authenticating. - :rtype: (int, bool) - """ - soledad_assert(self.STORAGE_SECRETS_KEY in data) - # check mac of the recovery document - mac = None - if MAC_KEY in data: - soledad_assert(data[MAC_KEY] is not None) - soledad_assert(MAC_METHOD_KEY in data) - soledad_assert(self.KDF_KEY in data) - soledad_assert(self.KDF_SALT_KEY in data) - soledad_assert(self.KDF_LENGTH_KEY in data) - if data[MAC_METHOD_KEY] == MacMethods.HMAC: - key = scrypt.hash( - self._passphrase_as_string(), - binascii.a2b_base64(data[self.KDF_SALT_KEY]), - buflen=32) - mac = hmac.new( - key, - json.dumps(data[self.STORAGE_SECRETS_KEY]), - sha256).hexdigest() - else: - raise UnknownMacMethod('Unknown MAC method: %s.' % - data[MAC_METHOD_KEY]) - if mac != data[MAC_KEY]: - raise WrongMac('Could not authenticate recovery document\'s ' - 'contents.') - # include secrets in the secret pool. - secrets = 0 - for secret_id, secret_data in data[self.STORAGE_SECRETS_KEY].items(): - if secret_id not in self._secrets: - secrets += 1 - self._secrets[secret_id] = secret_data - return secrets, mac - # # Setters/getters # @@ -1259,18 +707,26 @@ class Soledad(object): uuid = property(_get_uuid, doc='The user uuid.') - def _get_secret_id(self): - return self._secret_id + def get_secret_id(self): + return self._secrets.secret_id + + def set_secret_id(self, secret_id): + self._secrets.set_secret_id(secret_id) secret_id = property( - _get_secret_id, + get_secret_id, + set_secret_id, doc='The active secret id.') + def _set_secrets_path(self, secrets_path): + self._secrets.secrets_path = secrets_path + def _get_secrets_path(self): - return self._secrets_path + return self._secrets.secrets_path secrets_path = property( _get_secrets_path, + _set_secrets_path, doc='The path for the file containing the encrypted symmetric secret.') def _get_local_db_path(self): @@ -1287,20 +743,31 @@ class Soledad(object): _get_server_url, doc='The URL of the Soledad server.') - storage_secret = property( - _get_storage_secret, - doc='The secret used for symmetric encryption.') + @property + def storage_secret(self): + """ + Return the secret used for symmetric encryption. + """ + return self._secrets.storage_secret + + @property + def secrets(self): + return self._secrets - def _get_passphrase(self): - return self._passphrase + @property + def passphrase(self): + return self._secrets.passphrase - passphrase = property( - _get_passphrase, - doc='The passphrase for locking and unlocking encryption secrets for ' - 'local and remote storage.') + def change_passphrase(self, new_passphrase): + """ + Change the passphrase that encrypts the storage secret. - def _passphrase_as_string(self): - return self._passphrase.encode('utf-8') + :param new_passphrase: The new passphrase. + :type new_passphrase: unicode + + :raise NoStorageSecret: Raised if there's no storage secret available. + """ + self._secrets.change_passphrase(new_passphrase) # ---------------------------------------------------------------------------- diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index eb5a4f64..4a64b5a8 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -242,7 +242,7 @@ class SoledadCrypto(object): return hmac.new( self.secret[ MAC_KEY_LENGTH: - self._soledad.REMOTE_STORAGE_SECRET_LENGTH], + self._soledad.secrets.REMOTE_STORAGE_SECRET_LENGTH], doc_id, hashlib.sha256).digest() @@ -819,7 +819,8 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): try: content = json.loads(content) except TypeError: - logger.warning("Wrong type while decoding json: %s" % repr(docstr)) + logger.warning("Wrong type while decoding json: %s" + % repr(content)) return key = self._crypto.doc_passphrase(doc_id) @@ -884,11 +885,15 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): all_docs = self.get_docs_by_generation() decrypted_docs = self.get_docs_by_generation(encrypted=False) insertable = [] - for doc_id, rev, content, gen, trans_id, encrypted in all_docs: - next_decrypted = decrypted_docs.next() - if doc_id == next_decrypted[0]: - insertable.append((doc_id, rev, content, gen, trans_id)) - else: + for doc_id, rev, _, gen, trans_id, encrypted in all_docs: + try: + next_decrypted = decrypted_docs.next() + if doc_id == next_decrypted[0]: + content = next_decrypted[2] + insertable.append((doc_id, rev, content, gen, trans_id)) + else: + break + except StopIteration: break return insertable @@ -966,7 +971,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): """ # could pass source_replica in params for callback chain insert_fun = self._insert_doc_cb[self.source_replica_uid] - logger.debug("Sync decrypter pool: inserting doc in local db: " \ + logger.debug("Sync decrypter pool: inserting doc in local db: " "%s:%s %s" % (doc_id, doc_rev, gen)) try: # convert deleted documents to avoid error on document creation diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py new file mode 100644 index 00000000..3c6fc569 --- /dev/null +++ b/client/src/leap/soledad/client/secrets.py @@ -0,0 +1,681 @@ +# -*- coding: utf-8 -*- +# secrets.py +# Copyright (C) 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +""" +Soledad secrets handling. +""" + + +import os +import scrypt +import hmac +import logging +import binascii +import errno + + +from hashlib import sha256 +import simplejson as json + + +from leap.soledad.common import ( + soledad_assert, + soledad_assert_type +) +from leap.soledad.common.document import SoledadDocument +from leap.soledad.common.crypto import ( + MacMethods, + UnknownMacMethod, + WrongMac, + MAC_KEY, + MAC_METHOD_KEY, +) +from leap.soledad.common.errors import ( + InvalidTokenError, + NotLockedError, + AlreadyLockedError, + LockTimedOutError, +) +from leap.soledad.client.events import ( + SOLEDAD_CREATING_KEYS, + SOLEDAD_DONE_CREATING_KEYS, + SOLEDAD_DOWNLOADING_KEYS, + SOLEDAD_DONE_DOWNLOADING_KEYS, + SOLEDAD_UPLOADING_KEYS, + SOLEDAD_DONE_UPLOADING_KEYS, + signal, +) + + +logger = logging.getLogger(name=__name__) + + +# +# Exceptions +# + +class NoStorageSecret(Exception): + """ + Raised when trying to use a storage secret but none is available. + """ + pass + + +class PassphraseTooShort(Exception): + """ + Raised when trying to change the passphrase but the provided passphrase is + too short. + """ + + +class BootstrapSequenceError(Exception): + """ + Raised when an attempt to generate a secret and store it in a recovery + document on server failed. + """ + + +# +# Secrets handler +# + +class SoledadSecrets(object): + """ + Soledad secrets handler. + + The first C{self.REMOTE_STORAGE_SECRET_LENGTH} bytes of the storage + secret are used for remote storage encryption. We use the next + C{self.LOCAL_STORAGE_SECRET} bytes to derive a key for local storage. + From these bytes, the first C{self.SALT_LENGTH} bytes are used as the + salt and the rest as the password for the scrypt hashing. + """ + + LOCAL_STORAGE_SECRET_LENGTH = 512 + """ + The length of the secret used to derive a passphrase for the SQLCipher + database. + """ + + REMOTE_STORAGE_SECRET_LENGTH = 512 + """ + The length of the secret used to derive an encryption key and a MAC auth + key for remote storage. + """ + + SALT_LENGTH = 64 + """ + The length of the salt used to derive the key for the storage secret + encryption. + """ + + MINIMUM_PASSPHRASE_LENGTH = 6 + """ + The minimum length for a passphrase. The passphrase length is only checked + when the user changes her passphrase, not when she instantiates Soledad. + """ + + IV_SEPARATOR = ":" + """ + A separator used for storing the encryption initial value prepended to the + ciphertext. + """ + + UUID_KEY = 'uuid' + STORAGE_SECRETS_KEY = 'storage_secrets' + SECRET_KEY = 'secret' + CIPHER_KEY = 'cipher' + LENGTH_KEY = 'length' + KDF_KEY = 'kdf' + KDF_SALT_KEY = 'kdf_salt' + KDF_LENGTH_KEY = 'kdf_length' + KDF_SCRYPT = 'scrypt' + CIPHER_AES256 = 'aes256' + """ + Keys used to access storage secrets in recovery documents. + """ + + def __init__(self, uuid, passphrase, secrets_path, shared_db, crypto, + secret_id=None): + """ + Initialize the secrets manager. + + :param uuid: User's unique id. + :type uuid: str + :param passphrase: The passphrase for locking and unlocking encryption + secrets for local and remote storage. + :type passphrase: unicode + :param secrets_path: Path for storing encrypted key used for + symmetric encryption. + :type secrets_path: str + :param shared_db: The shared database that stores user secrets. + :type shared_db: leap.soledad.client.shared_db.SoledadSharedDatabase + :param crypto: A soledad crypto object. + :type crypto: SoledadCrypto + :param secret_id: The id of the storage secret to be used. + :type secret_id: str + """ + self._uuid = uuid + self._passphrase = passphrase + self._secrets_path = secrets_path + self._shared_db = shared_db + self._crypto = crypto + self._secret_id = secret_id + self._secrets = {} + + def bootstrap(self): + """ + Bootstrap secrets. + + Soledad secrets bootstrap is the following sequence of stages: + + * stage 1 - local secret loading: + - if secrets exist locally, load them. + * stage 2 - remote secret loading: + - else, if secrets exist in server, download them. + * stage 3 - secret generation: + - else, generate a new secret and store in server. + + This method decides which bootstrap stages have already been performed + and performs the missing ones in order. + + :raise BootstrapSequenceError: Raised when the secret generation and + storage on server sequence has failed for some reason. + """ + # STAGE 1 - verify if secrets exist locally + if not self._has_secret(): # try to load from local storage. + + # STAGE 2 - there are no secrets in local storage, so try to fetch + # encrypted secrets from server. + logger.info( + 'Trying to fetch cryptographic secrets from shared recovery ' + 'database...') + + # --- start of atomic operation in shared db --- + + # obtain lock on shared db + token = timeout = None + try: + token, timeout = self._shared_db.lock() + except AlreadyLockedError: + raise BootstrapSequenceError('Database is already locked.') + except LockTimedOutError: + raise BootstrapSequenceError('Lock operation timed out.') + + self._get_or_gen_crypto_secrets() + + # release the lock on shared db + try: + self._shared_db.unlock(token) + self._shared_db.close() + except NotLockedError: + # for some reason the lock expired. Despite that, secret + # loading or generation/storage must have been executed + # successfully, so we pass. + pass + except InvalidTokenError: + # here, our lock has not only expired but also some other + # client application has obtained a new lock and is currently + # doing its thing in the shared database. Using the same + # reasoning as above, we assume everything went smooth and + # pass. + pass + except Exception as e: + logger.error("Unhandled exception when unlocking shared " + "database.") + logger.exception(e) + + # --- end of atomic operation in shared db --- + + def _has_secret(self): + """ + Return whether there is a storage secret available for use or not. + + :return: Whether there's a storage secret for symmetric encryption. + :rtype: bool + """ + if self._secret_id is None or self._secret_id not in self._secrets: + try: + self._load_secrets() # try to load from disk + except IOError as e: + logger.warning('IOError: %s' % str(e)) + try: + self.storage_secret + return True + except Exception as e: + logger.warning("Couldn't load storage secret: %s" % str(e)) + return False + + def _load_secrets(self): + """ + Load storage secrets from local file. + """ + # does the file exist in disk? + if not os.path.isfile(self._secrets_path): + raise IOError('File does not exist: %s' % self._secrets_path) + # read storage secrets from file + content = None + with open(self._secrets_path, 'r') as f: + content = json.loads(f.read()) + _, mac = self._import_recovery_document(content) + if mac is False: + self._store_secrets() + self._put_secrets_in_shared_db() + # choose first secret if no secret_id was given + if self._secret_id is None: + self.set_secret_id(self._secrets.items()[0][0]) + + def _get_or_gen_crypto_secrets(self): + """ + Retrieves or generates the crypto secrets. + + :raises BootstrapSequenceError: Raised when unable to store secrets in + shared database. + """ + doc = self._get_secrets_from_shared_db() + + if doc: + logger.info( + 'Found cryptographic secrets in shared recovery ' + 'database.') + _, mac = self._import_recovery_document(doc.content) + if mac is False: + self.put_secrets_in_shared_db() + self._store_secrets() # save new secrets in local file + if self._secret_id is None: + self.set_secret_id(self._secrets.items()[0][0]) + else: + # STAGE 3 - there are no secrets in server also, so + # generate a secret and store it in remote db. + logger.info( + 'No cryptographic secrets found, creating new ' + ' secrets...') + self.set_secret_id(self._gen_secret()) + try: + self._put_secrets_in_shared_db() + except Exception as ex: + # storing generated secret in shared db failed for + # some reason, so we erase the generated secret and + # raise. + try: + os.unlink(self._secrets_path) + except OSError as e: + if e.errno != errno.ENOENT: # no such file or directory + logger.exception(e) + logger.exception(ex) + raise BootstrapSequenceError( + 'Could not store generated secret in the shared ' + 'database, bailing out...') + + # + # Shared DB related methods + # + + def _shared_db_doc_id(self): + """ + Calculate the doc_id of the document in the shared db that stores key + material. + + :return: the hash + :rtype: str + """ + return sha256( + '%s%s' % + (self._passphrase_as_string(), self._uuid)).hexdigest() + + def _export_recovery_document(self): + """ + Export the storage secrets. + + A recovery document has the following structure: + + { + 'storage_secrets': { + '': { + 'kdf': 'scrypt', + 'kdf_salt': '' + 'kdf_length': + 'cipher': 'aes256', + 'length': , + 'secret': '', + }, + }, + 'kdf': 'scrypt', + 'kdf_salt': '', + 'kdf_length: , + '_mac_method': 'hmac', + '_mac': '' + } + + Note that multiple storage secrets might be stored in one recovery + document. This method will also calculate a MAC of a string + representation of the secrets dictionary. + + :return: The recovery document. + :rtype: dict + """ + # create salt and key for calculating MAC + salt = os.urandom(self.SALT_LENGTH) + key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32) + data = { + self.STORAGE_SECRETS_KEY: self._secrets, + self.KDF_KEY: self.KDF_SCRYPT, + self.KDF_SALT_KEY: binascii.b2a_base64(salt), + self.KDF_LENGTH_KEY: len(key), + MAC_METHOD_KEY: MacMethods.HMAC, + MAC_KEY: hmac.new( + key, + json.dumps(self._secrets), + sha256).hexdigest(), + } + return data + + def _import_recovery_document(self, data): + """ + Import storage secrets for symmetric encryption and uuid (if present) + from a recovery document. + + Note that this method does not store the imported data on disk. For + that, use C{self._store_secrets()}. + + :param data: The recovery document. + :type data: dict + + :return: A tuple containing the number of imported secrets and whether + there was MAC informationa available for authenticating. + :rtype: (int, bool) + """ + soledad_assert(self.STORAGE_SECRETS_KEY in data) + # check mac of the recovery document + mac = None + if MAC_KEY in data: + soledad_assert(data[MAC_KEY] is not None) + soledad_assert(MAC_METHOD_KEY in data) + soledad_assert(self.KDF_KEY in data) + soledad_assert(self.KDF_SALT_KEY in data) + soledad_assert(self.KDF_LENGTH_KEY in data) + if data[MAC_METHOD_KEY] == MacMethods.HMAC: + key = scrypt.hash( + self._passphrase_as_string(), + binascii.a2b_base64(data[self.KDF_SALT_KEY]), + buflen=32) + mac = hmac.new( + key, + json.dumps(data[self.STORAGE_SECRETS_KEY]), + sha256).hexdigest() + else: + raise UnknownMacMethod('Unknown MAC method: %s.' % + data[MAC_METHOD_KEY]) + if mac != data[MAC_KEY]: + raise WrongMac('Could not authenticate recovery document\'s ' + 'contents.') + # include secrets in the secret pool. + secrets = 0 + for secret_id, secret_data in data[self.STORAGE_SECRETS_KEY].items(): + if secret_id not in self._secrets: + secrets += 1 + self._secrets[secret_id] = secret_data + return secrets, mac + + def _get_secrets_from_shared_db(self): + """ + Retrieve the document with encrypted key material from the shared + database. + + :return: a document with encrypted key material in its contents + :rtype: SoledadDocument + """ + signal(SOLEDAD_DOWNLOADING_KEYS, self._uuid) + db = self._shared_db + if not db: + logger.warning('No shared db found') + return + doc = db.get_doc(self._shared_db_doc_id()) + signal(SOLEDAD_DONE_DOWNLOADING_KEYS, self._uuid) + return doc + + def _put_secrets_in_shared_db(self): + """ + Assert local keys are the same as shared db's ones. + + Try to fetch keys from shared recovery database. If they already exist + in the remote db, assert that that data is the same as local data. + Otherwise, upload keys to shared recovery database. + """ + soledad_assert( + self._has_secret(), + 'Tried to send keys to server but they don\'t exist in local ' + 'storage.') + # try to get secrets doc from server, otherwise create it + doc = self._get_secrets_from_shared_db() + if doc is None: + doc = SoledadDocument( + doc_id=self._shared_db_doc_id()) + # fill doc with encrypted secrets + doc.content = self._export_recovery_document() + # upload secrets to server + signal(SOLEDAD_UPLOADING_KEYS, self._uuid) + db = self._shared_db + if not db: + logger.warning('No shared db found') + return + db.put_doc(doc) + signal(SOLEDAD_DONE_UPLOADING_KEYS, self._uuid) + + # + # Management of secret for symmetric encryption. + # + + @property + def storage_secret(self): + """ + Return the storage secret. + + Storage secret is encrypted before being stored. This method decrypts + and returns the stored secret. + + :return: The storage secret. + :rtype: str + """ + # calculate the encryption key + key = scrypt.hash( + self._passphrase_as_string(), + # the salt is stored base64 encoded + binascii.a2b_base64( + self._secrets[self._secret_id][self.KDF_SALT_KEY]), + buflen=32, # we need a key with 256 bits (32 bytes). + ) + # recover the initial value and ciphertext + iv, ciphertext = self._secrets[self._secret_id][self.SECRET_KEY].split( + self.IV_SEPARATOR, 1) + ciphertext = binascii.a2b_base64(ciphertext) + return self._crypto.decrypt_sym(ciphertext, key, iv=iv) + + def set_secret_id(self, secret_id): + """ + Define the id of the storage secret to be used. + + This method will also replace the secret in the crypto object. + + :param secret_id: The id of the storage secret to be used. + :type secret_id: str + """ + self._secret_id = secret_id + + def _gen_secret(self): + """ + Generate a secret for symmetric encryption and store in a local + encrypted file. + + This method emits the following signals: + + * SOLEDAD_CREATING_KEYS + * SOLEDAD_DONE_CREATING_KEYS + + A secret has the following structure: + + { + '': { + 'kdf': 'scrypt', + 'kdf_salt': '' + 'kdf_length': + 'cipher': 'aes256', + 'length': , + 'secret': '', + } + } + + :return: The id of the generated secret. + :rtype: str + """ + signal(SOLEDAD_CREATING_KEYS, self._uuid) + # generate random secret + secret = os.urandom( + self.LOCAL_STORAGE_SECRET_LENGTH + + self.REMOTE_STORAGE_SECRET_LENGTH) + secret_id = sha256(secret).hexdigest() + # generate random salt + salt = os.urandom(self.SALT_LENGTH) + # get a 256-bit key + key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32) + iv, ciphertext = self._crypto.encrypt_sym(secret, key) + self._secrets[secret_id] = { + # leap.soledad.crypto submodule uses AES256 for symmetric + # encryption. + self.KDF_KEY: self.KDF_SCRYPT, + self.KDF_SALT_KEY: binascii.b2a_base64(salt), + self.KDF_LENGTH_KEY: len(key), + self.CIPHER_KEY: self.CIPHER_AES256, + self.LENGTH_KEY: len(secret), + self.SECRET_KEY: '%s%s%s' % ( + str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)), + } + self._store_secrets() + signal(SOLEDAD_DONE_CREATING_KEYS, self._uuid) + return secret_id + + def _store_secrets(self): + """ + Store secrets in C{Soledad.STORAGE_SECRETS_FILE_PATH}. + """ + with open(self._secrets_path, 'w') as f: + f.write( + json.dumps( + self._export_recovery_document())) + + def change_passphrase(self, new_passphrase): + """ + Change the passphrase that encrypts the storage secret. + + :param new_passphrase: The new passphrase. + :type new_passphrase: unicode + + :raise NoStorageSecret: Raised if there's no storage secret available. + """ + # TODO: maybe we want to add more checks to guarantee passphrase is + # reasonable? + soledad_assert_type(new_passphrase, unicode) + if len(new_passphrase) < self.MINIMUM_PASSPHRASE_LENGTH: + raise PassphraseTooShort( + 'Passphrase must be at least %d characters long!' % + self.MINIMUM_PASSPHRASE_LENGTH) + # ensure there's a secret for which the passphrase will be changed. + if not self._has_secret(): + raise NoStorageSecret() + secret = self.storage_secret + # generate random salt + new_salt = os.urandom(self.SALT_LENGTH) + # get a 256-bit key + key = scrypt.hash(new_passphrase.encode('utf-8'), new_salt, buflen=32) + iv, ciphertext = self._crypto.encrypt_sym(secret, key) + # XXX update all secrets in the dict + self._secrets[self._secret_id] = { + # leap.soledad.crypto submodule uses AES256 for symmetric + # encryption. + self.KDF_KEY: self.KDF_SCRYPT, # TODO: remove hard coded kdf + self.KDF_SALT_KEY: binascii.b2a_base64(new_salt), + self.KDF_LENGTH_KEY: len(key), + self.CIPHER_KEY: self.CIPHER_AES256, + self.LENGTH_KEY: len(secret), + self.SECRET_KEY: '%s%s%s' % ( + str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)), + } + self._passphrase = new_passphrase + self._store_secrets() + self._put_secrets_in_shared_db() + + # + # Setters and getters + # + + @property + def secret_id(self): + return self._secret_id + + def _get_secrets_path(self): + return self._secrets_path + + def _set_secrets_path(self, secrets_path): + self._secrets_path = secrets_path + + secrets_path = property( + _get_secrets_path, + _set_secrets_path, + doc='The path for the file containing the encrypted symmetric secret.') + + @property + def passphrase(self): + """ + Return the passphrase for locking and unlocking encryption secrets for + local and remote storage. + """ + return self._passphrase + + def _passphrase_as_string(self): + return self._passphrase.encode('utf-8') + + def get_syncdb_secret(self): + """ + Return the secret for sync db. + """ + # TODO: implement. + pass + + def get_remote_secret(self): + """ + Return the secret for remote storage. + """ + # TODO: implement + pass + + def get_local_storage_key(self): + """ + Return the local storage key derived from the local storage secret. + """ + # salt indexes + salt_start = self.REMOTE_STORAGE_SECRET_LENGTH + salt_end = salt_start + self.SALT_LENGTH + # password indexes + pwd_start = salt_end + pwd_end = salt_start + self.LOCAL_STORAGE_SECRET_LENGTH + # calculate the key for local encryption + secret = self.storage_secret + return scrypt.hash( + secret[pwd_start:pwd_end], # the password + secret[salt_start:salt_end], # the salt + buflen=32, # we need a key with 256 bits (32 bytes) + ) -- cgit v1.2.3 From 21a3f854c07c1d40d50da8c922e956d3247a08b2 Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 1 Aug 2014 12:53:05 -0300 Subject: Fix tests after many changes in client. --- client/src/leap/soledad/client/sync.py | 2 -- client/src/leap/soledad/client/target.py | 34 +++++++++++++++----------------- 2 files changed, 16 insertions(+), 20 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index 5d545a77..c158f2a7 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -29,8 +29,6 @@ Extend u1db Synchronizer with the ability to: """ -import json - import logging import traceback from threading import Lock diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index 01e1231a..12175f48 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -28,12 +28,10 @@ import logging import re import urllib import threading -import urlparse from collections import defaultdict from time import sleep from uuid import uuid4 -from contextlib import contextmanager import simplejson as json from taskthread import TimerTask @@ -44,7 +42,6 @@ from u1db.remote.http_client import _encode_query_parameter, HTTPClientBase from zope.proxy import ProxyBase from zope.proxy import sameProxiedObjects, setProxiedObject -from leap.soledad.common import soledad_assert from leap.soledad.common.document import SoledadDocument from leap.soledad.client.auth import TokenBasedAuth from leap.soledad.client.crypto import is_symmetrically_encrypted @@ -87,7 +84,7 @@ class DocumentSyncerThread(threading.Thread): """ def __init__(self, doc_syncer, release_method, failed_method, - idx, total, last_request_lock=None, last_callback_lock=None): + idx, total, last_request_lock=None, last_callback_lock=None): """ Initialize a new syncer thread. @@ -246,7 +243,7 @@ class DocumentSyncerPool(object): """ def __init__(self, raw_url, raw_creds, query_string, headers, - ensure_callback, stop_method): + ensure_callback, stop_method): """ Initialize the document syncer pool. @@ -279,7 +276,7 @@ class DocumentSyncerPool(object): self._threads = [] def new_syncer_thread(self, idx, total, last_request_lock=None, - last_callback_lock=None): + last_callback_lock=None): """ Yield a new document syncer thread. @@ -619,7 +616,7 @@ class HTTPDocumentSyncer(HTTPClientBase, TokenBasedAuth): self._conn.endheaders() def _get_doc(self, received, sync_id, last_known_generation, - last_known_trans_id): + last_known_trans_id): """ Get a sync document from server by means of a POST request. @@ -658,7 +655,7 @@ class HTTPDocumentSyncer(HTTPClientBase, TokenBasedAuth): return self._response() def _put_doc(self, sync_id, last_known_generation, last_known_trans_id, - id, rev, content, gen, trans_id, number_of_docs, doc_idx): + id, rev, content, gen, trans_id, number_of_docs, doc_idx): """ Put a sync document on server by means of a POST request. @@ -765,7 +762,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # def __init__(self, url, source_replica_uid=None, creds=None, crypto=None, - sync_db=None, sync_db_write_lock=None): + sync_db=None, sync_db_write_lock=None): """ Initialize the SoledadSyncTarget. @@ -925,7 +922,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): """ new_generation, new_transaction_id, number_of_changes, doc_id, \ rev, content, gen, trans_id = \ - self._parse_received_doc_response(response) + self._parse_received_doc_response(response) if doc_id is not None: # decrypt incoming document and insert into local database # ------------------------------------------------------------- @@ -1134,11 +1131,14 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): """ self._ensure_callback = ensure_callback - if defer_decryption: + if defer_decryption and self._sync_db is not None: self._sync_exchange_lock.acquire() self._setup_sync_decr_pool(last_known_generation) self._setup_sync_watcher() self._defer_decryption = True + else: + # fall back + defer_decryption = False self.start() @@ -1149,7 +1149,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): setProxiedObject(self._insert_doc_cb[source_replica_uid], return_doc_cb) - if not self.clear_to_sync(): + if defer_decryption is True and not self.clear_to_sync(): raise PendingReceivedDocsSyncError self._ensure_connection() @@ -1171,7 +1171,6 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self._raw_url, self._raw_creds, url, headers, ensure_callback, self.stop) threads = [] - last_request_lock = None last_callback_lock = None sent = 0 total = len(docs_by_generations) @@ -1227,7 +1226,8 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): t.doc_syncer.set_request_method( 'put', sync_id, cur_target_gen, cur_target_trans_id, id=doc.doc_id, rev=doc.rev, content=doc_json, gen=gen, - trans_id=trans_id, number_of_docs=number_of_docs, doc_idx=sent + 1) + trans_id=trans_id, number_of_docs=number_of_docs, + doc_idx=sent + 1) # set the success calback def _success_callback(idx, total, response): @@ -1251,7 +1251,6 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # save thread and append t.start() threads.append((t, doc)) - last_request_lock = t.request_lock last_callback_lock = t.callback_lock sent += 1 @@ -1275,7 +1274,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): if last_successful_thread is not None: response_dict = json.loads(last_successful_thread.response[0])[0] gen_after_send = response_dict['new_generation'] - trans_id_after_send = response_dict['new_transaction_id'] + trans_id_after_send = response_dict['new_transaction_id'] # get docs from target if self.stopped is False: @@ -1356,7 +1355,6 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # no doc found return None - def delete_encrypted_docs_from_db(self, docs_ids): """ Delete several encrypted documents from the database of symmetrically @@ -1467,7 +1465,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): decrypter = self._sync_decr_pool decrypter.decrypt_received_docs() - done = decrypter.process_decrypted() + decrypter.process_decrypted() def _sign_request(self, method, url_query, params): """ -- cgit v1.2.3 From bb4ef28014b7846df8982f0008635f4d05b5a0b8 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 4 Aug 2014 10:34:20 -0300 Subject: Add instructions for closing SQLCipher db on docstrings. --- client/src/leap/soledad/client/sqlcipher.py | 37 ++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 85b0391b..7823e235 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -92,7 +92,16 @@ SQLITE_ISOLATION_LEVEL = None def open(path, password, create=True, document_factory=None, crypto=None, raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024, defer_encryption=False): - """Open a database at the given location. + """ + Open a database at the given location. + + *** IMPORTANT *** + + Don't forget to close the database after use by calling the close() + method otherwise some resources might not be freed and you may experience + several kinds of leakages. + + *** IMPORTANT *** Will raise u1db.errors.DatabaseDoesNotExist if create=False and the database does not already exist. @@ -195,6 +204,14 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): Connect to an existing SQLCipher database, creating a new sqlcipher database file if needed. + *** IMPORTANT *** + + Don't forget to close the database after use by calling the close() + method otherwise some resources might not be freed and you may + experience several kinds of leakages. + + *** IMPORTANT *** + :param sqlcipher_file: The path for the SQLCipher file. :type sqlcipher_file: str :param password: The password that protects the SQLCipher db. @@ -356,6 +373,14 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): """ Open a SQLCipher database. + *** IMPORTANT *** + + Don't forget to close the database after use by calling the close() + method otherwise some resources might not be freed and you may + experience several kinds of leakages. + + *** IMPORTANT *** + :param sqlcipher_file: The path for the SQLCipher file. :type sqlcipher_file: str @@ -1097,6 +1122,16 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): del self.sync_queue self.sync_queue = None + def __del__(self): + """ + Free resources when deleting or garbage collecting the database. + + This is only here to minimze problems if someone ever forgets to call + the close() method after using the database; you should not rely on + garbage collecting to free up the database resources. + """ + self.close() + @property def replica_uid(self): return self._get_replica_uid() -- cgit v1.2.3 From 9f455ab44d8f229840a5c6a75e0e7b6a88b04f57 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 4 Aug 2014 11:40:37 -0300 Subject: Store decrypted storage secret in memory. --- client/src/leap/soledad/client/secrets.py | 182 +++++++++++++++++------------- 1 file changed, 105 insertions(+), 77 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index 3c6fc569..621e2d99 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -252,13 +252,9 @@ class SoledadSecrets(object): try: self._load_secrets() # try to load from disk except IOError as e: - logger.warning('IOError: %s' % str(e)) - try: - self.storage_secret - return True - except Exception as e: - logger.warning("Couldn't load storage secret: %s" % str(e)) - return False + logger.warning('IOError while loading secrets from disk: %s' % str(e)) + return False + return self.storage_secret is not None def _load_secrets(self): """ @@ -371,15 +367,21 @@ class SoledadSecrets(object): # create salt and key for calculating MAC salt = os.urandom(self.SALT_LENGTH) key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32) + # encrypt secrets + encrypted_secrets = {} + for secret_id in self._secrets: + encrypted_secrets[secret_id] = self._encrypt_storage_secret( + self._secrets[secret_id]) + # create the recovery document data = { - self.STORAGE_SECRETS_KEY: self._secrets, + self.STORAGE_SECRETS_KEY: encrypted_secrets, self.KDF_KEY: self.KDF_SCRYPT, self.KDF_SALT_KEY: binascii.b2a_base64(salt), self.KDF_LENGTH_KEY: len(key), MAC_METHOD_KEY: MacMethods.HMAC, MAC_KEY: hmac.new( key, - json.dumps(self._secrets), + json.dumps(encrypted_secrets), sha256).hexdigest(), } return data @@ -425,10 +427,11 @@ class SoledadSecrets(object): 'contents.') # include secrets in the secret pool. secrets = 0 - for secret_id, secret_data in data[self.STORAGE_SECRETS_KEY].items(): + for secret_id, encrypted_secret in data[self.STORAGE_SECRETS_KEY].items(): if secret_id not in self._secrets: secrets += 1 - self._secrets[secret_id] = secret_data + self._secrets[secret_id] = \ + self._decrypt_storage_secret(encrypted_secret) return secrets, mac def _get_secrets_from_shared_db(self): @@ -480,30 +483,92 @@ class SoledadSecrets(object): # Management of secret for symmetric encryption. # - @property - def storage_secret(self): + def _decrypt_storage_secret(self, encrypted_secret_dict): """ - Return the storage secret. + Decrypt the storage secret. Storage secret is encrypted before being stored. This method decrypts - and returns the stored secret. + and returns the decrypted storage secret. - :return: The storage secret. + :param encrypted_secret_dict: The encrypted storage secret. + :type encrypted_secret_dict: dict + + :return: The decrypted storage secret. :rtype: str """ # calculate the encryption key + if encrypted_secret_dict[self.KDF_KEY] != self.KDF_SCRYPT: + raise Exception("Unknown KDF in stored secret.") key = scrypt.hash( self._passphrase_as_string(), # the salt is stored base64 encoded binascii.a2b_base64( - self._secrets[self._secret_id][self.KDF_SALT_KEY]), + encrypted_secret_dict[self.KDF_SALT_KEY]), buflen=32, # we need a key with 256 bits (32 bytes). ) + if encrypted_secret_dict[self.KDF_LENGTH_KEY] != len(key): + raise Exception("Wrong length of decryption key.") + if encrypted_secret_dict[self.CIPHER_KEY] != self.CIPHER_AES256: + raise Exception("Unknown cipher in stored secret.") # recover the initial value and ciphertext - iv, ciphertext = self._secrets[self._secret_id][self.SECRET_KEY].split( + iv, ciphertext = encrypted_secret_dict[self.SECRET_KEY].split( self.IV_SEPARATOR, 1) ciphertext = binascii.a2b_base64(ciphertext) - return self._crypto.decrypt_sym(ciphertext, key, iv=iv) + decrypted_secret = self._crypto.decrypt_sym(ciphertext, key, iv=iv) + if encrypted_secret_dict[self.LENGTH_KEY] != len(decrypted_secret): + raise Exception("Wrong length of decrypted secret.") + return decrypted_secret + + def _encrypt_storage_secret(self, decrypted_secret): + """ + Encrypt the storage secret. + + An encrypted secret has the following structure: + + { + '': { + 'kdf': 'scrypt', + 'kdf_salt': '' + 'kdf_length': + 'cipher': 'aes256', + 'length': , + 'secret': '', + } + } + + :param decrypted_secret: The decrypted storage secret. + :type decrypted_secret: str + + :return: The encrypted storage secret. + :rtype: dict + """ + # generate random salt + salt = os.urandom(self.SALT_LENGTH) + # get a 256-bit key + key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32) + iv, ciphertext = self._crypto.encrypt_sym(decrypted_secret, key) + encrypted_secret_dict = { + # leap.soledad.crypto submodule uses AES256 for symmetric + # encryption. + self.KDF_KEY: self.KDF_SCRYPT, + self.KDF_SALT_KEY: binascii.b2a_base64(salt), + self.KDF_LENGTH_KEY: len(key), + self.CIPHER_KEY: self.CIPHER_AES256, + self.LENGTH_KEY: len(decrypted_secret), + self.SECRET_KEY: '%s%s%s' % ( + str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)), + } + return encrypted_secret_dict + + @property + def storage_secret(self): + """ + Return the storage secret. + + :return: The decrypted storage secret. + :rtype: str + """ + return self._secrets.get(self._secret_id) def set_secret_id(self, secret_id): """ @@ -526,19 +591,6 @@ class SoledadSecrets(object): * SOLEDAD_CREATING_KEYS * SOLEDAD_DONE_CREATING_KEYS - A secret has the following structure: - - { - '': { - 'kdf': 'scrypt', - 'kdf_salt': '' - 'kdf_length': - 'cipher': 'aes256', - 'length': , - 'secret': '', - } - } - :return: The id of the generated secret. :rtype: str """ @@ -548,22 +600,7 @@ class SoledadSecrets(object): self.LOCAL_STORAGE_SECRET_LENGTH + self.REMOTE_STORAGE_SECRET_LENGTH) secret_id = sha256(secret).hexdigest() - # generate random salt - salt = os.urandom(self.SALT_LENGTH) - # get a 256-bit key - key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32) - iv, ciphertext = self._crypto.encrypt_sym(secret, key) - self._secrets[secret_id] = { - # leap.soledad.crypto submodule uses AES256 for symmetric - # encryption. - self.KDF_KEY: self.KDF_SCRYPT, - self.KDF_SALT_KEY: binascii.b2a_base64(salt), - self.KDF_LENGTH_KEY: len(key), - self.CIPHER_KEY: self.CIPHER_AES256, - self.LENGTH_KEY: len(secret), - self.SECRET_KEY: '%s%s%s' % ( - str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)), - } + self._secrets[secret_id] = secret self._store_secrets() signal(SOLEDAD_DONE_CREATING_KEYS, self._uuid) return secret_id @@ -596,24 +633,6 @@ class SoledadSecrets(object): # ensure there's a secret for which the passphrase will be changed. if not self._has_secret(): raise NoStorageSecret() - secret = self.storage_secret - # generate random salt - new_salt = os.urandom(self.SALT_LENGTH) - # get a 256-bit key - key = scrypt.hash(new_passphrase.encode('utf-8'), new_salt, buflen=32) - iv, ciphertext = self._crypto.encrypt_sym(secret, key) - # XXX update all secrets in the dict - self._secrets[self._secret_id] = { - # leap.soledad.crypto submodule uses AES256 for symmetric - # encryption. - self.KDF_KEY: self.KDF_SCRYPT, # TODO: remove hard coded kdf - self.KDF_SALT_KEY: binascii.b2a_base64(new_salt), - self.KDF_LENGTH_KEY: len(key), - self.CIPHER_KEY: self.CIPHER_AES256, - self.LENGTH_KEY: len(secret), - self.SECRET_KEY: '%s%s%s' % ( - str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)), - } self._passphrase = new_passphrase self._store_secrets() self._put_secrets_in_shared_db() @@ -655,27 +674,36 @@ class SoledadSecrets(object): # TODO: implement. pass - def get_remote_secret(self): + def _get_remote_storage_secret(self): """ Return the secret for remote storage. """ # TODO: implement pass - def get_local_storage_key(self): + + def _get_local_storage_secret(self): """ - Return the local storage key derived from the local storage secret. + Return the local storage secret. + """ + pwd_start = self.REMOTE_STORAGE_SECRET_LENGTH + self.SALT_LENGTH + pwd_end = self.REMOTE_STORAGE_SECRET_LENGTH + self.LOCAL_STORAGE_SECRET_LENGTH + return self.storage_secret[pwd_start:pwd_end] + + def _get_local_storage_salt(self): + """ + Return the local storage salt. """ - # salt indexes salt_start = self.REMOTE_STORAGE_SECRET_LENGTH salt_end = salt_start + self.SALT_LENGTH - # password indexes - pwd_start = salt_end - pwd_end = salt_start + self.LOCAL_STORAGE_SECRET_LENGTH - # calculate the key for local encryption - secret = self.storage_secret + return self.storage_secret[salt_start:salt_end] + + def get_local_storage_key(self): + """ + Return the local storage key derived from the local storage secret. + """ return scrypt.hash( - secret[pwd_start:pwd_end], # the password - secret[salt_start:salt_end], # the salt + self._get_local_storage_secret(), # the password + self._get_local_storage_salt(), # the salt buflen=32, # we need a key with 256 bits (32 bytes) ) -- cgit v1.2.3 From aa8fcba828bc917eaf8e6b0dacb76f0de904bf59 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 4 Aug 2014 16:17:09 -0300 Subject: Add salt for sync sb key derivation. --- client/src/leap/soledad/client/__init__.py | 7 +++ client/src/leap/soledad/client/crypto.py | 6 +-- client/src/leap/soledad/client/secrets.py | 82 ++++++++++++++++++++++++------ 3 files changed, 76 insertions(+), 19 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 0fd6672a..e66055e0 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -750,6 +750,13 @@ class Soledad(object): """ return self._secrets.storage_secret + @property + def remote_storage_secret(self): + """ + Return the secret used for encryption of remotelly stored data. + """ + return self._secrets.remote_storage_secret + @property def secrets(self): return self._secrets diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index 4a64b5a8..1b01913d 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -240,9 +240,7 @@ class SoledadCrypto(object): if self.secret is None: raise NoSymmetricSecret() return hmac.new( - self.secret[ - MAC_KEY_LENGTH: - self._soledad.secrets.REMOTE_STORAGE_SECRET_LENGTH], + self.secret[MAC_KEY_LENGTH:], doc_id, hashlib.sha256).digest() @@ -251,7 +249,7 @@ class SoledadCrypto(object): # def _get_secret(self): - return self._soledad.storage_secret + return self._soledad.secrets.remote_storage_secret secret = property( _get_secret, doc='The secret used for symmetric encryption') diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index 621e2d99..55580692 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -123,6 +123,14 @@ class SoledadSecrets(object): encryption. """ + GEN_SECRET_LENGTH = LOCAL_STORAGE_SECRET_LENGTH \ + + REMOTE_STORAGE_SECRET_LENGTH \ + + SALT_LENGTH # for sync db + """ + The length of the secret to be generated. This includes local and remote + secrets, and the salt for deriving the sync db secret. + """ + MINIMUM_PASSPHRASE_LENGTH = 6 """ The minimum length for a passphrase. The passphrase length is only checked @@ -268,12 +276,21 @@ class SoledadSecrets(object): with open(self._secrets_path, 'r') as f: content = json.loads(f.read()) _, mac = self._import_recovery_document(content) - if mac is False: - self._store_secrets() - self._put_secrets_in_shared_db() # choose first secret if no secret_id was given if self._secret_id is None: self.set_secret_id(self._secrets.items()[0][0]) + # enlarge secret if needed + enlarged = False + if len(self._secrets[self._secret_id]) < self.GEN_SECRET_LENGTH: + gen_len = self.GEN_SECRET_LENGTH \ + - len(self._secrets[self._secret_id]) + new_piece = os.urandom(gen_len) + self._secrets[self._secret_id] += new_piece + enlarged = True + # store and save in shared db if needed + if mac is False or enlarged is True: + self._store_secrets() + self._put_secrets_in_shared_db() def _get_or_gen_crypto_secrets(self): """ @@ -596,9 +613,7 @@ class SoledadSecrets(object): """ signal(SOLEDAD_CREATING_KEYS, self._uuid) # generate random secret - secret = os.urandom( - self.LOCAL_STORAGE_SECRET_LENGTH - + self.REMOTE_STORAGE_SECRET_LENGTH) + secret = os.urandom(self.GEN_SECRET_LENGTH) secret_id = sha256(secret).hexdigest() self._secrets[secret_id] = secret self._store_secrets() @@ -667,24 +682,29 @@ class SoledadSecrets(object): def _passphrase_as_string(self): return self._passphrase.encode('utf-8') - def get_syncdb_secret(self): - """ - Return the secret for sync db. - """ - # TODO: implement. - pass + # + # remote storage secret + # - def _get_remote_storage_secret(self): + @property + def remote_storage_secret(self): """ Return the secret for remote storage. """ - # TODO: implement - pass + key_start = 0 + key_end = self.REMOTE_STORAGE_SECRET_LENGTH + return self.storage_secret[key_start:key_end] + # + # local storage key + # def _get_local_storage_secret(self): """ Return the local storage secret. + + :return: The local storage secret. + :rtype: str """ pwd_start = self.REMOTE_STORAGE_SECRET_LENGTH + self.SALT_LENGTH pwd_end = self.REMOTE_STORAGE_SECRET_LENGTH + self.LOCAL_STORAGE_SECRET_LENGTH @@ -693,6 +713,9 @@ class SoledadSecrets(object): def _get_local_storage_salt(self): """ Return the local storage salt. + + :return: The local storage salt. + :rtype: str """ salt_start = self.REMOTE_STORAGE_SECRET_LENGTH salt_end = salt_start + self.SALT_LENGTH @@ -701,9 +724,38 @@ class SoledadSecrets(object): def get_local_storage_key(self): """ Return the local storage key derived from the local storage secret. + + :return: The key for protecting the local database. + :rtype: str """ return scrypt.hash( self._get_local_storage_secret(), # the password self._get_local_storage_salt(), # the salt buflen=32, # we need a key with 256 bits (32 bytes) ) + + # + # sync db key + # + + def _get_sync_db_salt(self): + """ + Return the salt for sync db. + """ + salt_start = self.LOCAL_STORAGE_SECRET_LENGTH \ + + self.REMOTE_STORAGE_SECRET_LENGTH + salt_end = salt_start + self.SALT_LENGTH + return self.storage_secret[salt_start:salt_end] + + def get_sync_db_key(self): + """ + Return the key for protecting the sync database. + + :return: The key for protecting the sync database. + :rtype: str + """ + return scrypt.hash( + self._get_local_storage_secret(), # the password + self._get_sync_db_salt(), # the salt + buflen=32, # we need a key with 256 bits (32 bytes) + ) -- cgit v1.2.3 From 30aa5c040c093aa82be09e94dd403c18597320e5 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 4 Aug 2014 16:42:56 -0300 Subject: Protect sync db with a password. --- client/src/leap/soledad/client/__init__.py | 4 +++- client/src/leap/soledad/client/mp_safe_db.py | 15 +++++++++++++-- client/src/leap/soledad/client/sqlcipher.py | 28 ++++++++++++++++++---------- 3 files changed, 34 insertions(+), 13 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index e66055e0..0b72be27 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -264,6 +264,7 @@ class Soledad(object): uses the 'raw PRAGMA key' format to handle the key to SQLCipher. """ key = self._secrets.get_local_storage_key() + sync_db_key = self._secrets.get_sync_db_key() self._db = sqlcipher_open( self._local_db_path, binascii.b2a_hex(key), # sqlcipher only accepts the hex version @@ -271,7 +272,8 @@ class Soledad(object): document_factory=SoledadDocument, crypto=self._crypto, raw_key=True, - defer_encryption=self._defer_encryption) + defer_encryption=self._defer_encryption, + sync_db_key=binascii.b2a_hex(sync_db_key)) def close(self): """ diff --git a/client/src/leap/soledad/client/mp_safe_db.py b/client/src/leap/soledad/client/mp_safe_db.py index a9ab5649..2c6b7e24 100644 --- a/client/src/leap/soledad/client/mp_safe_db.py +++ b/client/src/leap/soledad/client/mp_safe_db.py @@ -23,7 +23,7 @@ Multiprocessing-safe SQLite database. from threading import Thread from Queue import Queue -from sqlite3 import connect as sqlite3_connect +from pysqlcipher import dbapi2 # Thanks to http://code.activestate.com/recipes/526618/ @@ -49,7 +49,7 @@ class MPSafeSQLiteDB(Thread): """ Run the multiprocessing-safe database accessor. """ - conn = sqlite3_connect(self._db_path) + conn = dbapi2.connect(self._db_path) while True: req, arg, res = self._requests.get() if req == self.CLOSE: @@ -99,3 +99,14 @@ class MPSafeSQLiteDB(Thread): """ self.execute(self.CLOSE) self.join() + + def cursor(self): + """ + Return a fake cursor object. + + Not really a cursor, but allows for calling db.cursor().execute(). + + :return: Self. + :rtype: MPSafeSQLiteDatabase + """ + return self diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 7823e235..a7ddab24 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -91,7 +91,7 @@ SQLITE_ISOLATION_LEVEL = None def open(path, password, create=True, document_factory=None, crypto=None, raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, - cipher_page_size=1024, defer_encryption=False): + cipher_page_size=1024, defer_encryption=False, sync_db_key=None): """ Open a database at the given location. @@ -136,7 +136,8 @@ def open(path, password, create=True, document_factory=None, crypto=None, return SQLCipherDatabase.open_database( path, password, create=create, document_factory=document_factory, crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter, - cipher_page_size=cipher_page_size, defer_encryption=defer_encryption) + cipher_page_size=cipher_page_size, defer_encryption=defer_encryption, + sync_db_key=sync_db_key) # @@ -199,7 +200,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): def __init__(self, sqlcipher_file, password, document_factory=None, crypto=None, raw_key=False, cipher='aes-256-cbc', - kdf_iter=4000, cipher_page_size=1024): + kdf_iter=4000, cipher_page_size=1024, sync_db_key=None): """ Connect to an existing SQLCipher database, creating a new sqlcipher database file if needed. @@ -264,7 +265,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): self._sync_db = None self._sync_db_write_lock = None self._sync_enc_pool = None - self._init_sync_db(sqlcipher_file) + self._init_sync_db(sqlcipher_file, sync_db_key) if self.defer_encryption: # initialize sync db @@ -293,7 +294,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): def _open_database(cls, sqlcipher_file, password, document_factory=None, crypto=None, raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024, - defer_encryption=False): + defer_encryption=False, sync_db_key=None): """ Open a SQLCipher database. @@ -363,13 +364,14 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): return SQLCipherDatabase._sqlite_registry[v]( sqlcipher_file, password, document_factory=document_factory, crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter, - cipher_page_size=cipher_page_size) + cipher_page_size=cipher_page_size, sync_db_key=sync_db_key) @classmethod def open_database(cls, sqlcipher_file, password, create, backend_cls=None, document_factory=None, crypto=None, raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, - cipher_page_size=1024, defer_encryption=False): + cipher_page_size=1024, defer_encryption=False, + sync_db_key=None): """ Open a SQLCipher database. @@ -429,7 +431,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): sqlcipher_file, password, document_factory=document_factory, crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter, cipher_page_size=cipher_page_size, - defer_encryption=defer_encryption) + defer_encryption=defer_encryption, sync_db_key=sync_db_key) except u1db_errors.DatabaseDoesNotExist: if not create: raise @@ -440,7 +442,8 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): return backend_cls( sqlcipher_file, password, document_factory=document_factory, crypto=crypto, raw_key=raw_key, cipher=cipher, - kdf_iter=kdf_iter, cipher_page_size=cipher_page_size) + kdf_iter=kdf_iter, cipher_page_size=cipher_page_size, + sync_db_key=sync_db_key) def sync(self, url, creds=None, autocreate=True, defer_decryption=True): """ @@ -561,7 +564,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): 'ALTER TABLE document ' 'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE') - def _init_sync_db(self, sqlcipher_file): + def _init_sync_db(self, sqlcipher_file, sync_db_password): """ Initialize the Symmetrically-Encrypted document to be synced database, and the queue to communicate with subprocess workers. @@ -575,6 +578,11 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): else: sync_db_path = ":memory:" self._sync_db = MPSafeSQLiteDB(sync_db_path) + # protect the sync db with a password + if sync_db_password is not None: + self._set_crypto_pragmas( + self._sync_db, sync_db_password, True, + 'aes-256-cbc', 4000, 1024) self._sync_db_write_lock = threading.Lock() self._create_sync_db_tables() self.sync_queue = multiprocessing.Queue() -- cgit v1.2.3 From afdb1cefe605cabfe325df3124b9beb3174568ff Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 4 Aug 2014 16:48:21 -0300 Subject: Delete the received docs from sync db before starting a new sync. --- client/src/leap/soledad/client/crypto.py | 7 +++++++ client/src/leap/soledad/client/target.py | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index 1b01913d..a24f2053 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -986,3 +986,10 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): else: # If no errors found, remove it from the received database. self.delete_received_doc(doc_id, doc_rev) + + def empty(self): + """ + Empty the received docs table of the sync database. + """ + sql = "DELETE FROM %s WHERE 1" % (self.TABLE_NAME,) + res = self._sync_db.execute(sql) diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index 12175f48..1cb02856 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -1149,8 +1149,9 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): setProxiedObject(self._insert_doc_cb[source_replica_uid], return_doc_cb) + # empty the database before starting a new sync if defer_decryption is True and not self.clear_to_sync(): - raise PendingReceivedDocsSyncError + self._sync_decr_pool.empty() self._ensure_connection() if self._trace_hook: # for tests -- cgit v1.2.3 From ab7850bbdcded8b0e36cb27a2468f55d1910c218 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 12 Aug 2014 11:06:23 -0300 Subject: Fix bits from pullreq review. --- client/src/leap/soledad/client/__init__.py | 2 +- client/src/leap/soledad/client/crypto.py | 19 +++++--- client/src/leap/soledad/client/mp_safe_db.py | 2 +- client/src/leap/soledad/client/secrets.py | 69 +++++++++++++++++----------- client/src/leap/soledad/client/sqlcipher.py | 51 ++++++++++++-------- client/src/leap/soledad/client/target.py | 7 +-- 6 files changed, 90 insertions(+), 60 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 0b72be27..c76e4a4a 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -755,7 +755,7 @@ class Soledad(object): @property def remote_storage_secret(self): """ - Return the secret used for encryption of remotelly stored data. + Return the secret used for encryption of remotely stored data. """ return self._secrets.remote_storage_secret diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index a24f2053..5e3760b3 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -863,7 +863,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :param encrypted: If not None, only return documents with encrypted field equal to given parameter. - :type encrypted: bool + :type encrypted: bool or None :return: list of doc_id, rev, generation, gen, trans_id :rtype: list @@ -878,16 +878,23 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): def get_insertable_docs_by_gen(self): """ - Return a list of documents ready to be inserted. + Return a list of non-encrypted documents ready to be inserted. """ + # here, we compare the list of all available docs with the list of + # decrypted docs and find the longest common prefix between these two + # lists. Note that the order of lists fetch matters: if instead we + # first fetch the list of decrypted docs and then the list of all + # docs, then some document might have been decrypted between these two + # calls, and if it is just the right doc then it might not be caught + # by the next loop. all_docs = self.get_docs_by_generation() decrypted_docs = self.get_docs_by_generation(encrypted=False) insertable = [] for doc_id, rev, _, gen, trans_id, encrypted in all_docs: try: - next_decrypted = decrypted_docs.next() - if doc_id == next_decrypted[0]: - content = next_decrypted[2] + next_doc_id, _, next_content, _, _, _ = decrypted_docs.next() + if doc_id == next_doc_id: + content = next_content insertable.append((doc_id, rev, content, gen, trans_id)) else: break @@ -901,7 +908,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :param encrypted: If not None, return count of documents with encrypted field equal to given parameter. - :type encrypted: bool + :type encrypted: bool or None :return: The count of documents. :rtype: int diff --git a/client/src/leap/soledad/client/mp_safe_db.py b/client/src/leap/soledad/client/mp_safe_db.py index 2c6b7e24..780b7153 100644 --- a/client/src/leap/soledad/client/mp_safe_db.py +++ b/client/src/leap/soledad/client/mp_safe_db.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# crypto.py +# mp_safe_db.py # Copyright (C) 2014 LEAP # # This program is free software: you can redistribute it and/or modify diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index 55580692..b1c22371 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -69,21 +69,28 @@ logger = logging.getLogger(name=__name__) # Exceptions # -class NoStorageSecret(Exception): + +class SecretsException(Exception): + """ + Generic exception type raised by this module. + """ + + +class NoStorageSecret(SecretsException): """ Raised when trying to use a storage secret but none is available. """ pass -class PassphraseTooShort(Exception): +class PassphraseTooShort(SecretsException): """ Raised when trying to change the passphrase but the provided passphrase is too short. """ -class BootstrapSequenceError(Exception): +class BootstrapSequenceError(SecretsException): """ Raised when an attempt to generate a secret and store it in a recovery document on server failed. @@ -107,34 +114,35 @@ class SoledadSecrets(object): LOCAL_STORAGE_SECRET_LENGTH = 512 """ - The length of the secret used to derive a passphrase for the SQLCipher - database. + The length, in bytes, of the secret used to derive a passphrase for the + SQLCipher database. """ REMOTE_STORAGE_SECRET_LENGTH = 512 """ - The length of the secret used to derive an encryption key and a MAC auth - key for remote storage. + The length, in bytes, of the secret used to derive an encryption key and a + MAC auth key for remote storage. """ SALT_LENGTH = 64 """ - The length of the salt used to derive the key for the storage secret - encryption. + The length, in bytes, of the salt used to derive the key for the storage + secret encryption. """ GEN_SECRET_LENGTH = LOCAL_STORAGE_SECRET_LENGTH \ + REMOTE_STORAGE_SECRET_LENGTH \ + SALT_LENGTH # for sync db """ - The length of the secret to be generated. This includes local and remote - secrets, and the salt for deriving the sync db secret. + The length, in bytes, of the secret to be generated. This includes local + and remote secrets, and the salt for deriving the sync db secret. """ MINIMUM_PASSPHRASE_LENGTH = 6 """ - The minimum length for a passphrase. The passphrase length is only checked - when the user changes her passphrase, not when she instantiates Soledad. + The minimum length, in bytes, for a passphrase. The passphrase length is + only checked when the user changes her passphrase, not when she + instantiates Soledad. """ IV_SEPARATOR = ":" @@ -288,7 +296,7 @@ class SoledadSecrets(object): self._secrets[self._secret_id] += new_piece enlarged = True # store and save in shared db if needed - if mac is False or enlarged is True: + if not mac or enlarged: self._store_secrets() self._put_secrets_in_shared_db() @@ -443,13 +451,17 @@ class SoledadSecrets(object): raise WrongMac('Could not authenticate recovery document\'s ' 'contents.') # include secrets in the secret pool. - secrets = 0 + secret_count = 0 for secret_id, encrypted_secret in data[self.STORAGE_SECRETS_KEY].items(): if secret_id not in self._secrets: - secrets += 1 - self._secrets[secret_id] = \ - self._decrypt_storage_secret(encrypted_secret) - return secrets, mac + try: + self._secrets[secret_id] = \ + self._decrypt_storage_secret(encrypted_secret) + secret_count += 1 + except SecretsException as e: + logger.error("Failed to decrypt storage secret: %s" + % str(e)) + return secret_count, mac def _get_secrets_from_shared_db(self): """ @@ -512,10 +524,13 @@ class SoledadSecrets(object): :return: The decrypted storage secret. :rtype: str + + :raise SecretsException: Raised in case the decryption of the storage + secret fails for some reason. """ # calculate the encryption key if encrypted_secret_dict[self.KDF_KEY] != self.KDF_SCRYPT: - raise Exception("Unknown KDF in stored secret.") + raise SecretsException("Unknown KDF in stored secret.") key = scrypt.hash( self._passphrase_as_string(), # the salt is stored base64 encoded @@ -524,16 +539,16 @@ class SoledadSecrets(object): buflen=32, # we need a key with 256 bits (32 bytes). ) if encrypted_secret_dict[self.KDF_LENGTH_KEY] != len(key): - raise Exception("Wrong length of decryption key.") + raise SecretsException("Wrong length of decryption key.") if encrypted_secret_dict[self.CIPHER_KEY] != self.CIPHER_AES256: - raise Exception("Unknown cipher in stored secret.") + raise SecretsException("Unknown cipher in stored secret.") # recover the initial value and ciphertext iv, ciphertext = encrypted_secret_dict[self.SECRET_KEY].split( self.IV_SEPARATOR, 1) ciphertext = binascii.a2b_base64(ciphertext) decrypted_secret = self._crypto.decrypt_sym(ciphertext, key, iv=iv) if encrypted_secret_dict[self.LENGTH_KEY] != len(decrypted_secret): - raise Exception("Wrong length of decrypted secret.") + raise SecretsException("Wrong length of decrypted secret.") return decrypted_secret def _encrypt_storage_secret(self, decrypted_secret): @@ -729,8 +744,8 @@ class SoledadSecrets(object): :rtype: str """ return scrypt.hash( - self._get_local_storage_secret(), # the password - self._get_local_storage_salt(), # the salt + password=self._get_local_storage_secret(), + salt=self._get_local_storage_salt(), buflen=32, # we need a key with 256 bits (32 bytes) ) @@ -755,7 +770,7 @@ class SoledadSecrets(object): :rtype: str """ return scrypt.hash( - self._get_local_storage_secret(), # the password - self._get_sync_db_salt(), # the salt + password=self._get_local_storage_secret(), + salt=self._get_sync_db_salt(), buflen=32, # we need a key with 256 bits (32 bytes) ) diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index a7ddab24..b7de2fba 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -63,6 +63,7 @@ from leap.soledad.client.target import SoledadSyncTarget from leap.soledad.client.target import PendingReceivedDocsSyncError from leap.soledad.client.sync import SoledadSynchronizer from leap.soledad.client.mp_safe_db import MPSafeSQLiteDB +from leap.soledad.common import soledad_assert from leap.soledad.common.document import SoledadDocument @@ -262,13 +263,16 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): self._crypto = crypto # define sync-db attrs + self._sqlcipher_file = sqlcipher_file + self._sync_db_key = sync_db_key self._sync_db = None self._sync_db_write_lock = None self._sync_enc_pool = None - self._init_sync_db(sqlcipher_file, sync_db_key) + self.sync_queue = None if self.defer_encryption: # initialize sync db + self._init_sync_db() # initialize syncing queue encryption pool self._sync_enc_pool = SyncEncrypterPool( self._crypto, self._sync_db, self._sync_db_write_lock) @@ -471,6 +475,8 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): res = None # the following context manager blocks until the syncing lock can be # acquired. + if defer_decryption: + self._init_sync_db() with self.syncer(url, creds=creds) as syncer: # XXX could mark the critical section here... try: @@ -564,28 +570,27 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): 'ALTER TABLE document ' 'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE') - def _init_sync_db(self, sqlcipher_file, sync_db_password): + def _init_sync_db(self): """ Initialize the Symmetrically-Encrypted document to be synced database, and the queue to communicate with subprocess workers. - - :param sqlcipher_file: The path for the SQLCipher file. - :type sqlcipher_file: str """ - sync_db_path = None - if sqlcipher_file != ":memory:": - sync_db_path = "%s-sync" % sqlcipher_file - else: - sync_db_path = ":memory:" - self._sync_db = MPSafeSQLiteDB(sync_db_path) - # protect the sync db with a password - if sync_db_password is not None: - self._set_crypto_pragmas( - self._sync_db, sync_db_password, True, - 'aes-256-cbc', 4000, 1024) - self._sync_db_write_lock = threading.Lock() - self._create_sync_db_tables() - self.sync_queue = multiprocessing.Queue() + if self._sync_db is None: + soledad_assert(self._sync_db_key is not None) + sync_db_path = None + if self._sqlcipher_file != ":memory:": + sync_db_path = "%s-sync" % self._sqlcipher_file + else: + sync_db_path = ":memory:" + self._sync_db = MPSafeSQLiteDB(sync_db_path) + # protect the sync db with a password + if self._sync_db_key is not None: + self._set_crypto_pragmas( + self._sync_db, self._sync_db_key, False, + 'aes-256-cbc', 4000, 1024) + self._sync_db_write_lock = threading.Lock() + self._create_sync_db_tables() + self.sync_queue = multiprocessing.Queue() def _create_sync_db_tables(self): """ @@ -1106,24 +1111,30 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): """ Close db_handle and close syncer. """ - logger.debug("Sqlcipher backend: closing") + if logger is not None: # logger might be none if called from __del__ + logger.debug("Sqlcipher backend: closing") # stop the sync watcher for deferred encryption if self._sync_watcher is not None: self._sync_watcher.stop() self._sync_watcher.shutdown() + self._sync_watcher = None # close all open syncers for url in self._syncers: _, syncer = self._syncers[url] syncer.close() + self._syncers = [] # stop the encryption pool if self._sync_enc_pool is not None: self._sync_enc_pool.close() + self._sync_enc_pool = None # close the actual database if self._db_handle is not None: self._db_handle.close() + self._db_handle = None # close the sync database if self._sync_db is not None: self._sync_db.close() + self._sync_db = None # close the sync queue if self.sync_queue is not None: self.sync_queue.close() diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index 1cb02856..ae2010a6 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -807,12 +807,9 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self._sync_db = sync_db self._sync_db_write_lock = sync_db_write_lock - def _setup_sync_decr_pool(self, last_known_generation): + def _setup_sync_decr_pool(self): """ Set up the SyncDecrypterPool for deferred decryption. - - :param last_known_generation: Target's last known generation. - :type last_known_generation: int """ if self._sync_decr_pool is None: # initialize syncing queue decryption pool @@ -1133,7 +1130,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): if defer_decryption and self._sync_db is not None: self._sync_exchange_lock.acquire() - self._setup_sync_decr_pool(last_known_generation) + self._setup_sync_decr_pool() self._setup_sync_watcher() self._defer_decryption = True else: -- cgit v1.2.3 From 22d3a8d4c6a1e652109378245989f4f6a71d1f42 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 16 Sep 2014 11:41:31 -0500 Subject: comments + pep8 --- client/src/leap/soledad/client/crypto.py | 3 +++ client/src/leap/soledad/client/sync.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index 5e3760b3..d68f3089 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -616,6 +616,9 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): :param content: The encrypted document. :type content: str """ + # FIXME --- callback should complete immediately since otherwise the + # thread which handles the results will get blocked + # Right now we're blocking the dispatcher with the writes to sqlite. sql_del = "DELETE FROM '%s' WHERE doc_id=?" % (self.TABLE_NAME,) sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?)" % (self.TABLE_NAME,) diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index c158f2a7..0297c75c 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -120,7 +120,7 @@ class SoledadSynchronizer(Synchronizer): " target my gen: %d\n" " target my trans_id: %s" % (self.target_replica_uid, target_gen, target_trans_id, - target_my_gen, target_my_trans_id)) + target_my_gen, target_my_trans_id)) # make sure we'll have access to target replica uid once it exists if self.target_replica_uid is None: @@ -138,7 +138,7 @@ class SoledadSynchronizer(Synchronizer): # what's changed since that generation and this current gen my_gen, _, changes = self.source.whats_changed(target_my_gen) - logger.debug("Soledad sync: there are %d documents to send." \ + logger.debug("Soledad sync: there are %d documents to send." % len(changes)) # get source last-seen database generation for the target -- cgit v1.2.3 From 19f28c432f36022c5f1c0558f4742c864e7202c8 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 29 Sep 2014 11:19:16 -0300 Subject: Wait for last post request to finish before starting a new one during sync (#5975). --- client/src/leap/soledad/client/target.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index ae2010a6..651d3ee5 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -1176,6 +1176,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): synced = [] number_of_docs = len(docs_by_generations) + last_request_lock = None for doc, gen, trans_id in docs_by_generations: # allow for interrupting the sync process if self.stopped is True: @@ -1212,7 +1213,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # end of symmetric encryption # ------------------------------------------------------------- t = syncer_pool.new_syncer_thread( - sent + 1, total, last_request_lock=None, + sent + 1, total, last_request_lock=last_request_lock, last_callback_lock=last_callback_lock) # bail out if any thread failed @@ -1249,7 +1250,12 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # save thread and append t.start() threads.append((t, doc)) + + # update lock references so they can be used in next call to + # syncer_pool.new_syncer_thread() above last_callback_lock = t.callback_lock + last_request_lock = t.request_lock + sent += 1 # make sure all threads finished and we have up-to-date info -- cgit v1.2.3 From 5d8e1e4e210410c6f5702a4348fceba80ba03af6 Mon Sep 17 00:00:00 2001 From: Duda Dornelles Date: Thu, 27 Nov 2014 15:43:19 -0200 Subject: If the client loses and restores it connection we must reset the u1db sync_target connection for it to be able to sync again --- client/src/leap/soledad/client/sqlcipher.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index b7de2fba..26e74ef5 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -52,6 +52,7 @@ import json from hashlib import sha256 from contextlib import contextmanager from collections import defaultdict +from httplib import CannotSendRequest from pysqlcipher import dbapi2 from u1db.backends import sqlite_backend @@ -486,6 +487,11 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): except PendingReceivedDocsSyncError: logger.warning("Local sync db is not clear, skipping sync...") return + except CannotSendRequest: + logger.warning("Connection with sync target couldn't be established. Resetting connection...") + # closing the connection it will get it recreated in the next try + syncer.sync_target.close() + return return res -- cgit v1.2.3 From 3526d37350c27487fb1e4c6664dc346006ef72f4 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 27 Nov 2014 16:50:20 -0200 Subject: Fix pep8 style. --- client/src/leap/soledad/client/sqlcipher.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 26e74ef5..45629045 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -488,8 +488,9 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): logger.warning("Local sync db is not clear, skipping sync...") return except CannotSendRequest: - logger.warning("Connection with sync target couldn't be established. Resetting connection...") - # closing the connection it will get it recreated in the next try + logger.warning("Connection with sync target couldn't be " + "established. Resetting connection...") + # closing the connection it will be recreated in the next try syncer.sync_target.close() return -- cgit v1.2.3 From 4e90feb613da4f1f5221f3fed401d52dbf8f5e2b Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 26 Nov 2014 21:06:25 +0100 Subject: force tls v1 in soledad client. Partially fixes #6437 --- client/src/leap/soledad/client/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index c76e4a4a..7267180b 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -811,7 +811,8 @@ class VerifiedHTTPSConnection(httplib.HTTPSConnection): self.sock = ssl.wrap_socket(sock, ca_certs=SOLEDAD_CERT, - cert_reqs=ssl.CERT_REQUIRED) + cert_reqs=ssl.CERT_REQUIRED, + ssl_version=ssl.PROTOCOL_TLSv1) match_hostname(self.sock.getpeercert(), self.host) -- cgit v1.2.3 From 31eeafd715f407c61d8de4e6555241a1de33fba1 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 3 Dec 2014 00:22:18 +0100 Subject: Use SSL negotiation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although the API can be misleading, PROTOCOL_SSLv23 selects the highest protocol version that both the client and server support. Despite the name, this option can select “TLS” protocols as well as “SSL”. In this way, we can use TLSv1.2 (PROTOCOL_TLSv1 will *only* give us TLS v1.0) In the client side, we try to disable SSLv2 and SSLv3 options explicitely. The python version in wheezy does not offer PROTOCOL_TLSv1_2 nor OP_NO_SSLv2 or OP_NO_SSLv3 (It's new in 2.7.9) --- client/src/leap/soledad/client/__init__.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 7267180b..a4030d88 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -809,10 +809,23 @@ class VerifiedHTTPSConnection(httplib.HTTPSConnection): self.sock = sock self._tunnel() - self.sock = ssl.wrap_socket(sock, - ca_certs=SOLEDAD_CERT, - cert_reqs=ssl.CERT_REQUIRED, - ssl_version=ssl.PROTOCOL_TLSv1) + # negotiate the best availabe version... + ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + + # but if possible, we want to disable bad ones + # needs python 2.7.9+ + try: + ctx.options |= ssl.OP_NO_SSLv2 + ctx.options |= ssl.OP_NO_SSLv3 + except AttributeError: + pass + + ctx.load_cert_chain(certfile=SOLEDAD_CERT) + ctx.verify_mode = ssl.CERT_REQUIRED + + self.sock = ctx.wrap_socket( + sock, server_side=True, server_hostname=self.host) + match_hostname(self.sock.getpeercert(), self.host) -- cgit v1.2.3 From aafa79c0f5e3d05c28d8f41804ae692931e67d7e Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 4 Dec 2014 18:13:06 +0100 Subject: fix ssl negotiation since ssl.SSLContext does not exist prior to python 2.7.9 --- client/src/leap/soledad/client/__init__.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index a4030d88..d7d01b57 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -809,22 +809,25 @@ class VerifiedHTTPSConnection(httplib.HTTPSConnection): self.sock = sock self._tunnel() - # negotiate the best availabe version... - ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + highest_supported = ssl.PROTOCOL_SSLv23 - # but if possible, we want to disable bad ones - # needs python 2.7.9+ try: + # needs python 2.7.9+ + # negotiate the best available version, + # but explicitely disabled bad ones. + ctx = ssl.SSLContext(highest_supported) ctx.options |= ssl.OP_NO_SSLv2 ctx.options |= ssl.OP_NO_SSLv3 - except AttributeError: - pass - ctx.load_cert_chain(certfile=SOLEDAD_CERT) - ctx.verify_mode = ssl.CERT_REQUIRED + ctx.load_cert_chain(certfile=SOLEDAD_CERT) + ctx.verify_mode = ssl.CERT_REQUIRED + self.sock = ctx.wrap_socket( + sock, server_side=True, server_hostname=self.host) - self.sock = ctx.wrap_socket( - sock, server_side=True, server_hostname=self.host) + except AttributeError: + self.sock = ssl.wrap_socket( + sock, ca_certs=SOLEDAD_CERT, cert_reqs=ssl.CERT_REQUIRED, + ssl_version=highest_supported) match_hostname(self.sock.getpeercert(), self.host) -- cgit v1.2.3 From b761bfc3f95bc87461c8cc8ec8462b1a995ebddb Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 28 Nov 2014 11:41:25 -0200 Subject: Refactor client crypto for better code readability. --- client/src/leap/soledad/client/crypto.py | 233 ++++++++++++++++--------------- 1 file changed, 120 insertions(+), 113 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index d68f3089..681bf4f7 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -33,59 +33,39 @@ from zope.proxy import sameProxiedObjects from leap.soledad.common import soledad_assert from leap.soledad.common import soledad_assert_type from leap.soledad.common.document import SoledadDocument +from leap.soledad.common.crypto import EncryptionSchemes +from leap.soledad.common.crypto import EncryptionMethods +from leap.soledad.common.crypto import MacMethods +from leap.soledad.common.crypto import UnknownMacMethod +from leap.soledad.common.crypto import WrongMac +from leap.soledad.common.crypto import ENC_JSON_KEY +from leap.soledad.common.crypto import ENC_SCHEME_KEY +from leap.soledad.common.crypto import ENC_METHOD_KEY +from leap.soledad.common.crypto import ENC_IV_KEY +from leap.soledad.common.crypto import MAC_KEY +from leap.soledad.common.crypto import MAC_METHOD_KEY -from leap.soledad.common.crypto import ( - EncryptionSchemes, - UnknownEncryptionScheme, - MacMethods, - UnknownMacMethod, - WrongMac, - ENC_JSON_KEY, - ENC_SCHEME_KEY, - ENC_METHOD_KEY, - ENC_IV_KEY, - MAC_KEY, - MAC_METHOD_KEY, -) - logger = logging.getLogger(__name__) MAC_KEY_LENGTH = 64 -class EncryptionMethods(object): - """ - Representation of encryption methods that can be used. - """ - - AES_256_CTR = 'aes-256-ctr' - XSALSA20 = 'xsalsa20' - -# -# Exceptions -# - - -class DocumentNotEncrypted(Exception): - """ - Raised for failures in document encryption. +def _assert_known_encryption_method(method): """ - pass + Assert that we can encrypt/decrypt the given C{method} + :param method: The encryption method to assert. + :type method: str -class UnknownEncryptionMethod(Exception): - """ - Raised when trying to encrypt/decrypt with unknown method. - """ - pass - - -class NoSymmetricSecret(Exception): - """ - Raised when trying to get a hashed passphrase. + :raise AssertionError: Raised if C{method} is unknown. """ + valid_methods = [ + EncryptionMethods.AES_256_CTR, + EncryptionMethods.XSALSA20, + ] + soledad_assert(method in valid_methods) def encrypt_sym(data, key, method): @@ -104,13 +84,16 @@ def encrypt_sym(data, key, method): :return: A tuple with the initial value and the encrypted data. :rtype: (long, str) + + :raise AssertionError: Raised if C{method} is unknown. """ soledad_assert_type(key, str) - soledad_assert( len(key) == 32, # 32 x 8 = 256 bits. 'Wrong key size: %s bits (must be 256 bits long).' % (len(key) * 8)) + _assert_known_encryption_method(method) + iv = None # AES-256 in CTR mode if method == EncryptionMethods.AES_256_CTR: @@ -120,9 +103,7 @@ def encrypt_sym(data, key, method): elif method == EncryptionMethods.XSALSA20: iv = os.urandom(24) ciphertext = XSalsa20(key=key, iv=iv).process(data) - else: - # raise if method is unknown - raise UnknownEncryptionMethod('Unkwnown method: %s' % method) + return binascii.b2a_base64(iv), ciphertext @@ -143,6 +124,8 @@ def decrypt_sym(data, key, method, **kwargs): :return: The decrypted data. :rtype: str + + :raise AssertionError: Raised if C{method} is unknown. """ soledad_assert_type(key, str) # assert params @@ -152,6 +135,7 @@ def decrypt_sym(data, key, method, **kwargs): soledad_assert( 'iv' in kwargs, '%s needs an initial value.' % method) + _assert_known_encryption_method(method) # AES-256 in CTR mode if method == EncryptionMethods.AES_256_CTR: return AES( @@ -160,9 +144,6 @@ def decrypt_sym(data, key, method, **kwargs): return XSalsa20( key=key, iv=binascii.a2b_base64(kwargs['iv'])).process(data) - # raise if method is unknown - raise UnknownEncryptionMethod('Unkwnown method: %s' % method) - def doc_mac_key(doc_id, secret): """ @@ -176,17 +157,13 @@ def doc_mac_key(doc_id, secret): :param doc_id: The id of the document. :type doc_id: str - :param secret: soledad secret storage - :type secret: Soledad.storage_secret + :param secret: The Soledad storage secret + :type secret: str :return: The key. :rtype: str - - :raise NoSymmetricSecret: if no symmetric secret was supplied. """ - if secret is None: - raise NoSymmetricSecret() - + soledad_assert(secret is not None) return hmac.new( secret[:MAC_KEY_LENGTH], doc_id, @@ -234,11 +211,8 @@ class SoledadCrypto(object): :return: The passphrase. :rtype: str - - :raise NoSymmetricSecret: if no symmetric secret was supplied. """ - if self.secret is None: - raise NoSymmetricSecret() + soledad_assert(self.secret is not None) return hmac.new( self.secret[MAC_KEY_LENGTH:], doc_id, @@ -277,19 +251,25 @@ def mac_doc(doc_id, doc_rev, ciphertext, mac_method, secret): :type ciphertext: str :param mac_method: The MAC method to use. :type mac_method: str - :param secret: soledad secret - :type secret: Soledad.secret_storage + :param secret: The Soledad storage secret + :type secret: str :return: The calculated MAC. :rtype: str + + :raise UnknownMacMethod: Raised when C{mac_method} is unknown. """ - if mac_method == MacMethods.HMAC: - return hmac.new( - doc_mac_key(doc_id, secret), - str(doc_id) + str(doc_rev) + ciphertext, - hashlib.sha256).digest() - # raise if we do not know how to handle this MAC method - raise UnknownMacMethod('Unknown MAC method: %s.' % mac_method) + try: + soledad_assert(mac_method == MacMethods.HMAC) + except AssertionError: + raise UnknownMacMethod + content = str(doc_id) \ + + str(doc_rev) \ + + ciphertext + return hmac.new( + doc_mac_key(doc_id, secret), + content, + hashlib.sha256).digest() def encrypt_doc(crypto, doc): @@ -337,30 +317,37 @@ def encrypt_docstr(docstr, doc_id, doc_rev, key, secret): :param key: The key used to encrypt ``data`` (must be 256 bits long). :type key: str - :param secret: The Soledad secret (used for MAC auth). + :param secret: The Soledad storage secret (used for MAC auth). :type secret: str :return: The JSON serialization of the dict representing the encrypted content. :rtype: str """ - # encrypt content using AES-256 CTR mode + enc_scheme = EncryptionSchemes.SYMKEY + enc_method = EncryptionMethods.AES_256_CTR + mac_method = MacMethods.HMAC iv, ciphertext = encrypt_sym( str(docstr), # encryption/decryption routines expect str - key, method=EncryptionMethods.AES_256_CTR) + key, method=enc_method) + mac = binascii.b2a_hex( # store the mac as hex. + mac_doc( + doc_id, + doc_rev, + ciphertext, + mac_method, + secret)) # Return a representation for the encrypted content. In the following, we # convert binary data to hexadecimal representation so the JSON # serialization does not complain about what it tries to serialize. hex_ciphertext = binascii.b2a_hex(ciphertext) return json.dumps({ ENC_JSON_KEY: hex_ciphertext, - ENC_SCHEME_KEY: EncryptionSchemes.SYMKEY, - ENC_METHOD_KEY: EncryptionMethods.AES_256_CTR, + ENC_SCHEME_KEY: enc_scheme, + ENC_METHOD_KEY: enc_method, ENC_IV_KEY: iv, - MAC_KEY: binascii.b2a_hex(mac_doc( # store the mac as hex. - doc_id, doc_rev, ciphertext, - MacMethods.HMAC, secret)), - MAC_METHOD_KEY: MacMethods.HMAC, + MAC_KEY: mac, + MAC_METHOD_KEY: mac_method, }) @@ -382,9 +369,47 @@ def decrypt_doc(crypto, doc): return decrypt_doc_dict(doc.content, doc.doc_id, doc.rev, key, secret) +def _verify_doc_mac(doc_id, doc_rev, ciphertext, mac_method, secret, doc_mac): + """ + Verify that C{doc_mac} is a correct MAC for the given document. + + :param doc_id: The id of the document. + :type doc_id: str + :param doc_rev: The revision of the document. + :type doc_rev: str + :param ciphertext: The content of the document. + :type ciphertext: str + :param mac_method: The MAC method to use. + :type mac_method: str + :param secret: The Soledad storage secret + :type secret: str + :param doc_mac: The MAC to be verified against. + :type doc_mac: str + + :raise UnknownMacMethod: Raised when C{mac_method} is unknown. + """ + calculated_mac = mac_doc( + doc_id, + doc_rev, + ciphertext, + mac_method, + secret) + # we compare mac's hashes to avoid possible timing attacks that might + # exploit python's builtin comparison operator behaviour, which fails + # immediatelly when non-matching bytes are found. + doc_mac_hash = hashlib.sha256( + binascii.a2b_hex( # the mac is stored as hex + doc_mac)).digest() + calculated_mac_hash = hashlib.sha256(calculated_mac).digest() + + if doc_mac_hash != calculated_mac_hash: + logger.warning("Wrong MAC while decrypting doc...") + raise WrongMac('Could not authenticate document\'s contents.') + + def decrypt_doc_dict(doc_dict, doc_id, doc_rev, key, secret): """ - Decrypt C{doc}'s content. + Decrypt a symmetrically encrypted C{doc}'s content. Return the JSON string representation of the document's decrypted content. @@ -421,48 +446,30 @@ def decrypt_doc_dict(doc_dict, doc_id, doc_rev, key, secret): :return: The JSON serialization of the decrypted content. :rtype: str + + :raise UnknownEncryptionMethod: Raised when trying to decrypt from an + unknown encryption method. """ + # assert document dictionary structure soledad_assert(ENC_JSON_KEY in doc_dict) soledad_assert(ENC_SCHEME_KEY in doc_dict) soledad_assert(ENC_METHOD_KEY in doc_dict) + soledad_assert(ENC_IV_KEY in doc_dict) soledad_assert(MAC_KEY in doc_dict) soledad_assert(MAC_METHOD_KEY in doc_dict) - # verify MAC - ciphertext = binascii.a2b_hex( # content is stored as hex. - doc_dict[ENC_JSON_KEY]) - mac = mac_doc( - doc_id, doc_rev, - ciphertext, - doc_dict[MAC_METHOD_KEY], secret) - # we compare mac's hashes to avoid possible timing attacks that might - # exploit python's builtin comparison operator behaviour, which fails - # immediatelly when non-matching bytes are found. - doc_mac_hash = hashlib.sha256( - binascii.a2b_hex( # the mac is stored as hex - doc_dict[MAC_KEY])).digest() - calculated_mac_hash = hashlib.sha256(mac).digest() - - if doc_mac_hash != calculated_mac_hash: - logger.warning("Wrong MAC while decrypting doc...") - raise WrongMac('Could not authenticate document\'s contents.') - # decrypt doc's content + ciphertext = binascii.a2b_hex(doc_dict[ENC_JSON_KEY]) enc_scheme = doc_dict[ENC_SCHEME_KEY] - plainjson = None - if enc_scheme == EncryptionSchemes.SYMKEY: - enc_method = doc_dict[ENC_METHOD_KEY] - if enc_method == EncryptionMethods.AES_256_CTR: - soledad_assert(ENC_IV_KEY in doc_dict) - plainjson = decrypt_sym( - ciphertext, key, - method=enc_method, - iv=doc_dict[ENC_IV_KEY]) - else: - raise UnknownEncryptionMethod(enc_method) - else: - raise UnknownEncryptionScheme(enc_scheme) + enc_method = doc_dict[ENC_METHOD_KEY] + enc_iv = doc_dict[ENC_IV_KEY] + doc_mac = doc_dict[MAC_KEY] + mac_method = doc_dict[MAC_METHOD_KEY] + + soledad_assert(enc_scheme == EncryptionSchemes.SYMKEY) + + _verify_doc_mac(doc_id, doc_rev, ciphertext, mac_method, secret, doc_mac) - return plainjson + return decrypt_sym(ciphertext, key, method=enc_method, iv=enc_iv) def is_symmetrically_encrypted(doc): @@ -540,7 +547,7 @@ def encrypt_doc_task(doc_id, doc_rev, content, key, secret): :type content: str :param key: The encryption key. :type key: str - :param secret: The Soledad secret (used for MAC auth). + :param secret: The Soledad storage secret (used for MAC auth). :type secret: str :return: A tuple containing the doc id, revision and encrypted content. @@ -646,7 +653,7 @@ def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret): :type trans_id: str :param key: The encryption key. :type key: str - :param secret: The Soledad secret (used for MAC auth). + :param secret: The Soledad storage secret (used for MAC auth). :type secret: str :return: A tuple containing the doc id, revision and encrypted content. -- cgit v1.2.3 From ce0d421e41cfb75a3957541d6c88fcd7b26e8cd6 Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 28 Nov 2014 11:50:15 -0200 Subject: Add encryption scheme, method and iv to symmetrically encrypted document MAC (#6400). --- client/src/leap/soledad/client/crypto.py | 174 +++++++++++++++++------------- client/src/leap/soledad/client/secrets.py | 79 +++++--------- 2 files changed, 129 insertions(+), 124 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index 681bf4f7..d6d9a618 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -32,18 +32,8 @@ from zope.proxy import sameProxiedObjects from leap.soledad.common import soledad_assert from leap.soledad.common import soledad_assert_type +from leap.soledad.common import crypto from leap.soledad.common.document import SoledadDocument -from leap.soledad.common.crypto import EncryptionSchemes -from leap.soledad.common.crypto import EncryptionMethods -from leap.soledad.common.crypto import MacMethods -from leap.soledad.common.crypto import UnknownMacMethod -from leap.soledad.common.crypto import WrongMac -from leap.soledad.common.crypto import ENC_JSON_KEY -from leap.soledad.common.crypto import ENC_SCHEME_KEY -from leap.soledad.common.crypto import ENC_METHOD_KEY -from leap.soledad.common.crypto import ENC_IV_KEY -from leap.soledad.common.crypto import MAC_KEY -from leap.soledad.common.crypto import MAC_METHOD_KEY logger = logging.getLogger(__name__) @@ -59,13 +49,16 @@ def _assert_known_encryption_method(method): :param method: The encryption method to assert. :type method: str - :raise AssertionError: Raised if C{method} is unknown. + :raise UnknownEncryptionMethodError: Raised when C{method} is unknown. """ valid_methods = [ - EncryptionMethods.AES_256_CTR, - EncryptionMethods.XSALSA20, + crypto.EncryptionMethods.AES_256_CTR, + crypto.EncryptionMethods.XSALSA20, ] - soledad_assert(method in valid_methods) + try: + soledad_assert(method in valid_methods) + except AssertionError: + raise crypto.UnknownEncryptionMethodError def encrypt_sym(data, key, method): @@ -96,11 +89,11 @@ def encrypt_sym(data, key, method): iv = None # AES-256 in CTR mode - if method == EncryptionMethods.AES_256_CTR: + if method == crypto.EncryptionMethods.AES_256_CTR: iv = os.urandom(16) ciphertext = AES(key=key, iv=iv).process(data) # XSalsa20 - elif method == EncryptionMethods.XSALSA20: + elif method == crypto.EncryptionMethods.XSALSA20: iv = os.urandom(24) ciphertext = XSalsa20(key=key, iv=iv).process(data) @@ -125,7 +118,7 @@ def decrypt_sym(data, key, method, **kwargs): :return: The decrypted data. :rtype: str - :raise AssertionError: Raised if C{method} is unknown. + :raise UnknownEncryptionMethodError: Raised when C{method} is unknown. """ soledad_assert_type(key, str) # assert params @@ -137,10 +130,10 @@ def decrypt_sym(data, key, method, **kwargs): '%s needs an initial value.' % method) _assert_known_encryption_method(method) # AES-256 in CTR mode - if method == EncryptionMethods.AES_256_CTR: + if method == crypto.EncryptionMethods.AES_256_CTR: return AES( key=key, iv=binascii.a2b_base64(kwargs['iv'])).process(data) - elif method == EncryptionMethods.XSALSA20: + elif method == crypto.EncryptionMethods.XSALSA20: return XSalsa20( key=key, iv=binascii.a2b_base64(kwargs['iv'])).process(data) @@ -185,11 +178,11 @@ class SoledadCrypto(object): self._soledad = soledad def encrypt_sym(self, data, key, - method=EncryptionMethods.AES_256_CTR): + method=crypto.EncryptionMethods.AES_256_CTR): return encrypt_sym(data, key, method) def decrypt_sym(self, data, key, - method=EncryptionMethods.AES_256_CTR, **kwargs): + method=crypto.EncryptionMethods.AES_256_CTR, **kwargs): return decrypt_sym(data, key, method, **kwargs) def doc_mac_key(self, doc_id, secret): @@ -233,7 +226,8 @@ class SoledadCrypto(object): # Crypto utilities for a SoledadDocument. # -def mac_doc(doc_id, doc_rev, ciphertext, mac_method, secret): +def mac_doc(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, enc_iv, + mac_method, secret): """ Calculate a MAC for C{doc} using C{ciphertext}. @@ -249,6 +243,12 @@ def mac_doc(doc_id, doc_rev, ciphertext, mac_method, secret): :type doc_rev: str :param ciphertext: The content of the document. :type ciphertext: str + :param enc_scheme: The encryption scheme. + :type enc_scheme: str + :param enc_method: The encryption method. + :type enc_method: str + :param enc_iv: The encryption initialization vector. + :type enc_iv: str :param mac_method: The MAC method to use. :type mac_method: str :param secret: The Soledad storage secret @@ -257,15 +257,20 @@ def mac_doc(doc_id, doc_rev, ciphertext, mac_method, secret): :return: The calculated MAC. :rtype: str - :raise UnknownMacMethod: Raised when C{mac_method} is unknown. + :raise crypto.UnknownMacMethodError: Raised when C{mac_method} is unknown. """ try: - soledad_assert(mac_method == MacMethods.HMAC) + soledad_assert(mac_method == crypto.MacMethods.HMAC) except AssertionError: - raise UnknownMacMethod - content = str(doc_id) \ - + str(doc_rev) \ - + ciphertext + raise crypto.UnknownMacMethodError + template = "{doc_id}{doc_rev}{ciphertext}{enc_scheme}{enc_method}{enc_iv}" + content = template.format( + doc_id=doc_id, + doc_rev=doc_rev, + ciphertext=ciphertext, + enc_scheme=enc_scheme, + enc_method=enc_method, + enc_iv=enc_iv) return hmac.new( doc_mac_key(doc_id, secret), content, @@ -297,12 +302,12 @@ def encrypt_docstr(docstr, doc_id, doc_rev, key, secret): string representing the following: { - ENC_JSON_KEY: '', - ENC_SCHEME_KEY: 'symkey', - ENC_METHOD_KEY: EncryptionMethods.AES_256_CTR, - ENC_IV_KEY: '', + crypto.ENC_JSON_KEY: '', + crypto.ENC_SCHEME_KEY: 'symkey', + crypto.ENC_METHOD_KEY: crypto.EncryptionMethods.AES_256_CTR, + crypto.ENC_IV_KEY: '', MAC_KEY: '' - MAC_METHOD_KEY: 'hmac' + crypto.MAC_METHOD_KEY: 'hmac' } :param docstr: A representation of the document to be encrypted. @@ -324,10 +329,10 @@ def encrypt_docstr(docstr, doc_id, doc_rev, key, secret): content. :rtype: str """ - enc_scheme = EncryptionSchemes.SYMKEY - enc_method = EncryptionMethods.AES_256_CTR - mac_method = MacMethods.HMAC - iv, ciphertext = encrypt_sym( + enc_scheme = crypto.EncryptionSchemes.SYMKEY + enc_method = crypto.EncryptionMethods.AES_256_CTR + mac_method = crypto.MacMethods.HMAC + enc_iv, ciphertext = encrypt_sym( str(docstr), # encryption/decryption routines expect str key, method=enc_method) mac = binascii.b2a_hex( # store the mac as hex. @@ -335,6 +340,9 @@ def encrypt_docstr(docstr, doc_id, doc_rev, key, secret): doc_id, doc_rev, ciphertext, + enc_scheme, + enc_method, + enc_iv, mac_method, secret)) # Return a representation for the encrypted content. In the following, we @@ -342,12 +350,12 @@ def encrypt_docstr(docstr, doc_id, doc_rev, key, secret): # serialization does not complain about what it tries to serialize. hex_ciphertext = binascii.b2a_hex(ciphertext) return json.dumps({ - ENC_JSON_KEY: hex_ciphertext, - ENC_SCHEME_KEY: enc_scheme, - ENC_METHOD_KEY: enc_method, - ENC_IV_KEY: iv, - MAC_KEY: mac, - MAC_METHOD_KEY: mac_method, + crypto.ENC_JSON_KEY: hex_ciphertext, + crypto.ENC_SCHEME_KEY: enc_scheme, + crypto.ENC_METHOD_KEY: enc_method, + crypto.ENC_IV_KEY: enc_iv, + crypto.MAC_KEY: mac, + crypto.MAC_METHOD_KEY: mac_method, }) @@ -369,7 +377,8 @@ def decrypt_doc(crypto, doc): return decrypt_doc_dict(doc.content, doc.doc_id, doc.rev, key, secret) -def _verify_doc_mac(doc_id, doc_rev, ciphertext, mac_method, secret, doc_mac): +def _verify_doc_mac(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, + enc_iv, mac_method, secret, doc_mac): """ Verify that C{doc_mac} is a correct MAC for the given document. @@ -379,6 +388,12 @@ def _verify_doc_mac(doc_id, doc_rev, ciphertext, mac_method, secret, doc_mac): :type doc_rev: str :param ciphertext: The content of the document. :type ciphertext: str + :param enc_scheme: The encryption scheme. + :type enc_scheme: str + :param enc_method: The encryption method. + :type enc_method: str + :param enc_iv: The encryption initialization vector. + :type enc_iv: str :param mac_method: The MAC method to use. :type mac_method: str :param secret: The Soledad storage secret @@ -386,12 +401,16 @@ def _verify_doc_mac(doc_id, doc_rev, ciphertext, mac_method, secret, doc_mac): :param doc_mac: The MAC to be verified against. :type doc_mac: str - :raise UnknownMacMethod: Raised when C{mac_method} is unknown. + :raise crypto.UnknownMacMethodError: Raised when C{mac_method} is unknown. + :raise crypto.WrongMacError: Raised when MAC could not be verified. """ calculated_mac = mac_doc( doc_id, doc_rev, ciphertext, + enc_scheme, + enc_method, + enc_iv, mac_method, secret) # we compare mac's hashes to avoid possible timing attacks that might @@ -404,7 +423,8 @@ def _verify_doc_mac(doc_id, doc_rev, ciphertext, mac_method, secret, doc_mac): if doc_mac_hash != calculated_mac_hash: logger.warning("Wrong MAC while decrypting doc...") - raise WrongMac('Could not authenticate document\'s contents.') + raise crypto.WrongMacError("Could not authenticate document's " + "contents.") def decrypt_doc_dict(doc_dict, doc_id, doc_rev, key, secret): @@ -416,18 +436,18 @@ def decrypt_doc_dict(doc_dict, doc_id, doc_rev, key, secret): The passed doc_dict argument should have the following structure: { - ENC_JSON_KEY: '', - ENC_SCHEME_KEY: '', - ENC_METHOD_KEY: '', - ENC_IV_KEY: '', # (optional) + crypto.ENC_JSON_KEY: '', + crypto.ENC_SCHEME_KEY: '', + crypto.ENC_METHOD_KEY: '', + crypto.ENC_IV_KEY: '', # (optional) MAC_KEY: '' - MAC_METHOD_KEY: 'hmac' + crypto.MAC_METHOD_KEY: 'hmac' } C{enc_blob} is the encryption of the JSON serialization of the document's content. For now Soledad just deals with documents whose C{enc_scheme} is - EncryptionSchemes.SYMKEY and C{enc_method} is - EncryptionMethods.AES_256_CTR. + crypto.EncryptionSchemes.SYMKEY and C{enc_method} is + crypto.EncryptionMethods.AES_256_CTR. :param doc_dict: The content of the document to be decrypted. :type doc_dict: dict @@ -447,27 +467,32 @@ def decrypt_doc_dict(doc_dict, doc_id, doc_rev, key, secret): :return: The JSON serialization of the decrypted content. :rtype: str - :raise UnknownEncryptionMethod: Raised when trying to decrypt from an + :raise UnknownEncryptionMethodError: Raised when trying to decrypt from an unknown encryption method. """ # assert document dictionary structure - soledad_assert(ENC_JSON_KEY in doc_dict) - soledad_assert(ENC_SCHEME_KEY in doc_dict) - soledad_assert(ENC_METHOD_KEY in doc_dict) - soledad_assert(ENC_IV_KEY in doc_dict) - soledad_assert(MAC_KEY in doc_dict) - soledad_assert(MAC_METHOD_KEY in doc_dict) - - ciphertext = binascii.a2b_hex(doc_dict[ENC_JSON_KEY]) - enc_scheme = doc_dict[ENC_SCHEME_KEY] - enc_method = doc_dict[ENC_METHOD_KEY] - enc_iv = doc_dict[ENC_IV_KEY] - doc_mac = doc_dict[MAC_KEY] - mac_method = doc_dict[MAC_METHOD_KEY] - - soledad_assert(enc_scheme == EncryptionSchemes.SYMKEY) - - _verify_doc_mac(doc_id, doc_rev, ciphertext, mac_method, secret, doc_mac) + expected_keys = set([ + crypto.ENC_JSON_KEY, + crypto.ENC_SCHEME_KEY, + crypto.ENC_METHOD_KEY, + crypto.ENC_IV_KEY, + crypto.MAC_KEY, + crypto.MAC_METHOD_KEY, + ]) + soledad_assert(expected_keys.issubset(set(doc_dict.keys()))) + + ciphertext = binascii.a2b_hex(doc_dict[crypto.ENC_JSON_KEY]) + enc_scheme = doc_dict[crypto.ENC_SCHEME_KEY] + enc_method = doc_dict[crypto.ENC_METHOD_KEY] + enc_iv = doc_dict[crypto.ENC_IV_KEY] + doc_mac = doc_dict[crypto.MAC_KEY] + mac_method = doc_dict[crypto.MAC_METHOD_KEY] + + soledad_assert(enc_scheme == crypto.EncryptionSchemes.SYMKEY) + + _verify_doc_mac( + doc_id, doc_rev, ciphertext, enc_scheme, enc_method, + enc_iv, mac_method, secret, doc_mac) return decrypt_sym(ciphertext, key, method=enc_method, iv=enc_iv) @@ -481,8 +506,9 @@ def is_symmetrically_encrypted(doc): :rtype: bool """ - if doc.content and ENC_SCHEME_KEY in doc.content: - if doc.content[ENC_SCHEME_KEY] == EncryptionSchemes.SYMKEY: + if doc.content and crypto.ENC_SCHEME_KEY in doc.content: + if doc.content[crypto.ENC_SCHEME_KEY] \ + == crypto.EncryptionSchemes.SYMKEY: return True return False diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index b1c22371..970ac82f 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -33,33 +33,12 @@ from hashlib import sha256 import simplejson as json -from leap.soledad.common import ( - soledad_assert, - soledad_assert_type -) -from leap.soledad.common.document import SoledadDocument -from leap.soledad.common.crypto import ( - MacMethods, - UnknownMacMethod, - WrongMac, - MAC_KEY, - MAC_METHOD_KEY, -) -from leap.soledad.common.errors import ( - InvalidTokenError, - NotLockedError, - AlreadyLockedError, - LockTimedOutError, -) -from leap.soledad.client.events import ( - SOLEDAD_CREATING_KEYS, - SOLEDAD_DONE_CREATING_KEYS, - SOLEDAD_DOWNLOADING_KEYS, - SOLEDAD_DONE_DOWNLOADING_KEYS, - SOLEDAD_UPLOADING_KEYS, - SOLEDAD_DONE_UPLOADING_KEYS, - signal, -) +from leap.soledad.common import soledad_assert +from leap.soledad.common import soledad_assert_type +from leap.soledad.common import document +from leap.soledad.common import errors +from leap.soledad.common import crypto +from leap.soledad.client import events logger = logging.getLogger(name=__name__) @@ -227,9 +206,9 @@ class SoledadSecrets(object): token = timeout = None try: token, timeout = self._shared_db.lock() - except AlreadyLockedError: + except errors.AlreadyLockedError: raise BootstrapSequenceError('Database is already locked.') - except LockTimedOutError: + except errors.LockTimedOutError: raise BootstrapSequenceError('Lock operation timed out.') self._get_or_gen_crypto_secrets() @@ -238,12 +217,12 @@ class SoledadSecrets(object): try: self._shared_db.unlock(token) self._shared_db.close() - except NotLockedError: + except errors.NotLockedError: # for some reason the lock expired. Despite that, secret # loading or generation/storage must have been executed # successfully, so we pass. pass - except InvalidTokenError: + except errors.InvalidTokenError: # here, our lock has not only expired but also some other # client application has obtained a new lock and is currently # doing its thing in the shared database. Using the same @@ -403,8 +382,8 @@ class SoledadSecrets(object): self.KDF_KEY: self.KDF_SCRYPT, self.KDF_SALT_KEY: binascii.b2a_base64(salt), self.KDF_LENGTH_KEY: len(key), - MAC_METHOD_KEY: MacMethods.HMAC, - MAC_KEY: hmac.new( + crypto.MAC_METHOD_KEY: crypto.MacMethods.HMAC, + crypto.MAC_KEY: hmac.new( key, json.dumps(encrypted_secrets), sha256).hexdigest(), @@ -429,13 +408,13 @@ class SoledadSecrets(object): soledad_assert(self.STORAGE_SECRETS_KEY in data) # check mac of the recovery document mac = None - if MAC_KEY in data: - soledad_assert(data[MAC_KEY] is not None) - soledad_assert(MAC_METHOD_KEY in data) + if crypto.MAC_KEY in data: + soledad_assert(data[crypto.MAC_KEY] is not None) + soledad_assert(crypto.MAC_METHOD_KEY in data) soledad_assert(self.KDF_KEY in data) soledad_assert(self.KDF_SALT_KEY in data) soledad_assert(self.KDF_LENGTH_KEY in data) - if data[MAC_METHOD_KEY] == MacMethods.HMAC: + if data[crypto.MAC_METHOD_KEY] == crypto.MacMethods.HMAC: key = scrypt.hash( self._passphrase_as_string(), binascii.a2b_base64(data[self.KDF_SALT_KEY]), @@ -445,10 +424,10 @@ class SoledadSecrets(object): json.dumps(data[self.STORAGE_SECRETS_KEY]), sha256).hexdigest() else: - raise UnknownMacMethod('Unknown MAC method: %s.' % - data[MAC_METHOD_KEY]) - if mac != data[MAC_KEY]: - raise WrongMac('Could not authenticate recovery document\'s ' + raise crypto.UnknownMacMethodError('Unknown MAC method: %s.' % + data[crypto.MAC_METHOD_KEY]) + if mac != data[crypto.MAC_KEY]: + raise crypto.WrongMacError('Could not authenticate recovery document\'s ' 'contents.') # include secrets in the secret pool. secret_count = 0 @@ -469,15 +448,15 @@ class SoledadSecrets(object): database. :return: a document with encrypted key material in its contents - :rtype: SoledadDocument + :rtype: document.SoledadDocument """ - signal(SOLEDAD_DOWNLOADING_KEYS, self._uuid) + events.signal(events.SOLEDAD_DOWNLOADING_KEYS, self._uuid) db = self._shared_db if not db: logger.warning('No shared db found') return doc = db.get_doc(self._shared_db_doc_id()) - signal(SOLEDAD_DONE_DOWNLOADING_KEYS, self._uuid) + events.signal(events.SOLEDAD_DONE_DOWNLOADING_KEYS, self._uuid) return doc def _put_secrets_in_shared_db(self): @@ -495,18 +474,18 @@ class SoledadSecrets(object): # try to get secrets doc from server, otherwise create it doc = self._get_secrets_from_shared_db() if doc is None: - doc = SoledadDocument( + doc = document.SoledadDocument( doc_id=self._shared_db_doc_id()) # fill doc with encrypted secrets doc.content = self._export_recovery_document() # upload secrets to server - signal(SOLEDAD_UPLOADING_KEYS, self._uuid) + events.signal(events.SOLEDAD_UPLOADING_KEYS, self._uuid) db = self._shared_db if not db: logger.warning('No shared db found') return db.put_doc(doc) - signal(SOLEDAD_DONE_UPLOADING_KEYS, self._uuid) + events.signal(events.SOLEDAD_DONE_UPLOADING_KEYS, self._uuid) # # Management of secret for symmetric encryption. @@ -618,7 +597,7 @@ class SoledadSecrets(object): Generate a secret for symmetric encryption and store in a local encrypted file. - This method emits the following signals: + This method emits the following events.signals: * SOLEDAD_CREATING_KEYS * SOLEDAD_DONE_CREATING_KEYS @@ -626,13 +605,13 @@ class SoledadSecrets(object): :return: The id of the generated secret. :rtype: str """ - signal(SOLEDAD_CREATING_KEYS, self._uuid) + events.signal(events.SOLEDAD_CREATING_KEYS, self._uuid) # generate random secret secret = os.urandom(self.GEN_SECRET_LENGTH) secret_id = sha256(secret).hexdigest() self._secrets[secret_id] = secret self._store_secrets() - signal(SOLEDAD_DONE_CREATING_KEYS, self._uuid) + events.signal(events.SOLEDAD_DONE_CREATING_KEYS, self._uuid) return secret_id def _store_secrets(self): -- cgit v1.2.3 From d234ec94734219116b1190232b6ba9c1a118e1d6 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 9 Dec 2014 16:07:17 -0600 Subject: Fix incorrect ssl context setup The changes introduced in aafa79c0f5 having to do with the cert verification are incorrect, regarding the use of the newest ssl context api introduced in python 2.7.9. There the use of the server setup was taken, instead of the correct client options. I hereby apologize for the insuficient testing on that fix. It happens that I wrongly tested in an evironment that did the fallback to pre-2.7.9 interpreter. --- client/src/leap/soledad/client/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index d7d01b57..0750dfbe 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -819,10 +819,9 @@ class VerifiedHTTPSConnection(httplib.HTTPSConnection): ctx.options |= ssl.OP_NO_SSLv2 ctx.options |= ssl.OP_NO_SSLv3 - ctx.load_cert_chain(certfile=SOLEDAD_CERT) + ctx.load_verify_locations(cafile=SOLEDAD_CERT) ctx.verify_mode = ssl.CERT_REQUIRED - self.sock = ctx.wrap_socket( - sock, server_side=True, server_hostname=self.host) + self.sock = ctx.wrap_socket(sock) except AttributeError: self.sock = ssl.wrap_socket( -- cgit v1.2.3 From 4c918100110029ccbab463bd77b3565383fc409b Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 16 Sep 2014 22:09:06 -0500 Subject: remove unused imports --- client/src/leap/soledad/client/shared_db.py | 8 -------- 1 file changed, 8 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/shared_db.py b/client/src/leap/soledad/client/shared_db.py index 52e51c6f..31c4e8e8 100644 --- a/client/src/leap/soledad/client/shared_db.py +++ b/client/src/leap/soledad/client/shared_db.py @@ -14,19 +14,11 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . - - """ A shared database for storing/retrieving encrypted key material. """ - -import simplejson as json - - from u1db.remote import http_database - -from leap.soledad.common import SHARED_DB_LOCK_DOC_ID_PREFIX from leap.soledad.client.auth import TokenBasedAuth -- cgit v1.2.3 From 238822f869f8210883a82f87ae66a48751a7321b Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 23 Sep 2014 12:43:14 -0500 Subject: use max cpu_count workers on pool --- client/src/leap/soledad/client/crypto.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index d6d9a618..aa8135c0 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -521,7 +521,7 @@ class SyncEncryptDecryptPool(object): """ Base class for encrypter/decrypter pools. """ - WORKERS = 5 + WORKERS = multiprocessing.cpu_count() def __init__(self, crypto, sync_db, write_lock): """ @@ -590,7 +590,7 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): of documents to be synced. """ # TODO implement throttling to reduce cpu usage?? - WORKERS = 5 + WORKERS = multiprocessing.cpu_count() TABLE_NAME = "docs_tosync" FIELD_NAMES = "doc_id, rev, content" -- cgit v1.2.3 From 091c2034ee08956541c1111673ebe2f69673f9f8 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 16 Sep 2014 23:23:20 -0500 Subject: reorganize pragmas, stub SQLCipherOptions object --- client/src/leap/soledad/client/__init__.py | 3 +- client/src/leap/soledad/client/adbapi.py | 77 ++++ client/src/leap/soledad/client/mp_safe_db.py | 2 +- client/src/leap/soledad/client/pragmas.py | 349 +++++++++++++++++ client/src/leap/soledad/client/sqlcipher.py | 565 ++++++--------------------- 5 files changed, 538 insertions(+), 458 deletions(-) create mode 100644 client/src/leap/soledad/client/adbapi.py create mode 100644 client/src/leap/soledad/client/pragmas.py (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 0750dfbe..50fcff2c 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -374,7 +374,8 @@ class Soledad(object): include_deleted=include_deleted) def get_all_docs(self, include_deleted=False): - """Get the JSON content for all documents in the database. + """ + Get the JSON content for all documents in the database. :param include_deleted: If set to True, deleted documents will be returned with empty content. Otherwise deleted diff --git a/client/src/leap/soledad/client/adbapi.py b/client/src/leap/soledad/client/adbapi.py new file mode 100644 index 00000000..730999a3 --- /dev/null +++ b/client/src/leap/soledad/client/adbapi.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# sqlcipher.py +# Copyright (C) 2013, 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +An asyncrhonous interface to soledad using sqlcipher backend. +It uses twisted.enterprise.adbapi. + +""" +import os +import sys + +from twisted.enterprise import adbapi +from twisted.python import log + +DEBUG_SQL = os.environ.get("LEAP_DEBUG_SQL") +if DEBUG_SQL: + log.startLogging(sys.stdout) + + +def getConnectionPool(db=None, key=None): + return SQLCipherConnectionPool( + "pysqlcipher.dbapi2", database=db, key=key, check_same_thread=False) + + +class SQLCipherConnectionPool(adbapi.ConnectionPool): + + key = None + + def connect(self): + """ + Return a database connection when one becomes available. + + This method blocks and should be run in a thread from the internal + threadpool. Don't call this method directly from non-threaded code. + Using this method outside the external threadpool may exceed the + maximum number of connections in the pool. + + :return: a database connection from the pool. + """ + self.noisy = DEBUG_SQL + + tid = self.threadID() + conn = self.connections.get(tid) + + if self.key is None: + self.key = self.connkw.pop('key', None) + + if conn is None: + if self.noisy: + log.msg('adbapi connecting: %s %s%s' % (self.dbapiName, + self.connargs or '', + self.connkw or '')) + conn = self.dbapi.connect(*self.connargs, **self.connkw) + + # XXX we should hook here all OUR SOLEDAD pragmas ----- + conn.cursor().execute("PRAGMA key=%s" % self.key) + conn.commit() + # ----------------------------------------------------- + # XXX profit of openfun isntead??? + + if self.openfun is not None: + self.openfun(conn) + self.connections[tid] = conn + return conn diff --git a/client/src/leap/soledad/client/mp_safe_db.py b/client/src/leap/soledad/client/mp_safe_db.py index 780b7153..9ed0bef4 100644 --- a/client/src/leap/soledad/client/mp_safe_db.py +++ b/client/src/leap/soledad/client/mp_safe_db.py @@ -88,7 +88,7 @@ class MPSafeSQLiteDB(Thread): res = Queue() self.execute(req, arg, res) while True: - rec=res.get() + rec = res.get() if rec == self.NO_MORE: break yield rec diff --git a/client/src/leap/soledad/client/pragmas.py b/client/src/leap/soledad/client/pragmas.py new file mode 100644 index 00000000..a21e68a8 --- /dev/null +++ b/client/src/leap/soledad/client/pragmas.py @@ -0,0 +1,349 @@ +# -*- coding: utf-8 -*- +# pragmas.py +# Copyright (C) 2013, 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Different pragmas used in the SQLCIPHER database. +""" +# TODO --------------------------------------------------------------- +# Work In Progress. +# We need to reduce the impedance mismatch between the current soledad +# implementation and the eventually asynchronous api. +# So... how to plug it in, allowing for an optional sync / async coexistence? +# One of the first things is to isolate all the pragmas work that has to be +# done during initialization. +# And, instead of having all of them passed the db_handle and executing that, +# we could have just a string returned, that can be chained to a deferred. +# --------------------------------------------------------------------- +import logging +import string + +logger = logging.getLogger(__name__) + + +def set_crypto_pragmas(db_handle, sqlcipher_opts): + """ + Set cryptographic params (key, cipher, KDF number of iterations and + cipher page size). + + :param db_handle: + :type db_handle: + :param sqlcipher_opts: options for the SQLCipherDatabase + :type sqlcipher_opts: SQLCipherOpts instance + """ + # XXX assert CryptoOptions + opts = sqlcipher_opts + _set_key(db_handle, opts.key, opts.is_raw_key) + _set_cipher(db_handle, opts.cipher) + _set_kdf_iter(db_handle, opts.kdf_iter) + _set_cipher_page_size(db_handle, opts.cipher_page_size) + + +def _set_key(db_handle, key, is_raw_key): + """ + Set the C{key} for use with the database. + + The process of creating a new, encrypted database is called 'keying' + the database. SQLCipher uses just-in-time key derivation at the point + it is first needed for an operation. This means that the key (and any + options) must be set before the first operation on the database. As + soon as the database is touched (e.g. SELECT, CREATE TABLE, UPDATE, + etc.) and pages need to be read or written, the key is prepared for + use. + + Implementation Notes: + + * PRAGMA key should generally be called as the first operation on a + database. + + :param key: The key for use with the database. + :type key: str + :param is_raw_key: Whether C{key} is a raw 64-char hex string or a + passphrase that should be hashed to obtain the + encyrption key. + :type is_raw_key: bool + """ + if is_raw_key: + _set_key_raw(db_handle, key) + else: + _set_key_passphrase(db_handle, key) + + +def _set_key_passphrase(cls, db_handle, passphrase): + """ + Set a passphrase for encryption key derivation. + + The key itself can be a passphrase, which is converted to a key using + PBKDF2 key derivation. The result is used as the encryption key for + the database. By using this method, there is no way to alter the KDF; + if you want to do so you should use a raw key instead and derive the + key using your own KDF. + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param passphrase: The passphrase used to derive the encryption key. + :type passphrase: str + """ + db_handle.cursor().execute("PRAGMA key = '%s'" % passphrase) + + +def _set_key_raw(db_handle, key): + """ + Set a raw hexadecimal encryption key. + + It is possible to specify an exact byte sequence using a blob literal. + With this method, it is the calling application's responsibility to + ensure that the data provided is a 64 character hex string, which will + be converted directly to 32 bytes (256 bits) of key data. + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param key: A 64 character hex string. + :type key: str + """ + if not all(c in string.hexdigits for c in key): + raise NotAnHexString(key) + db_handle.cursor().execute('PRAGMA key = "x\'%s"' % key) + + +def _set_cipher(db_handle, cipher='aes-256-cbc'): + """ + Set the cipher and mode to use for symmetric encryption. + + SQLCipher uses aes-256-cbc as the default cipher and mode of + operation. It is possible to change this, though not generally + recommended, using PRAGMA cipher. + + SQLCipher makes direct use of libssl, so all cipher options available + to libssl are also available for use with SQLCipher. See `man enc` for + OpenSSL's supported ciphers. + + Implementation Notes: + + * PRAGMA cipher must be called after PRAGMA key and before the first + actual database operation or it will have no effect. + + * If a non-default value is used PRAGMA cipher to create a database, + it must also be called every time that database is opened. + + * SQLCipher does not implement its own encryption. Instead it uses the + widely available and peer-reviewed OpenSSL libcrypto for all + cryptographic functions. + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param cipher: The cipher and mode to use. + :type cipher: str + """ + db_handle.cursor().execute("PRAGMA cipher = '%s'" % cipher) + + +def _set_kdf_iter(db_handle, kdf_iter=4000): + """ + Set the number of iterations for the key derivation function. + + SQLCipher uses PBKDF2 key derivation to strengthen the key and make it + resistent to brute force and dictionary attacks. The default + configuration uses 4000 PBKDF2 iterations (effectively 16,000 SHA1 + operations). PRAGMA kdf_iter can be used to increase or decrease the + number of iterations used. + + Implementation Notes: + + * PRAGMA kdf_iter must be called after PRAGMA key and before the first + actual database operation or it will have no effect. + + * If a non-default value is used PRAGMA kdf_iter to create a database, + it must also be called every time that database is opened. + + * It is not recommended to reduce the number of iterations if a + passphrase is in use. + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param kdf_iter: The number of iterations to use. + :type kdf_iter: int + """ + db_handle.cursor().execute("PRAGMA kdf_iter = '%d'" % kdf_iter) + + +def _set_cipher_page_size(db_handle, cipher_page_size=1024): + """ + Set the page size of the encrypted database. + + SQLCipher 2 introduced the new PRAGMA cipher_page_size that can be + used to adjust the page size for the encrypted database. The default + page size is 1024 bytes, but it can be desirable for some applications + to use a larger page size for increased performance. For instance, + some recent testing shows that increasing the page size can noticeably + improve performance (5-30%) for certain queries that manipulate a + large number of pages (e.g. selects without an index, large inserts in + a transaction, big deletes). + + To adjust the page size, call the pragma immediately after setting the + key for the first time and each subsequent time that you open the + database. + + Implementation Notes: + + * PRAGMA cipher_page_size must be called after PRAGMA key and before + the first actual database operation or it will have no effect. + + * If a non-default value is used PRAGMA cipher_page_size to create a + database, it must also be called every time that database is opened. + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param cipher_page_size: The page size. + :type cipher_page_size: int + """ + db_handle.cursor().execute( + "PRAGMA cipher_page_size = '%d'" % cipher_page_size) + + +# XXX UNUSED ? +def set_rekey(db_handle, new_key, is_raw_key): + """ + Change the key of an existing encrypted database. + + To change the key on an existing encrypted database, it must first be + unlocked with the current encryption key. Once the database is + readable and writeable, PRAGMA rekey can be used to re-encrypt every + page in the database with a new key. + + * PRAGMA rekey must be called after PRAGMA key. It can be called at any + time once the database is readable. + + * PRAGMA rekey can not be used to encrypted a standard SQLite + database! It is only useful for changing the key on an existing + database. + + * Previous versions of SQLCipher provided a PRAGMA rekey_cipher and + code>PRAGMA rekey_kdf_iter. These are deprecated and should not be + used. Instead, use sqlcipher_export(). + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param new_key: The new key. + :type new_key: str + :param is_raw_key: Whether C{password} is a raw 64-char hex string or a + passphrase that should be hashed to obtain the encyrption + key. + :type is_raw_key: bool + """ + if is_raw_key: + _set_rekey_raw(db_handle, new_key) + else: + _set_rekey_passphrase(db_handle, new_key) + + +def _set_rekey_passphrase(db_handle, passphrase): + """ + Change the passphrase for encryption key derivation. + + The key itself can be a passphrase, which is converted to a key using + PBKDF2 key derivation. The result is used as the encryption key for + the database. + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param passphrase: The passphrase used to derive the encryption key. + :type passphrase: str + """ + db_handle.cursor().execute("PRAGMA rekey = '%s'" % passphrase) + + +def _set_rekey_raw(cls, db_handle, key): + """ + Change the raw hexadecimal encryption key. + + It is possible to specify an exact byte sequence using a blob literal. + With this method, it is the calling application's responsibility to + ensure that the data provided is a 64 character hex string, which will + be converted directly to 32 bytes (256 bits) of key data. + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param key: A 64 character hex string. + :type key: str + """ + if not all(c in string.hexdigits for c in key): + raise NotAnHexString(key) + db_handle.cursor().execute('PRAGMA rekey = "x\'%s"' % key) + + +def set_synchronous_off(db_handle): + """ + Change the setting of the "synchronous" flag to OFF. + """ + logger.debug("SQLCIPHER: SETTING SYNCHRONOUS OFF") + db_handle.cursor().execute('PRAGMA synchronous=OFF') + + +def set_synchronous_normal(db_handle): + """ + Change the setting of the "synchronous" flag to NORMAL. + """ + logger.debug("SQLCIPHER: SETTING SYNCHRONOUS NORMAL") + db_handle.cursor().execute('PRAGMA synchronous=NORMAL') + + +def set_mem_temp_store(cls, db_handle): + """ + Use a in-memory store for temporary tables. + """ + logger.debug("SQLCIPHER: SETTING TEMP_STORE MEMORY") + db_handle.cursor().execute('PRAGMA temp_store=MEMORY') + + +def set_write_ahead_logging(cls, db_handle): + """ + Enable write-ahead logging, and set the autocheckpoint to 50 pages. + + Setting the autocheckpoint to a small value, we make the reads not + suffer too much performance degradation. + + From the sqlite docs: + + "There is a tradeoff between average read performance and average write + performance. To maximize the read performance, one wants to keep the + WAL as small as possible and hence run checkpoints frequently, perhaps + as often as every COMMIT. To maximize write performance, one wants to + amortize the cost of each checkpoint over as many writes as possible, + meaning that one wants to run checkpoints infrequently and let the WAL + grow as large as possible before each checkpoint. The decision of how + often to run checkpoints may therefore vary from one application to + another depending on the relative read and write performance + requirements of the application. The default strategy is to run a + checkpoint once the WAL reaches 1000 pages" + """ + logger.debug("SQLCIPHER: SETTING WRITE-AHEAD LOGGING") + db_handle.cursor().execute('PRAGMA journal_mode=WAL') + # The optimum value can still use a little bit of tuning, but we favor + # small sizes of the WAL file to get fast reads, since we assume that + # the writes will be quick enough to not block too much. + + # TODO + # As a further improvement, we might want to set autocheckpoint to 0 + # here and do the checkpoints manually in a separate thread, to avoid + # any blocks in the main thread (we should run a loopingcall from here) + db_handle.cursor().execute('PRAGMA wal_autocheckpoint=50') + + +class NotAnHexString(Exception): + """ + Raised when trying to (raw) key the database with a non-hex string. + """ + pass diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 45629045..613903f7 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -44,7 +44,6 @@ handled by Soledad should be created by SQLCipher >= 2.0. import logging import multiprocessing import os -import string import threading import time import json @@ -64,6 +63,7 @@ from leap.soledad.client.target import SoledadSyncTarget from leap.soledad.client.target import PendingReceivedDocsSyncError from leap.soledad.client.sync import SoledadSynchronizer from leap.soledad.client.mp_safe_db import MPSafeSQLiteDB +from leap.soledad.client import pragmas from leap.soledad.common import soledad_assert from leap.soledad.common.document import SoledadDocument @@ -91,6 +91,55 @@ SQLITE_CHECK_SAME_THREAD = False SQLITE_ISOLATION_LEVEL = None +class SQLCipherOptions(object): + def __init__(self, path, key, create=True, is_raw_key=False, + cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024, + document_factory=None, defer_encryption=False, + sync_db_key=None): + """ + Options for the initialization of an SQLCipher database. + + :param path: The filesystem path for the database to open. + :type path: str + :param create: + True/False, should the database be created if it doesn't + already exist? + :param create: bool + :param document_factory: + A function that will be called with the same parameters as + Document.__init__. + :type document_factory: callable + :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt + document contents when syncing. + :type crypto: soledad.crypto.SoledadCrypto + :param is_raw_key: + Whether ``password`` is a raw 64-char hex string or a passphrase + that should be hashed to obtain the encyrption key. + :type raw_key: bool + :param cipher: The cipher and mode to use. + :type cipher: str + :param kdf_iter: The number of iterations to use. + :type kdf_iter: int + :param cipher_page_size: The page size. + :type cipher_page_size: int + :param defer_encryption: + Whether to defer encryption/decryption of documents, or do it + inline while syncing. + :type defer_encryption: bool + """ + self.path = path + self.key = key + self.is_raw_key = is_raw_key + self.create = create + self.cipher = cipher + self.kdf_iter = kdf_iter + self.cipher_page_size = cipher_page_size + self.defer_encryption = defer_encryption + self.sync_db_key = sync_db_key + self.document_factory = None + + +# XXX Use SQLCIpherOptions instead def open(path, password, create=True, document_factory=None, crypto=None, raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024, defer_encryption=False, sync_db_key=None): @@ -108,56 +157,22 @@ def open(path, password, create=True, document_factory=None, crypto=None, Will raise u1db.errors.DatabaseDoesNotExist if create=False and the database does not already exist. - :param path: The filesystem path for the database to open. - :type path: str - :param create: True/False, should the database be created if it doesn't - already exist? - :param create: bool - :param document_factory: A function that will be called with the same - parameters as Document.__init__. - :type document_factory: callable - :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt - document contents when syncing. - :type crypto: soledad.crypto.SoledadCrypto - :param raw_key: Whether C{password} is a raw 64-char hex string or a - passphrase that should be hashed to obtain the encyrption key. - :type raw_key: bool - :param cipher: The cipher and mode to use. - :type cipher: str - :param kdf_iter: The number of iterations to use. - :type kdf_iter: int - :param cipher_page_size: The page size. - :type cipher_page_size: int - :param defer_encryption: Whether to defer encryption/decryption of - documents, or do it inline while syncing. - :type defer_encryption: bool - :return: An instance of Database. :rtype SQLCipherDatabase """ - return SQLCipherDatabase.open_database( - path, password, create=create, document_factory=document_factory, - crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter, - cipher_page_size=cipher_page_size, defer_encryption=defer_encryption, - sync_db_key=sync_db_key) - - -# -# Exceptions -# - -class DatabaseIsNotEncrypted(Exception): - """ - Exception raised when trying to open non-encrypted databases. - """ - pass - - -class NotAnHexString(Exception): - """ - Raised when trying to (raw) key the database with a non-hex string. - """ - pass + args = (path, password) + kwargs = { + 'create': create, + 'document_factory': document_factory, + 'crypto': crypto, + 'raw_key': raw_key, + 'cipher': cipher, + 'kdf_iter': kdf_iter, + 'cipher_page_size': cipher_page_size, + 'defer_encryption': defer_encryption, + 'sync_db_key': sync_db_key} + # XXX pass only a CryptoOptions object around + return SQLCipherDatabase.open_database(*args, **kwargs) # @@ -200,6 +215,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): same database replica. """ + # XXX Use SQLCIpherOptions instead def __init__(self, sqlcipher_file, password, document_factory=None, crypto=None, raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024, sync_db_key=None): @@ -214,30 +230,10 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): experience several kinds of leakages. *** IMPORTANT *** - - :param sqlcipher_file: The path for the SQLCipher file. - :type sqlcipher_file: str - :param password: The password that protects the SQLCipher db. - :type password: str - :param document_factory: A function that will be called with the same - parameters as Document.__init__. - :type document_factory: callable - :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt - document contents when syncing. - :type crypto: soledad.crypto.SoledadCrypto - :param raw_key: Whether password is a raw 64-char hex string or a - passphrase that should be hashed to obtain the - encyrption key. - :type raw_key: bool - :param cipher: The cipher and mode to use. - :type cipher: str - :param kdf_iter: The number of iterations to use. - :type kdf_iter: int - :param cipher_page_size: The page size. - :type cipher_page_size: int """ # ensure the db is encrypted if the file already exists if os.path.exists(sqlcipher_file): + # XXX pass only a CryptoOptions object around self.assert_db_is_encrypted( sqlcipher_file, password, raw_key, cipher, kdf_iter, cipher_page_size) @@ -249,16 +245,19 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): isolation_level=SQLITE_ISOLATION_LEVEL, check_same_thread=SQLITE_CHECK_SAME_THREAD) # set SQLCipher cryptographic parameters - self._set_crypto_pragmas( + + # XXX allow optiona deferredChain here ? + pragmas.set_crypto_pragmas( self._db_handle, password, raw_key, cipher, kdf_iter, cipher_page_size) if os.environ.get('LEAP_SQLITE_NOSYNC'): - self._pragma_synchronous_off(self._db_handle) + pragmas.set_synchronous_off(self._db_handle) else: - self._pragma_synchronous_normal(self._db_handle) + pragmas.set_synchronous_normal(self._db_handle) if os.environ.get('LEAP_SQLITE_MEMSTORE'): - self._pragma_mem_temp_store(self._db_handle) - self._pragma_write_ahead_logging(self._db_handle) + pragmas.set_mem_temp_store(self._db_handle) + pragmas.set_write_ahead_logging(self._db_handle) + self._real_replica_uid = None self._ensure_schema() self._crypto = crypto @@ -296,6 +295,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): self._syncers = {} @classmethod + # XXX Use SQLCIpherOptions instead def _open_database(cls, sqlcipher_file, password, document_factory=None, crypto=None, raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024, @@ -303,29 +303,6 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): """ Open a SQLCipher database. - :param sqlcipher_file: The path for the SQLCipher file. - :type sqlcipher_file: str - :param password: The password that protects the SQLCipher db. - :type password: str - :param document_factory: A function that will be called with the same - parameters as Document.__init__. - :type document_factory: callable - :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt - document contents when syncing. - :type crypto: soledad.crypto.SoledadCrypto - :param raw_key: Whether C{password} is a raw 64-char hex string or a - passphrase that should be hashed to obtain the encyrption key. - :type raw_key: bool - :param cipher: The cipher and mode to use. - :type cipher: str - :param kdf_iter: The number of iterations to use. - :type kdf_iter: int - :param cipher_page_size: The page size. - :type cipher_page_size: int - :param defer_encryption: Whether to defer encryption/decryption of - documents, or do it inline while syncing. - :type defer_encryption: bool - :return: The database object. :rtype: SQLCipherDatabase """ @@ -346,7 +323,9 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): try: # set cryptographic params - cls._set_crypto_pragmas( + + # XXX pass only a CryptoOptions object around + pragmas.set_crypto_pragmas( db_handle, password, raw_key, cipher, kdf_iter, cipher_page_size) c = db_handle.cursor() @@ -372,11 +351,12 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): cipher_page_size=cipher_page_size, sync_db_key=sync_db_key) @classmethod - def open_database(cls, sqlcipher_file, password, create, backend_cls=None, + def open_database(cls, sqlcipher_file, password, create, document_factory=None, crypto=None, raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024, defer_encryption=False, sync_db_key=None): + # XXX pass only a CryptoOptions object around """ Open a SQLCipher database. @@ -388,67 +368,29 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): *** IMPORTANT *** - :param sqlcipher_file: The path for the SQLCipher file. - :type sqlcipher_file: str - - :param password: The password that protects the SQLCipher db. - :type password: str - - :param create: Should the datbase be created if it does not already - exist? - :type create: bool - - :param backend_cls: A class to use as backend. - :type backend_cls: type - - :param document_factory: A function that will be called with the same - parameters as Document.__init__. - :type document_factory: callable - - :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt - document contents when syncing. - :type crypto: soledad.crypto.SoledadCrypto - - :param raw_key: Whether C{password} is a raw 64-char hex string or a - passphrase that should be hashed to obtain the - encyrption key. - :type raw_key: bool - - :param cipher: The cipher and mode to use. - :type cipher: str - - :param kdf_iter: The number of iterations to use. - :type kdf_iter: int - - :param cipher_page_size: The page size. - :type cipher_page_size: int - - :param defer_encryption: Whether to defer encryption/decryption of - documents, or do it inline while syncing. - :type defer_encryption: bool - :return: The database object. :rtype: SQLCipherDatabase """ cls.defer_encryption = defer_encryption + args = sqlcipher_file, password + kwargs = { + 'crypto': crypto, + 'raw_key': raw_key, + 'cipher': cipher, + 'kdf_iter': kdf_iter, + 'cipher_page_size': cipher_page_size, + 'defer_encryption': defer_encryption, + 'sync_db_key': sync_db_key, + 'document_factory': document_factory, + } try: - return cls._open_database( - sqlcipher_file, password, document_factory=document_factory, - crypto=crypto, raw_key=raw_key, cipher=cipher, - kdf_iter=kdf_iter, cipher_page_size=cipher_page_size, - defer_encryption=defer_encryption, sync_db_key=sync_db_key) + return cls._open_database(*args, **kwargs) except u1db_errors.DatabaseDoesNotExist: if not create: raise - # TODO: remove backend class from here. - if backend_cls is None: - # default is SQLCipherPartialExpandDatabase - backend_cls = SQLCipherDatabase - return backend_cls( - sqlcipher_file, password, document_factory=document_factory, - crypto=crypto, raw_key=raw_key, cipher=cipher, - kdf_iter=kdf_iter, cipher_page_size=cipher_page_size, - sync_db_key=sync_db_key) + + # XXX here we were missing sync_db_key, intentional? + return SQLCipherDatabase(*args, **kwargs) def sync(self, url, creds=None, autocreate=True, defer_decryption=True): """ @@ -592,7 +534,8 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): self._sync_db = MPSafeSQLiteDB(sync_db_path) # protect the sync db with a password if self._sync_db_key is not None: - self._set_crypto_pragmas( + # XXX pass only a CryptoOptions object around + pragmas.set_crypto_pragmas( self._sync_db, self._sync_db_key, False, 'aes-256-cbc', 4000, 1024) self._sync_db_write_lock = threading.Lock() @@ -712,6 +655,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): # SQLCipher API methods # + # XXX Use SQLCIpherOptions instead @classmethod def assert_db_is_encrypted(cls, sqlcipher_file, key, raw_key, cipher, kdf_iter, cipher_page_size): @@ -755,314 +699,12 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): sqlcipher_file, isolation_level=SQLITE_ISOLATION_LEVEL, check_same_thread=SQLITE_CHECK_SAME_THREAD) - cls._set_crypto_pragmas( + pragmas.set_crypto_pragmas( db_handle, key, raw_key, cipher, kdf_iter, cipher_page_size) db_handle.cursor().execute( 'SELECT count(*) FROM sqlite_master') - @classmethod - def _set_crypto_pragmas(cls, db_handle, key, raw_key, cipher, kdf_iter, - cipher_page_size): - """ - Set cryptographic params (key, cipher, KDF number of iterations and - cipher page size). - """ - cls._pragma_key(db_handle, key, raw_key) - cls._pragma_cipher(db_handle, cipher) - cls._pragma_kdf_iter(db_handle, kdf_iter) - cls._pragma_cipher_page_size(db_handle, cipher_page_size) - - @classmethod - def _pragma_key(cls, db_handle, key, raw_key): - """ - Set the C{key} for use with the database. - - The process of creating a new, encrypted database is called 'keying' - the database. SQLCipher uses just-in-time key derivation at the point - it is first needed for an operation. This means that the key (and any - options) must be set before the first operation on the database. As - soon as the database is touched (e.g. SELECT, CREATE TABLE, UPDATE, - etc.) and pages need to be read or written, the key is prepared for - use. - - Implementation Notes: - - * PRAGMA key should generally be called as the first operation on a - database. - - :param key: The key for use with the database. - :type key: str - :param raw_key: Whether C{key} is a raw 64-char hex string or a - passphrase that should be hashed to obtain the encyrption key. - :type raw_key: bool - """ - if raw_key: - cls._pragma_key_raw(db_handle, key) - else: - cls._pragma_key_passphrase(db_handle, key) - - @classmethod - def _pragma_key_passphrase(cls, db_handle, passphrase): - """ - Set a passphrase for encryption key derivation. - - The key itself can be a passphrase, which is converted to a key using - PBKDF2 key derivation. The result is used as the encryption key for - the database. By using this method, there is no way to alter the KDF; - if you want to do so you should use a raw key instead and derive the - key using your own KDF. - - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param passphrase: The passphrase used to derive the encryption key. - :type passphrase: str - """ - db_handle.cursor().execute("PRAGMA key = '%s'" % passphrase) - - @classmethod - def _pragma_key_raw(cls, db_handle, key): - """ - Set a raw hexadecimal encryption key. - - It is possible to specify an exact byte sequence using a blob literal. - With this method, it is the calling application's responsibility to - ensure that the data provided is a 64 character hex string, which will - be converted directly to 32 bytes (256 bits) of key data. - - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param key: A 64 character hex string. - :type key: str - """ - if not all(c in string.hexdigits for c in key): - raise NotAnHexString(key) - db_handle.cursor().execute('PRAGMA key = "x\'%s"' % key) - - @classmethod - def _pragma_cipher(cls, db_handle, cipher='aes-256-cbc'): - """ - Set the cipher and mode to use for symmetric encryption. - - SQLCipher uses aes-256-cbc as the default cipher and mode of - operation. It is possible to change this, though not generally - recommended, using PRAGMA cipher. - - SQLCipher makes direct use of libssl, so all cipher options available - to libssl are also available for use with SQLCipher. See `man enc` for - OpenSSL's supported ciphers. - - Implementation Notes: - - * PRAGMA cipher must be called after PRAGMA key and before the first - actual database operation or it will have no effect. - - * If a non-default value is used PRAGMA cipher to create a database, - it must also be called every time that database is opened. - - * SQLCipher does not implement its own encryption. Instead it uses the - widely available and peer-reviewed OpenSSL libcrypto for all - cryptographic functions. - - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param cipher: The cipher and mode to use. - :type cipher: str - """ - db_handle.cursor().execute("PRAGMA cipher = '%s'" % cipher) - - @classmethod - def _pragma_kdf_iter(cls, db_handle, kdf_iter=4000): - """ - Set the number of iterations for the key derivation function. - - SQLCipher uses PBKDF2 key derivation to strengthen the key and make it - resistent to brute force and dictionary attacks. The default - configuration uses 4000 PBKDF2 iterations (effectively 16,000 SHA1 - operations). PRAGMA kdf_iter can be used to increase or decrease the - number of iterations used. - - Implementation Notes: - - * PRAGMA kdf_iter must be called after PRAGMA key and before the first - actual database operation or it will have no effect. - - * If a non-default value is used PRAGMA kdf_iter to create a database, - it must also be called every time that database is opened. - - * It is not recommended to reduce the number of iterations if a - passphrase is in use. - - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param kdf_iter: The number of iterations to use. - :type kdf_iter: int - """ - db_handle.cursor().execute("PRAGMA kdf_iter = '%d'" % kdf_iter) - - @classmethod - def _pragma_cipher_page_size(cls, db_handle, cipher_page_size=1024): - """ - Set the page size of the encrypted database. - - SQLCipher 2 introduced the new PRAGMA cipher_page_size that can be - used to adjust the page size for the encrypted database. The default - page size is 1024 bytes, but it can be desirable for some applications - to use a larger page size for increased performance. For instance, - some recent testing shows that increasing the page size can noticeably - improve performance (5-30%) for certain queries that manipulate a - large number of pages (e.g. selects without an index, large inserts in - a transaction, big deletes). - - To adjust the page size, call the pragma immediately after setting the - key for the first time and each subsequent time that you open the - database. - - Implementation Notes: - - * PRAGMA cipher_page_size must be called after PRAGMA key and before - the first actual database operation or it will have no effect. - - * If a non-default value is used PRAGMA cipher_page_size to create a - database, it must also be called every time that database is opened. - - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param cipher_page_size: The page size. - :type cipher_page_size: int - """ - db_handle.cursor().execute( - "PRAGMA cipher_page_size = '%d'" % cipher_page_size) - - @classmethod - def _pragma_rekey(cls, db_handle, new_key, raw_key): - """ - Change the key of an existing encrypted database. - - To change the key on an existing encrypted database, it must first be - unlocked with the current encryption key. Once the database is - readable and writeable, PRAGMA rekey can be used to re-encrypt every - page in the database with a new key. - - * PRAGMA rekey must be called after PRAGMA key. It can be called at any - time once the database is readable. - - * PRAGMA rekey can not be used to encrypted a standard SQLite - database! It is only useful for changing the key on an existing - database. - - * Previous versions of SQLCipher provided a PRAGMA rekey_cipher and - code>PRAGMA rekey_kdf_iter. These are deprecated and should not be - used. Instead, use sqlcipher_export(). - - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param new_key: The new key. - :type new_key: str - :param raw_key: Whether C{password} is a raw 64-char hex string or a - passphrase that should be hashed to obtain the encyrption key. - :type raw_key: bool - """ - # XXX change key param! - if raw_key: - cls._pragma_rekey_raw(db_handle, key) - else: - cls._pragma_rekey_passphrase(db_handle, key) - - @classmethod - def _pragma_rekey_passphrase(cls, db_handle, passphrase): - """ - Change the passphrase for encryption key derivation. - - The key itself can be a passphrase, which is converted to a key using - PBKDF2 key derivation. The result is used as the encryption key for - the database. - - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param passphrase: The passphrase used to derive the encryption key. - :type passphrase: str - """ - db_handle.cursor().execute("PRAGMA rekey = '%s'" % passphrase) - - @classmethod - def _pragma_rekey_raw(cls, db_handle, key): - """ - Change the raw hexadecimal encryption key. - - It is possible to specify an exact byte sequence using a blob literal. - With this method, it is the calling application's responsibility to - ensure that the data provided is a 64 character hex string, which will - be converted directly to 32 bytes (256 bits) of key data. - - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param key: A 64 character hex string. - :type key: str - """ - if not all(c in string.hexdigits for c in key): - raise NotAnHexString(key) - # XXX change passphrase param! - db_handle.cursor().execute('PRAGMA rekey = "x\'%s"' % passphrase) - - @classmethod - def _pragma_synchronous_off(cls, db_handle): - """ - Change the setting of the "synchronous" flag to OFF. - """ - logger.debug("SQLCIPHER: SETTING SYNCHRONOUS OFF") - db_handle.cursor().execute('PRAGMA synchronous=OFF') - - @classmethod - def _pragma_synchronous_normal(cls, db_handle): - """ - Change the setting of the "synchronous" flag to NORMAL. - """ - logger.debug("SQLCIPHER: SETTING SYNCHRONOUS NORMAL") - db_handle.cursor().execute('PRAGMA synchronous=NORMAL') - - @classmethod - def _pragma_mem_temp_store(cls, db_handle): - """ - Use a in-memory store for temporary tables. - """ - logger.debug("SQLCIPHER: SETTING TEMP_STORE MEMORY") - db_handle.cursor().execute('PRAGMA temp_store=MEMORY') - - @classmethod - def _pragma_write_ahead_logging(cls, db_handle): - """ - Enable write-ahead logging, and set the autocheckpoint to 50 pages. - - Setting the autocheckpoint to a small value, we make the reads not - suffer too much performance degradation. - - From the sqlite docs: - - "There is a tradeoff between average read performance and average write - performance. To maximize the read performance, one wants to keep the - WAL as small as possible and hence run checkpoints frequently, perhaps - as often as every COMMIT. To maximize write performance, one wants to - amortize the cost of each checkpoint over as many writes as possible, - meaning that one wants to run checkpoints infrequently and let the WAL - grow as large as possible before each checkpoint. The decision of how - often to run checkpoints may therefore vary from one application to - another depending on the relative read and write performance - requirements of the application. The default strategy is to run a - checkpoint once the WAL reaches 1000 pages" - """ - logger.debug("SQLCIPHER: SETTING WRITE-AHEAD LOGGING") - db_handle.cursor().execute('PRAGMA journal_mode=WAL') - # The optimum value can still use a little bit of tuning, but we favor - # small sizes of the WAL file to get fast reads, since we assume that - # the writes will be quick enough to not block too much. - - # TODO - # As a further improvement, we might want to set autocheckpoint to 0 - # here and do the checkpoints manually in a separate thread, to avoid - # any blocks in the main thread (we should run a loopingcall from here) - db_handle.cursor().execute('PRAGMA wal_autocheckpoint=50') - # Extra query methods: extensions to the base sqlite implmentation. def get_count_from_index(self, index_name, *key_values): @@ -1162,5 +804,16 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): def replica_uid(self): return self._get_replica_uid() +# +# Exceptions +# + + +class DatabaseIsNotEncrypted(Exception): + """ + Exception raised when trying to open non-encrypted databases. + """ + pass + sqlite_backend.SQLiteDatabase.register_implementation(SQLCipherDatabase) -- cgit v1.2.3 From 9c56adfd27e96c44c12ad5295c42e6b8d9bcad98 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Mon, 22 Sep 2014 20:03:45 -0500 Subject: move public api to its own file --- client/src/leap/soledad/client/__init__.py | 824 +-------------------- client/src/leap/soledad/client/api.py | 822 ++++++++++++++++++++ client/src/leap/soledad/client/mp_safe_db.py | 112 --- .../src/leap/soledad/client/mp_safe_db_TOREMOVE.py | 112 +++ client/src/leap/soledad/client/sqlcipher.py | 323 ++++---- client/src/leap/soledad/client/sync.py | 4 - 6 files changed, 1107 insertions(+), 1090 deletions(-) create mode 100644 client/src/leap/soledad/client/api.py delete mode 100644 client/src/leap/soledad/client/mp_safe_db.py create mode 100644 client/src/leap/soledad/client/mp_safe_db_TOREMOVE.py (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 50fcff2c..245a8971 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -16,828 +16,12 @@ # along with this program. If not, see . """ Soledad - Synchronization Of Locally Encrypted Data Among Devices. - -Soledad is the part of LEAP that manages storage and synchronization of -application data. It is built on top of U1DB reference Python API and -implements (1) a SQLCipher backend for local storage in the client, (2) a -SyncTarget that encrypts data before syncing, and (3) a CouchDB backend for -remote storage in the server side. -""" -import binascii -import errno -import httplib -import logging -import os -import socket -import ssl -import urlparse - - -try: - import cchardet as chardet -except ImportError: - import chardet - -from u1db.remote import http_client -from u1db.remote.ssl_match_hostname import match_hostname - -from leap.common.config import get_path_prefix -from leap.soledad.common import ( - SHARED_DB_NAME, - soledad_assert, - soledad_assert_type -) -from leap.soledad.client.events import ( - SOLEDAD_NEW_DATA_TO_SYNC, - SOLEDAD_DONE_DATA_SYNC, - signal, -) -from leap.soledad.common.document import SoledadDocument -from leap.soledad.client.crypto import SoledadCrypto -from leap.soledad.client.secrets import SoledadSecrets -from leap.soledad.client.shared_db import SoledadSharedDatabase -from leap.soledad.client.sqlcipher import open as sqlcipher_open -from leap.soledad.client.sqlcipher import SQLCipherDatabase -from leap.soledad.client.target import SoledadSyncTarget - - -logger = logging.getLogger(name=__name__) - - -# -# Constants -# - -SOLEDAD_CERT = None """ -Path to the certificate file used to certify the SSL connection between -Soledad client and server. -""" - - -# -# Soledad: local encrypted storage and remote encrypted sync. -# - -class Soledad(object): - """ - Soledad provides encrypted data storage and sync. - - A Soledad instance is used to store and retrieve data in a local encrypted - database and synchronize this database with Soledad server. - - This class is also responsible for bootstrapping users' account by - creating cryptographic secrets and/or storing/fetching them on Soledad - server. - - Soledad uses C{leap.common.events} to signal events. The possible events - to be signaled are: - - SOLEDAD_CREATING_KEYS: emitted during bootstrap sequence when key - generation starts. - SOLEDAD_DONE_CREATING_KEYS: emitted during bootstrap sequence when key - generation finishes. - SOLEDAD_UPLOADING_KEYS: emitted during bootstrap sequence when soledad - starts sending keys to server. - SOLEDAD_DONE_UPLOADING_KEYS: emitted during bootstrap sequence when - soledad finishes sending keys to server. - SOLEDAD_DOWNLOADING_KEYS: emitted during bootstrap sequence when - soledad starts to retrieve keys from server. - SOLEDAD_DONE_DOWNLOADING_KEYS: emitted during bootstrap sequence when - soledad finishes downloading keys from server. - SOLEDAD_NEW_DATA_TO_SYNC: emitted upon call to C{need_sync()} when - there's indeed new data to be synchronized between local database - replica and server's replica. - SOLEDAD_DONE_DATA_SYNC: emitted inside C{sync()} method when it has - finished synchronizing with remote replica. - """ - - LOCAL_DATABASE_FILE_NAME = 'soledad.u1db' - """ - The name of the local SQLCipher U1DB database file. - """ - - STORAGE_SECRETS_FILE_NAME = "soledad.json" - """ - The name of the file where the storage secrets will be stored. - """ - - DEFAULT_PREFIX = os.path.join(get_path_prefix(), 'leap', 'soledad') - """ - Prefix for default values for path. - """ - - def __init__(self, uuid, passphrase, secrets_path, local_db_path, - server_url, cert_file, - auth_token=None, secret_id=None, defer_encryption=False): - """ - Initialize configuration, cryptographic keys and dbs. - - :param uuid: User's uuid. - :type uuid: str - - :param passphrase: The passphrase for locking and unlocking encryption - secrets for local and remote storage. - :type passphrase: unicode - - :param secrets_path: Path for storing encrypted key used for - symmetric encryption. - :type secrets_path: str - - :param local_db_path: Path for local encrypted storage db. - :type local_db_path: str - - :param server_url: URL for Soledad server. This is used either to sync - with the user's remote db and to interact with the - shared recovery database. - :type server_url: str - - :param cert_file: Path to the certificate of the ca used - to validate the SSL certificate used by the remote - soledad server. - :type cert_file: str - - :param auth_token: Authorization token for accessing remote databases. - :type auth_token: str - - :param secret_id: The id of the storage secret to be used. - :type secret_id: str - - :param defer_encryption: Whether to defer encryption/decryption of - documents, or do it inline while syncing. - :type defer_encryption: bool - - :raise BootstrapSequenceError: Raised when the secret generation and - storage on server sequence has failed - for some reason. - """ - # store config params - self._uuid = uuid - self._passphrase = passphrase - self._secrets_path = secrets_path - self._local_db_path = local_db_path - self._server_url = server_url - # configure SSL certificate - global SOLEDAD_CERT - SOLEDAD_CERT = cert_file - self._set_token(auth_token) - self._defer_encryption = defer_encryption - - self._init_config() - self._init_dirs() - - # init crypto variables - self._shared_db_instance = None - self._crypto = SoledadCrypto(self) - self._secrets = SoledadSecrets( - self._uuid, - self._passphrase, - self._secrets_path, - self._shared_db, - self._crypto, - secret_id=secret_id) - - # initiate bootstrap sequence - self._bootstrap() # might raise BootstrapSequenceError() - - def _init_config(self): - """ - Initialize configuration using default values for missing params. - """ - soledad_assert_type(self._passphrase, unicode) - # initialize secrets_path - if self._secrets_path is None: - self._secrets_path = os.path.join( - self.DEFAULT_PREFIX, self.STORAGE_SECRETS_FILE_NAME) - # initialize local_db_path - if self._local_db_path is None: - self._local_db_path = os.path.join( - self.DEFAULT_PREFIX, self.LOCAL_DATABASE_FILE_NAME) - # initialize server_url - soledad_assert( - self._server_url is not None, - 'Missing URL for Soledad server.') - - # - # initialization/destruction methods - # - - def _bootstrap(self): - """ - Bootstrap local Soledad instance. - - :raise BootstrapSequenceError: Raised when the secret generation and - storage on server sequence has failed for some reason. - """ - try: - self._secrets.bootstrap() - self._init_db() - except: - raise - - def _init_dirs(self): - """ - Create work directories. - - :raise OSError: in case file exists and is not a dir. - """ - paths = map( - lambda x: os.path.dirname(x), - [self._local_db_path, self._secrets_path]) - for path in paths: - try: - if not os.path.isdir(path): - logger.info('Creating directory: %s.' % path) - os.makedirs(path) - except OSError as exc: - if exc.errno == errno.EEXIST and os.path.isdir(path): - pass - else: - raise - - def _init_db(self): - """ - Initialize the U1DB SQLCipher database for local storage. - - Currently, Soledad uses the default SQLCipher cipher, i.e. - 'aes-256-cbc'. We use scrypt to derive a 256-bit encryption key and - uses the 'raw PRAGMA key' format to handle the key to SQLCipher. - """ - key = self._secrets.get_local_storage_key() - sync_db_key = self._secrets.get_sync_db_key() - self._db = sqlcipher_open( - self._local_db_path, - binascii.b2a_hex(key), # sqlcipher only accepts the hex version - create=True, - document_factory=SoledadDocument, - crypto=self._crypto, - raw_key=True, - defer_encryption=self._defer_encryption, - sync_db_key=binascii.b2a_hex(sync_db_key)) - - def close(self): - """ - Close underlying U1DB database. - """ - logger.debug("Closing soledad") - if hasattr(self, '_db') and isinstance( - self._db, - SQLCipherDatabase): - self._db.stop_sync() - self._db.close() - - @property - def _shared_db(self): - """ - Return an instance of the shared recovery database object. - - :return: The shared database. - :rtype: SoledadSharedDatabase - """ - if self._shared_db_instance is None: - self._shared_db_instance = SoledadSharedDatabase.open_database( - urlparse.urljoin(self.server_url, SHARED_DB_NAME), - self._uuid, - False, # db should exist at this point. - creds=self._creds) - return self._shared_db_instance - - # - # Document storage, retrieval and sync. - # - - def put_doc(self, doc): - """ - Update a document in the local encrypted database. - - ============================== WARNING ============================== - This method converts the document's contents to unicode in-place. This - means that after calling C{put_doc(doc)}, the contents of the - document, i.e. C{doc.content}, might be different from before the - call. - ============================== WARNING ============================== - - :param doc: the document to update - :type doc: SoledadDocument - - :return: the new revision identifier for the document - :rtype: str - """ - doc.content = self._convert_to_unicode(doc.content) - return self._db.put_doc(doc) - - def delete_doc(self, doc): - """ - Delete a document from the local encrypted database. - - :param doc: the document to delete - :type doc: SoledadDocument - - :return: the new revision identifier for the document - :rtype: str - """ - return self._db.delete_doc(doc) - - def get_doc(self, doc_id, include_deleted=False): - """ - Retrieve a document from the local encrypted database. - - :param doc_id: the unique document identifier - :type doc_id: str - :param include_deleted: if True, deleted documents will be - returned with empty content; otherwise asking - for a deleted document will return None - :type include_deleted: bool - - :return: the document object or None - :rtype: SoledadDocument - """ - return self._db.get_doc(doc_id, include_deleted=include_deleted) - - def get_docs(self, doc_ids, check_for_conflicts=True, - include_deleted=False): - """ - Get the content for many documents. - - :param doc_ids: a list of document identifiers - :type doc_ids: list - :param check_for_conflicts: if set False, then the conflict check will - be skipped, and 'None' will be returned instead of True/False - :type check_for_conflicts: bool - - :return: iterable giving the Document object for each document id - in matching doc_ids order. - :rtype: generator - """ - return self._db.get_docs( - doc_ids, check_for_conflicts=check_for_conflicts, - include_deleted=include_deleted) - - def get_all_docs(self, include_deleted=False): - """ - Get the JSON content for all documents in the database. - - :param include_deleted: If set to True, deleted documents will be - returned with empty content. Otherwise deleted - documents will not be included in the results. - :return: (generation, [Document]) - The current generation of the database, followed by a list of - all the documents in the database. - """ - return self._db.get_all_docs(include_deleted) - - def _convert_to_unicode(self, content): - """ - Converts content to unicode (or all the strings in content) - - NOTE: Even though this method supports any type, it will - currently ignore contents of lists, tuple or any other - iterable than dict. We don't need support for these at the - moment - - :param content: content to convert - :type content: object - - :rtype: object - """ - if isinstance(content, unicode): - return content - elif isinstance(content, str): - result = chardet.detect(content) - default = "utf-8" - encoding = result["encoding"] or default - try: - content = content.decode(encoding) - except UnicodeError as e: - logger.error("Unicode error: {0!r}. Using 'replace'".format(e)) - content = content.decode(encoding, 'replace') - return content - else: - if isinstance(content, dict): - for key in content.keys(): - content[key] = self._convert_to_unicode(content[key]) - return content - - def create_doc(self, content, doc_id=None): - """ - Create a new document in the local encrypted database. - - :param content: the contents of the new document - :type content: dict - :param doc_id: an optional identifier specifying the document id - :type doc_id: str - - :return: the new document - :rtype: SoledadDocument - """ - return self._db.create_doc( - self._convert_to_unicode(content), doc_id=doc_id) - - def create_doc_from_json(self, json, doc_id=None): - """ - Create a new document. - - You can optionally specify the document identifier, but the document - must not already exist. See 'put_doc' if you want to override an - existing document. - If the database specifies a maximum document size and the document - exceeds it, create will fail and raise a DocumentTooBig exception. - - :param json: The JSON document string - :type json: str - :param doc_id: An optional identifier specifying the document id. - :type doc_id: - :return: The new document - :rtype: SoledadDocument - """ - return self._db.create_doc_from_json(json, doc_id=doc_id) - - def create_index(self, index_name, *index_expressions): - """ - Create an named index, which can then be queried for future lookups. - Creating an index which already exists is not an error, and is cheap. - Creating an index which does not match the index_expressions of the - existing index is an error. - Creating an index will block until the expressions have been evaluated - and the index generated. - - :param index_name: A unique name which can be used as a key prefix - :type index_name: str - :param index_expressions: index expressions defining the index - information. - :type index_expressions: dict - - Examples: - - "fieldname", or "fieldname.subfieldname" to index alphabetically - sorted on the contents of a field. - - "number(fieldname, width)", "lower(fieldname)" - """ - if self._db: - return self._db.create_index( - index_name, *index_expressions) - - def delete_index(self, index_name): - """ - Remove a named index. - - :param index_name: The name of the index we are removing - :type index_name: str - """ - if self._db: - return self._db.delete_index(index_name) - - def list_indexes(self): - """ - List the definitions of all known indexes. - - :return: A list of [('index-name', ['field', 'field2'])] definitions. - :rtype: list - """ - if self._db: - return self._db.list_indexes() - - def get_from_index(self, index_name, *key_values): - """ - Return documents that match the keys supplied. - - You must supply exactly the same number of values as have been defined - in the index. It is possible to do a prefix match by using '*' to - indicate a wildcard match. You can only supply '*' to trailing entries, - (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) - It is also possible to append a '*' to the last supplied value (eg - 'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') - - :param index_name: The index to query - :type index_name: str - :param key_values: values to match. eg, if you have - an index with 3 fields then you would have: - get_from_index(index_name, val1, val2, val3) - :type key_values: tuple - :return: List of [Document] - :rtype: list - """ - if self._db: - return self._db.get_from_index(index_name, *key_values) - - def get_count_from_index(self, index_name, *key_values): - """ - Return the count of the documents that match the keys and - values supplied. - - :param index_name: The index to query - :type index_name: str - :param key_values: values to match. eg, if you have - an index with 3 fields then you would have: - get_from_index(index_name, val1, val2, val3) - :type key_values: tuple - :return: count. - :rtype: int - """ - if self._db: - return self._db.get_count_from_index(index_name, *key_values) - - def get_range_from_index(self, index_name, start_value, end_value): - """ - Return documents that fall within the specified range. - - Both ends of the range are inclusive. For both start_value and - end_value, one must supply exactly the same number of values as have - been defined in the index, or pass None. In case of a single column - index, a string is accepted as an alternative for a tuple with a single - value. It is possible to do a prefix match by using '*' to indicate - a wildcard match. You can only supply '*' to trailing entries, (eg - 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also - possible to append a '*' to the last supplied value (eg 'val*', '*', - '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') - - :param index_name: The index to query - :type index_name: str - :param start_values: tuples of values that define the lower bound of - the range. eg, if you have an index with 3 fields then you would - have: (val1, val2, val3) - :type start_values: tuple - :param end_values: tuples of values that define the upper bound of the - range. eg, if you have an index with 3 fields then you would have: - (val1, val2, val3) - :type end_values: tuple - :return: List of [Document] - :rtype: list - """ - if self._db: - return self._db.get_range_from_index( - index_name, start_value, end_value) - - def get_index_keys(self, index_name): - """ - Return all keys under which documents are indexed in this index. - - :param index_name: The index to query - :type index_name: str - :return: [] A list of tuples of indexed keys. - :rtype: list - """ - if self._db: - return self._db.get_index_keys(index_name) - - def get_doc_conflicts(self, doc_id): - """ - Get the list of conflicts for the given document. - - :param doc_id: the document id - :type doc_id: str - - :return: a list of the document entries that are conflicted - :rtype: list - """ - if self._db: - return self._db.get_doc_conflicts(doc_id) - - def resolve_doc(self, doc, conflicted_doc_revs): - """ - Mark a document as no longer conflicted. - - :param doc: a document with the new content to be inserted. - :type doc: SoledadDocument - :param conflicted_doc_revs: a list of revisions that the new content - supersedes. - :type conflicted_doc_revs: list - """ - if self._db: - return self._db.resolve_doc(doc, conflicted_doc_revs) - - def sync(self, defer_decryption=True): - """ - Synchronize the local encrypted replica with a remote replica. - - This method blocks until a syncing lock is acquired, so there are no - attempts of concurrent syncs from the same client replica. - - :param url: the url of the target replica to sync with - :type url: str - - :param defer_decryption: Whether to defer the decryption process using - the intermediate database. If False, - decryption will be done inline. - :type defer_decryption: bool - - :return: The local generation before the synchronisation was - performed. - :rtype: str - """ - if self._db: - try: - local_gen = self._db.sync( - urlparse.urljoin(self.server_url, 'user-%s' % self._uuid), - creds=self._creds, autocreate=False, - defer_decryption=defer_decryption) - signal(SOLEDAD_DONE_DATA_SYNC, self._uuid) - return local_gen - except Exception as e: - logger.error("Soledad exception when syncing: %s" % str(e)) - - def stop_sync(self): - """ - Stop the current syncing process. - """ - if self._db: - self._db.stop_sync() - - def need_sync(self, url): - """ - Return if local db replica differs from remote url's replica. - - :param url: The remote replica to compare with local replica. - :type url: str - - :return: Whether remote replica and local replica differ. - :rtype: bool - """ - target = SoledadSyncTarget( - url, self._db._get_replica_uid(), creds=self._creds, - crypto=self._crypto) - info = target.get_sync_info(self._db._get_replica_uid()) - # compare source generation with target's last known source generation - if self._db._get_generation() != info[4]: - signal(SOLEDAD_NEW_DATA_TO_SYNC, self._uuid) - return True - return False - - @property - def syncing(self): - """ - Property, True if the syncer is syncing. - """ - return self._db.syncing - - def _set_token(self, token): - """ - Set the authentication token for remote database access. - - Build the credentials dictionary with the following format: - - self._{ - 'token': { - 'uuid': '' - 'token': '' - } - - :param token: The authentication token. - :type token: str - """ - self._creds = { - 'token': { - 'uuid': self._uuid, - 'token': token, - } - } - - def _get_token(self): - """ - Return current token from credentials dictionary. - """ - return self._creds['token']['token'] - - token = property(_get_token, _set_token, doc='The authentication Token.') - - # - # Setters/getters - # - - def _get_uuid(self): - return self._uuid - - uuid = property(_get_uuid, doc='The user uuid.') - - def get_secret_id(self): - return self._secrets.secret_id - - def set_secret_id(self, secret_id): - self._secrets.set_secret_id(secret_id) - - secret_id = property( - get_secret_id, - set_secret_id, - doc='The active secret id.') - - def _set_secrets_path(self, secrets_path): - self._secrets.secrets_path = secrets_path - - def _get_secrets_path(self): - return self._secrets.secrets_path - - secrets_path = property( - _get_secrets_path, - _set_secrets_path, - doc='The path for the file containing the encrypted symmetric secret.') - - def _get_local_db_path(self): - return self._local_db_path - - local_db_path = property( - _get_local_db_path, - doc='The path for the local database replica.') - - def _get_server_url(self): - return self._server_url - - server_url = property( - _get_server_url, - doc='The URL of the Soledad server.') - - @property - def storage_secret(self): - """ - Return the secret used for symmetric encryption. - """ - return self._secrets.storage_secret - - @property - def remote_storage_secret(self): - """ - Return the secret used for encryption of remotely stored data. - """ - return self._secrets.remote_storage_secret - - @property - def secrets(self): - return self._secrets - - @property - def passphrase(self): - return self._secrets.passphrase - - def change_passphrase(self, new_passphrase): - """ - Change the passphrase that encrypts the storage secret. - - :param new_passphrase: The new passphrase. - :type new_passphrase: unicode - - :raise NoStorageSecret: Raised if there's no storage secret available. - """ - self._secrets.change_passphrase(new_passphrase) - - -# ---------------------------------------------------------------------------- -# Monkey patching u1db to be able to provide a custom SSL cert -# ---------------------------------------------------------------------------- - -# We need a more reasonable timeout (in seconds) -SOLEDAD_TIMEOUT = 120 - - -class VerifiedHTTPSConnection(httplib.HTTPSConnection): - """ - HTTPSConnection verifying server side certificates. - """ - # derived from httplib.py - - def connect(self): - """ - Connect to a host on a given (SSL) port. - """ - try: - source = self.source_address - sock = socket.create_connection((self.host, self.port), - SOLEDAD_TIMEOUT, source) - except AttributeError: - # source_address was introduced in 2.7 - sock = socket.create_connection((self.host, self.port), - SOLEDAD_TIMEOUT) - if self._tunnel_host: - self.sock = sock - self._tunnel() - - highest_supported = ssl.PROTOCOL_SSLv23 - - try: - # needs python 2.7.9+ - # negotiate the best available version, - # but explicitely disabled bad ones. - ctx = ssl.SSLContext(highest_supported) - ctx.options |= ssl.OP_NO_SSLv2 - ctx.options |= ssl.OP_NO_SSLv3 - - ctx.load_verify_locations(cafile=SOLEDAD_CERT) - ctx.verify_mode = ssl.CERT_REQUIRED - self.sock = ctx.wrap_socket(sock) - - except AttributeError: - self.sock = ssl.wrap_socket( - sock, ca_certs=SOLEDAD_CERT, cert_reqs=ssl.CERT_REQUIRED, - ssl_version=highest_supported) - - match_hostname(self.sock.getpeercert(), self.host) - - -old__VerifiedHTTPSConnection = http_client._VerifiedHTTPSConnection -http_client._VerifiedHTTPSConnection = VerifiedHTTPSConnection - - -__all__ = ['soledad_assert', 'Soledad'] +from leap.soledad.client.api import Soledad +from leap.soledad.common import soledad_assert from ._version import get_versions __version__ = get_versions()['version'] del get_versions + +__all__ = ['soledad_assert', 'Soledad', '__version__'] diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py new file mode 100644 index 00000000..bfb6c703 --- /dev/null +++ b/client/src/leap/soledad/client/api.py @@ -0,0 +1,822 @@ +# -*- coding: utf-8 -*- +# api.py +# Copyright (C) 2013, 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Soledad - Synchronization Of Locally Encrypted Data Among Devices. + +This module holds the public api for Soledad. + +Soledad is the part of LEAP that manages storage and synchronization of +application data. It is built on top of U1DB reference Python API and +implements (1) a SQLCipher backend for local storage in the client, (2) a +SyncTarget that encrypts data before syncing, and (3) a CouchDB backend for +remote storage in the server side. +""" +import binascii +import errno +import httplib +import logging +import os +import socket +import ssl +import urlparse + + +try: + import cchardet as chardet +except ImportError: + import chardet + +from u1db.remote import http_client +from u1db.remote.ssl_match_hostname import match_hostname + +from leap.common.config import get_path_prefix +from leap.soledad.common import SHARED_DB_NAME +from leap.soledad.common import soledad_assert +from leap.soledad.common import soledad_assert_type +from leap.soledad.common.document import SoledadDocument + +from leap.soledad.client import events as soledad_events +from leap.soledad.client.crypto import SoledadCrypto +from leap.soledad.client.secrets import SoledadSecrets +from leap.soledad.client.shared_db import SoledadSharedDatabase +from leap.soledad.client.sqlcipher import SQLCipherDatabase +from leap.soledad.client.target import SoledadSyncTarget +from leap.soledad.client.sqlcipher import SQLCipherDB, SQLCipherOptions + +logger = logging.getLogger(name=__name__) + +# +# Constants +# + +SOLEDAD_CERT = None +""" +Path to the certificate file used to certify the SSL connection between +Soledad client and server. +""" + + +# +# Soledad: local encrypted storage and remote encrypted sync. +# + +class Soledad(object): + """ + Soledad provides encrypted data storage and sync. + + A Soledad instance is used to store and retrieve data in a local encrypted + database and synchronize this database with Soledad server. + + This class is also responsible for bootstrapping users' account by + creating cryptographic secrets and/or storing/fetching them on Soledad + server. + + Soledad uses ``leap.common.events`` to signal events. The possible events + to be signaled are: + + SOLEDAD_CREATING_KEYS: emitted during bootstrap sequence when key + generation starts. + SOLEDAD_DONE_CREATING_KEYS: emitted during bootstrap sequence when key + generation finishes. + SOLEDAD_UPLOADING_KEYS: emitted during bootstrap sequence when soledad + starts sending keys to server. + SOLEDAD_DONE_UPLOADING_KEYS: emitted during bootstrap sequence when + soledad finishes sending keys to server. + SOLEDAD_DOWNLOADING_KEYS: emitted during bootstrap sequence when + soledad starts to retrieve keys from server. + SOLEDAD_DONE_DOWNLOADING_KEYS: emitted during bootstrap sequence when + soledad finishes downloading keys from server. + SOLEDAD_NEW_DATA_TO_SYNC: emitted upon call to C{need_sync()} when + there's indeed new data to be synchronized between local database + replica and server's replica. + SOLEDAD_DONE_DATA_SYNC: emitted inside C{sync()} method when it has + finished synchronizing with remote replica. + """ + + LOCAL_DATABASE_FILE_NAME = 'soledad.u1db' + """ + The name of the local SQLCipher U1DB database file. + """ + + STORAGE_SECRETS_FILE_NAME = "soledad.json" + """ + The name of the file where the storage secrets will be stored. + """ + + DEFAULT_PREFIX = os.path.join(get_path_prefix(), 'leap', 'soledad') + """ + Prefix for default values for path. + """ + + def __init__(self, uuid, passphrase, secrets_path, local_db_path, + server_url, cert_file, + auth_token=None, secret_id=None, defer_encryption=False): + """ + Initialize configuration, cryptographic keys and dbs. + + :param uuid: User's uuid. + :type uuid: str + + :param passphrase: The passphrase for locking and unlocking encryption + secrets for local and remote storage. + :type passphrase: unicode + + :param secrets_path: Path for storing encrypted key used for + symmetric encryption. + :type secrets_path: str + + :param local_db_path: Path for local encrypted storage db. + :type local_db_path: str + + :param server_url: URL for Soledad server. This is used either to sync + with the user's remote db and to interact with the + shared recovery database. + :type server_url: str + + :param cert_file: Path to the certificate of the ca used + to validate the SSL certificate used by the remote + soledad server. + :type cert_file: str + + :param auth_token: Authorization token for accessing remote databases. + :type auth_token: str + + :param secret_id: The id of the storage secret to be used. + :type secret_id: str + + :param defer_encryption: Whether to defer encryption/decryption of + documents, or do it inline while syncing. + :type defer_encryption: bool + + :raise BootstrapSequenceError: Raised when the secret generation and + storage on server sequence has failed + for some reason. + """ + # store config params + self._uuid = uuid + self._passphrase = passphrase + self._secrets_path = secrets_path + self._local_db_path = local_db_path + self._server_url = server_url + # configure SSL certificate + global SOLEDAD_CERT + SOLEDAD_CERT = cert_file + self._set_token(auth_token) + self._defer_encryption = defer_encryption + + self._init_config() + self._init_dirs() + + # init crypto variables + self._shared_db_instance = None + self._crypto = SoledadCrypto(self) + self._secrets = SoledadSecrets( + self._uuid, + self._passphrase, + self._secrets_path, + self._shared_db, + self._crypto, + secret_id=secret_id) + + # initiate bootstrap sequence + self._bootstrap() # might raise BootstrapSequenceError() + + def _init_config(self): + """ + Initialize configuration using default values for missing params. + """ + soledad_assert_type(self._passphrase, unicode) + # initialize secrets_path + if self._secrets_path is None: + self._secrets_path = os.path.join( + self.DEFAULT_PREFIX, self.STORAGE_SECRETS_FILE_NAME) + # initialize local_db_path + if self._local_db_path is None: + self._local_db_path = os.path.join( + self.DEFAULT_PREFIX, self.LOCAL_DATABASE_FILE_NAME) + # initialize server_url + soledad_assert( + self._server_url is not None, + 'Missing URL for Soledad server.') + + # + # initialization/destruction methods + # + + def _bootstrap(self): + """ + Bootstrap local Soledad instance. + + :raise BootstrapSequenceError: Raised when the secret generation and + storage on server sequence has failed for some reason. + """ + try: + self._secrets.bootstrap() + self._init_db() + except: + raise + + def _init_dirs(self): + """ + Create work directories. + + :raise OSError: in case file exists and is not a dir. + """ + paths = map( + lambda x: os.path.dirname(x), + [self._local_db_path, self._secrets_path]) + for path in paths: + try: + if not os.path.isdir(path): + logger.info('Creating directory: %s.' % path) + os.makedirs(path) + except OSError as exc: + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + + def _init_db(self): + """ + Initialize the U1DB SQLCipher database for local storage. + + Currently, Soledad uses the default SQLCipher cipher, i.e. + 'aes-256-cbc'. We use scrypt to derive a 256-bit encryption key and + uses the 'raw PRAGMA key' format to handle the key to SQLCipher. + """ + tohex = binascii.b2a_hex + # sqlcipher only accepts the hex version + key = tohex(self._secrets.get_local_storage_key()) + sync_db_key = tohex(self._secrets.get_sync_db_key()) + + opts = SQLCipherOptions( + self._local_db_path, key, + is_raw_key=True, + create=True, + defer_encryption=self._defer_encryption, + sync_db_key=sync_db_key, + crypto=self._crypto, # XXX add this + document_factory=SoledadDocument, + ) + self._db = SQLCipherDB(opts) + + def close(self): + """ + Close underlying U1DB database. + """ + logger.debug("Closing soledad") + if hasattr(self, '_db') and isinstance( + self._db, + SQLCipherDatabase): + self._db.stop_sync() + self._db.close() + + @property + def _shared_db(self): + """ + Return an instance of the shared recovery database object. + + :return: The shared database. + :rtype: SoledadSharedDatabase + """ + if self._shared_db_instance is None: + self._shared_db_instance = SoledadSharedDatabase.open_database( + urlparse.urljoin(self.server_url, SHARED_DB_NAME), + self._uuid, + False, # db should exist at this point. + creds=self._creds) + return self._shared_db_instance + + # + # Document storage, retrieval and sync. + # + + def put_doc(self, doc): + """ + Update a document in the local encrypted database. + + ============================== WARNING ============================== + This method converts the document's contents to unicode in-place. This + means that after calling C{put_doc(doc)}, the contents of the + document, i.e. C{doc.content}, might be different from before the + call. + ============================== WARNING ============================== + + :param doc: the document to update + :type doc: SoledadDocument + + :return: the new revision identifier for the document + :rtype: str + """ + doc.content = self._convert_to_unicode(doc.content) + return self._db.put_doc(doc) + + def delete_doc(self, doc): + """ + Delete a document from the local encrypted database. + + :param doc: the document to delete + :type doc: SoledadDocument + + :return: the new revision identifier for the document + :rtype: str + """ + return self._db.delete_doc(doc) + + def get_doc(self, doc_id, include_deleted=False): + """ + Retrieve a document from the local encrypted database. + + :param doc_id: the unique document identifier + :type doc_id: str + :param include_deleted: if True, deleted documents will be + returned with empty content; otherwise asking + for a deleted document will return None + :type include_deleted: bool + + :return: the document object or None + :rtype: SoledadDocument + """ + return self._db.get_doc(doc_id, include_deleted=include_deleted) + + def get_docs(self, doc_ids, check_for_conflicts=True, + include_deleted=False): + """ + Get the content for many documents. + + :param doc_ids: a list of document identifiers + :type doc_ids: list + :param check_for_conflicts: if set False, then the conflict check will + be skipped, and 'None' will be returned instead of True/False + :type check_for_conflicts: bool + + :return: iterable giving the Document object for each document id + in matching doc_ids order. + :rtype: generator + """ + return self._db.get_docs( + doc_ids, check_for_conflicts=check_for_conflicts, + include_deleted=include_deleted) + + def get_all_docs(self, include_deleted=False): + """ + Get the JSON content for all documents in the database. + + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted + documents will not be included in the results. + :return: (generation, [Document]) + The current generation of the database, followed by a list of + all the documents in the database. + """ + return self._db.get_all_docs(include_deleted) + + def _convert_to_unicode(self, content): + """ + Converts content to unicode (or all the strings in content) + + NOTE: Even though this method supports any type, it will + currently ignore contents of lists, tuple or any other + iterable than dict. We don't need support for these at the + moment + + :param content: content to convert + :type content: object + + :rtype: object + """ + if isinstance(content, unicode): + return content + elif isinstance(content, str): + result = chardet.detect(content) + default = "utf-8" + encoding = result["encoding"] or default + try: + content = content.decode(encoding) + except UnicodeError as e: + logger.error("Unicode error: {0!r}. Using 'replace'".format(e)) + content = content.decode(encoding, 'replace') + return content + else: + if isinstance(content, dict): + for key in content.keys(): + content[key] = self._convert_to_unicode(content[key]) + return content + + def create_doc(self, content, doc_id=None): + """ + Create a new document in the local encrypted database. + + :param content: the contents of the new document + :type content: dict + :param doc_id: an optional identifier specifying the document id + :type doc_id: str + + :return: the new document + :rtype: SoledadDocument + """ + return self._db.create_doc( + self._convert_to_unicode(content), doc_id=doc_id) + + def create_doc_from_json(self, json, doc_id=None): + """ + Create a new document. + + You can optionally specify the document identifier, but the document + must not already exist. See 'put_doc' if you want to override an + existing document. + If the database specifies a maximum document size and the document + exceeds it, create will fail and raise a DocumentTooBig exception. + + :param json: The JSON document string + :type json: str + :param doc_id: An optional identifier specifying the document id. + :type doc_id: + :return: The new document + :rtype: SoledadDocument + """ + return self._db.create_doc_from_json(json, doc_id=doc_id) + + def create_index(self, index_name, *index_expressions): + """ + Create an named index, which can then be queried for future lookups. + Creating an index which already exists is not an error, and is cheap. + Creating an index which does not match the index_expressions of the + existing index is an error. + Creating an index will block until the expressions have been evaluated + and the index generated. + + :param index_name: A unique name which can be used as a key prefix + :type index_name: str + :param index_expressions: index expressions defining the index + information. + :type index_expressions: dict + + Examples: + + "fieldname", or "fieldname.subfieldname" to index alphabetically + sorted on the contents of a field. + + "number(fieldname, width)", "lower(fieldname)" + """ + if self._db: + return self._db.create_index( + index_name, *index_expressions) + + def delete_index(self, index_name): + """ + Remove a named index. + + :param index_name: The name of the index we are removing + :type index_name: str + """ + if self._db: + return self._db.delete_index(index_name) + + def list_indexes(self): + """ + List the definitions of all known indexes. + + :return: A list of [('index-name', ['field', 'field2'])] definitions. + :rtype: list + """ + if self._db: + return self._db.list_indexes() + + def get_from_index(self, index_name, *key_values): + """ + Return documents that match the keys supplied. + + You must supply exactly the same number of values as have been defined + in the index. It is possible to do a prefix match by using '*' to + indicate a wildcard match. You can only supply '*' to trailing entries, + (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) + It is also possible to append a '*' to the last supplied value (eg + 'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: tuple + :return: List of [Document] + :rtype: list + """ + if self._db: + return self._db.get_from_index(index_name, *key_values) + + def get_count_from_index(self, index_name, *key_values): + """ + Return the count of the documents that match the keys and + values supplied. + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: tuple + :return: count. + :rtype: int + """ + if self._db: + return self._db.get_count_from_index(index_name, *key_values) + + def get_range_from_index(self, index_name, start_value, end_value): + """ + Return documents that fall within the specified range. + + Both ends of the range are inclusive. For both start_value and + end_value, one must supply exactly the same number of values as have + been defined in the index, or pass None. In case of a single column + index, a string is accepted as an alternative for a tuple with a single + value. It is possible to do a prefix match by using '*' to indicate + a wildcard match. You can only supply '*' to trailing entries, (eg + 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also + possible to append a '*' to the last supplied value (eg 'val*', '*', + '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + + :param index_name: The index to query + :type index_name: str + :param start_values: tuples of values that define the lower bound of + the range. eg, if you have an index with 3 fields then you would + have: (val1, val2, val3) + :type start_values: tuple + :param end_values: tuples of values that define the upper bound of the + range. eg, if you have an index with 3 fields then you would have: + (val1, val2, val3) + :type end_values: tuple + :return: List of [Document] + :rtype: list + """ + if self._db: + return self._db.get_range_from_index( + index_name, start_value, end_value) + + def get_index_keys(self, index_name): + """ + Return all keys under which documents are indexed in this index. + + :param index_name: The index to query + :type index_name: str + :return: [] A list of tuples of indexed keys. + :rtype: list + """ + if self._db: + return self._db.get_index_keys(index_name) + + def get_doc_conflicts(self, doc_id): + """ + Get the list of conflicts for the given document. + + :param doc_id: the document id + :type doc_id: str + + :return: a list of the document entries that are conflicted + :rtype: list + """ + if self._db: + return self._db.get_doc_conflicts(doc_id) + + def resolve_doc(self, doc, conflicted_doc_revs): + """ + Mark a document as no longer conflicted. + + :param doc: a document with the new content to be inserted. + :type doc: SoledadDocument + :param conflicted_doc_revs: a list of revisions that the new content + supersedes. + :type conflicted_doc_revs: list + """ + if self._db: + return self._db.resolve_doc(doc, conflicted_doc_revs) + + def sync(self, defer_decryption=True): + """ + Synchronize the local encrypted replica with a remote replica. + + This method blocks until a syncing lock is acquired, so there are no + attempts of concurrent syncs from the same client replica. + + :param url: the url of the target replica to sync with + :type url: str + + :param defer_decryption: Whether to defer the decryption process using + the intermediate database. If False, + decryption will be done inline. + :type defer_decryption: bool + + :return: The local generation before the synchronisation was + performed. + :rtype: str + """ + if self._db: + try: + local_gen = self._db.sync( + urlparse.urljoin(self.server_url, 'user-%s' % self._uuid), + creds=self._creds, autocreate=False, + defer_decryption=defer_decryption) + soledad_events.signal( + soledad_events.SOLEDAD_DONE_DATA_SYNC, self._uuid) + return local_gen + except Exception as e: + logger.error("Soledad exception when syncing: %s" % str(e)) + + def stop_sync(self): + """ + Stop the current syncing process. + """ + if self._db: + self._db.stop_sync() + + def need_sync(self, url): + """ + Return if local db replica differs from remote url's replica. + + :param url: The remote replica to compare with local replica. + :type url: str + + :return: Whether remote replica and local replica differ. + :rtype: bool + """ + target = SoledadSyncTarget( + url, self._db._get_replica_uid(), creds=self._creds, + crypto=self._crypto) + info = target.get_sync_info(self._db._get_replica_uid()) + # compare source generation with target's last known source generation + if self._db._get_generation() != info[4]: + soledad_events.signal( + soledad_events.SOLEDAD_NEW_DATA_TO_SYNC, self._uuid) + return True + return False + + @property + def syncing(self): + """ + Property, True if the syncer is syncing. + """ + return self._db.syncing + + def _set_token(self, token): + """ + Set the authentication token for remote database access. + + Build the credentials dictionary with the following format: + + self._{ + 'token': { + 'uuid': '' + 'token': '' + } + + :param token: The authentication token. + :type token: str + """ + self._creds = { + 'token': { + 'uuid': self._uuid, + 'token': token, + } + } + + def _get_token(self): + """ + Return current token from credentials dictionary. + """ + return self._creds['token']['token'] + + token = property(_get_token, _set_token, doc='The authentication Token.') + + # + # Setters/getters + # + + def _get_uuid(self): + return self._uuid + + uuid = property(_get_uuid, doc='The user uuid.') + + def get_secret_id(self): + return self._secrets.secret_id + + def set_secret_id(self, secret_id): + self._secrets.set_secret_id(secret_id) + + secret_id = property( + get_secret_id, + set_secret_id, + doc='The active secret id.') + + def _set_secrets_path(self, secrets_path): + self._secrets.secrets_path = secrets_path + + def _get_secrets_path(self): + return self._secrets.secrets_path + + secrets_path = property( + _get_secrets_path, + _set_secrets_path, + doc='The path for the file containing the encrypted symmetric secret.') + + def _get_local_db_path(self): + return self._local_db_path + + local_db_path = property( + _get_local_db_path, + doc='The path for the local database replica.') + + def _get_server_url(self): + return self._server_url + + server_url = property( + _get_server_url, + doc='The URL of the Soledad server.') + + @property + def storage_secret(self): + """ + Return the secret used for symmetric encryption. + """ + return self._secrets.storage_secret + + @property + def remote_storage_secret(self): + """ + Return the secret used for encryption of remotely stored data. + """ + return self._secrets.remote_storage_secret + + @property + def secrets(self): + return self._secrets + + @property + def passphrase(self): + return self._secrets.passphrase + + def change_passphrase(self, new_passphrase): + """ + Change the passphrase that encrypts the storage secret. + + :param new_passphrase: The new passphrase. + :type new_passphrase: unicode + + :raise NoStorageSecret: Raised if there's no storage secret available. + """ + self._secrets.change_passphrase(new_passphrase) + + +# ---------------------------------------------------------------------------- +# Monkey patching u1db to be able to provide a custom SSL cert +# ---------------------------------------------------------------------------- + +# We need a more reasonable timeout (in seconds) +SOLEDAD_TIMEOUT = 120 + + +class VerifiedHTTPSConnection(httplib.HTTPSConnection): + """ + HTTPSConnection verifying server side certificates. + """ + # derived from httplib.py + + def connect(self): + """ + Connect to a host on a given (SSL) port. + """ + try: + source = self.source_address + sock = socket.create_connection((self.host, self.port), + SOLEDAD_TIMEOUT, source) + except AttributeError: + # source_address was introduced in 2.7 + sock = socket.create_connection((self.host, self.port), + SOLEDAD_TIMEOUT) + if self._tunnel_host: + self.sock = sock + self._tunnel() + + self.sock = ssl.wrap_socket(sock, + ca_certs=SOLEDAD_CERT, + cert_reqs=ssl.CERT_REQUIRED) + match_hostname(self.sock.getpeercert(), self.host) + + +old__VerifiedHTTPSConnection = http_client._VerifiedHTTPSConnection +http_client._VerifiedHTTPSConnection = VerifiedHTTPSConnection + diff --git a/client/src/leap/soledad/client/mp_safe_db.py b/client/src/leap/soledad/client/mp_safe_db.py deleted file mode 100644 index 9ed0bef4..00000000 --- a/client/src/leap/soledad/client/mp_safe_db.py +++ /dev/null @@ -1,112 +0,0 @@ -# -*- coding: utf-8 -*- -# mp_safe_db.py -# Copyright (C) 2014 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -""" -Multiprocessing-safe SQLite database. -""" - - -from threading import Thread -from Queue import Queue -from pysqlcipher import dbapi2 - - -# Thanks to http://code.activestate.com/recipes/526618/ - -class MPSafeSQLiteDB(Thread): - """ - A multiprocessing-safe SQLite database accessor. - """ - - CLOSE = "--close--" - NO_MORE = "--no more--" - - def __init__(self, db_path): - """ - Initialize the process - """ - Thread.__init__(self) - self._db_path = db_path - self._requests = Queue() - self.start() - - def run(self): - """ - Run the multiprocessing-safe database accessor. - """ - conn = dbapi2.connect(self._db_path) - while True: - req, arg, res = self._requests.get() - if req == self.CLOSE: - break - with conn: - cursor = conn.cursor() - cursor.execute(req, arg) - if res: - for rec in cursor.fetchall(): - res.put(rec) - res.put(self.NO_MORE) - conn.close() - - def execute(self, req, arg=None, res=None): - """ - Execute a request on the database. - - :param req: The request to be executed. - :type req: str - :param arg: The arguments for the request. - :type arg: tuple - :param res: A queue to write request results. - :type res: multiprocessing.Queue - """ - self._requests.put((req, arg or tuple(), res)) - - def select(self, req, arg=None): - """ - Run a select query on the database and yield results. - - :param req: The request to be executed. - :type req: str - :param arg: The arguments for the request. - :type arg: tuple - """ - res = Queue() - self.execute(req, arg, res) - while True: - rec = res.get() - if rec == self.NO_MORE: - break - yield rec - - def close(self): - """ - Close the database connection. - """ - self.execute(self.CLOSE) - self.join() - - def cursor(self): - """ - Return a fake cursor object. - - Not really a cursor, but allows for calling db.cursor().execute(). - - :return: Self. - :rtype: MPSafeSQLiteDatabase - """ - return self diff --git a/client/src/leap/soledad/client/mp_safe_db_TOREMOVE.py b/client/src/leap/soledad/client/mp_safe_db_TOREMOVE.py new file mode 100644 index 00000000..9ed0bef4 --- /dev/null +++ b/client/src/leap/soledad/client/mp_safe_db_TOREMOVE.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +# mp_safe_db.py +# Copyright (C) 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +""" +Multiprocessing-safe SQLite database. +""" + + +from threading import Thread +from Queue import Queue +from pysqlcipher import dbapi2 + + +# Thanks to http://code.activestate.com/recipes/526618/ + +class MPSafeSQLiteDB(Thread): + """ + A multiprocessing-safe SQLite database accessor. + """ + + CLOSE = "--close--" + NO_MORE = "--no more--" + + def __init__(self, db_path): + """ + Initialize the process + """ + Thread.__init__(self) + self._db_path = db_path + self._requests = Queue() + self.start() + + def run(self): + """ + Run the multiprocessing-safe database accessor. + """ + conn = dbapi2.connect(self._db_path) + while True: + req, arg, res = self._requests.get() + if req == self.CLOSE: + break + with conn: + cursor = conn.cursor() + cursor.execute(req, arg) + if res: + for rec in cursor.fetchall(): + res.put(rec) + res.put(self.NO_MORE) + conn.close() + + def execute(self, req, arg=None, res=None): + """ + Execute a request on the database. + + :param req: The request to be executed. + :type req: str + :param arg: The arguments for the request. + :type arg: tuple + :param res: A queue to write request results. + :type res: multiprocessing.Queue + """ + self._requests.put((req, arg or tuple(), res)) + + def select(self, req, arg=None): + """ + Run a select query on the database and yield results. + + :param req: The request to be executed. + :type req: str + :param arg: The arguments for the request. + :type arg: tuple + """ + res = Queue() + self.execute(req, arg, res) + while True: + rec = res.get() + if rec == self.NO_MORE: + break + yield rec + + def close(self): + """ + Close the database connection. + """ + self.execute(self.CLOSE) + self.join() + + def cursor(self): + """ + Return a fake cursor object. + + Not really a cursor, but allows for calling db.cursor().execute(). + + :return: Self. + :rtype: MPSafeSQLiteDatabase + """ + return self diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 613903f7..fcef592d 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -53,7 +53,7 @@ from contextlib import contextmanager from collections import defaultdict from httplib import CannotSendRequest -from pysqlcipher import dbapi2 +from pysqlcipher import dbapi2 as sqlcipher_dbapi2 from u1db.backends import sqlite_backend from u1db import errors as u1db_errors from taskthread import TimerTask @@ -71,7 +71,7 @@ from leap.soledad.common.document import SoledadDocument logger = logging.getLogger(__name__) # Monkey-patch u1db.backends.sqlite_backend with pysqlcipher.dbapi2 -sqlite_backend.dbapi2 = dbapi2 +sqlite_backend.dbapi2 = sqlcipher_dbapi2 # It seems that, as long as we are not using old sqlite versions, serialized # mode is enabled by default at compile time. So accessing db connections from @@ -91,11 +91,12 @@ SQLITE_CHECK_SAME_THREAD = False SQLITE_ISOLATION_LEVEL = None +# TODO accept cyrpto object too.... or pass it along.. class SQLCipherOptions(object): def __init__(self, path, key, create=True, is_raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024, - document_factory=None, defer_encryption=False, - sync_db_key=None): + document_factory=None, + defer_encryption=False, sync_db_key=None): """ Options for the initialization of an SQLCipher database. @@ -140,39 +141,39 @@ class SQLCipherOptions(object): # XXX Use SQLCIpherOptions instead -def open(path, password, create=True, document_factory=None, crypto=None, - raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, - cipher_page_size=1024, defer_encryption=False, sync_db_key=None): - """ - Open a database at the given location. - - *** IMPORTANT *** - - Don't forget to close the database after use by calling the close() - method otherwise some resources might not be freed and you may experience - several kinds of leakages. - - *** IMPORTANT *** - - Will raise u1db.errors.DatabaseDoesNotExist if create=False and the - database does not already exist. - - :return: An instance of Database. - :rtype SQLCipherDatabase - """ - args = (path, password) - kwargs = { - 'create': create, - 'document_factory': document_factory, - 'crypto': crypto, - 'raw_key': raw_key, - 'cipher': cipher, - 'kdf_iter': kdf_iter, - 'cipher_page_size': cipher_page_size, - 'defer_encryption': defer_encryption, - 'sync_db_key': sync_db_key} +#def open(path, password, create=True, document_factory=None, crypto=None, + #raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, + #cipher_page_size=1024, defer_encryption=False, sync_db_key=None): + #""" + #Open a database at the given location. +# + #*** IMPORTANT *** +# + #Don't forget to close the database after use by calling the close() + #method otherwise some resources might not be freed and you may experience + #several kinds of leakages. +# + #*** IMPORTANT *** +# + #Will raise u1db.errors.DatabaseDoesNotExist if create=False and the + #database does not already exist. +# + #:return: An instance of Database. + #:rtype SQLCipherDatabase + #""" + #args = (path, password) + #kwargs = { + #'create': create, + #'document_factory': document_factory, + #'crypto': crypto, + #'raw_key': raw_key, + #'cipher': cipher, + #'kdf_iter': kdf_iter, + #'cipher_page_size': cipher_page_size, + #'defer_encryption': defer_encryption, + #'sync_db_key': sync_db_key} # XXX pass only a CryptoOptions object around - return SQLCipherDatabase.open_database(*args, **kwargs) + #return SQLCipherDatabase.open_database(*args, **kwargs) # @@ -216,9 +217,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): """ # XXX Use SQLCIpherOptions instead - def __init__(self, sqlcipher_file, password, document_factory=None, - crypto=None, raw_key=False, cipher='aes-256-cbc', - kdf_iter=4000, cipher_page_size=1024, sync_db_key=None): + def __init__(self, opts): """ Connect to an existing SQLCipher database, creating a new sqlcipher database file if needed. @@ -230,23 +229,28 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): experience several kinds of leakages. *** IMPORTANT *** + + :param opts: + :type opts: SQLCipherOptions """ # ensure the db is encrypted if the file already exists - if os.path.exists(sqlcipher_file): - # XXX pass only a CryptoOptions object around - self.assert_db_is_encrypted( - sqlcipher_file, password, raw_key, cipher, kdf_iter, - cipher_page_size) + if os.path.exists(opts.sqlcipher_file): + self.assert_db_is_encrypted(opts) # connect to the sqlcipher database + # XXX this lock should not be needed ----------------- + # u1db holds a mutex over sqlite internally for the initialization. with self.k_lock: - self._db_handle = dbapi2.connect( - sqlcipher_file, + self._db_handle = sqlcipher_dbapi2.connect( + + # TODO ----------------------------------------------- + # move the init to a single function + opts.sqlcipher_file, isolation_level=SQLITE_ISOLATION_LEVEL, check_same_thread=SQLITE_CHECK_SAME_THREAD) # set SQLCipher cryptographic parameters - # XXX allow optiona deferredChain here ? + # XXX allow optional deferredChain here ? pragmas.set_crypto_pragmas( self._db_handle, password, raw_key, cipher, kdf_iter, cipher_page_size) @@ -260,8 +264,11 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): self._real_replica_uid = None self._ensure_schema() - self._crypto = crypto + self._crypto = opts.crypto + + # TODO ------------------------------------------------ + # Move syncdb to another class ------------------------ # define sync-db attrs self._sqlcipher_file = sqlcipher_file self._sync_db_key = sync_db_key @@ -294,103 +301,122 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): # self._syncers = {'': ('', syncer), ...} self._syncers = {} - @classmethod - # XXX Use SQLCIpherOptions instead - def _open_database(cls, sqlcipher_file, password, document_factory=None, - crypto=None, raw_key=False, cipher='aes-256-cbc', - kdf_iter=4000, cipher_page_size=1024, - defer_encryption=False, sync_db_key=None): + def _extra_schema_init(self, c): """ - Open a SQLCipher database. + Add any extra fields, etc to the basic table definitions. - :return: The database object. - :rtype: SQLCipherDatabase + This method is called by u1db.backends.sqlite_backend._initialize() + method, which is executed when the database schema is created. Here, + we use it to include the "syncable" property for LeapDocuments. + + :param c: The cursor for querying the database. + :type c: dbapi2.cursor """ - cls.defer_encryption = defer_encryption - if not os.path.isfile(sqlcipher_file): - raise u1db_errors.DatabaseDoesNotExist() + c.execute( + 'ALTER TABLE document ' + 'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE') + - tries = 2 + # TODO ---- rescue the fix for the windows case from here... + #@classmethod + # XXX Use SQLCIpherOptions instead + #def _open_database(cls, sqlcipher_file, password, document_factory=None, + #crypto=None, raw_key=False, cipher='aes-256-cbc', + #kdf_iter=4000, cipher_page_size=1024, + #defer_encryption=False, sync_db_key=None): + #""" + #Open a SQLCipher database. +# + #:return: The database object. + #:rtype: SQLCipherDatabase + #""" + #cls.defer_encryption = defer_encryption + #if not os.path.isfile(sqlcipher_file): + #raise u1db_errors.DatabaseDoesNotExist() +# + #tries = 2 # Note: There seems to be a bug in sqlite 3.5.9 (with python2.6) # where without re-opening the database on Windows, it # doesn't see the transaction that was just committed - while True: - - with cls.k_lock: - db_handle = dbapi2.connect( - sqlcipher_file, - check_same_thread=SQLITE_CHECK_SAME_THREAD) - - try: + #while True: +# + #with cls.k_lock: + #db_handle = dbapi2.connect( + #sqlcipher_file, + #check_same_thread=SQLITE_CHECK_SAME_THREAD) +# + #try: # set cryptographic params - +# # XXX pass only a CryptoOptions object around - pragmas.set_crypto_pragmas( - db_handle, password, raw_key, cipher, kdf_iter, - cipher_page_size) - c = db_handle.cursor() + #pragmas.set_crypto_pragmas( + #db_handle, password, raw_key, cipher, kdf_iter, + #cipher_page_size) + #c = db_handle.cursor() # XXX if we use it here, it should be public - v, err = cls._which_index_storage(c) - except Exception as exc: - logger.warning("ERROR OPENING DATABASE!") - logger.debug("error was: %r" % exc) - v, err = None, exc - finally: - db_handle.close() - if v is not None: - break + #v, err = cls._which_index_storage(c) + #except Exception as exc: + #logger.warning("ERROR OPENING DATABASE!") + #logger.debug("error was: %r" % exc) + #v, err = None, exc + #finally: + #db_handle.close() + #if v is not None: + #break # possibly another process is initializing it, wait for it to be # done - if tries == 0: - raise err # go for the richest error? - tries -= 1 - time.sleep(cls.WAIT_FOR_PARALLEL_INIT_HALF_INTERVAL) - return SQLCipherDatabase._sqlite_registry[v]( - sqlcipher_file, password, document_factory=document_factory, - crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter, - cipher_page_size=cipher_page_size, sync_db_key=sync_db_key) - - @classmethod - def open_database(cls, sqlcipher_file, password, create, - document_factory=None, crypto=None, raw_key=False, - cipher='aes-256-cbc', kdf_iter=4000, - cipher_page_size=1024, defer_encryption=False, - sync_db_key=None): + #if tries == 0: + #raise err # go for the richest error? + #tries -= 1 + #time.sleep(cls.WAIT_FOR_PARALLEL_INIT_HALF_INTERVAL) + #return SQLCipherDatabase._sqlite_registry[v]( + #sqlcipher_file, password, document_factory=document_factory, + #crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter, + #cipher_page_size=cipher_page_size, sync_db_key=sync_db_key) + + #@classmethod + #def open_database(cls, sqlcipher_file, password, create, + #document_factory=None, crypto=None, raw_key=False, + #cipher='aes-256-cbc', kdf_iter=4000, + #cipher_page_size=1024, defer_encryption=False, + #sync_db_key=None): # XXX pass only a CryptoOptions object around - """ - Open a SQLCipher database. - - *** IMPORTANT *** - - Don't forget to close the database after use by calling the close() - method otherwise some resources might not be freed and you may - experience several kinds of leakages. - - *** IMPORTANT *** - - :return: The database object. - :rtype: SQLCipherDatabase - """ - cls.defer_encryption = defer_encryption - args = sqlcipher_file, password - kwargs = { - 'crypto': crypto, - 'raw_key': raw_key, - 'cipher': cipher, - 'kdf_iter': kdf_iter, - 'cipher_page_size': cipher_page_size, - 'defer_encryption': defer_encryption, - 'sync_db_key': sync_db_key, - 'document_factory': document_factory, - } - try: - return cls._open_database(*args, **kwargs) - except u1db_errors.DatabaseDoesNotExist: - if not create: - raise - + #""" + #Open a SQLCipher database. +# + #*** IMPORTANT *** +# + #Don't forget to close the database after use by calling the close() + #method otherwise some resources might not be freed and you may + #experience several kinds of leakages. +# + #*** IMPORTANT *** +# + #:return: The database object. + #:rtype: SQLCipherDatabase + #""" + #cls.defer_encryption = defer_encryption + #args = sqlcipher_file, password + #kwargs = { + #'crypto': crypto, + #'raw_key': raw_key, + #'cipher': cipher, + #'kdf_iter': kdf_iter, + #'cipher_page_size': cipher_page_size, + #'defer_encryption': defer_encryption, + #'sync_db_key': sync_db_key, + #'document_factory': document_factory, + #} + #try: + #return cls._open_database(*args, **kwargs) + #except u1db_errors.DatabaseDoesNotExist: + #if not create: + #raise +# # XXX here we were missing sync_db_key, intentional? - return SQLCipherDatabase(*args, **kwargs) + #return SQLCipherDatabase(*args, **kwargs) + + # BEGIN SYNC FOO ---------------------------------------------------------- def sync(self, url, creds=None, autocreate=True, defer_decryption=True): """ @@ -471,7 +497,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): def _get_syncer(self, url, creds=None): """ - Get a synchronizer for C{url} using C{creds}. + Get a synchronizer for ``url`` using ``creds``. :param url: The url of the target replica to sync with. :type url: str @@ -504,20 +530,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): syncer.num_inserted = 0 return syncer - def _extra_schema_init(self, c): - """ - Add any extra fields, etc to the basic table definitions. - - This method is called by u1db.backends.sqlite_backend._initialize() - method, which is executed when the database schema is created. Here, - we use it to include the "syncable" property for LeapDocuments. - - :param c: The cursor for querying the database. - :type c: dbapi2.cursor - """ - c.execute( - 'ALTER TABLE document ' - 'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE') + # END SYNC FOO ---------------------------------------------------------- def _init_sync_db(self): """ @@ -601,8 +614,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): :return: The new document revision. :rtype: str """ - doc_rev = sqlite_backend.SQLitePartialExpandDatabase.put_doc( - self, doc) + doc_rev = sqlite_backend.SQLitePartialExpandDatabase.put_doc(self, doc) if self.defer_encryption: self.sync_queue.put_nowait(doc) return doc_rev @@ -644,8 +656,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): self, doc_id, check_for_conflicts) if doc: c = self._db_handle.cursor() - c.execute('SELECT syncable FROM document ' - 'WHERE doc_id=?', + c.execute('SELECT syncable FROM document WHERE doc_id=?', (doc.doc_id,)) result = c.fetchone() doc.syncable = bool(result[0]) @@ -691,11 +702,11 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): # backend should raise a DatabaseError exception. sqlite_backend.SQLitePartialExpandDatabase(sqlcipher_file) raise DatabaseIsNotEncrypted() - except dbapi2.DatabaseError: + except sqlcipher_dbapi2.DatabaseError: # assert that we can access it using SQLCipher with the given # key with cls.k_lock: - db_handle = dbapi2.connect( + db_handle = sqlcipher_dbapi2.connect( sqlcipher_file, isolation_level=SQLITE_ISOLATION_LEVEL, check_same_thread=SQLITE_CHECK_SAME_THREAD) @@ -750,8 +761,8 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): )) try: c.execute(statement, tuple(args)) - except dbapi2.OperationalError, e: - raise dbapi2.OperationalError( + except sqlcipher_dbapi2.OperationalError, e: + raise sqlcipher_dbapi2.OperationalError( str(e) + '\nstatement: %s\nargs: %s\n' % (statement, args)) res = c.fetchall() return res[0][0] @@ -760,6 +771,8 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): """ Close db_handle and close syncer. """ + # TODO separate db from syncers -------------- + if logger is not None: # logger might be none if called from __del__ logger.debug("Sqlcipher backend: closing") # stop the sync watcher for deferred encryption @@ -780,6 +793,8 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): if self._db_handle is not None: self._db_handle.close() self._db_handle = None + + # --------------------------------------- # close the sync database if self._sync_db is not None: self._sync_db.close() diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index 0297c75c..a47afbb6 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -14,8 +14,6 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . - - """ Soledad synchronization utilities. @@ -27,8 +25,6 @@ Extend u1db Synchronizer with the ability to: * Be interrupted and recovered. """ - - import logging import traceback from threading import Lock -- cgit v1.2.3 From e0f70a342deccbb53a6ea7215b3322388bb18461 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 23 Sep 2014 13:38:06 -0500 Subject: Refactor soledad api to use async db * add examples and benchmarks * remove autocommit mode, allow wal disabling * lock initialization * make api use async calls --- client/src/leap/soledad/client/adbapi.py | 146 +++- client/src/leap/soledad/client/api.py | 323 ++++---- client/src/leap/soledad/client/examples/README | 4 + .../src/leap/soledad/client/examples/compare.txt | 8 + .../src/leap/soledad/client/examples/manifest.phk | 50 ++ .../leap/soledad/client/examples/plot-async-db.py | 45 ++ .../leap/soledad/client/examples/run_benchmark.py | 28 + .../src/leap/soledad/client/examples/use_adbapi.py | 103 +++ client/src/leap/soledad/client/examples/use_api.py | 67 ++ .../src/leap/soledad/client/mp_safe_db_TOREMOVE.py | 112 --- client/src/leap/soledad/client/pragmas.py | 20 +- client/src/leap/soledad/client/sqlcipher.py | 845 ++++++++++----------- 12 files changed, 990 insertions(+), 761 deletions(-) create mode 100644 client/src/leap/soledad/client/examples/README create mode 100644 client/src/leap/soledad/client/examples/compare.txt create mode 100644 client/src/leap/soledad/client/examples/manifest.phk create mode 100644 client/src/leap/soledad/client/examples/plot-async-db.py create mode 100644 client/src/leap/soledad/client/examples/run_benchmark.py create mode 100644 client/src/leap/soledad/client/examples/use_adbapi.py create mode 100644 client/src/leap/soledad/client/examples/use_api.py delete mode 100644 client/src/leap/soledad/client/mp_safe_db_TOREMOVE.py (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/adbapi.py b/client/src/leap/soledad/client/adbapi.py index 730999a3..3b15509b 100644 --- a/client/src/leap/soledad/client/adbapi.py +++ b/client/src/leap/soledad/client/adbapi.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# sqlcipher.py +# adbapi.py # Copyright (C) 2013, 2014 LEAP # # This program is free software: you can redistribute it and/or modify @@ -17,61 +17,135 @@ """ An asyncrhonous interface to soledad using sqlcipher backend. It uses twisted.enterprise.adbapi. - """ +import re import os import sys +from functools import partial + +import u1db +from u1db.backends import sqlite_backend + from twisted.enterprise import adbapi from twisted.python import log +from leap.soledad.client.sqlcipher import set_init_pragmas + + DEBUG_SQL = os.environ.get("LEAP_DEBUG_SQL") if DEBUG_SQL: log.startLogging(sys.stdout) -def getConnectionPool(db=None, key=None): - return SQLCipherConnectionPool( - "pysqlcipher.dbapi2", database=db, key=key, check_same_thread=False) +def getConnectionPool(opts, openfun=None, driver="pysqlcipher"): + if openfun is None and driver == "pysqlcipher": + openfun = partial(set_init_pragmas, opts=opts) + return U1DBConnectionPool( + "%s.dbapi2" % driver, database=opts.path, + check_same_thread=False, cp_openfun=openfun) -class SQLCipherConnectionPool(adbapi.ConnectionPool): +# XXX work in progress -------------------------------------------- - key = None - def connect(self): - """ - Return a database connection when one becomes available. +class U1DBSqliteWrapper(sqlite_backend.SQLitePartialExpandDatabase): + """ + A very simple wrapper around sqlcipher backend. - This method blocks and should be run in a thread from the internal - threadpool. Don't call this method directly from non-threaded code. - Using this method outside the external threadpool may exceed the - maximum number of connections in the pool. + Instead of initializing the database on the fly, it just uses an existing + connection that is passed to it in the initializer. + """ - :return: a database connection from the pool. - """ - self.noisy = DEBUG_SQL + def __init__(self, conn): + self._db_handle = conn + self._real_replica_uid = None + self._ensure_schema() + self._factory = u1db.Document - tid = self.threadID() - conn = self.connections.get(tid) - if self.key is None: - self.key = self.connkw.pop('key', None) +class U1DBConnection(adbapi.Connection): - if conn is None: - if self.noisy: - log.msg('adbapi connecting: %s %s%s' % (self.dbapiName, - self.connargs or '', - self.connkw or '')) - conn = self.dbapi.connect(*self.connargs, **self.connkw) + u1db_wrapper = U1DBSqliteWrapper + + def __init__(self, pool, init_u1db=False): + self.init_u1db = init_u1db + adbapi.Connection.__init__(self, pool) + + def reconnect(self): + if self._connection is not None: + self._pool.disconnect(self._connection) + self._connection = self._pool.connect() + + if self.init_u1db: + self._u1db = self.u1db_wrapper(self._connection) + + def __getattr__(self, name): + if name.startswith('u1db_'): + meth = re.sub('^u1db_', '', name) + return getattr(self._u1db, meth) + else: + return getattr(self._connection, name) - # XXX we should hook here all OUR SOLEDAD pragmas ----- - conn.cursor().execute("PRAGMA key=%s" % self.key) - conn.commit() - # ----------------------------------------------------- - # XXX profit of openfun isntead??? - if self.openfun is not None: - self.openfun(conn) - self.connections[tid] = conn - return conn +class U1DBTransaction(adbapi.Transaction): + + def __getattr__(self, name): + if name.startswith('u1db_'): + meth = re.sub('^u1db_', '', name) + return getattr(self._connection._u1db, meth) + else: + return getattr(self._cursor, name) + + +class U1DBConnectionPool(adbapi.ConnectionPool): + + connectionFactory = U1DBConnection + transactionFactory = U1DBTransaction + + def __init__(self, *args, **kwargs): + adbapi.ConnectionPool.__init__(self, *args, **kwargs) + # all u1db connections, hashed by thread-id + self.u1dbconnections = {} + + def runU1DBQuery(self, meth, *args, **kw): + meth = "u1db_%s" % meth + return self.runInteraction(self._runU1DBQuery, meth, *args, **kw) + + def _runU1DBQuery(self, trans, meth, *args, **kw): + meth = getattr(trans, meth) + return meth(*args, **kw) + + def _runInteraction(self, interaction, *args, **kw): + tid = self.threadID() + u1db = self.u1dbconnections.get(tid) + conn = self.connectionFactory(self, init_u1db=not bool(u1db)) + + if u1db is None: + self.u1dbconnections[tid] = conn._u1db + else: + conn._u1db = u1db + + trans = self.transactionFactory(self, conn) + try: + result = interaction(trans, *args, **kw) + trans.close() + conn.commit() + return result + except: + excType, excValue, excTraceback = sys.exc_info() + try: + conn.rollback() + except: + log.err(None, "Rollback failed") + raise excType, excValue, excTraceback + + def finalClose(self): + self.shutdownID = None + self.threadpool.stop() + self.running = False + for conn in self.connections.values(): + self._close(conn) + for u1db in self.u1dbconnections.values(): + self._close(u1db) + self.connections.clear() diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index bfb6c703..703b9516 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -34,7 +34,6 @@ import socket import ssl import urlparse - try: import cchardet as chardet except ImportError: @@ -47,15 +46,14 @@ from leap.common.config import get_path_prefix from leap.soledad.common import SHARED_DB_NAME from leap.soledad.common import soledad_assert from leap.soledad.common import soledad_assert_type -from leap.soledad.common.document import SoledadDocument +from leap.soledad.client import adbapi from leap.soledad.client import events as soledad_events from leap.soledad.client.crypto import SoledadCrypto from leap.soledad.client.secrets import SoledadSecrets from leap.soledad.client.shared_db import SoledadSharedDatabase -from leap.soledad.client.sqlcipher import SQLCipherDatabase from leap.soledad.client.target import SoledadSyncTarget -from leap.soledad.client.sqlcipher import SQLCipherDB, SQLCipherOptions +from leap.soledad.client.sqlcipher import SQLCipherOptions logger = logging.getLogger(name=__name__) @@ -200,18 +198,19 @@ class Soledad(object): Initialize configuration using default values for missing params. """ soledad_assert_type(self._passphrase, unicode) + initialize = lambda attr, val: attr is None and setattr(attr, val) + # initialize secrets_path - if self._secrets_path is None: - self._secrets_path = os.path.join( - self.DEFAULT_PREFIX, self.STORAGE_SECRETS_FILE_NAME) + initialize(self._secrets_path, os.path.join( + self.DEFAULT_PREFIX, self.STORAGE_SECRETS_FILE_NAME)) + # initialize local_db_path - if self._local_db_path is None: - self._local_db_path = os.path.join( - self.DEFAULT_PREFIX, self.LOCAL_DATABASE_FILE_NAME) + initialize(self._local_db_path, os.path.join( + self.DEFAULT_PREFIX, self.LOCAL_DATABASE_FILE_NAME)) + # initialize server_url - soledad_assert( - self._server_url is not None, - 'Missing URL for Soledad server.') + soledad_assert(self._server_url is not None, + 'Missing URL for Soledad server.') # # initialization/destruction methods @@ -221,14 +220,13 @@ class Soledad(object): """ Bootstrap local Soledad instance. - :raise BootstrapSequenceError: Raised when the secret generation and - storage on server sequence has failed for some reason. + :raise BootstrapSequenceError: + Raised when the secret generation and storage on server sequence + has failed for some reason. """ - try: - self._secrets.bootstrap() - self._init_db() - except: - raise + self._secrets.bootstrap() + self._init_db() + # XXX initialize syncers? def _init_dirs(self): """ @@ -255,8 +253,9 @@ class Soledad(object): Initialize the U1DB SQLCipher database for local storage. Currently, Soledad uses the default SQLCipher cipher, i.e. - 'aes-256-cbc'. We use scrypt to derive a 256-bit encryption key and - uses the 'raw PRAGMA key' format to handle the key to SQLCipher. + 'aes-256-cbc'. We use scrypt to derive a 256-bit encryption key, + and internally the SQLCipherDatabase initialization uses the 'raw + PRAGMA key' format to handle the key to SQLCipher. """ tohex = binascii.b2a_hex # sqlcipher only accepts the hex version @@ -265,25 +264,28 @@ class Soledad(object): opts = SQLCipherOptions( self._local_db_path, key, - is_raw_key=True, - create=True, + is_raw_key=True, create=True, defer_encryption=self._defer_encryption, sync_db_key=sync_db_key, - crypto=self._crypto, # XXX add this - document_factory=SoledadDocument, ) - self._db = SQLCipherDB(opts) + self._dbpool = adbapi.getConnectionPool(opts) def close(self): """ Close underlying U1DB database. """ logger.debug("Closing soledad") - if hasattr(self, '_db') and isinstance( - self._db, - SQLCipherDatabase): - self._db.stop_sync() - self._db.close() + self._dbpool.close() + + # TODO close syncers >>>>>> + + #if hasattr(self, '_db') and isinstance( + #self._db, + #SQLCipherDatabase): + #self._db.close() +# + # XXX stop syncers + # self._db.stop_sync() @property def _shared_db(self): @@ -306,24 +308,29 @@ class Soledad(object): # def put_doc(self, doc): + # TODO what happens with this warning during the deferred life cycle? + # Isn't it better to defend ourselves from the mutability, to avoid + # nasty surprises? """ Update a document in the local encrypted database. ============================== WARNING ============================== This method converts the document's contents to unicode in-place. This - means that after calling C{put_doc(doc)}, the contents of the - document, i.e. C{doc.content}, might be different from before the + means that after calling `put_doc(doc)`, the contents of the + document, i.e. `doc.content`, might be different from before the call. ============================== WARNING ============================== :param doc: the document to update :type doc: SoledadDocument - :return: the new revision identifier for the document - :rtype: str + :return: + a deferred that will fire with the new revision identifier for + the document + :rtype: Deferred """ doc.content = self._convert_to_unicode(doc.content) - return self._db.put_doc(doc) + return self._dbpool.put_doc(doc) def delete_doc(self, doc): """ @@ -332,10 +339,12 @@ class Soledad(object): :param doc: the document to delete :type doc: SoledadDocument - :return: the new revision identifier for the document - :rtype: str + :return: + a deferred that will fire with ... + :rtype: Deferred """ - return self._db.delete_doc(doc) + # XXX what does this do when fired??? + return self._dbpool.delete_doc(doc) def get_doc(self, doc_id, include_deleted=False): """ @@ -343,15 +352,17 @@ class Soledad(object): :param doc_id: the unique document identifier :type doc_id: str - :param include_deleted: if True, deleted documents will be - returned with empty content; otherwise asking - for a deleted document will return None + :param include_deleted: + if True, deleted documents will be returned with empty content; + otherwise asking for a deleted document will return None :type include_deleted: bool - :return: the document object or None - :rtype: SoledadDocument + :return: + A deferred that will fire with the document object, containing a + SoledadDocument, or None if it could not be found + :rtype: Deferred """ - return self._db.get_doc(doc_id, include_deleted=include_deleted) + return self._dbpool.get_doc(doc_id, include_deleted=include_deleted) def get_docs(self, doc_ids, check_for_conflicts=True, include_deleted=False): @@ -364,11 +375,12 @@ class Soledad(object): be skipped, and 'None' will be returned instead of True/False :type check_for_conflicts: bool - :return: iterable giving the Document object for each document id - in matching doc_ids order. - :rtype: generator + :return: + A deferred that will fire with an iterable giving the Document + object for each document id in matching doc_ids order. + :rtype: Deferred """ - return self._db.get_docs( + return self._dbpool.get_docs( doc_ids, check_for_conflicts=check_for_conflicts, include_deleted=include_deleted) @@ -379,43 +391,13 @@ class Soledad(object): :param include_deleted: If set to True, deleted documents will be returned with empty content. Otherwise deleted documents will not be included in the results. - :return: (generation, [Document]) - The current generation of the database, followed by a list of - all the documents in the database. + :return: + A deferred that will fire with (generation, [Document]): that is, + the current generation of the database, followed by a list of all + the documents in the database. + :rtype: Deferred """ - return self._db.get_all_docs(include_deleted) - - def _convert_to_unicode(self, content): - """ - Converts content to unicode (or all the strings in content) - - NOTE: Even though this method supports any type, it will - currently ignore contents of lists, tuple or any other - iterable than dict. We don't need support for these at the - moment - - :param content: content to convert - :type content: object - - :rtype: object - """ - if isinstance(content, unicode): - return content - elif isinstance(content, str): - result = chardet.detect(content) - default = "utf-8" - encoding = result["encoding"] or default - try: - content = content.decode(encoding) - except UnicodeError as e: - logger.error("Unicode error: {0!r}. Using 'replace'".format(e)) - content = content.decode(encoding, 'replace') - return content - else: - if isinstance(content, dict): - for key in content.keys(): - content[key] = self._convert_to_unicode(content[key]) - return content + return self._dbpool.get_all_docs(include_deleted) def create_doc(self, content, doc_id=None): """ @@ -426,11 +408,13 @@ class Soledad(object): :param doc_id: an optional identifier specifying the document id :type doc_id: str - :return: the new document - :rtype: SoledadDocument + :return: + A deferred tht will fire with the new document (SoledadDocument + instance). + :rtype: Deferred """ - return self._db.create_doc( - self._convert_to_unicode(content), doc_id=doc_id) + return self._dbpool.create_doc( + _convert_to_unicode(content), doc_id=doc_id) def create_doc_from_json(self, json, doc_id=None): """ @@ -446,10 +430,12 @@ class Soledad(object): :type json: str :param doc_id: An optional identifier specifying the document id. :type doc_id: - :return: The new document - :rtype: SoledadDocument + :return: + A deferred that will fire with the new document (A SoledadDocument + instance) + :rtype: Deferred """ - return self._db.create_doc_from_json(json, doc_id=doc_id) + return self._dbpool.create_doc_from_json(json, doc_id=doc_id) def create_index(self, index_name, *index_expressions): """ @@ -462,8 +448,8 @@ class Soledad(object): :param index_name: A unique name which can be used as a key prefix :type index_name: str - :param index_expressions: index expressions defining the index - information. + :param index_expressions: + index expressions defining the index information. :type index_expressions: dict Examples: @@ -473,9 +459,7 @@ class Soledad(object): "number(fieldname, width)", "lower(fieldname)" """ - if self._db: - return self._db.create_index( - index_name, *index_expressions) + return self._dbpool.create_index(index_name, *index_expressions) def delete_index(self, index_name): """ @@ -484,8 +468,7 @@ class Soledad(object): :param index_name: The name of the index we are removing :type index_name: str """ - if self._db: - return self._db.delete_index(index_name) + return self._dbpool.delete_index(index_name) def list_indexes(self): """ @@ -494,8 +477,7 @@ class Soledad(object): :return: A list of [('index-name', ['field', 'field2'])] definitions. :rtype: list """ - if self._db: - return self._db.list_indexes() + return self._dbpool.list_indexes() def get_from_index(self, index_name, *key_values): """ @@ -517,8 +499,7 @@ class Soledad(object): :return: List of [Document] :rtype: list """ - if self._db: - return self._db.get_from_index(index_name, *key_values) + return self._dbpool.get_from_index(index_name, *key_values) def get_count_from_index(self, index_name, *key_values): """ @@ -534,8 +515,7 @@ class Soledad(object): :return: count. :rtype: int """ - if self._db: - return self._db.get_count_from_index(index_name, *key_values) + return self._dbpool.get_count_from_index(index_name, *key_values) def get_range_from_index(self, index_name, start_value, end_value): """ @@ -561,12 +541,11 @@ class Soledad(object): range. eg, if you have an index with 3 fields then you would have: (val1, val2, val3) :type end_values: tuple - :return: List of [Document] - :rtype: list + :return: A deferred that will fire with a list of [Document] + :rtype: Deferred """ - if self._db: - return self._db.get_range_from_index( - index_name, start_value, end_value) + return self._dbpool.get_range_from_index( + index_name, start_value, end_value) def get_index_keys(self, index_name): """ @@ -574,11 +553,11 @@ class Soledad(object): :param index_name: The index to query :type index_name: str - :return: [] A list of tuples of indexed keys. - :rtype: list + :return: + A deferred that will fire with a list of tuples of indexed keys. + :rtype: Deferred """ - if self._db: - return self._db.get_index_keys(index_name) + return self._dbpool.get_index_keys(index_name) def get_doc_conflicts(self, doc_id): """ @@ -587,11 +566,12 @@ class Soledad(object): :param doc_id: the document id :type doc_id: str - :return: a list of the document entries that are conflicted - :rtype: list + :return: + A deferred that will fire with a list of the document entries that + are conflicted. + :rtype: Deferred """ - if self._db: - return self._db.get_doc_conflicts(doc_id) + return self._dbpool.get_doc_conflicts(doc_id) def resolve_doc(self, doc, conflicted_doc_revs): """ @@ -599,12 +579,18 @@ class Soledad(object): :param doc: a document with the new content to be inserted. :type doc: SoledadDocument - :param conflicted_doc_revs: a list of revisions that the new content - supersedes. + :param conflicted_doc_revs: + A deferred that will fire with a list of revisions that the new + content supersedes. :type conflicted_doc_revs: list """ - if self._db: - return self._db.resolve_doc(doc, conflicted_doc_revs) + return self._dbpool.resolve_doc(doc, conflicted_doc_revs) + + # + # Sync API + # + + # TODO have interfaces, and let it implement it. def sync(self, defer_decryption=True): """ @@ -616,33 +602,38 @@ class Soledad(object): :param url: the url of the target replica to sync with :type url: str - :param defer_decryption: Whether to defer the decryption process using - the intermediate database. If False, - decryption will be done inline. + :param defer_decryption: + Whether to defer the decryption process using the intermediate + database. If False, decryption will be done inline. :type defer_decryption: bool - :return: The local generation before the synchronisation was - performed. + :return: + A deferred that will fire with the local generation before the + synchronisation was performed. :rtype: str """ - if self._db: - try: - local_gen = self._db.sync( - urlparse.urljoin(self.server_url, 'user-%s' % self._uuid), - creds=self._creds, autocreate=False, - defer_decryption=defer_decryption) - soledad_events.signal( - soledad_events.SOLEDAD_DONE_DATA_SYNC, self._uuid) - return local_gen - except Exception as e: - logger.error("Soledad exception when syncing: %s" % str(e)) + # TODO this needs work. + # Should: + # (1) Defer to the syncer pool + # (2) Return a deferred (the deferToThreadpool can be good) + # (3) Add the callback for signaling the event + # (4) Let the local gen be returned from the thread + try: + local_gen = self._dbsyncer.sync( + urlparse.urljoin(self.server_url, 'user-%s' % self._uuid), + creds=self._creds, autocreate=False, + defer_decryption=defer_decryption) + soledad_events.signal( + soledad_events.SOLEDAD_DONE_DATA_SYNC, self._uuid) + return local_gen + except Exception as e: + logger.error("Soledad exception when syncing: %s" % str(e)) def stop_sync(self): """ Stop the current syncing process. """ - if self._db: - self._db.stop_sync() + self._dbsyncer.stop_sync() def need_sync(self, url): """ @@ -654,12 +645,18 @@ class Soledad(object): :return: Whether remote replica and local replica differ. :rtype: bool """ + # XXX pass the get_replica_uid ------------------------ + # From where? initialize with that? + replica_uid = self._db._get_replica_uid() target = SoledadSyncTarget( - url, self._db._get_replica_uid(), creds=self._creds, - crypto=self._crypto) - info = target.get_sync_info(self._db._get_replica_uid()) + url, replica_uid, creds=self._creds, crypto=self._crypto) + + generation = self._db._get_generation() + # XXX better unpack it? + info = target.get_sync_info(replica_uid) + # compare source generation with target's last known source generation - if self._db._get_generation() != info[4]: + if generation != info[4]: soledad_events.signal( soledad_events.SOLEDAD_NEW_DATA_TO_SYNC, self._uuid) return True @@ -670,7 +667,7 @@ class Soledad(object): """ Property, True if the syncer is syncing. """ - return self._db.syncing + return self._dbsyncer.syncing def _set_token(self, token): """ @@ -781,6 +778,39 @@ class Soledad(object): self._secrets.change_passphrase(new_passphrase) +def _convert_to_unicode(content): + """ + Convert content to unicode (or all the strings in content) + + NOTE: Even though this method supports any type, it will + currently ignore contents of lists, tuple or any other + iterable than dict. We don't need support for these at the + moment + + :param content: content to convert + :type content: object + + :rtype: object + """ + if isinstance(content, unicode): + return content + elif isinstance(content, str): + result = chardet.detect(content) + default = "utf-8" + encoding = result["encoding"] or default + try: + content = content.decode(encoding) + except UnicodeError as e: + logger.error("Unicode error: {0!r}. Using 'replace'".format(e)) + content = content.decode(encoding, 'replace') + return content + else: + if isinstance(content, dict): + for key in content.keys(): + content[key] = _convert_to_unicode(content[key]) + return content + + # ---------------------------------------------------------------------------- # Monkey patching u1db to be able to provide a custom SSL cert # ---------------------------------------------------------------------------- @@ -819,4 +849,3 @@ class VerifiedHTTPSConnection(httplib.HTTPSConnection): old__VerifiedHTTPSConnection = http_client._VerifiedHTTPSConnection http_client._VerifiedHTTPSConnection = VerifiedHTTPSConnection - diff --git a/client/src/leap/soledad/client/examples/README b/client/src/leap/soledad/client/examples/README new file mode 100644 index 00000000..3aed8377 --- /dev/null +++ b/client/src/leap/soledad/client/examples/README @@ -0,0 +1,4 @@ +Right now, you can find here both an example of use +and the benchmarking scripts. +TODO move benchmark scripts to root scripts/ folder, +and leave here only a minimal example. diff --git a/client/src/leap/soledad/client/examples/compare.txt b/client/src/leap/soledad/client/examples/compare.txt new file mode 100644 index 00000000..19a1325a --- /dev/null +++ b/client/src/leap/soledad/client/examples/compare.txt @@ -0,0 +1,8 @@ +TIMES=100 TMPDIR=/media/sdb5/leap python use_adbapi.py 1.34s user 0.16s system 53% cpu 2.832 total +TIMES=100 TMPDIR=/media/sdb5/leap python use_api.py 1.22s user 0.14s system 62% cpu 2.181 total + +TIMES=1000 TMPDIR=/media/sdb5/leap python use_api.py 2.18s user 0.34s system 27% cpu 9.213 total +TIMES=1000 TMPDIR=/media/sdb5/leap python use_adbapi.py 2.40s user 0.34s system 39% cpu 7.004 total + +TIMES=5000 TMPDIR=/media/sdb5/leap python use_api.py 6.63s user 1.27s system 13% cpu 57.882 total +TIMES=5000 TMPDIR=/media/sdb5/leap python use_adbapi.py 6.84s user 1.26s system 36% cpu 22.367 total diff --git a/client/src/leap/soledad/client/examples/manifest.phk b/client/src/leap/soledad/client/examples/manifest.phk new file mode 100644 index 00000000..2c86c07d --- /dev/null +++ b/client/src/leap/soledad/client/examples/manifest.phk @@ -0,0 +1,50 @@ +The Hacker's Manifesto + +The Hacker's Manifesto +by: The Mentor + +Another one got caught today, it's all over the papers. "Teenager +Arrested in Computer Crime Scandal", "Hacker Arrested after Bank +Tampering." "Damn kids. They're all alike." But did you, in your +three-piece psychology and 1950's technobrain, ever take a look behind +the eyes of the hacker? Did you ever wonder what made him tick, what +forces shaped him, what may have molded him? I am a hacker, enter my +world. Mine is a world that begins with school. I'm smarter than most of +the other kids, this crap they teach us bores me. "Damn underachiever. +They're all alike." I'm in junior high or high school. I've listened to +teachers explain for the fifteenth time how to reduce a fraction. I +understand it. "No, Ms. Smith, I didn't show my work. I did it in +my head." "Damn kid. Probably copied it. They're all alike." I made a +discovery today. I found a computer. Wait a second, this is cool. It does +what I want it to. If it makes a mistake, it's because I screwed it up. +Not because it doesn't like me, or feels threatened by me, or thinks I'm +a smart ass, or doesn't like teaching and shouldn't be here. Damn kid. +All he does is play games. They're all alike. And then it happened... a +door opened to a world... rushing through the phone line like heroin +through an addict's veins, an electronic pulse is sent out, a refuge from +the day-to-day incompetencies is sought... a board is found. "This is +it... this is where I belong..." I know everyone here... even if I've +never met them, never talked to them, may never hear from them again... I +know you all... Damn kid. Tying up the phone line again. They're all +alike... You bet your ass we're all alike... we've been spoon-fed baby +food at school when we hungered for steak... the bits of meat that you +did let slip through were pre-chewed and tasteless. We've been dominated +by sadists, or ignored by the apathetic. The few that had something to +teach found us willing pupils, but those few are like drops of water in +the desert. This is our world now... the world of the electron and the +switch, the beauty of the baud. We make use of a service already existing +without paying for what could be dirt-cheap if it wasn't run by +profiteering gluttons, and you call us criminals. We explore... and you +call us criminals. We seek after knowledge... and you call us criminals. +We exist without skin color, without nationality, without religious +bias... and you call us criminals. You build atomic bombs, you wage wars, +you murder, cheat, and lie to us and try to make us believe it's for our +own good, yet we're the criminals. Yes, I am a criminal. My crime is that +of curiosity. My crime is that of judging people by what they say and +think, not what they look like. My crime is that of outsmarting you, +something that you will never forgive me for. I am a hacker, and this is +my manifesto. You may stop this individual, but you can't stop us all... +after all, we're all alike. + +This was the last published file written by The Mentor. Shortly after +releasing it, he was busted by the FBI. The Mentor, sadly missed. diff --git a/client/src/leap/soledad/client/examples/plot-async-db.py b/client/src/leap/soledad/client/examples/plot-async-db.py new file mode 100644 index 00000000..018a1a1d --- /dev/null +++ b/client/src/leap/soledad/client/examples/plot-async-db.py @@ -0,0 +1,45 @@ +import csv +from matplotlib import pyplot as plt + +FILE = "bench.csv" + +# config the plot +plt.xlabel('number of inserts') +plt.ylabel('time (seconds)') +plt.title('SQLCipher parallelization') + +kwargs = { + 'linewidth': 1.0, + 'linestyle': '-', +} + +series = (('sync', 'r'), + ('async', 'g')) + +data = {'mark': [], + 'sync': [], + 'async': []} + +with open(FILE, 'rb') as csvfile: + series_reader = csv.reader(csvfile, delimiter=',') + for m, s, a in series_reader: + data['mark'].append(int(m)) + data['sync'].append(float(s)) + data['async'].append(float(a)) + +xmax = max(data['mark']) +xmin = min(data['mark']) +ymax = max(data['sync'] + data['async']) +ymin = min(data['sync'] + data['async']) + +for run in series: + name = run[0] + color = run[1] + plt.plot(data['mark'], data[name], label=name, color=color, **kwargs) + +plt.axes().annotate("", xy=(xmax, ymax)) +plt.axes().annotate("", xy=(xmin, ymin)) + +plt.grid() +plt.legend() +plt.show() diff --git a/client/src/leap/soledad/client/examples/run_benchmark.py b/client/src/leap/soledad/client/examples/run_benchmark.py new file mode 100644 index 00000000..a112cf45 --- /dev/null +++ b/client/src/leap/soledad/client/examples/run_benchmark.py @@ -0,0 +1,28 @@ +""" +Run a mini-benchmark between regular api and dbapi +""" +import commands +import os +import time + +TMPDIR = os.environ.get("TMPDIR", "/tmp") +CSVFILE = 'bench.csv' + +cmd = "SILENT=1 TIMES={times} TMPDIR={tmpdir} python ./use_{version}api.py" + +parse_time = lambda r: r.split('\n')[-1] + + +with open(CSVFILE, 'w') as log: + + for times in range(0, 10000, 500): + cmd1 = cmd.format(times=times, tmpdir=TMPDIR, version="") + sync_time = parse_time(commands.getoutput(cmd1)) + + cmd2 = cmd.format(times=times, tmpdir=TMPDIR, version="adb") + async_time = parse_time(commands.getoutput(cmd2)) + + print times, sync_time, async_time + log.write("%s, %s, %s\n" % (times, sync_time, async_time)) + log.flush() + time.sleep(2) diff --git a/client/src/leap/soledad/client/examples/use_adbapi.py b/client/src/leap/soledad/client/examples/use_adbapi.py new file mode 100644 index 00000000..d3ee8527 --- /dev/null +++ b/client/src/leap/soledad/client/examples/use_adbapi.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +# use_adbapi.py +# Copyright (C) 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Example of use of the asynchronous soledad api. +""" +from __future__ import print_function +import datetime +import os + +import u1db +from twisted.internet import defer, reactor + +from leap.soledad.client import adbapi +from leap.soledad.client.sqlcipher import SQLCipherOptions + + +folder = os.environ.get("TMPDIR", "tmp") +times = int(os.environ.get("TIMES", "1000")) +silent = os.environ.get("SILENT", False) + +tmpdb = os.path.join(folder, "test.soledad") + + +def debug(*args): + if not silent: + print(*args) + +debug("[+] db path:", tmpdb) +debug("[+] times", times) + +if os.path.isfile(tmpdb): + debug("[+] Removing existing db file...") + os.remove(tmpdb) + +start_time = datetime.datetime.now() + +opts = SQLCipherOptions(tmpdb, "secret", create=True) +dbpool = adbapi.getConnectionPool(opts) + + +def createDoc(doc): + return dbpool.runU1DBQuery("create_doc", doc) + + +def getAllDocs(): + return dbpool.runU1DBQuery("get_all_docs") + + +def countDocs(_): + debug("counting docs...") + d = getAllDocs() + d.addCallbacks(printResult, lambda e: e.printTraceback()) + d.addBoth(allDone) + + +def printResult(r): + if isinstance(r, u1db.Document): + debug(r.doc_id, r.content['number']) + else: + len_results = len(r[1]) + debug("GOT %s results" % len(r[1])) + + if len_results == times: + debug("ALL GOOD") + else: + raise ValueError("We didn't expect this result len") + + +def allDone(_): + debug("ALL DONE!") + if silent: + end_time = datetime.datetime.now() + print((end_time - start_time).total_seconds()) + reactor.stop() + +deferreds = [] + +for i in range(times): + doc = {"number": i, + "payload": open('manifest.phk').read()} + d = createDoc(doc) + d.addCallbacks(printResult, lambda e: e.printTraceback()) + deferreds.append(d) + + +all_done = defer.gatherResults(deferreds, consumeErrors=True) +all_done.addCallback(countDocs) + +reactor.run() diff --git a/client/src/leap/soledad/client/examples/use_api.py b/client/src/leap/soledad/client/examples/use_api.py new file mode 100644 index 00000000..fd0a100c --- /dev/null +++ b/client/src/leap/soledad/client/examples/use_api.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +# use_api.py +# Copyright (C) 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Example of use of the soledad api. +""" +from __future__ import print_function +import datetime +import os + +from leap.soledad.client import sqlcipher +from leap.soledad.client.sqlcipher import SQLCipherOptions + + +folder = os.environ.get("TMPDIR", "tmp") +times = int(os.environ.get("TIMES", "1000")) +silent = os.environ.get("SILENT", False) + +tmpdb = os.path.join(folder, "test.soledad") + + +def debug(*args): + if not silent: + print(*args) + +debug("[+] db path:", tmpdb) +debug("[+] times", times) + +if os.path.isfile(tmpdb): + debug("[+] Removing existing db file...") + os.remove(tmpdb) + +start_time = datetime.datetime.now() + +opts = SQLCipherOptions(tmpdb, "secret", create=True) +db = sqlcipher.SQLCipherDatabase(None, opts) + + +def allDone(): + debug("ALL DONE!") + + +for i in range(times): + doc = {"number": i, + "payload": open('manifest.phk').read()} + d = db.create_doc(doc) + debug(d.doc_id, d.content['number']) + +debug("Count", len(db.get_all_docs()[1])) +if silent: + end_time = datetime.datetime.now() + print((end_time - start_time).total_seconds()) + +allDone() diff --git a/client/src/leap/soledad/client/mp_safe_db_TOREMOVE.py b/client/src/leap/soledad/client/mp_safe_db_TOREMOVE.py deleted file mode 100644 index 9ed0bef4..00000000 --- a/client/src/leap/soledad/client/mp_safe_db_TOREMOVE.py +++ /dev/null @@ -1,112 +0,0 @@ -# -*- coding: utf-8 -*- -# mp_safe_db.py -# Copyright (C) 2014 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -""" -Multiprocessing-safe SQLite database. -""" - - -from threading import Thread -from Queue import Queue -from pysqlcipher import dbapi2 - - -# Thanks to http://code.activestate.com/recipes/526618/ - -class MPSafeSQLiteDB(Thread): - """ - A multiprocessing-safe SQLite database accessor. - """ - - CLOSE = "--close--" - NO_MORE = "--no more--" - - def __init__(self, db_path): - """ - Initialize the process - """ - Thread.__init__(self) - self._db_path = db_path - self._requests = Queue() - self.start() - - def run(self): - """ - Run the multiprocessing-safe database accessor. - """ - conn = dbapi2.connect(self._db_path) - while True: - req, arg, res = self._requests.get() - if req == self.CLOSE: - break - with conn: - cursor = conn.cursor() - cursor.execute(req, arg) - if res: - for rec in cursor.fetchall(): - res.put(rec) - res.put(self.NO_MORE) - conn.close() - - def execute(self, req, arg=None, res=None): - """ - Execute a request on the database. - - :param req: The request to be executed. - :type req: str - :param arg: The arguments for the request. - :type arg: tuple - :param res: A queue to write request results. - :type res: multiprocessing.Queue - """ - self._requests.put((req, arg or tuple(), res)) - - def select(self, req, arg=None): - """ - Run a select query on the database and yield results. - - :param req: The request to be executed. - :type req: str - :param arg: The arguments for the request. - :type arg: tuple - """ - res = Queue() - self.execute(req, arg, res) - while True: - rec = res.get() - if rec == self.NO_MORE: - break - yield rec - - def close(self): - """ - Close the database connection. - """ - self.execute(self.CLOSE) - self.join() - - def cursor(self): - """ - Return a fake cursor object. - - Not really a cursor, but allows for calling db.cursor().execute(). - - :return: Self. - :rtype: MPSafeSQLiteDatabase - """ - return self diff --git a/client/src/leap/soledad/client/pragmas.py b/client/src/leap/soledad/client/pragmas.py index a21e68a8..7a13a694 100644 --- a/client/src/leap/soledad/client/pragmas.py +++ b/client/src/leap/soledad/client/pragmas.py @@ -15,18 +15,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . """ -Different pragmas used in the SQLCIPHER database. +Different pragmas used in the initialization of the SQLCipher database. """ -# TODO --------------------------------------------------------------- -# Work In Progress. -# We need to reduce the impedance mismatch between the current soledad -# implementation and the eventually asynchronous api. -# So... how to plug it in, allowing for an optional sync / async coexistence? -# One of the first things is to isolate all the pragmas work that has to be -# done during initialization. -# And, instead of having all of them passed the db_handle and executing that, -# we could have just a string returned, that can be chained to a deferred. -# --------------------------------------------------------------------- import logging import string @@ -81,7 +71,7 @@ def _set_key(db_handle, key, is_raw_key): _set_key_passphrase(db_handle, key) -def _set_key_passphrase(cls, db_handle, passphrase): +def _set_key_passphrase(db_handle, passphrase): """ Set a passphrase for encryption key derivation. @@ -265,7 +255,7 @@ def _set_rekey_passphrase(db_handle, passphrase): db_handle.cursor().execute("PRAGMA rekey = '%s'" % passphrase) -def _set_rekey_raw(cls, db_handle, key): +def _set_rekey_raw(db_handle, key): """ Change the raw hexadecimal encryption key. @@ -300,7 +290,7 @@ def set_synchronous_normal(db_handle): db_handle.cursor().execute('PRAGMA synchronous=NORMAL') -def set_mem_temp_store(cls, db_handle): +def set_mem_temp_store(db_handle): """ Use a in-memory store for temporary tables. """ @@ -308,7 +298,7 @@ def set_mem_temp_store(cls, db_handle): db_handle.cursor().execute('PRAGMA temp_store=MEMORY') -def set_write_ahead_logging(cls, db_handle): +def set_write_ahead_logging(db_handle): """ Enable write-ahead logging, and set the autocheckpoint to 50 pages. diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index fcef592d..c9e69c73 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -45,7 +45,7 @@ import logging import multiprocessing import os import threading -import time +# import time --- needed for the win initialization hack import json from hashlib import sha256 @@ -58,11 +58,13 @@ from u1db.backends import sqlite_backend from u1db import errors as u1db_errors from taskthread import TimerTask -from leap.soledad.client.crypto import SyncEncrypterPool, SyncDecrypterPool +from leap.soledad.client import crypto from leap.soledad.client.target import SoledadSyncTarget from leap.soledad.client.target import PendingReceivedDocsSyncError from leap.soledad.client.sync import SoledadSynchronizer -from leap.soledad.client.mp_safe_db import MPSafeSQLiteDB + +# TODO use adbapi too +from leap.soledad.client.mp_safe_db_TOREMOVE import MPSafeSQLiteDB from leap.soledad.client import pragmas from leap.soledad.common import soledad_assert from leap.soledad.common.document import SoledadDocument @@ -80,36 +82,81 @@ sqlite_backend.dbapi2 = sqlcipher_dbapi2 # See https://sqlite.org/threadsafe.html # and http://bugs.python.org/issue16509 -SQLITE_CHECK_SAME_THREAD = False +# TODO this no longer needed ------------- +#SQLITE_CHECK_SAME_THREAD = False + + +def initialize_sqlcipher_db(opts, on_init=None): + """ + Initialize a SQLCipher database. + + :param opts: + :type opts: SQLCipherOptions + :param on_init: a tuple of queries to be executed on initialization + :type on_init: tuple + :return: a SQLCipher connection + """ + conn = sqlcipher_dbapi2.connect( + opts.path) + + # XXX not needed -- check + #check_same_thread=SQLITE_CHECK_SAME_THREAD) + + set_init_pragmas(conn, opts, extra_queries=on_init) + return conn + +_db_init_lock = threading.Lock() + + +def set_init_pragmas(conn, opts=None, extra_queries=None): + """ + Set the initialization pragmas. + + This includes the crypto pragmas, and any other options that must + be passed early to sqlcipher db. + """ + assert opts is not None + extra_queries = [] if extra_queries is None else extra_queries + with _db_init_lock: + # only one execution path should initialize the db + _set_init_pragmas(conn, opts, extra_queries) + + +def _set_init_pragmas(conn, opts, extra_queries): -# We set isolation_level to None to setup autocommit mode. -# See: http://docs.python.org/2/library/sqlite3.html#controlling-transactions -# This avoids problems with sequential operations using the same soledad object -# trying to open new transactions -# (The error was: -# OperationalError:cannot start a transaction within a transaction.) -SQLITE_ISOLATION_LEVEL = None + sync_off = os.environ.get('LEAP_SQLITE_NOSYNC') + memstore = os.environ.get('LEAP_SQLITE_MEMSTORE') + nowal = os.environ.get('LEAP_SQLITE_NOWAL') + + pragmas.set_crypto_pragmas(conn, opts) + + if not nowal: + pragmas.set_write_ahead_logging(conn) + if sync_off: + pragmas.set_synchronous_off(conn) + else: + pragmas.set_synchronous_normal(conn) + if memstore: + pragmas.set_mem_temp_store(conn) + + for query in extra_queries: + conn.cursor().execute(query) -# TODO accept cyrpto object too.... or pass it along.. class SQLCipherOptions(object): + """ + A container with options for the initialization of an SQLCipher database. + """ def __init__(self, path, key, create=True, is_raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024, - document_factory=None, defer_encryption=False, sync_db_key=None): """ - Options for the initialization of an SQLCipher database. - :param path: The filesystem path for the database to open. :type path: str :param create: True/False, should the database be created if it doesn't already exist? :param create: bool - :param document_factory: - A function that will be called with the same parameters as - Document.__init__. - :type document_factory: callable :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt document contents when syncing. :type crypto: soledad.crypto.SoledadCrypto @@ -137,87 +184,22 @@ class SQLCipherOptions(object): self.cipher_page_size = cipher_page_size self.defer_encryption = defer_encryption self.sync_db_key = sync_db_key - self.document_factory = None - - -# XXX Use SQLCIpherOptions instead -#def open(path, password, create=True, document_factory=None, crypto=None, - #raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, - #cipher_page_size=1024, defer_encryption=False, sync_db_key=None): - #""" - #Open a database at the given location. -# - #*** IMPORTANT *** -# - #Don't forget to close the database after use by calling the close() - #method otherwise some resources might not be freed and you may experience - #several kinds of leakages. -# - #*** IMPORTANT *** -# - #Will raise u1db.errors.DatabaseDoesNotExist if create=False and the - #database does not already exist. -# - #:return: An instance of Database. - #:rtype SQLCipherDatabase - #""" - #args = (path, password) - #kwargs = { - #'create': create, - #'document_factory': document_factory, - #'crypto': crypto, - #'raw_key': raw_key, - #'cipher': cipher, - #'kdf_iter': kdf_iter, - #'cipher_page_size': cipher_page_size, - #'defer_encryption': defer_encryption, - #'sync_db_key': sync_db_key} - # XXX pass only a CryptoOptions object around - #return SQLCipherDatabase.open_database(*args, **kwargs) - # # The SQLCipher database # + class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): """ A U1DB implementation that uses SQLCipher as its persistence layer. """ defer_encryption = False - _index_storage_value = 'expand referenced encrypted' - k_lock = threading.Lock() - create_doc_lock = threading.Lock() - update_indexes_lock = threading.Lock() - _sync_watcher = None - _sync_enc_pool = None - - """ - The name of the local symmetrically encrypted documents to - sync database file. - """ - LOCAL_SYMMETRIC_SYNC_FILE_NAME = 'sync.u1db' - - """ - A dictionary that hold locks which avoid multiple sync attempts from the - same database replica. - """ - encrypting_lock = threading.Lock() - - """ - Period or recurrence of the periodic encrypting task, in seconds. - """ - ENCRYPT_TASK_PERIOD = 1 + # XXX not used afaik: + # _index_storage_value = 'expand referenced encrypted' - syncing_lock = defaultdict(threading.Lock) - """ - A dictionary that hold locks which avoid multiple sync attempts from the - same database replica. - """ - - # XXX Use SQLCIpherOptions instead - def __init__(self, opts): + def __init__(self, soledad_crypto, opts): """ Connect to an existing SQLCipher database, creating a new sqlcipher database file if needed. @@ -230,76 +212,23 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): *** IMPORTANT *** + :param soledad_crypto: + :type soldead_crypto: :param opts: :type opts: SQLCipherOptions """ + # TODO ------ we don't need any soledad crypto in here + # ensure the db is encrypted if the file already exists - if os.path.exists(opts.sqlcipher_file): + if os.path.isfile(opts.path): self.assert_db_is_encrypted(opts) # connect to the sqlcipher database - # XXX this lock should not be needed ----------------- - # u1db holds a mutex over sqlite internally for the initialization. - with self.k_lock: - self._db_handle = sqlcipher_dbapi2.connect( - - # TODO ----------------------------------------------- - # move the init to a single function - opts.sqlcipher_file, - isolation_level=SQLITE_ISOLATION_LEVEL, - check_same_thread=SQLITE_CHECK_SAME_THREAD) - # set SQLCipher cryptographic parameters - - # XXX allow optional deferredChain here ? - pragmas.set_crypto_pragmas( - self._db_handle, password, raw_key, cipher, kdf_iter, - cipher_page_size) - if os.environ.get('LEAP_SQLITE_NOSYNC'): - pragmas.set_synchronous_off(self._db_handle) - else: - pragmas.set_synchronous_normal(self._db_handle) - if os.environ.get('LEAP_SQLITE_MEMSTORE'): - pragmas.set_mem_temp_store(self._db_handle) - pragmas.set_write_ahead_logging(self._db_handle) - - self._real_replica_uid = None - self._ensure_schema() - self._crypto = opts.crypto - - - # TODO ------------------------------------------------ - # Move syncdb to another class ------------------------ - # define sync-db attrs - self._sqlcipher_file = sqlcipher_file - self._sync_db_key = sync_db_key - self._sync_db = None - self._sync_db_write_lock = None - self._sync_enc_pool = None - self.sync_queue = None + self._db_handle = initialize_sqlcipher_db(opts) + self._real_replica_uid = None + self._ensure_schema() - if self.defer_encryption: - # initialize sync db - self._init_sync_db() - # initialize syncing queue encryption pool - self._sync_enc_pool = SyncEncrypterPool( - self._crypto, self._sync_db, self._sync_db_write_lock) - self._sync_watcher = TimerTask(self._encrypt_syncing_docs, - self.ENCRYPT_TASK_PERIOD) - self._sync_watcher.start() - - def factory(doc_id=None, rev=None, json='{}', has_conflicts=False, - syncable=True): - return SoledadDocument(doc_id=doc_id, rev=rev, json=json, - has_conflicts=has_conflicts, - syncable=syncable) - self.set_document_factory(factory) - # we store syncers in a dictionary indexed by the target URL. We also - # store a hash of the auth info in case auth info expires and we need - # to rebuild the syncer for that target. The final self._syncers - # format is the following: - # - # self._syncers = {'': ('', syncer), ...} - self._syncers = {} + self.set_document_factory(soledad_doc_factory) def _extra_schema_init(self, c): """ @@ -312,40 +241,212 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): :param c: The cursor for querying the database. :type c: dbapi2.cursor """ + print "CALLING EXTRA SCHEMA INIT...." c.execute( 'ALTER TABLE document ' 'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE') + # + # Document operations + # + + def put_doc(self, doc): + """ + Overwrite the put_doc method, to enqueue the modified document for + encryption before sync. + + :param doc: The document to be put. + :type doc: u1db.Document + + :return: The new document revision. + :rtype: str + """ + doc_rev = sqlite_backend.SQLitePartialExpandDatabase.put_doc(self, doc) + + # XXX move to API + if self.defer_encryption: + self.sync_queue.put_nowait(doc) + return doc_rev + + # + # SQLCipher API methods + # + + # TODO this doesn't need to be an instance method + def assert_db_is_encrypted(self, opts): + """ + Assert that the sqlcipher file contains an encrypted database. + + When opening an existing database, PRAGMA key will not immediately + throw an error if the key provided is incorrect. To test that the + database can be successfully opened with the provided key, it is + necessary to perform some operation on the database (i.e. read from + it) and confirm it is success. + + The easiest way to do this is select off the sqlite_master table, + which will attempt to read the first page of the database and will + parse the schema. + + :param opts: + """ + # We try to open an encrypted database with the regular u1db + # backend should raise a DatabaseError exception. + # If the regular backend succeeds, then we need to stop because + # the database was not properly initialized. + try: + sqlite_backend.SQLitePartialExpandDatabase(opts.path) + except sqlcipher_dbapi2.DatabaseError: + # assert that we can access it using SQLCipher with the given + # key + dummy_query = ('SELECT count(*) FROM sqlite_master',) + initialize_sqlcipher_db(opts, on_init=dummy_query) + else: + raise DatabaseIsNotEncrypted() + + # Extra query methods: extensions to the base u1db sqlite implmentation. + + def get_count_from_index(self, index_name, *key_values): + """ + Return the count for a given combination of index_name + and key values. + + Extension method made from similar methods in u1db version 13.09 + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: tuple + :return: count. + :rtype: int + """ + c = self._db_handle.cursor() + definition = self._get_index_definition(index_name) + + if len(key_values) != len(definition): + raise u1db_errors.InvalidValueForIndex() + tables = ["document_fields d%d" % i for i in range(len(definition))] + novalue_where = ["d.doc_id = d%d.doc_id" + " AND d%d.field_name = ?" + % (i, i) for i in range(len(definition))] + exact_where = [novalue_where[i] + + (" AND d%d.value = ?" % (i,)) + for i in range(len(definition))] + args = [] + where = [] + for idx, (field, value) in enumerate(zip(definition, key_values)): + args.append(field) + where.append(exact_where[idx]) + args.append(value) + + tables = ["document_fields d%d" % i for i in range(len(definition))] + statement = ( + "SELECT COUNT(*) FROM document d, %s WHERE %s " % ( + ', '.join(tables), + ' AND '.join(where), + )) + try: + c.execute(statement, tuple(args)) + except sqlcipher_dbapi2.OperationalError, e: + raise sqlcipher_dbapi2.OperationalError( + str(e) + '\nstatement: %s\nargs: %s\n' % (statement, args)) + res = c.fetchall() + return res[0][0] + + def close(self): + """ + Close db connections. + """ + # TODO should be handled by adbapi instead + # TODO syncdb should be stopped first + + if logger is not None: # logger might be none if called from __del__ + logger.debug("SQLCipher backend: closing") + + # close the actual database + if self._db_handle is not None: + self._db_handle.close() + self._db_handle = None + + # indexes + + def _put_and_update_indexes(self, old_doc, doc): + """ + Update a document and all indexes related to it. + + :param old_doc: The old version of the document. + :type old_doc: u1db.Document + :param doc: The new version of the document. + :type doc: u1db.Document + """ + sqlite_backend.SQLitePartialExpandDatabase._put_and_update_indexes( + self, old_doc, doc) + c = self._db_handle.cursor() + c.execute('UPDATE document SET syncable=? WHERE doc_id=?', + (doc.syncable, doc.doc_id)) + + def _get_doc(self, doc_id, check_for_conflicts=False): + """ + Get just the document content, without fancy handling. + + :param doc_id: The unique document identifier + :type doc_id: str + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise asking for a deleted + document will return None. + :type include_deleted: bool + + :return: a Document object. + :type: u1db.Document + """ + doc = sqlite_backend.SQLitePartialExpandDatabase._get_doc( + self, doc_id, check_for_conflicts) + if doc: + c = self._db_handle.cursor() + c.execute('SELECT syncable FROM document WHERE doc_id=?', + (doc.doc_id,)) + result = c.fetchone() + doc.syncable = bool(result[0]) + return doc + + def __del__(self): + """ + Free resources when deleting or garbage collecting the database. + + This is only here to minimze problems if someone ever forgets to call + the close() method after using the database; you should not rely on + garbage collecting to free up the database resources. + """ + self.close() # TODO ---- rescue the fix for the windows case from here... - #@classmethod - # XXX Use SQLCIpherOptions instead - #def _open_database(cls, sqlcipher_file, password, document_factory=None, - #crypto=None, raw_key=False, cipher='aes-256-cbc', - #kdf_iter=4000, cipher_page_size=1024, - #defer_encryption=False, sync_db_key=None): - #""" - #Open a SQLCipher database. + # @classmethod + # def _open_database(cls, sqlcipher_file, password, document_factory=None, + # crypto=None, raw_key=False, cipher='aes-256-cbc', + # kdf_iter=4000, cipher_page_size=1024, + # defer_encryption=False, sync_db_key=None): + # """ + # Open a SQLCipher database. # - #:return: The database object. - #:rtype: SQLCipherDatabase - #""" - #cls.defer_encryption = defer_encryption - #if not os.path.isfile(sqlcipher_file): - #raise u1db_errors.DatabaseDoesNotExist() + # :return: The database object. + # :rtype: SQLCipherDatabase + # """ + # cls.defer_encryption = defer_encryption + # if not os.path.isfile(sqlcipher_file): + # raise u1db_errors.DatabaseDoesNotExist() # - #tries = 2 + # tries = 2 # Note: There seems to be a bug in sqlite 3.5.9 (with python2.6) # where without re-opening the database on Windows, it # doesn't see the transaction that was just committed - #while True: -# - #with cls.k_lock: - #db_handle = dbapi2.connect( - #sqlcipher_file, - #check_same_thread=SQLITE_CHECK_SAME_THREAD) + # while True: + # with cls.k_lock: + # db_handle = dbapi2.connect( + # sqlcipher_file, + # check_same_thread=SQLITE_CHECK_SAME_THREAD) # - #try: + # try: # set cryptographic params # # XXX pass only a CryptoOptions object around @@ -374,49 +475,108 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): #crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter, #cipher_page_size=cipher_page_size, sync_db_key=sync_db_key) - #@classmethod - #def open_database(cls, sqlcipher_file, password, create, - #document_factory=None, crypto=None, raw_key=False, - #cipher='aes-256-cbc', kdf_iter=4000, - #cipher_page_size=1024, defer_encryption=False, - #sync_db_key=None): - # XXX pass only a CryptoOptions object around - #""" - #Open a SQLCipher database. -# - #*** IMPORTANT *** -# - #Don't forget to close the database after use by calling the close() - #method otherwise some resources might not be freed and you may - #experience several kinds of leakages. -# - #*** IMPORTANT *** -# - #:return: The database object. - #:rtype: SQLCipherDatabase - #""" - #cls.defer_encryption = defer_encryption - #args = sqlcipher_file, password - #kwargs = { - #'crypto': crypto, - #'raw_key': raw_key, - #'cipher': cipher, - #'kdf_iter': kdf_iter, - #'cipher_page_size': cipher_page_size, - #'defer_encryption': defer_encryption, - #'sync_db_key': sync_db_key, - #'document_factory': document_factory, - #} - #try: - #return cls._open_database(*args, **kwargs) - #except u1db_errors.DatabaseDoesNotExist: - #if not create: - #raise -# - # XXX here we were missing sync_db_key, intentional? - #return SQLCipherDatabase(*args, **kwargs) - # BEGIN SYNC FOO ---------------------------------------------------------- +class SQLCipherU1DBSync(object): + + _sync_watcher = None + _sync_enc_pool = None + + """ + The name of the local symmetrically encrypted documents to + sync database file. + """ + LOCAL_SYMMETRIC_SYNC_FILE_NAME = 'sync.u1db' + + """ + A dictionary that hold locks which avoid multiple sync attempts from the + same database replica. + """ + # XXX We do not need the lock here now. Remove. + encrypting_lock = threading.Lock() + + """ + Period or recurrence of the periodic encrypting task, in seconds. + """ + # XXX use LoopingCall. + # Just use fucking deferreds, do not waste time looping. + ENCRYPT_TASK_PERIOD = 1 + + """ + A dictionary that hold locks which avoid multiple sync attempts from the + same database replica. + """ + syncing_lock = defaultdict(threading.Lock) + + def _init_sync(self, opts, soledad_crypto, defer_encryption=False): + + self._crypto = soledad_crypto + + # TODO ----- have to decide what to do with syncer + self._sync_db_key = opts.sync_db_key + self._sync_db = None + self._sync_db_write_lock = None + self._sync_enc_pool = None + self.sync_queue = None + + if self.defer_encryption: + # initialize sync db + self._init_sync_db() + # initialize syncing queue encryption pool + self._sync_enc_pool = crypto.SyncEncrypterPool( + self._crypto, self._sync_db, self._sync_db_write_lock) + self._sync_watcher = TimerTask(self._encrypt_syncing_docs, + self.ENCRYPT_TASK_PERIOD) + self._sync_watcher.start() + + # TODO move to class attribute? + # we store syncers in a dictionary indexed by the target URL. We also + # store a hash of the auth info in case auth info expires and we need + # to rebuild the syncer for that target. The final self._syncers + # format is the following:: + # + # self._syncers = {'': ('', syncer), ...} + self._syncers = {} + self._sync_db_write_lock = threading.Lock() + self.sync_queue = multiprocessing.Queue() + + def _init_sync_db(self, opts): + """ + Initialize the Symmetrically-Encrypted document to be synced database, + and the queue to communicate with subprocess workers. + + :param opts: + :type opts: SQLCipherOptions + """ + soledad_assert(opts.sync_db_key is not None) + sync_db_path = None + if opts.path != ":memory:": + sync_db_path = "%s-sync" % opts.path + else: + sync_db_path = ":memory:" + + # XXX use initialize_sqlcipher_db here too + # TODO pass on_init queries to initialize_sqlcipher_db + self._sync_db = MPSafeSQLiteDB(sync_db_path) + pragmas.set_crypto_pragmas(self._sync_db, opts) + + # create sync tables + self._create_sync_db_tables() + + def _create_sync_db_tables(self): + """ + Create tables for the local sync documents db if needed. + """ + # TODO use adbapi --------------------------------- + encr = crypto.SyncEncrypterPool + decr = crypto.SyncDecrypterPool + sql_encr = ("CREATE TABLE IF NOT EXISTS %s (%s)" % ( + encr.TABLE_NAME, encr.FIELD_NAMES)) + sql_decr = ("CREATE TABLE IF NOT EXISTS %s (%s)" % ( + decr.TABLE_NAME, decr.FIELD_NAMES)) + + with self._sync_db_write_lock: + self._sync_db.execute(sql_encr) + self._sync_db.execute(sql_decr) def sync(self, url, creds=None, autocreate=True, defer_decryption=True): """ @@ -428,14 +588,15 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): :param url: The url of the target replica to sync with. :type url: str - :param creds: optional dictionary giving credentials. + :param creds: + optional dictionary giving credentials. to authorize the operation with the server. :type creds: dict :param autocreate: Ask the target to create the db if non-existent. :type autocreate: bool - :param defer_decryption: Whether to defer the decryption process using - the intermediate database. If False, - decryption will be done inline. + :param defer_decryption: + Whether to defer the decryption process using the intermediate + database. If False, decryption will be done inline. :type defer_decryption: bool :return: The local generation before the synchronisation was performed. @@ -482,13 +643,13 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): Because of that, this method blocks until the syncing lock can be acquired. """ - with SQLCipherDatabase.syncing_lock[self._get_replica_uid()]: + with self.syncing_lock[self._get_replica_uid()]: syncer = self._get_syncer(url, creds=creds) yield syncer @property def syncing(self): - lock = SQLCipherDatabase.syncing_lock[self._get_replica_uid()] + lock = self.syncing_lock[self._get_replica_uid()] acquired_lock = lock.acquire(False) if acquired_lock is False: return True @@ -530,46 +691,6 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): syncer.num_inserted = 0 return syncer - # END SYNC FOO ---------------------------------------------------------- - - def _init_sync_db(self): - """ - Initialize the Symmetrically-Encrypted document to be synced database, - and the queue to communicate with subprocess workers. - """ - if self._sync_db is None: - soledad_assert(self._sync_db_key is not None) - sync_db_path = None - if self._sqlcipher_file != ":memory:": - sync_db_path = "%s-sync" % self._sqlcipher_file - else: - sync_db_path = ":memory:" - self._sync_db = MPSafeSQLiteDB(sync_db_path) - # protect the sync db with a password - if self._sync_db_key is not None: - # XXX pass only a CryptoOptions object around - pragmas.set_crypto_pragmas( - self._sync_db, self._sync_db_key, False, - 'aes-256-cbc', 4000, 1024) - self._sync_db_write_lock = threading.Lock() - self._create_sync_db_tables() - self.sync_queue = multiprocessing.Queue() - - def _create_sync_db_tables(self): - """ - Create tables for the local sync documents db if needed. - """ - encr = SyncEncrypterPool - decr = SyncDecrypterPool - sql_encr = ("CREATE TABLE IF NOT EXISTS %s (%s)" % ( - encr.TABLE_NAME, encr.FIELD_NAMES)) - sql_decr = ("CREATE TABLE IF NOT EXISTS %s (%s)" % ( - decr.TABLE_NAME, decr.FIELD_NAMES)) - - with self._sync_db_write_lock: - self._sync_db.execute(sql_encr) - self._sync_db.execute(sql_decr) - # # Symmetric encryption of syncing docs # @@ -599,182 +720,14 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): finally: lock.release() - # - # Document operations - # - - def put_doc(self, doc): - """ - Overwrite the put_doc method, to enqueue the modified document for - encryption before sync. - - :param doc: The document to be put. - :type doc: u1db.Document - - :return: The new document revision. - :rtype: str - """ - doc_rev = sqlite_backend.SQLitePartialExpandDatabase.put_doc(self, doc) - if self.defer_encryption: - self.sync_queue.put_nowait(doc) - return doc_rev - - # indexes - - def _put_and_update_indexes(self, old_doc, doc): - """ - Update a document and all indexes related to it. - - :param old_doc: The old version of the document. - :type old_doc: u1db.Document - :param doc: The new version of the document. - :type doc: u1db.Document - """ - with self.update_indexes_lock: - sqlite_backend.SQLitePartialExpandDatabase._put_and_update_indexes( - self, old_doc, doc) - c = self._db_handle.cursor() - c.execute('UPDATE document SET syncable=? ' - 'WHERE doc_id=?', - (doc.syncable, doc.doc_id)) - - def _get_doc(self, doc_id, check_for_conflicts=False): - """ - Get just the document content, without fancy handling. - - :param doc_id: The unique document identifier - :type doc_id: str - :param include_deleted: If set to True, deleted documents will be - returned with empty content. Otherwise asking for a deleted - document will return None. - :type include_deleted: bool - - :return: a Document object. - :type: u1db.Document - """ - doc = sqlite_backend.SQLitePartialExpandDatabase._get_doc( - self, doc_id, check_for_conflicts) - if doc: - c = self._db_handle.cursor() - c.execute('SELECT syncable FROM document WHERE doc_id=?', - (doc.doc_id,)) - result = c.fetchone() - doc.syncable = bool(result[0]) - return doc - - # - # SQLCipher API methods - # - - # XXX Use SQLCIpherOptions instead - @classmethod - def assert_db_is_encrypted(cls, sqlcipher_file, key, raw_key, cipher, - kdf_iter, cipher_page_size): - """ - Assert that C{sqlcipher_file} contains an encrypted database. - - When opening an existing database, PRAGMA key will not immediately - throw an error if the key provided is incorrect. To test that the - database can be successfully opened with the provided key, it is - necessary to perform some operation on the database (i.e. read from - it) and confirm it is success. - - The easiest way to do this is select off the sqlite_master table, - which will attempt to read the first page of the database and will - parse the schema. - - :param sqlcipher_file: The path for the SQLCipher file. - :type sqlcipher_file: str - :param key: The key that protects the SQLCipher db. - :type key: str - :param raw_key: Whether C{key} is a raw 64-char hex string or a - passphrase that should be hashed to obtain the encyrption key. - :type raw_key: bool - :param cipher: The cipher and mode to use. - :type cipher: str - :param kdf_iter: The number of iterations to use. - :type kdf_iter: int - :param cipher_page_size: The page size. - :type cipher_page_size: int - """ - try: - # try to open an encrypted database with the regular u1db - # backend should raise a DatabaseError exception. - sqlite_backend.SQLitePartialExpandDatabase(sqlcipher_file) - raise DatabaseIsNotEncrypted() - except sqlcipher_dbapi2.DatabaseError: - # assert that we can access it using SQLCipher with the given - # key - with cls.k_lock: - db_handle = sqlcipher_dbapi2.connect( - sqlcipher_file, - isolation_level=SQLITE_ISOLATION_LEVEL, - check_same_thread=SQLITE_CHECK_SAME_THREAD) - pragmas.set_crypto_pragmas( - db_handle, key, raw_key, cipher, - kdf_iter, cipher_page_size) - db_handle.cursor().execute( - 'SELECT count(*) FROM sqlite_master') - - # Extra query methods: extensions to the base sqlite implmentation. - - def get_count_from_index(self, index_name, *key_values): - """ - Returns the count for a given combination of index_name - and key values. - - Extension method made from similar methods in u1db version 13.09 - - :param index_name: The index to query - :type index_name: str - :param key_values: values to match. eg, if you have - an index with 3 fields then you would have: - get_from_index(index_name, val1, val2, val3) - :type key_values: tuple - :return: count. - :rtype: int - """ - c = self._db_handle.cursor() - definition = self._get_index_definition(index_name) - - if len(key_values) != len(definition): - raise u1db_errors.InvalidValueForIndex() - tables = ["document_fields d%d" % i for i in range(len(definition))] - novalue_where = ["d.doc_id = d%d.doc_id" - " AND d%d.field_name = ?" - % (i, i) for i in range(len(definition))] - exact_where = [novalue_where[i] - + (" AND d%d.value = ?" % (i,)) - for i in range(len(definition))] - args = [] - where = [] - for idx, (field, value) in enumerate(zip(definition, key_values)): - args.append(field) - where.append(exact_where[idx]) - args.append(value) - - tables = ["document_fields d%d" % i for i in range(len(definition))] - statement = ( - "SELECT COUNT(*) FROM document d, %s WHERE %s " % ( - ', '.join(tables), - ' AND '.join(where), - )) - try: - c.execute(statement, tuple(args)) - except sqlcipher_dbapi2.OperationalError, e: - raise sqlcipher_dbapi2.OperationalError( - str(e) + '\nstatement: %s\nargs: %s\n' % (statement, args)) - res = c.fetchall() - return res[0][0] + @property + def replica_uid(self): + return self._get_replica_uid() def close(self): """ - Close db_handle and close syncer. + Close the syncer and syncdb orderly """ - # TODO separate db from syncers -------------- - - if logger is not None: # logger might be none if called from __del__ - logger.debug("Sqlcipher backend: closing") # stop the sync watcher for deferred encryption if self._sync_watcher is not None: self._sync_watcher.stop() @@ -789,12 +742,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): if self._sync_enc_pool is not None: self._sync_enc_pool.close() self._sync_enc_pool = None - # close the actual database - if self._db_handle is not None: - self._db_handle.close() - self._db_handle = None - # --------------------------------------- # close the sync database if self._sync_db is not None: self._sync_db.close() @@ -805,20 +753,6 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): del self.sync_queue self.sync_queue = None - def __del__(self): - """ - Free resources when deleting or garbage collecting the database. - - This is only here to minimze problems if someone ever forgets to call - the close() method after using the database; you should not rely on - garbage collecting to free up the database resources. - """ - self.close() - - @property - def replica_uid(self): - return self._get_replica_uid() - # # Exceptions # @@ -831,4 +765,13 @@ class DatabaseIsNotEncrypted(Exception): pass +def soledad_doc_factory(doc_id=None, rev=None, json='{}', has_conflicts=False, + syncable=True): + """ + Return a default Soledad Document. + Used in the initialization for SQLCipherDatabase + """ + return SoledadDocument(doc_id=doc_id, rev=rev, json=json, + has_conflicts=has_conflicts, syncable=syncable) + sqlite_backend.SQLiteDatabase.register_implementation(SQLCipherDatabase) -- cgit v1.2.3 From 8f4daa13744c049dcc96eb2cb780df1e9ba08738 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 2 Oct 2014 06:17:57 -0500 Subject: Separate soledad interfaces * Separate local storage, syncers and shared_db * Comment out unused need_sync method * Use twisted LoopingCall * Create a threadpool for syncs * Return deferred from sync method * Do not pass crypto to SQLCipherDatabase * Pass replica_uid to u1db_syncer * Rename / reorganize some initialization methods --- client/src/leap/soledad/client/adbapi.py | 28 +- client/src/leap/soledad/client/api.py | 613 ++++++--------------- client/src/leap/soledad/client/examples/use_api.py | 2 +- client/src/leap/soledad/client/interfaces.py | 361 ++++++++++++ client/src/leap/soledad/client/pragmas.py | 13 +- client/src/leap/soledad/client/secrets.py | 15 +- client/src/leap/soledad/client/sqlcipher.py | 326 ++++++----- client/src/leap/soledad/client/sync.py | 7 +- client/src/leap/soledad/client/target.py | 45 +- 9 files changed, 755 insertions(+), 655 deletions(-) create mode 100644 client/src/leap/soledad/client/interfaces.py (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/adbapi.py b/client/src/leap/soledad/client/adbapi.py index 3b15509b..60d9e195 100644 --- a/client/src/leap/soledad/client/adbapi.py +++ b/client/src/leap/soledad/client/adbapi.py @@ -30,7 +30,7 @@ from u1db.backends import sqlite_backend from twisted.enterprise import adbapi from twisted.python import log -from leap.soledad.client.sqlcipher import set_init_pragmas +from leap.soledad.client import sqlcipher as soledad_sqlcipher DEBUG_SQL = os.environ.get("LEAP_DEBUG_SQL") @@ -40,18 +40,15 @@ if DEBUG_SQL: def getConnectionPool(opts, openfun=None, driver="pysqlcipher"): if openfun is None and driver == "pysqlcipher": - openfun = partial(set_init_pragmas, opts=opts) + openfun = partial(soledad_sqlcipher.set_init_pragmas, opts=opts) return U1DBConnectionPool( "%s.dbapi2" % driver, database=opts.path, check_same_thread=False, cp_openfun=openfun) -# XXX work in progress -------------------------------------------- - - -class U1DBSqliteWrapper(sqlite_backend.SQLitePartialExpandDatabase): +class U1DBSQLiteBackend(sqlite_backend.SQLitePartialExpandDatabase): """ - A very simple wrapper around sqlcipher backend. + A very simple wrapper for u1db around sqlcipher backend. Instead of initializing the database on the fly, it just uses an existing connection that is passed to it in the initializer. @@ -64,9 +61,24 @@ class U1DBSqliteWrapper(sqlite_backend.SQLitePartialExpandDatabase): self._factory = u1db.Document +class SoledadSQLCipherWrapper(soledad_sqlcipher.SQLCipherDatabase): + """ + A wrapper for u1db that uses the Soledad-extended sqlcipher backend. + + Instead of initializing the database on the fly, it just uses an existing + connection that is passed to it in the initializer. + """ + def __init__(self, conn): + self._db_handle = conn + self._real_replica_uid = None + self._ensure_schema() + self.set_document_factory(soledad_sqlcipher.soledad_doc_factory) + self._prime_replica_uid() + + class U1DBConnection(adbapi.Connection): - u1db_wrapper = U1DBSqliteWrapper + u1db_wrapper = SoledadSQLCipherWrapper def __init__(self, pool, init_u1db=False): self.init_u1db = init_u1db diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 703b9516..493f6c1d 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -41,6 +41,9 @@ except ImportError: from u1db.remote import http_client from u1db.remote.ssl_match_hostname import match_hostname +from zope.interface import implements + +from twisted.python import log from leap.common.config import get_path_prefix from leap.soledad.common import SHARED_DB_NAME @@ -49,11 +52,11 @@ from leap.soledad.common import soledad_assert_type from leap.soledad.client import adbapi from leap.soledad.client import events as soledad_events +from leap.soledad.client import interfaces as soledad_interfaces from leap.soledad.client.crypto import SoledadCrypto from leap.soledad.client.secrets import SoledadSecrets from leap.soledad.client.shared_db import SoledadSharedDatabase -from leap.soledad.client.target import SoledadSyncTarget -from leap.soledad.client.sqlcipher import SQLCipherOptions +from leap.soledad.client.sqlcipher import SQLCipherOptions, SQLCipherU1DBSync logger = logging.getLogger(name=__name__) @@ -61,17 +64,13 @@ logger = logging.getLogger(name=__name__) # Constants # -SOLEDAD_CERT = None """ Path to the certificate file used to certify the SSL connection between Soledad client and server. """ +SOLEDAD_CERT = None -# -# Soledad: local encrypted storage and remote encrypted sync. -# - class Soledad(object): """ Soledad provides encrypted data storage and sync. @@ -104,65 +103,57 @@ class Soledad(object): SOLEDAD_DONE_DATA_SYNC: emitted inside C{sync()} method when it has finished synchronizing with remote replica. """ + implements(soledad_interfaces.ILocalStorage, + soledad_interfaces.ISyncableStorage, + soledad_interfaces.ISharedSecretsStorage) - LOCAL_DATABASE_FILE_NAME = 'soledad.u1db' - """ - The name of the local SQLCipher U1DB database file. - """ - - STORAGE_SECRETS_FILE_NAME = "soledad.json" - """ - The name of the file where the storage secrets will be stored. - """ - - DEFAULT_PREFIX = os.path.join(get_path_prefix(), 'leap', 'soledad') - """ - Prefix for default values for path. - """ + local_db_file_name = 'soledad.u1db' + secrets_file_name = "soledad.json" + default_prefix = os.path.join(get_path_prefix(), 'leap', 'soledad') def __init__(self, uuid, passphrase, secrets_path, local_db_path, server_url, cert_file, - auth_token=None, secret_id=None, defer_encryption=False): + auth_token=None, defer_encryption=False): """ Initialize configuration, cryptographic keys and dbs. :param uuid: User's uuid. :type uuid: str - :param passphrase: The passphrase for locking and unlocking encryption - secrets for local and remote storage. + :param passphrase: + The passphrase for locking and unlocking encryption secrets for + local and remote storage. :type passphrase: unicode - :param secrets_path: Path for storing encrypted key used for - symmetric encryption. + :param secrets_path: + Path for storing encrypted key used for symmetric encryption. :type secrets_path: str :param local_db_path: Path for local encrypted storage db. :type local_db_path: str - :param server_url: URL for Soledad server. This is used either to sync - with the user's remote db and to interact with the - shared recovery database. + :param server_url: + URL for Soledad server. This is used either to sync with the user's + remote db and to interact with the shared recovery database. :type server_url: str - :param cert_file: Path to the certificate of the ca used - to validate the SSL certificate used by the remote - soledad server. + :param cert_file: + Path to the certificate of the ca used to validate the SSL + certificate used by the remote soledad server. :type cert_file: str - :param auth_token: Authorization token for accessing remote databases. + :param auth_token: + Authorization token for accessing remote databases. :type auth_token: str - :param secret_id: The id of the storage secret to be used. - :type secret_id: str - - :param defer_encryption: Whether to defer encryption/decryption of - documents, or do it inline while syncing. + :param defer_encryption: + Whether to defer encryption/decryption of documents, or do it + inline while syncing. :type defer_encryption: bool - :raise BootstrapSequenceError: Raised when the secret generation and - storage on server sequence has failed - for some reason. + :raise BootstrapSequenceError: + Raised when the secret generation and storage on server sequence + has failed for some reason. """ # store config params self._uuid = uuid @@ -170,30 +161,34 @@ class Soledad(object): self._secrets_path = secrets_path self._local_db_path = local_db_path self._server_url = server_url + self._defer_encryption = defer_encryption + + self.shared_db = None + # configure SSL certificate global SOLEDAD_CERT SOLEDAD_CERT = cert_file - self._set_token(auth_token) - self._defer_encryption = defer_encryption - - self._init_config() - self._init_dirs() # init crypto variables - self._shared_db_instance = None + self._set_token(auth_token) self._crypto = SoledadCrypto(self) - self._secrets = SoledadSecrets( - self._uuid, - self._passphrase, - self._secrets_path, - self._shared_db, - self._crypto, - secret_id=secret_id) - # initiate bootstrap sequence - self._bootstrap() # might raise BootstrapSequenceError() + self._init_config_with_defaults() + self._init_working_dirs() + + # Initialize shared recovery database + self.init_shared_db(server_url, uuid, self._creds) - def _init_config(self): + # The following can raise BootstrapSequenceError, that will be + # propagated upwards. + self._init_secrets() + self._init_u1db_sqlcipher_backend() + self._init_u1db_syncer() + + # + # initialization/destruction methods + # + def _init_config_with_defaults(self): """ Initialize configuration using default values for missing params. """ @@ -202,55 +197,37 @@ class Soledad(object): # initialize secrets_path initialize(self._secrets_path, os.path.join( - self.DEFAULT_PREFIX, self.STORAGE_SECRETS_FILE_NAME)) - + self.default_prefix, self.secrets_file_name)) # initialize local_db_path initialize(self._local_db_path, os.path.join( - self.DEFAULT_PREFIX, self.LOCAL_DATABASE_FILE_NAME)) - + self.default_prefix, self.local_db_file_name)) # initialize server_url soledad_assert(self._server_url is not None, 'Missing URL for Soledad server.') - # - # initialization/destruction methods - # - - def _bootstrap(self): - """ - Bootstrap local Soledad instance. - - :raise BootstrapSequenceError: - Raised when the secret generation and storage on server sequence - has failed for some reason. - """ - self._secrets.bootstrap() - self._init_db() - # XXX initialize syncers? - - def _init_dirs(self): + def _init_working_dirs(self): """ Create work directories. :raise OSError: in case file exists and is not a dir. """ - paths = map( - lambda x: os.path.dirname(x), - [self._local_db_path, self._secrets_path]) + paths = map(lambda x: os.path.dirname(x), [ + self._local_db_path, self._secrets_path]) for path in paths: - try: - if not os.path.isdir(path): - logger.info('Creating directory: %s.' % path) - os.makedirs(path) - except OSError as exc: - if exc.errno == errno.EEXIST and os.path.isdir(path): - pass - else: - raise - - def _init_db(self): + create_path_if_not_exists(path) + + def _init_secrets(self): + self._secrets = SoledadSecrets( + self.uuid, self.passphrase, self.secrets_path, + self._shared_db, self._crypto) + self._secrets.bootstrap() + + def _init_u1db_sqlcipher_backend(self): """ - Initialize the U1DB SQLCipher database for local storage. + Initialize the U1DB SQLCipher database for local storage, by + instantiating a modified twisted adbapi that will maintain a threadpool + with a u1db-sqclipher connection for each thread, and will return + deferreds for each u1db query. Currently, Soledad uses the default SQLCipher cipher, i.e. 'aes-256-cbc'. We use scrypt to derive a 256-bit encryption key, @@ -268,8 +245,17 @@ class Soledad(object): defer_encryption=self._defer_encryption, sync_db_key=sync_db_key, ) + self._soledad_opts = opts self._dbpool = adbapi.getConnectionPool(opts) + def _init_u1db_syncer(self): + self._dbsyncer = SQLCipherU1DBSync( + self._soledad_opts, self._crypto, self._defer_encryption) + + # + # Closing methods + # + def close(self): """ Close underlying U1DB database. @@ -279,401 +265,164 @@ class Soledad(object): # TODO close syncers >>>>>> - #if hasattr(self, '_db') and isinstance( - #self._db, - #SQLCipherDatabase): - #self._db.close() -# - # XXX stop syncers - # self._db.stop_sync() - - @property - def _shared_db(self): - """ - Return an instance of the shared recovery database object. - - :return: The shared database. - :rtype: SoledadSharedDatabase - """ - if self._shared_db_instance is None: - self._shared_db_instance = SoledadSharedDatabase.open_database( - urlparse.urljoin(self.server_url, SHARED_DB_NAME), - self._uuid, - False, # db should exist at this point. - creds=self._creds) - return self._shared_db_instance - # - # Document storage, retrieval and sync. + # ILocalStorage # def put_doc(self, doc): - # TODO what happens with this warning during the deferred life cycle? - # Isn't it better to defend ourselves from the mutability, to avoid - # nasty surprises? """ - Update a document in the local encrypted database. - ============================== WARNING ============================== This method converts the document's contents to unicode in-place. This means that after calling `put_doc(doc)`, the contents of the document, i.e. `doc.content`, might be different from before the call. ============================== WARNING ============================== - - :param doc: the document to update - :type doc: SoledadDocument - - :return: - a deferred that will fire with the new revision identifier for - the document - :rtype: Deferred """ + # TODO what happens with this warning during the deferred life cycle? + # Isn't it better to defend ourselves from the mutability, to avoid + # nasty surprises? doc.content = self._convert_to_unicode(doc.content) return self._dbpool.put_doc(doc) def delete_doc(self, doc): - """ - Delete a document from the local encrypted database. - - :param doc: the document to delete - :type doc: SoledadDocument - - :return: - a deferred that will fire with ... - :rtype: Deferred - """ # XXX what does this do when fired??? return self._dbpool.delete_doc(doc) def get_doc(self, doc_id, include_deleted=False): - """ - Retrieve a document from the local encrypted database. - - :param doc_id: the unique document identifier - :type doc_id: str - :param include_deleted: - if True, deleted documents will be returned with empty content; - otherwise asking for a deleted document will return None - :type include_deleted: bool - - :return: - A deferred that will fire with the document object, containing a - SoledadDocument, or None if it could not be found - :rtype: Deferred - """ return self._dbpool.get_doc(doc_id, include_deleted=include_deleted) def get_docs(self, doc_ids, check_for_conflicts=True, include_deleted=False): - """ - Get the content for many documents. - - :param doc_ids: a list of document identifiers - :type doc_ids: list - :param check_for_conflicts: if set False, then the conflict check will - be skipped, and 'None' will be returned instead of True/False - :type check_for_conflicts: bool - - :return: - A deferred that will fire with an iterable giving the Document - object for each document id in matching doc_ids order. - :rtype: Deferred - """ - return self._dbpool.get_docs( - doc_ids, check_for_conflicts=check_for_conflicts, - include_deleted=include_deleted) + return self._dbpool.get_docs(doc_ids, + check_for_conflicts=check_for_conflicts, + include_deleted=include_deleted) def get_all_docs(self, include_deleted=False): - """ - Get the JSON content for all documents in the database. - - :param include_deleted: If set to True, deleted documents will be - returned with empty content. Otherwise deleted - documents will not be included in the results. - :return: - A deferred that will fire with (generation, [Document]): that is, - the current generation of the database, followed by a list of all - the documents in the database. - :rtype: Deferred - """ return self._dbpool.get_all_docs(include_deleted) def create_doc(self, content, doc_id=None): - """ - Create a new document in the local encrypted database. - - :param content: the contents of the new document - :type content: dict - :param doc_id: an optional identifier specifying the document id - :type doc_id: str - - :return: - A deferred tht will fire with the new document (SoledadDocument - instance). - :rtype: Deferred - """ return self._dbpool.create_doc( _convert_to_unicode(content), doc_id=doc_id) def create_doc_from_json(self, json, doc_id=None): - """ - Create a new document. - - You can optionally specify the document identifier, but the document - must not already exist. See 'put_doc' if you want to override an - existing document. - If the database specifies a maximum document size and the document - exceeds it, create will fail and raise a DocumentTooBig exception. - - :param json: The JSON document string - :type json: str - :param doc_id: An optional identifier specifying the document id. - :type doc_id: - :return: - A deferred that will fire with the new document (A SoledadDocument - instance) - :rtype: Deferred - """ return self._dbpool.create_doc_from_json(json, doc_id=doc_id) def create_index(self, index_name, *index_expressions): - """ - Create an named index, which can then be queried for future lookups. - Creating an index which already exists is not an error, and is cheap. - Creating an index which does not match the index_expressions of the - existing index is an error. - Creating an index will block until the expressions have been evaluated - and the index generated. - - :param index_name: A unique name which can be used as a key prefix - :type index_name: str - :param index_expressions: - index expressions defining the index information. - :type index_expressions: dict - - Examples: - - "fieldname", or "fieldname.subfieldname" to index alphabetically - sorted on the contents of a field. - - "number(fieldname, width)", "lower(fieldname)" - """ return self._dbpool.create_index(index_name, *index_expressions) def delete_index(self, index_name): - """ - Remove a named index. - - :param index_name: The name of the index we are removing - :type index_name: str - """ return self._dbpool.delete_index(index_name) def list_indexes(self): - """ - List the definitions of all known indexes. - - :return: A list of [('index-name', ['field', 'field2'])] definitions. - :rtype: list - """ return self._dbpool.list_indexes() def get_from_index(self, index_name, *key_values): - """ - Return documents that match the keys supplied. - - You must supply exactly the same number of values as have been defined - in the index. It is possible to do a prefix match by using '*' to - indicate a wildcard match. You can only supply '*' to trailing entries, - (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) - It is also possible to append a '*' to the last supplied value (eg - 'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') - - :param index_name: The index to query - :type index_name: str - :param key_values: values to match. eg, if you have - an index with 3 fields then you would have: - get_from_index(index_name, val1, val2, val3) - :type key_values: tuple - :return: List of [Document] - :rtype: list - """ return self._dbpool.get_from_index(index_name, *key_values) def get_count_from_index(self, index_name, *key_values): - """ - Return the count of the documents that match the keys and - values supplied. - - :param index_name: The index to query - :type index_name: str - :param key_values: values to match. eg, if you have - an index with 3 fields then you would have: - get_from_index(index_name, val1, val2, val3) - :type key_values: tuple - :return: count. - :rtype: int - """ return self._dbpool.get_count_from_index(index_name, *key_values) def get_range_from_index(self, index_name, start_value, end_value): - """ - Return documents that fall within the specified range. - - Both ends of the range are inclusive. For both start_value and - end_value, one must supply exactly the same number of values as have - been defined in the index, or pass None. In case of a single column - index, a string is accepted as an alternative for a tuple with a single - value. It is possible to do a prefix match by using '*' to indicate - a wildcard match. You can only supply '*' to trailing entries, (eg - 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also - possible to append a '*' to the last supplied value (eg 'val*', '*', - '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') - - :param index_name: The index to query - :type index_name: str - :param start_values: tuples of values that define the lower bound of - the range. eg, if you have an index with 3 fields then you would - have: (val1, val2, val3) - :type start_values: tuple - :param end_values: tuples of values that define the upper bound of the - range. eg, if you have an index with 3 fields then you would have: - (val1, val2, val3) - :type end_values: tuple - :return: A deferred that will fire with a list of [Document] - :rtype: Deferred - """ return self._dbpool.get_range_from_index( index_name, start_value, end_value) def get_index_keys(self, index_name): - """ - Return all keys under which documents are indexed in this index. - - :param index_name: The index to query - :type index_name: str - :return: - A deferred that will fire with a list of tuples of indexed keys. - :rtype: Deferred - """ return self._dbpool.get_index_keys(index_name) def get_doc_conflicts(self, doc_id): - """ - Get the list of conflicts for the given document. - - :param doc_id: the document id - :type doc_id: str - - :return: - A deferred that will fire with a list of the document entries that - are conflicted. - :rtype: Deferred - """ return self._dbpool.get_doc_conflicts(doc_id) def resolve_doc(self, doc, conflicted_doc_revs): - """ - Mark a document as no longer conflicted. - - :param doc: a document with the new content to be inserted. - :type doc: SoledadDocument - :param conflicted_doc_revs: - A deferred that will fire with a list of revisions that the new - content supersedes. - :type conflicted_doc_revs: list - """ return self._dbpool.resolve_doc(doc, conflicted_doc_revs) + def _get_local_db_path(self): + return self._local_db_path + + # XXX Do we really need all this private / property dance? + + local_db_path = property( + _get_local_db_path, + doc='The path for the local database replica.') + + def _get_uuid(self): + return self._uuid + + uuid = property(_get_uuid, doc='The user uuid.') + # - # Sync API + # ISyncableStorage # - # TODO have interfaces, and let it implement it. - def sync(self, defer_decryption=True): - """ - Synchronize the local encrypted replica with a remote replica. - This method blocks until a syncing lock is acquired, so there are no - attempts of concurrent syncs from the same client replica. + # ----------------------------------------------------------------- + # TODO this needs work. + # Should review/write tests to check that this: - :param url: the url of the target replica to sync with - :type url: str + # (1) Defer to the syncer pool -- DONE (on dbsyncer) + # (2) Return the deferred + # (3) Add the callback for signaling the event (executed on reactor + # thread) + # (4) Check that the deferred is called with the local gen. - :param defer_decryption: - Whether to defer the decryption process using the intermediate - database. If False, decryption will be done inline. - :type defer_decryption: bool + # TODO document that this returns a deferred + # ----------------------------------------------------------------- - :return: - A deferred that will fire with the local generation before the - synchronisation was performed. - :rtype: str - """ - # TODO this needs work. - # Should: - # (1) Defer to the syncer pool - # (2) Return a deferred (the deferToThreadpool can be good) - # (3) Add the callback for signaling the event - # (4) Let the local gen be returned from the thread + def on_sync_done(local_gen): + soledad_events.signal( + soledad_events.SOLEDAD_DONE_DATA_SYNC, self.uuid) + return local_gen + + sync_url = urlparse.urljoin(self.server_url, 'user-%s' % self.uuid) try: - local_gen = self._dbsyncer.sync( - urlparse.urljoin(self.server_url, 'user-%s' % self._uuid), + d = self._dbsyncer.sync( + sync_url, creds=self._creds, autocreate=False, defer_decryption=defer_decryption) - soledad_events.signal( - soledad_events.SOLEDAD_DONE_DATA_SYNC, self._uuid) - return local_gen + + d.addCallbacks(on_sync_done, lambda err: log.err(err)) + return d + + # TODO catch the exception by adding an Errback except Exception as e: logger.error("Soledad exception when syncing: %s" % str(e)) def stop_sync(self): - """ - Stop the current syncing process. - """ self._dbsyncer.stop_sync() - def need_sync(self, url): - """ - Return if local db replica differs from remote url's replica. - - :param url: The remote replica to compare with local replica. - :type url: str - - :return: Whether remote replica and local replica differ. - :rtype: bool - """ - # XXX pass the get_replica_uid ------------------------ - # From where? initialize with that? - replica_uid = self._db._get_replica_uid() - target = SoledadSyncTarget( - url, replica_uid, creds=self._creds, crypto=self._crypto) - - generation = self._db._get_generation() + # FIXME ------------------------------------------------------- + # review if we really need this. I think that we can the sync + # fail silently if nothing is to be synced. + #def need_sync(self, url): + # XXX dispatch this method in the dbpool ................. + #replica_uid = self._dbpool.replica_uid + #target = SoledadSyncTarget( + #url, replica_uid, creds=self._creds, crypto=self._crypto) +# + # XXX does it matter if we get this from the general dbpool or the + # syncer pool? + #generation = self._dbpool.get_generation() +# # XXX better unpack it? - info = target.get_sync_info(replica_uid) - + #info = target.get_sync_info(replica_uid) +# # compare source generation with target's last known source generation - if generation != info[4]: - soledad_events.signal( - soledad_events.SOLEDAD_NEW_DATA_TO_SYNC, self._uuid) - return True - return False + #if generation != info[4]: + #soledad_events.signal( + #soledad_events.SOLEDAD_NEW_DATA_TO_SYNC, self.uuid) + #return True + #return False @property def syncing(self): - """ - Property, True if the syncer is syncing. - """ return self._dbsyncer.syncing def _set_token(self, token): """ Set the authentication token for remote database access. - Build the credentials dictionary with the following format: + Internally, this builds the credentials dictionary with the following + format: self._{ 'token': { @@ -686,7 +435,7 @@ class Soledad(object): """ self._creds = { 'token': { - 'uuid': self._uuid, + 'uuid': self.uuid, 'token': token, } } @@ -699,25 +448,24 @@ class Soledad(object): token = property(_get_token, _set_token, doc='The authentication Token.') - # - # Setters/getters - # - - def _get_uuid(self): - return self._uuid - - uuid = property(_get_uuid, doc='The user uuid.') + def _get_server_url(self): + return self._server_url - def get_secret_id(self): - return self._secrets.secret_id + server_url = property( + _get_server_url, + doc='The URL of the Soledad server.') - def set_secret_id(self, secret_id): - self._secrets.set_secret_id(secret_id) + # + # ISharedSecretsStorage + # - secret_id = property( - get_secret_id, - set_secret_id, - doc='The active secret id.') + def init_shared_db(self, server_url, uuid, creds): + shared_db_url = urlparse.urljoin(server_url, SHARED_DB_NAME) + self.shared_db = SoledadSharedDatabase.open_database( + shared_db_url, + uuid, + creds=creds, + create=False) # db should exist at this point. def _set_secrets_path(self, secrets_path): self._secrets.secrets_path = secrets_path @@ -730,20 +478,6 @@ class Soledad(object): _set_secrets_path, doc='The path for the file containing the encrypted symmetric secret.') - def _get_local_db_path(self): - return self._local_db_path - - local_db_path = property( - _get_local_db_path, - doc='The path for the local database replica.') - - def _get_server_url(self): - return self._server_url - - server_url = property( - _get_server_url, - doc='The URL of the Soledad server.') - @property def storage_secret(self): """ @@ -762,19 +496,7 @@ class Soledad(object): def secrets(self): return self._secrets - @property - def passphrase(self): - return self._secrets.passphrase - def change_passphrase(self, new_passphrase): - """ - Change the passphrase that encrypts the storage secret. - - :param new_passphrase: The new passphrase. - :type new_passphrase: unicode - - :raise NoStorageSecret: Raised if there's no storage secret available. - """ self._secrets.change_passphrase(new_passphrase) @@ -811,6 +533,17 @@ def _convert_to_unicode(content): return content +def create_path_if_not_exists(path): + try: + if not os.path.isdir(path): + logger.info('Creating directory: %s.' % path) + os.makedirs(path) + except OSError as exc: + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + # ---------------------------------------------------------------------------- # Monkey patching u1db to be able to provide a custom SSL cert # ---------------------------------------------------------------------------- diff --git a/client/src/leap/soledad/client/examples/use_api.py b/client/src/leap/soledad/client/examples/use_api.py index fd0a100c..4268fe71 100644 --- a/client/src/leap/soledad/client/examples/use_api.py +++ b/client/src/leap/soledad/client/examples/use_api.py @@ -46,7 +46,7 @@ if os.path.isfile(tmpdb): start_time = datetime.datetime.now() opts = SQLCipherOptions(tmpdb, "secret", create=True) -db = sqlcipher.SQLCipherDatabase(None, opts) +db = sqlcipher.SQLCipherDatabase(opts) def allDone(): diff --git a/client/src/leap/soledad/client/interfaces.py b/client/src/leap/soledad/client/interfaces.py new file mode 100644 index 00000000..6bd3f200 --- /dev/null +++ b/client/src/leap/soledad/client/interfaces.py @@ -0,0 +1,361 @@ +# -*- coding: utf-8 -*- +# interfaces.py +# Copyright (C) 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Interfaces used by the Soledad Client. +""" +from zope.interface import Interface, Attribute + + +class ILocalStorage(Interface): + """ + I implement core methods for the u1db local storage. + """ + local_db_path = Attribute( + "The path for the local database replica") + local_db_file_name = Attribute( + "The name of the local SQLCipher U1DB database file") + uuid = Attribute("The user uuid") + default_prefix = Attribute( + "Prefix for default values for path") + + def put_doc(self, doc): + """ + Update a document in the local encrypted database. + + :param doc: the document to update + :type doc: SoledadDocument + + :return: + a deferred that will fire with the new revision identifier for + the document + :rtype: Deferred + """ + + def delete_doc(self, doc): + """ + Delete a document from the local encrypted database. + + :param doc: the document to delete + :type doc: SoledadDocument + + :return: + a deferred that will fire with ... + :rtype: Deferred + """ + + def get_doc(self, doc_id, include_deleted=False): + """ + Retrieve a document from the local encrypted database. + + :param doc_id: the unique document identifier + :type doc_id: str + :param include_deleted: + if True, deleted documents will be returned with empty content; + otherwise asking for a deleted document will return None + :type include_deleted: bool + + :return: + A deferred that will fire with the document object, containing a + SoledadDocument, or None if it could not be found + :rtype: Deferred + """ + + def get_docs(self, doc_ids, check_for_conflicts=True, + include_deleted=False): + """ + Get the content for many documents. + + :param doc_ids: a list of document identifiers + :type doc_ids: list + :param check_for_conflicts: if set False, then the conflict check will + be skipped, and 'None' will be returned instead of True/False + :type check_for_conflicts: bool + + :return: + A deferred that will fire with an iterable giving the Document + object for each document id in matching doc_ids order. + :rtype: Deferred + """ + + def get_all_docs(self, include_deleted=False): + """ + Get the JSON content for all documents in the database. + + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted + documents will not be included in the results. + :return: + A deferred that will fire with (generation, [Document]): that is, + the current generation of the database, followed by a list of all + the documents in the database. + :rtype: Deferred + """ + + def create_doc(self, content, doc_id=None): + """ + Create a new document in the local encrypted database. + + :param content: the contents of the new document + :type content: dict + :param doc_id: an optional identifier specifying the document id + :type doc_id: str + + :return: + A deferred tht will fire with the new document (SoledadDocument + instance). + :rtype: Deferred + """ + + def create_doc_from_json(self, json, doc_id=None): + """ + Create a new document. + + You can optionally specify the document identifier, but the document + must not already exist. See 'put_doc' if you want to override an + existing document. + If the database specifies a maximum document size and the document + exceeds it, create will fail and raise a DocumentTooBig exception. + + :param json: The JSON document string + :type json: str + :param doc_id: An optional identifier specifying the document id. + :type doc_id: + :return: + A deferred that will fire with the new document (A SoledadDocument + instance) + :rtype: Deferred + """ + + def create_index(self, index_name, *index_expressions): + """ + Create an named index, which can then be queried for future lookups. + Creating an index which already exists is not an error, and is cheap. + Creating an index which does not match the index_expressions of the + existing index is an error. + Creating an index will block until the expressions have been evaluated + and the index generated. + + :param index_name: A unique name which can be used as a key prefix + :type index_name: str + :param index_expressions: + index expressions defining the index information. + :type index_expressions: dict + + Examples: + + "fieldname", or "fieldname.subfieldname" to index alphabetically + sorted on the contents of a field. + + "number(fieldname, width)", "lower(fieldname)" + """ + + def delete_index(self, index_name): + """ + Remove a named index. + + :param index_name: The name of the index we are removing + :type index_name: str + """ + + def list_indexes(self): + """ + List the definitions of all known indexes. + + :return: A list of [('index-name', ['field', 'field2'])] definitions. + :rtype: Deferred + """ + + def get_from_index(self, index_name, *key_values): + """ + Return documents that match the keys supplied. + + You must supply exactly the same number of values as have been defined + in the index. It is possible to do a prefix match by using '*' to + indicate a wildcard match. You can only supply '*' to trailing entries, + (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) + It is also possible to append a '*' to the last supplied value (eg + 'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: tuple + :return: List of [Document] + :rtype: list + """ + + def get_count_from_index(self, index_name, *key_values): + """ + Return the count of the documents that match the keys and + values supplied. + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: tuple + :return: count. + :rtype: int + """ + + def get_range_from_index(self, index_name, start_value, end_value): + """ + Return documents that fall within the specified range. + + Both ends of the range are inclusive. For both start_value and + end_value, one must supply exactly the same number of values as have + been defined in the index, or pass None. In case of a single column + index, a string is accepted as an alternative for a tuple with a single + value. It is possible to do a prefix match by using '*' to indicate + a wildcard match. You can only supply '*' to trailing entries, (eg + 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also + possible to append a '*' to the last supplied value (eg 'val*', '*', + '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + + :param index_name: The index to query + :type index_name: str + :param start_values: tuples of values that define the lower bound of + the range. eg, if you have an index with 3 fields then you would + have: (val1, val2, val3) + :type start_values: tuple + :param end_values: tuples of values that define the upper bound of the + range. eg, if you have an index with 3 fields then you would have: + (val1, val2, val3) + :type end_values: tuple + :return: A deferred that will fire with a list of [Document] + :rtype: Deferred + """ + + def get_index_keys(self, index_name): + """ + Return all keys under which documents are indexed in this index. + + :param index_name: The index to query + :type index_name: str + :return: + A deferred that will fire with a list of tuples of indexed keys. + :rtype: Deferred + """ + + def get_doc_conflicts(self, doc_id): + """ + Get the list of conflicts for the given document. + + :param doc_id: the document id + :type doc_id: str + + :return: + A deferred that will fire with a list of the document entries that + are conflicted. + :rtype: Deferred + """ + + def resolve_doc(self, doc, conflicted_doc_revs): + """ + Mark a document as no longer conflicted. + + :param doc: a document with the new content to be inserted. + :type doc: SoledadDocument + :param conflicted_doc_revs: + A deferred that will fire with a list of revisions that the new + content supersedes. + :type conflicted_doc_revs: list + """ + + +class ISyncableStorage(Interface): + """ + I implement methods to synchronize with a remote replica. + """ + replica_uid = Attribute("The uid of the local replica") + server_url = Attribute("The URL of the Soledad server.") + syncing = Attribute( + "Property, True if the syncer is syncing.") + token = Attribute("The authentication Token.") + + def sync(self, defer_decryption=True): + """ + Synchronize the local encrypted replica with a remote replica. + + This method blocks until a syncing lock is acquired, so there are no + attempts of concurrent syncs from the same client replica. + + :param url: the url of the target replica to sync with + :type url: str + + :param defer_decryption: + Whether to defer the decryption process using the intermediate + database. If False, decryption will be done inline. + :type defer_decryption: bool + + :return: + A deferred that will fire with the local generation before the + synchronisation was performed. + :rtype: str + """ + + def stop_sync(self): + """ + Stop the current syncing process. + """ + + +class ISharedSecretsStorage(Interface): + """ + I implement methods needed for the Shared Recovery Database. + """ + secrets_path = Attribute( + "Path for storing encrypted key used for symmetric encryption.") + secrets_file_name = Attribute( + "The name of the file where the storage secrets will be stored") + + storage_secret = Attribute("") + remote_storage_secret = Attribute("") + shared_db = Attribute("The shared db object") + + # XXX this used internally from secrets, so it might be good to preserve + # as a public boundary with other components. + secrets = Attribute("") + + def init_shared_db(self, server_url, uuid, creds): + """ + Initialize the shared recovery database. + + :param server_url: + :type server_url: + :param uuid: + :type uuid: + :param creds: + :type creds: + """ + + def change_passphrase(self, new_passphrase): + """ + Change the passphrase that encrypts the storage secret. + + :param new_passphrase: The new passphrase. + :type new_passphrase: unicode + + :raise NoStorageSecret: Raised if there's no storage secret available. + """ + + # XXX not in use. Uncomment if we ever decide to allow + # multiple secrets. + # secret_id = Attribute("The id of the storage secret to be used") diff --git a/client/src/leap/soledad/client/pragmas.py b/client/src/leap/soledad/client/pragmas.py index 7a13a694..2e9c53a3 100644 --- a/client/src/leap/soledad/client/pragmas.py +++ b/client/src/leap/soledad/client/pragmas.py @@ -43,7 +43,7 @@ def set_crypto_pragmas(db_handle, sqlcipher_opts): def _set_key(db_handle, key, is_raw_key): """ - Set the C{key} for use with the database. + Set the ``key`` for use with the database. The process of creating a new, encrypted database is called 'keying' the database. SQLCipher uses just-in-time key derivation at the point @@ -60,9 +60,9 @@ def _set_key(db_handle, key, is_raw_key): :param key: The key for use with the database. :type key: str - :param is_raw_key: Whether C{key} is a raw 64-char hex string or a - passphrase that should be hashed to obtain the - encyrption key. + :param is_raw_key: + Whether C{key} is a raw 64-char hex string or a passphrase that should + be hashed to obtain the encyrption key. :type is_raw_key: bool """ if is_raw_key: @@ -321,14 +321,11 @@ def set_write_ahead_logging(db_handle): """ logger.debug("SQLCIPHER: SETTING WRITE-AHEAD LOGGING") db_handle.cursor().execute('PRAGMA journal_mode=WAL') + # The optimum value can still use a little bit of tuning, but we favor # small sizes of the WAL file to get fast reads, since we assume that # the writes will be quick enough to not block too much. - # TODO - # As a further improvement, we might want to set autocheckpoint to 0 - # here and do the checkpoints manually in a separate thread, to avoid - # any blocks in the main thread (we should run a loopingcall from here) db_handle.cursor().execute('PRAGMA wal_autocheckpoint=50') diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index 970ac82f..93f8c25d 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -144,8 +144,7 @@ class SoledadSecrets(object): Keys used to access storage secrets in recovery documents. """ - def __init__(self, uuid, passphrase, secrets_path, shared_db, crypto, - secret_id=None): + def __init__(self, uuid, passphrase, secrets_path, shared_db, crypto): """ Initialize the secrets manager. @@ -161,17 +160,20 @@ class SoledadSecrets(object): :type shared_db: leap.soledad.client.shared_db.SoledadSharedDatabase :param crypto: A soledad crypto object. :type crypto: SoledadCrypto - :param secret_id: The id of the storage secret to be used. - :type secret_id: str """ + # XXX removed since not in use + # We will pick the first secret available. + # param secret_id: The id of the storage secret to be used. + self._uuid = uuid self._passphrase = passphrase self._secrets_path = secrets_path self._shared_db = shared_db self._crypto = crypto - self._secret_id = secret_id self._secrets = {} + self._secret_id = None + def bootstrap(self): """ Bootstrap secrets. @@ -247,7 +249,8 @@ class SoledadSecrets(object): try: self._load_secrets() # try to load from disk except IOError as e: - logger.warning('IOError while loading secrets from disk: %s' % str(e)) + logger.warning( + 'IOError while loading secrets from disk: %s' % str(e)) return False return self.storage_secret is not None diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index c9e69c73..a7e9e0fe 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -45,7 +45,6 @@ import logging import multiprocessing import os import threading -# import time --- needed for the win initialization hack import json from hashlib import sha256 @@ -56,7 +55,10 @@ from httplib import CannotSendRequest from pysqlcipher import dbapi2 as sqlcipher_dbapi2 from u1db.backends import sqlite_backend from u1db import errors as u1db_errors -from taskthread import TimerTask + +from twisted.internet.task import LoopingCall +from twisted.internet.threads import deferToThreadPool +from twisted.python.threadpool import ThreadPool from leap.soledad.client import crypto from leap.soledad.client.target import SoledadSyncTarget @@ -64,7 +66,6 @@ from leap.soledad.client.target import PendingReceivedDocsSyncError from leap.soledad.client.sync import SoledadSynchronizer # TODO use adbapi too -from leap.soledad.client.mp_safe_db_TOREMOVE import MPSafeSQLiteDB from leap.soledad.client import pragmas from leap.soledad.common import soledad_assert from leap.soledad.common.document import SoledadDocument @@ -75,16 +76,6 @@ logger = logging.getLogger(__name__) # Monkey-patch u1db.backends.sqlite_backend with pysqlcipher.dbapi2 sqlite_backend.dbapi2 = sqlcipher_dbapi2 -# It seems that, as long as we are not using old sqlite versions, serialized -# mode is enabled by default at compile time. So accessing db connections from -# different threads should be safe, as long as no attempt is made to use them -# from multiple threads with no locking. -# See https://sqlite.org/threadsafe.html -# and http://bugs.python.org/issue16509 - -# TODO this no longer needed ------------- -#SQLITE_CHECK_SAME_THREAD = False - def initialize_sqlcipher_db(opts, on_init=None): """ @@ -96,12 +87,17 @@ def initialize_sqlcipher_db(opts, on_init=None): :type on_init: tuple :return: a SQLCipher connection """ - conn = sqlcipher_dbapi2.connect( - opts.path) + # Note: There seemed to be a bug in sqlite 3.5.9 (with python2.6) + # where without re-opening the database on Windows, it + # doesn't see the transaction that was just committed + # Removing from here now, look at the pysqlite implementation if the + # bug shows up in windows. - # XXX not needed -- check - #check_same_thread=SQLITE_CHECK_SAME_THREAD) + if not os.path.isfile(opts.path) and not opts.create: + raise u1db_errors.DatabaseDoesNotExist() + conn = sqlcipher_dbapi2.connect( + opts.path) set_init_pragmas(conn, opts, extra_queries=on_init) return conn @@ -196,10 +192,11 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): """ defer_encryption = False - # XXX not used afaik: - # _index_storage_value = 'expand referenced encrypted' + # The attribute _index_storage_value will be used as the lookup key. + # Here we extend it with `encrypted` + _index_storage_value = 'expand referenced encrypted' - def __init__(self, soledad_crypto, opts): + def __init__(self, opts): """ Connect to an existing SQLCipher database, creating a new sqlcipher database file if needed. @@ -217,18 +214,34 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): :param opts: :type opts: SQLCipherOptions """ - # TODO ------ we don't need any soledad crypto in here - # ensure the db is encrypted if the file already exists if os.path.isfile(opts.path): - self.assert_db_is_encrypted(opts) + _assert_db_is_encrypted(opts) # connect to the sqlcipher database self._db_handle = initialize_sqlcipher_db(opts) - self._real_replica_uid = None - self._ensure_schema() + # TODO --------------------------------------------------- + # Everything else in this initialization has to be factored + # out, so it can be used from U1DBSqlcipherWrapper __init__ + # too. + # --------------------------------------------------------- + + self._ensure_schema() self.set_document_factory(soledad_doc_factory) + self._prime_replica_uid() + + def _prime_replica_uid(self): + """ + In the u1db implementation, _replica_uid is a property + that returns the value in _real_replica_uid, and does + a db query if no value found. + Here we prime the replica uid during initialization so + that we don't have to wait for the query afterwards. + """ + self._real_replica_uid = None + self._get_replica_uid() + print "REPLICA UID --->", self._real_replica_uid def _extra_schema_init(self, c): """ @@ -241,7 +254,6 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): :param c: The cursor for querying the database. :type c: dbapi2.cursor """ - print "CALLING EXTRA SCHEMA INIT...." c.execute( 'ALTER TABLE document ' 'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE') @@ -263,7 +275,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): """ doc_rev = sqlite_backend.SQLitePartialExpandDatabase.put_doc(self, doc) - # XXX move to API + # TODO XXX move to API XXX if self.defer_encryption: self.sync_queue.put_nowait(doc) return doc_rev @@ -272,37 +284,6 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): # SQLCipher API methods # - # TODO this doesn't need to be an instance method - def assert_db_is_encrypted(self, opts): - """ - Assert that the sqlcipher file contains an encrypted database. - - When opening an existing database, PRAGMA key will not immediately - throw an error if the key provided is incorrect. To test that the - database can be successfully opened with the provided key, it is - necessary to perform some operation on the database (i.e. read from - it) and confirm it is success. - - The easiest way to do this is select off the sqlite_master table, - which will attempt to read the first page of the database and will - parse the schema. - - :param opts: - """ - # We try to open an encrypted database with the regular u1db - # backend should raise a DatabaseError exception. - # If the regular backend succeeds, then we need to stop because - # the database was not properly initialized. - try: - sqlite_backend.SQLitePartialExpandDatabase(opts.path) - except sqlcipher_dbapi2.DatabaseError: - # assert that we can access it using SQLCipher with the given - # key - dummy_query = ('SELECT count(*) FROM sqlite_master',) - initialize_sqlcipher_db(opts, on_init=dummy_query) - else: - raise DatabaseIsNotEncrypted() - # Extra query methods: extensions to the base u1db sqlite implmentation. def get_count_from_index(self, index_name, *key_values): @@ -420,65 +401,10 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): """ self.close() - # TODO ---- rescue the fix for the windows case from here... - # @classmethod - # def _open_database(cls, sqlcipher_file, password, document_factory=None, - # crypto=None, raw_key=False, cipher='aes-256-cbc', - # kdf_iter=4000, cipher_page_size=1024, - # defer_encryption=False, sync_db_key=None): - # """ - # Open a SQLCipher database. -# - # :return: The database object. - # :rtype: SQLCipherDatabase - # """ - # cls.defer_encryption = defer_encryption - # if not os.path.isfile(sqlcipher_file): - # raise u1db_errors.DatabaseDoesNotExist() -# - # tries = 2 - # Note: There seems to be a bug in sqlite 3.5.9 (with python2.6) - # where without re-opening the database on Windows, it - # doesn't see the transaction that was just committed - # while True: - # with cls.k_lock: - # db_handle = dbapi2.connect( - # sqlcipher_file, - # check_same_thread=SQLITE_CHECK_SAME_THREAD) -# - # try: - # set cryptographic params -# - # XXX pass only a CryptoOptions object around - #pragmas.set_crypto_pragmas( - #db_handle, password, raw_key, cipher, kdf_iter, - #cipher_page_size) - #c = db_handle.cursor() - # XXX if we use it here, it should be public - #v, err = cls._which_index_storage(c) - #except Exception as exc: - #logger.warning("ERROR OPENING DATABASE!") - #logger.debug("error was: %r" % exc) - #v, err = None, exc - #finally: - #db_handle.close() - #if v is not None: - #break - # possibly another process is initializing it, wait for it to be - # done - #if tries == 0: - #raise err # go for the richest error? - #tries -= 1 - #time.sleep(cls.WAIT_FOR_PARALLEL_INIT_HALF_INTERVAL) - #return SQLCipherDatabase._sqlite_registry[v]( - #sqlcipher_file, password, document_factory=document_factory, - #crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter, - #cipher_page_size=cipher_page_size, sync_db_key=sync_db_key) - class SQLCipherU1DBSync(object): - _sync_watcher = None + _sync_loop = None _sync_enc_pool = None """ @@ -495,11 +421,10 @@ class SQLCipherU1DBSync(object): encrypting_lock = threading.Lock() """ - Period or recurrence of the periodic encrypting task, in seconds. + Period or recurrence of the Looping Call that will do the encryption to the + syncdb (in seconds). """ - # XXX use LoopingCall. - # Just use fucking deferreds, do not waste time looping. - ENCRYPT_TASK_PERIOD = 1 + ENCRYPT_LOOP_PERIOD = 1 """ A dictionary that hold locks which avoid multiple sync attempts from the @@ -507,39 +432,62 @@ class SQLCipherU1DBSync(object): """ syncing_lock = defaultdict(threading.Lock) - def _init_sync(self, opts, soledad_crypto, defer_encryption=False): + def __init__(self, opts, soledad_crypto, replica_uid, + defer_encryption=False): self._crypto = soledad_crypto - - # TODO ----- have to decide what to do with syncer self._sync_db_key = opts.sync_db_key self._sync_db = None self._sync_db_write_lock = None self._sync_enc_pool = None self.sync_queue = None - if self.defer_encryption: - # initialize sync db - self._init_sync_db() - # initialize syncing queue encryption pool - self._sync_enc_pool = crypto.SyncEncrypterPool( - self._crypto, self._sync_db, self._sync_db_write_lock) - self._sync_watcher = TimerTask(self._encrypt_syncing_docs, - self.ENCRYPT_TASK_PERIOD) - self._sync_watcher.start() - - # TODO move to class attribute? # we store syncers in a dictionary indexed by the target URL. We also # store a hash of the auth info in case auth info expires and we need # to rebuild the syncer for that target. The final self._syncers # format is the following:: # # self._syncers = {'': ('', syncer), ...} + self._syncers = {} self._sync_db_write_lock = threading.Lock() self.sync_queue = multiprocessing.Queue() - def _init_sync_db(self, opts): + self._sync_threadpool = None + self._initialize_sync_threadpool() + + if defer_encryption: + self._initialize_sync_db() + + # initialize syncing queue encryption pool + self._sync_enc_pool = crypto.SyncEncrypterPool( + self._crypto, self._sync_db, self._sync_db_write_lock) + + # ------------------------------------------------------------------ + # From the documentation: If f returns a deferred, rescheduling + # will not take place until the deferred has fired. The result + # value is ignored. + + # TODO use this to avoid multiple sync attempts if the sync has not + # finished! + # ------------------------------------------------------------------ + + # XXX this was called sync_watcher --- trace any remnants + self._sync_loop = LoopingCall(self._encrypt_syncing_docs), + self._sync_loop.start(self.ENCRYPT_LOOP_PERIOD) + + def _initialize_sync_threadpool(self): + """ + Initialize a ThreadPool with exactly one thread, that will be used to + run all the network blocking calls for syncing on a separate thread. + + TODO this needs to be ported away from urllib and into twisted async + calls, and then we can ditch this syncing thread and reintegrate into + the main reactor. + """ + self._sync_threadpool = ThreadPool(0, 1) + + def _initialize_sync_db(self, opts): """ Initialize the Symmetrically-Encrypted document to be synced database, and the queue to communicate with subprocess workers. @@ -554,29 +502,32 @@ class SQLCipherU1DBSync(object): else: sync_db_path = ":memory:" - # XXX use initialize_sqlcipher_db here too - # TODO pass on_init queries to initialize_sqlcipher_db - self._sync_db = MPSafeSQLiteDB(sync_db_path) - pragmas.set_crypto_pragmas(self._sync_db, opts) + # --------------------------------------------------------- + # TODO use a separate adbapi for this (sqlcipher only, no u1db) + # We could control that it only has 1 or 2 threads. - # create sync tables - self._create_sync_db_tables() + opts.path = sync_db_path - def _create_sync_db_tables(self): + self._sync_db = initialize_sqlcipher_db( + opts, on_init=self._sync_db_extra_init) + # --------------------------------------------------------- + + @property + def _sync_db_extra_init(self): """ - Create tables for the local sync documents db if needed. + Queries for creating tables for the local sync documents db if needed. + They are passed as extra initialization to initialize_sqlciphjer_db + + :rtype: tuple of strings """ - # TODO use adbapi --------------------------------- + maybe_create = "CREATE TABLE IF NOT EXISTS %s (%s)" encr = crypto.SyncEncrypterPool decr = crypto.SyncDecrypterPool - sql_encr = ("CREATE TABLE IF NOT EXISTS %s (%s)" % ( + sql_encr_table_query = (maybe_create % ( encr.TABLE_NAME, encr.FIELD_NAMES)) - sql_decr = ("CREATE TABLE IF NOT EXISTS %s (%s)" % ( + sql_decr_table_query = (maybe_create % ( decr.TABLE_NAME, decr.FIELD_NAMES)) - - with self._sync_db_write_lock: - self._sync_db.execute(sql_encr) - self._sync_db.execute(sql_decr) + return (sql_encr_table_query, sql_decr_table_query) def sync(self, url, creds=None, autocreate=True, defer_decryption=True): """ @@ -599,15 +550,24 @@ class SQLCipherU1DBSync(object): database. If False, decryption will be done inline. :type defer_decryption: bool - :return: The local generation before the synchronisation was performed. - :rtype: int + :return: + A Deferred, that will fire with the local generation (type `int`) + before the synchronisation was performed. + :rtype: deferred """ + kwargs = {'creds': creds, 'autocreate': autocreate, + 'defer_decryption': defer_decryption} + return deferToThreadPool(self._sync, url, **kwargs) + + def _sync(self, url, creds=None, autocreate=True, defer_decryption=True): res = None + # the following context manager blocks until the syncing lock can be # acquired. - if defer_decryption: - self._init_sync_db() - with self.syncer(url, creds=creds) as syncer: + # TODO review, I think this is no longer needed with a 1-thread + # threadpool. + + with self._syncer(url, creds=creds) as syncer: # XXX could mark the critical section here... try: res = syncer.sync(autocreate=autocreate, @@ -634,7 +594,7 @@ class SQLCipherU1DBSync(object): syncer.stop() @contextmanager - def syncer(self, url, creds=None): + def _syncer(self, url, creds=None): """ Accesor for synchronizer. @@ -643,13 +603,13 @@ class SQLCipherU1DBSync(object): Because of that, this method blocks until the syncing lock can be acquired. """ - with self.syncing_lock[self._get_replica_uid()]: + with self.syncing_lock[self.replica_uid]: syncer = self._get_syncer(url, creds=creds) yield syncer @property def syncing(self): - lock = self.syncing_lock[self._get_replica_uid()] + lock = self.syncing_lock[self.replica_uid] acquired_lock = lock.acquire(False) if acquired_lock is False: return True @@ -679,7 +639,7 @@ class SQLCipherU1DBSync(object): syncer = SoledadSynchronizer( self, SoledadSyncTarget(url, - self._replica_uid, + self.replica_uid, creds=creds, crypto=self._crypto, sync_db=self._sync_db, @@ -701,8 +661,11 @@ class SQLCipherU1DBSync(object): to be encrypted in the sync db. They will be read by the SoledadSyncTarget during the sync_exchange. - Called periodical from the TimerTask self._sync_watcher. + Called periodically from the LoopingCall self._sync_loop. """ + # TODO should return a deferred that would firewhen the encryption is + # done. See note on __init__ + lock = self.encrypting_lock # optional wait flag used to avoid blocking if not lock.acquire(False): @@ -720,19 +683,19 @@ class SQLCipherU1DBSync(object): finally: lock.release() - @property - def replica_uid(self): - return self._get_replica_uid() + def get_generation(self): + # FIXME + # XXX this SHOULD BE a callback + return self._get_generation() def close(self): """ Close the syncer and syncdb orderly """ - # stop the sync watcher for deferred encryption - if self._sync_watcher is not None: - self._sync_watcher.stop() - self._sync_watcher.shutdown() - self._sync_watcher = None + # stop the sync loop for deferred encryption + if self._sync_loop is not None: + self._sync_loop.stop() + self._sync_loop = None # close all open syncers for url in self._syncers: _, syncer = self._syncers[url] @@ -753,6 +716,37 @@ class SQLCipherU1DBSync(object): del self.sync_queue self.sync_queue = None + +def _assert_db_is_encrypted(opts): + """ + Assert that the sqlcipher file contains an encrypted database. + + When opening an existing database, PRAGMA key will not immediately + throw an error if the key provided is incorrect. To test that the + database can be successfully opened with the provided key, it is + necessary to perform some operation on the database (i.e. read from + it) and confirm it is success. + + The easiest way to do this is select off the sqlite_master table, + which will attempt to read the first page of the database and will + parse the schema. + + :param opts: + """ + # We try to open an encrypted database with the regular u1db + # backend should raise a DatabaseError exception. + # If the regular backend succeeds, then we need to stop because + # the database was not properly initialized. + try: + sqlite_backend.SQLitePartialExpandDatabase(opts.path) + except sqlcipher_dbapi2.DatabaseError: + # assert that we can access it using SQLCipher with the given + # key + dummy_query = ('SELECT count(*) FROM sqlite_master',) + initialize_sqlcipher_db(opts, on_init=dummy_query) + else: + raise DatabaseIsNotEncrypted() + # # Exceptions # diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index a47afbb6..aa19ddab 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -17,10 +17,9 @@ """ Soledad synchronization utilities. - Extend u1db Synchronizer with the ability to: - * Defer the update of the known replica uid until all the decryption of + * Postpone the update of the known replica uid until all the decryption of the incoming messages has been processed. * Be interrupted and recovered. @@ -48,6 +47,8 @@ class SoledadSynchronizer(Synchronizer): Also modified to allow for interrupting the synchronization process. """ + # TODO can delegate the syncing to the api object, living in the reactor + # thread, and use a simple flag. syncing_lock = Lock() def stop(self): @@ -232,6 +233,8 @@ class SoledadSynchronizer(Synchronizer): # release if something in the syncdb-decrypt goes wrong. we could keep # track of the release date and cleanup unrealistic sync entries after # some time. + + # TODO use cancellable deferreds instead locked = self.syncing_lock.locked() return locked diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index 651d3ee5..9b546402 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -14,14 +14,10 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . - - """ A U1DB backend for encrypting data before sending to server and decrypting after receiving. """ - - import cStringIO import gzip import logging @@ -34,7 +30,7 @@ from time import sleep from uuid import uuid4 import simplejson as json -from taskthread import TimerTask + from u1db import errors from u1db.remote import utils, http_errors from u1db.remote.http_target import HTTPSyncTarget @@ -42,6 +38,8 @@ from u1db.remote.http_client import _encode_query_parameter, HTTPClientBase from zope.proxy import ProxyBase from zope.proxy import sameProxiedObjects, setProxiedObject +from twisted.internet.task import LoopingCall + from leap.soledad.common.document import SoledadDocument from leap.soledad.client.auth import TokenBasedAuth from leap.soledad.client.crypto import is_symmetrically_encrypted @@ -755,7 +753,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): """ Period of recurrence of the periodic decrypting task, in seconds. """ - DECRYPT_TASK_PERIOD = 0.5 + DECRYPT_LOOP_PERIOD = 0.5 # # Modified HTTPSyncTarget methods. @@ -802,7 +800,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self._sync_db_write_lock = None self._decryption_callback = None self._sync_decr_pool = None - self._sync_watcher = None + self._sync_loop = None if sync_db and sync_db_write_lock is not None: self._sync_db = sync_db self._sync_db_write_lock = sync_db_write_lock @@ -828,23 +826,22 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self._sync_decr_pool.close() self._sync_decr_pool = None - def _setup_sync_watcher(self): + def _setup_sync_loop(self): """ - Set up the sync watcher for deferred decryption. + Set up the sync loop for deferred decryption. """ - if self._sync_watcher is None: - self._sync_watcher = TimerTask( - self._decrypt_syncing_received_docs, - delay=self.DECRYPT_TASK_PERIOD) + if self._sync_loop is None: + self._sync_loop = LoopingCall( + self._decrypt_syncing_received_docs) + self._sync_loop.start(self.DECRYPT_LOOP_PERIOD) - def _teardown_sync_watcher(self): + def _teardown_sync_loop(self): """ - Tear down the sync watcher. + Tear down the sync loop. """ - if self._sync_watcher is not None: - self._sync_watcher.stop() - self._sync_watcher.shutdown() - self._sync_watcher = None + if self._sync_loop is not None: + self._sync_loop.stop() + self._sync_loop = None def _get_replica_uid(self, url): """ @@ -1131,7 +1128,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): if defer_decryption and self._sync_db is not None: self._sync_exchange_lock.acquire() self._setup_sync_decr_pool() - self._setup_sync_watcher() + self._setup_sync_loop() self._defer_decryption = True else: # fall back @@ -1292,10 +1289,10 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # decrypt docs in case of deferred decryption if defer_decryption: - self._sync_watcher.start() + self._sync_loop.start() while self.clear_to_sync() is False: - sleep(self.DECRYPT_TASK_PERIOD) - self._teardown_sync_watcher() + sleep(self.DECRYPT_LOOP_PERIOD) + self._teardown_sync_loop() self._teardown_sync_decr_pool() self._sync_exchange_lock.release() @@ -1460,7 +1457,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): Decrypt the documents received from remote replica and insert them into the local one. - Called periodically from TimerTask self._sync_watcher. + Called periodically from LoopingCall self._sync_loop. """ if sameProxiedObjects( self._insert_doc_cb.get(self.source_replica_uid), -- cgit v1.2.3 From 1ae8f27c622034dc9524dab4b971bf0828966dd1 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 7 Oct 2014 11:32:23 -0300 Subject: Adapt sqlcipher tests to new api. --- client/src/leap/soledad/client/sqlcipher.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index a7e9e0fe..c645bb8d 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -502,9 +502,10 @@ class SQLCipherU1DBSync(object): else: sync_db_path = ":memory:" - # --------------------------------------------------------- - # TODO use a separate adbapi for this (sqlcipher only, no u1db) - # We could control that it only has 1 or 2 threads. + # XXX use initialize_sqlcipher_db here too + # TODO pass on_init queries to initialize_sqlcipher_db + self._sync_db = None#MPSafeSQLiteDB(sync_db_path) + pragmas.set_crypto_pragmas(self._sync_db, opts) opts.path = sync_db_path -- cgit v1.2.3 From 71d0ba384b16e5a1d9cfd4ee2b046ff6957f9b4e Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 9 Oct 2014 01:55:58 +0200 Subject: working sync-threadpool * Completed mapping of async dbpool * Fixed shared db initialization. Stuff To Be Fixed yet: [ ] All inserts have to be done from the sync threadpool. Right now we're reusing the connection from multiple threads in the syncer. I'm assuming the writes are automatically locking the file at the sqlite level, so this shouldn't pose a problem. [ ] Correctly handle the multiprocessing pool, and the callback execution. --- client/src/leap/soledad/client/adbapi.py | 44 +++------- client/src/leap/soledad/client/api.py | 72 ++++++++++------- client/src/leap/soledad/client/sqlcipher.py | 121 ++++++++++++++++++++++++++-- 3 files changed, 164 insertions(+), 73 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/adbapi.py b/client/src/leap/soledad/client/adbapi.py index 60d9e195..733fce23 100644 --- a/client/src/leap/soledad/client/adbapi.py +++ b/client/src/leap/soledad/client/adbapi.py @@ -24,11 +24,9 @@ import sys from functools import partial -import u1db -from u1db.backends import sqlite_backend - from twisted.enterprise import adbapi from twisted.python import log +from zope.proxy import ProxyBase, setProxiedObject from leap.soledad.client import sqlcipher as soledad_sqlcipher @@ -46,39 +44,9 @@ def getConnectionPool(opts, openfun=None, driver="pysqlcipher"): check_same_thread=False, cp_openfun=openfun) -class U1DBSQLiteBackend(sqlite_backend.SQLitePartialExpandDatabase): - """ - A very simple wrapper for u1db around sqlcipher backend. - - Instead of initializing the database on the fly, it just uses an existing - connection that is passed to it in the initializer. - """ - - def __init__(self, conn): - self._db_handle = conn - self._real_replica_uid = None - self._ensure_schema() - self._factory = u1db.Document - - -class SoledadSQLCipherWrapper(soledad_sqlcipher.SQLCipherDatabase): - """ - A wrapper for u1db that uses the Soledad-extended sqlcipher backend. - - Instead of initializing the database on the fly, it just uses an existing - connection that is passed to it in the initializer. - """ - def __init__(self, conn): - self._db_handle = conn - self._real_replica_uid = None - self._ensure_schema() - self.set_document_factory(soledad_sqlcipher.soledad_doc_factory) - self._prime_replica_uid() - - class U1DBConnection(adbapi.Connection): - u1db_wrapper = SoledadSQLCipherWrapper + u1db_wrapper = soledad_sqlcipher.SoledadSQLCipherWrapper def __init__(self, pool, init_u1db=False): self.init_u1db = init_u1db @@ -120,6 +88,9 @@ class U1DBConnectionPool(adbapi.ConnectionPool): # all u1db connections, hashed by thread-id self.u1dbconnections = {} + # The replica uid, primed by the connections on init. + self.replica_uid = ProxyBase(None) + def runU1DBQuery(self, meth, *args, **kw): meth = "u1db_%s" % meth return self.runInteraction(self._runU1DBQuery, meth, *args, **kw) @@ -133,6 +104,11 @@ class U1DBConnectionPool(adbapi.ConnectionPool): u1db = self.u1dbconnections.get(tid) conn = self.connectionFactory(self, init_u1db=not bool(u1db)) + if self.replica_uid is None: + replica_uid = conn._u1db._real_replica_uid + setProxiedObject(self.replica_uid, replica_uid) + print "GOT REPLICA UID IN DBPOOL", self.replica_uid + if u1db is None: self.u1dbconnections[tid] = conn._u1db else: diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 493f6c1d..ff6257b2 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -158,10 +158,10 @@ class Soledad(object): # store config params self._uuid = uuid self._passphrase = passphrase - self._secrets_path = secrets_path self._local_db_path = local_db_path self._server_url = server_url self._defer_encryption = defer_encryption + self._secrets_path = None self.shared_db = None @@ -176,6 +176,8 @@ class Soledad(object): self._init_config_with_defaults() self._init_working_dirs() + self._secrets_path = secrets_path + # Initialize shared recovery database self.init_shared_db(server_url, uuid, self._creds) @@ -193,13 +195,12 @@ class Soledad(object): Initialize configuration using default values for missing params. """ soledad_assert_type(self._passphrase, unicode) - initialize = lambda attr, val: attr is None and setattr(attr, val) + initialize = lambda attr, val: getattr( + self, attr, None) is None and setattr(self, attr, val) - # initialize secrets_path - initialize(self._secrets_path, os.path.join( + initialize("_secrets_path", os.path.join( self.default_prefix, self.secrets_file_name)) - # initialize local_db_path - initialize(self._local_db_path, os.path.join( + initialize("_local_db_path", os.path.join( self.default_prefix, self.local_db_file_name)) # initialize server_url soledad_assert(self._server_url is not None, @@ -218,8 +219,8 @@ class Soledad(object): def _init_secrets(self): self._secrets = SoledadSecrets( - self.uuid, self.passphrase, self.secrets_path, - self._shared_db, self._crypto) + self.uuid, self._passphrase, self._secrets_path, + self.shared_db, self._crypto) self._secrets.bootstrap() def _init_u1db_sqlcipher_backend(self): @@ -249,8 +250,11 @@ class Soledad(object): self._dbpool = adbapi.getConnectionPool(opts) def _init_u1db_syncer(self): + replica_uid = self._dbpool.replica_uid + print "replica UID (syncer init)", replica_uid self._dbsyncer = SQLCipherU1DBSync( - self._soledad_opts, self._crypto, self._defer_encryption) + self._soledad_opts, self._crypto, replica_uid, + self._defer_encryption) # # Closing methods @@ -269,6 +273,9 @@ class Soledad(object): # ILocalStorage # + def _defer(self, meth, *args, **kw): + return self._dbpool.runU1DBQuery(meth, *args, **kw) + def put_doc(self, doc): """ ============================== WARNING ============================== @@ -282,58 +289,59 @@ class Soledad(object): # Isn't it better to defend ourselves from the mutability, to avoid # nasty surprises? doc.content = self._convert_to_unicode(doc.content) - return self._dbpool.put_doc(doc) + return self._defer("put_doc", doc) def delete_doc(self, doc): # XXX what does this do when fired??? - return self._dbpool.delete_doc(doc) + return self._defer("delete_doc", doc) def get_doc(self, doc_id, include_deleted=False): - return self._dbpool.get_doc(doc_id, include_deleted=include_deleted) + return self._defer( + "get_doc", doc_id, include_deleted=include_deleted) - def get_docs(self, doc_ids, check_for_conflicts=True, - include_deleted=False): - return self._dbpool.get_docs(doc_ids, - check_for_conflicts=check_for_conflicts, - include_deleted=include_deleted) + def get_docs( + self, doc_ids, check_for_conflicts=True, include_deleted=False): + return self._defer( + "get_docs", doc_ids, check_for_conflicts=check_for_conflicts, + include_deleted=include_deleted) def get_all_docs(self, include_deleted=False): - return self._dbpool.get_all_docs(include_deleted) + return self._defer("get_all_docs", include_deleted) def create_doc(self, content, doc_id=None): - return self._dbpool.create_doc( - _convert_to_unicode(content), doc_id=doc_id) + return self._defer( + "create_doc", _convert_to_unicode(content), doc_id=doc_id) def create_doc_from_json(self, json, doc_id=None): - return self._dbpool.create_doc_from_json(json, doc_id=doc_id) + return self._defer("create_doc_from_json", json, doc_id=doc_id) def create_index(self, index_name, *index_expressions): - return self._dbpool.create_index(index_name, *index_expressions) + return self._defer("create_index", index_name, *index_expressions) def delete_index(self, index_name): - return self._dbpool.delete_index(index_name) + return self._defer("delete_index", index_name) def list_indexes(self): - return self._dbpool.list_indexes() + return self._defer("list_indexes") def get_from_index(self, index_name, *key_values): - return self._dbpool.get_from_index(index_name, *key_values) + return self._defer("get_from_index", index_name, *key_values) def get_count_from_index(self, index_name, *key_values): - return self._dbpool.get_count_from_index(index_name, *key_values) + return self._defer("get_count_from_index", index_name, *key_values) def get_range_from_index(self, index_name, start_value, end_value): - return self._dbpool.get_range_from_index( - index_name, start_value, end_value) + return self._defer( + "get_range_from_index", index_name, start_value, end_value) def get_index_keys(self, index_name): - return self._dbpool.get_index_keys(index_name) + return self._defer("get_index_keys", index_name) def get_doc_conflicts(self, doc_id): - return self._dbpool.get_doc_conflicts(doc_id) + return self._defer("get_doc_conflicts", doc_id) def resolve_doc(self, doc, conflicted_doc_revs): - return self._dbpool.resolve_doc(doc, conflicted_doc_revs) + return self._defer("resolve_doc", doc, conflicted_doc_revs) def _get_local_db_path(self): return self._local_db_path @@ -460,6 +468,8 @@ class Soledad(object): # def init_shared_db(self, server_url, uuid, creds): + # XXX should assert that server_url begins with https + # Otherwise u1db target will fail. shared_db_url = urlparse.urljoin(server_url, SHARED_DB_NAME) self.shared_db = SoledadSharedDatabase.open_database( shared_db_url, diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index c645bb8d..c8e14176 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -55,10 +55,14 @@ from httplib import CannotSendRequest from pysqlcipher import dbapi2 as sqlcipher_dbapi2 from u1db.backends import sqlite_backend from u1db import errors as u1db_errors +import u1db + +from twisted.internet import reactor from twisted.internet.task import LoopingCall from twisted.internet.threads import deferToThreadPool from twisted.python.threadpool import ThreadPool +from twisted.python import log from leap.soledad.client import crypto from leap.soledad.client.target import SoledadSyncTarget @@ -77,7 +81,7 @@ logger = logging.getLogger(__name__) sqlite_backend.dbapi2 = sqlcipher_dbapi2 -def initialize_sqlcipher_db(opts, on_init=None): +def initialize_sqlcipher_db(opts, on_init=None, check_same_thread=True): """ Initialize a SQLCipher database. @@ -97,7 +101,7 @@ def initialize_sqlcipher_db(opts, on_init=None): raise u1db_errors.DatabaseDoesNotExist() conn = sqlcipher_dbapi2.connect( - opts.path) + opts.path, check_same_thread=check_same_thread) set_init_pragmas(conn, opts, extra_queries=on_init) return conn @@ -241,7 +245,6 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): """ self._real_replica_uid = None self._get_replica_uid() - print "REPLICA UID --->", self._real_replica_uid def _extra_schema_init(self, c): """ @@ -402,7 +405,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): self.close() -class SQLCipherU1DBSync(object): +class SQLCipherU1DBSync(SQLCipherDatabase): _sync_loop = None _sync_enc_pool = None @@ -435,7 +438,13 @@ class SQLCipherU1DBSync(object): def __init__(self, opts, soledad_crypto, replica_uid, defer_encryption=False): + self._opts = opts + self._path = opts.path self._crypto = soledad_crypto + self.__replica_uid = replica_uid + + print "REPLICA UID (u1dbsync init)", replica_uid + self._sync_db_key = opts.sync_db_key self._sync_db = None self._sync_db_write_lock = None @@ -453,9 +462,17 @@ class SQLCipherU1DBSync(object): self._sync_db_write_lock = threading.Lock() self.sync_queue = multiprocessing.Queue() + self.running = False self._sync_threadpool = None self._initialize_sync_threadpool() + self._reactor = reactor + self._reactor.callWhenRunning(self._start) + + self.ready = False + self._db_handle = None + self._initialize_syncer_main_db() + if defer_encryption: self._initialize_sync_db() @@ -476,6 +493,40 @@ class SQLCipherU1DBSync(object): self._sync_loop = LoopingCall(self._encrypt_syncing_docs), self._sync_loop.start(self.ENCRYPT_LOOP_PERIOD) + self.shutdownID = None + + @property + def _replica_uid(self): + return str(self.__replica_uid) + + def _start(self): + if not self.running: + self._sync_threadpool.start() + self.shutdownID = self._reactor.addSystemEventTrigger( + 'during', 'shutdown', self.finalClose) + self.running = True + + def _defer_to_sync_threadpool(self, meth, *args, **kwargs): + return deferToThreadPool( + self._reactor, self._sync_threadpool, meth, *args, **kwargs) + + def _initialize_syncer_main_db(self): + + def init_db(): + + # XXX DEBUG --------------------------------------------- + import thread + print "initializing in thread", thread.get_ident() + # XXX DEBUG --------------------------------------------- + + self._db_handle = initialize_sqlcipher_db( + self._opts, check_same_thread=False) + self._real_replica_uid = None + self._ensure_schema() + self.set_document_factory(soledad_doc_factory) + + return self._defer_to_sync_threadpool(init_db) + def _initialize_sync_threadpool(self): """ Initialize a ThreadPool with exactly one thread, that will be used to @@ -556,9 +607,19 @@ class SQLCipherU1DBSync(object): before the synchronisation was performed. :rtype: deferred """ + if not self.ready: + print "not ready yet..." + # XXX --------------------------------------------------------- + # This might happen because the database has not yet been + # initialized (it's deferred to the theadpool). + # A good strategy might involve to return a deferred that will + # callLater this same function after a timeout (deferLater) + # Might want to keep track of retries and cancel too. + # -------------------------------------------------------------- + print "Syncing to...", url kwargs = {'creds': creds, 'autocreate': autocreate, 'defer_decryption': defer_decryption} - return deferToThreadPool(self._sync, url, **kwargs) + return self._defer_to_sync_threadpool(self._sync, url, **kwargs) def _sync(self, url, creds=None, autocreate=True, defer_decryption=True): res = None @@ -568,9 +629,11 @@ class SQLCipherU1DBSync(object): # TODO review, I think this is no longer needed with a 1-thread # threadpool. + log.msg("in _sync") with self._syncer(url, creds=creds) as syncer: # XXX could mark the critical section here... try: + log.msg('syncer sync...') res = syncer.sync(autocreate=autocreate, defer_decryption=defer_decryption) @@ -590,6 +653,9 @@ class SQLCipherU1DBSync(object): """ Interrupt all ongoing syncs. """ + self._defer_to_sync_threadpool(self._stop_sync) + + def _stop_sync(self): for url in self._syncers: _, syncer = self._syncers[url] syncer.stop() @@ -604,13 +670,13 @@ class SQLCipherU1DBSync(object): Because of that, this method blocks until the syncing lock can be acquired. """ - with self.syncing_lock[self.replica_uid]: + with self.syncing_lock[self._path]: syncer = self._get_syncer(url, creds=creds) yield syncer @property def syncing(self): - lock = self.syncing_lock[self.replica_uid] + lock = self.syncing_lock[self._path] acquired_lock = lock.acquire(False) if acquired_lock is False: return True @@ -640,7 +706,8 @@ class SQLCipherU1DBSync(object): syncer = SoledadSynchronizer( self, SoledadSyncTarget(url, - self.replica_uid, + # XXX is the replica_uid ready? + self._replica_uid, creds=creds, crypto=self._crypto, sync_db=self._sync_db, @@ -689,6 +756,14 @@ class SQLCipherU1DBSync(object): # XXX this SHOULD BE a callback return self._get_generation() + def finalClose(self): + """ + This should only be called by the shutdown trigger. + """ + self.shutdownID = None + self._sync_threadpool.stop() + self.running = False + def close(self): """ Close the syncer and syncdb orderly @@ -718,6 +793,36 @@ class SQLCipherU1DBSync(object): self.sync_queue = None +class U1DBSQLiteBackend(sqlite_backend.SQLitePartialExpandDatabase): + """ + A very simple wrapper for u1db around sqlcipher backend. + + Instead of initializing the database on the fly, it just uses an existing + connection that is passed to it in the initializer. + """ + + def __init__(self, conn): + self._db_handle = conn + self._real_replica_uid = None + self._ensure_schema() + self._factory = u1db.Document + + +class SoledadSQLCipherWrapper(SQLCipherDatabase): + """ + A wrapper for u1db that uses the Soledad-extended sqlcipher backend. + + Instead of initializing the database on the fly, it just uses an existing + connection that is passed to it in the initializer. + """ + def __init__(self, conn): + self._db_handle = conn + self._real_replica_uid = None + self._ensure_schema() + self.set_document_factory(soledad_doc_factory) + self._prime_replica_uid() + + def _assert_db_is_encrypted(opts): """ Assert that the sqlcipher file contains an encrypted database. -- cgit v1.2.3 From 133b72e2546ebabb1384583aec313e544aff69e2 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 14 Oct 2014 18:42:08 +0200 Subject: add soledad sync example --- .../leap/soledad/client/examples/soledad_sync.py | 65 ++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 client/src/leap/soledad/client/examples/soledad_sync.py (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/examples/soledad_sync.py b/client/src/leap/soledad/client/examples/soledad_sync.py new file mode 100644 index 00000000..6d0f6595 --- /dev/null +++ b/client/src/leap/soledad/client/examples/soledad_sync.py @@ -0,0 +1,65 @@ +from leap.bitmask.config.providerconfig import ProviderConfig +from leap.bitmask.crypto.srpauth import SRPAuth +from leap.soledad.client import Soledad + +import logging +logging.basicConfig(level=logging.DEBUG) + + +# EDIT THIS -------------------------------------------- +user = u"USERNAME" +uuid = u"USERUUID" +_pass = u"USERPASS" +server_url = "https://soledad.server.example.org:2323" +# EDIT THIS -------------------------------------------- + +secrets_path = "/tmp/%s.secrets" % uuid +local_db_path = "/tmp/%s.soledad" % uuid +cert_file = "/tmp/cacert.pem" +provider_config = '/tmp/cdev.json' + + +provider = ProviderConfig() +provider.load(provider_config) + +soledad = None + + +def printStuff(r): + print r + + +def printErr(err): + logging.exception(err.value) + + +def init_soledad(_): + token = srpauth.get_token() + print "token", token + + global soledad + soledad = Soledad(uuid, _pass, secrets_path, local_db_path, + server_url, cert_file, + auth_token=token, defer_encryption=False) + + def getall(_): + d = soledad.get_all_docs() + return d + + d1 = soledad.create_doc({"test": 42}) + d1.addCallback(getall) + d1.addCallbacks(printStuff, printErr) + + d2 = soledad.sync() + d2.addCallbacks(printStuff, printErr) + d2.addBoth(lambda r: reactor.stop()) + + +srpauth = SRPAuth(provider) + +d = srpauth.authenticate(user, _pass) +d.addCallbacks(init_soledad, printErr) + + +from twisted.internet import reactor +reactor.run() -- cgit v1.2.3 From d19e0d3b3b7a51d2e51800d41f53899254005661 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 16 Oct 2014 15:19:54 +0200 Subject: add syncable property to shared db --- client/src/leap/soledad/client/api.py | 20 ++++++---- client/src/leap/soledad/client/secrets.py | 59 +++++++++++++++++------------ client/src/leap/soledad/client/shared_db.py | 31 ++++++++++++--- 3 files changed, 73 insertions(+), 37 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index ff6257b2..7886f397 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -113,7 +113,7 @@ class Soledad(object): def __init__(self, uuid, passphrase, secrets_path, local_db_path, server_url, cert_file, - auth_token=None, defer_encryption=False): + auth_token=None, defer_encryption=False, syncable=True): """ Initialize configuration, cryptographic keys and dbs. @@ -151,6 +151,11 @@ class Soledad(object): inline while syncing. :type defer_encryption: bool + :param syncable: + If set to ``False``, this database will not attempt to synchronize + with remote replicas (default is ``True``) + :type syncable: bool + :raise BootstrapSequenceError: Raised when the secret generation and storage on server sequence has failed for some reason. @@ -179,13 +184,15 @@ class Soledad(object): self._secrets_path = secrets_path # Initialize shared recovery database - self.init_shared_db(server_url, uuid, self._creds) + self.init_shared_db(server_url, uuid, self._creds, syncable=syncable) # The following can raise BootstrapSequenceError, that will be # propagated upwards. self._init_secrets() self._init_u1db_sqlcipher_backend() - self._init_u1db_syncer() + + if syncable: + self._init_u1db_syncer() # # initialization/destruction methods @@ -467,15 +474,14 @@ class Soledad(object): # ISharedSecretsStorage # - def init_shared_db(self, server_url, uuid, creds): - # XXX should assert that server_url begins with https - # Otherwise u1db target will fail. + def init_shared_db(self, server_url, uuid, creds, syncable=True): shared_db_url = urlparse.urljoin(server_url, SHARED_DB_NAME) self.shared_db = SoledadSharedDatabase.open_database( shared_db_url, uuid, creds=creds, - create=False) # db should exist at this point. + create=False, # db should exist at this point. + syncable=syncable) def _set_secrets_path(self, secrets_path): self._secrets.secrets_path = secrets_path diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index 93f8c25d..81ccb114 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -289,9 +289,12 @@ class SoledadSecrets(object): :raises BootstrapSequenceError: Raised when unable to store secrets in shared database. """ - doc = self._get_secrets_from_shared_db() + if self._shared_db.syncable: + doc = self._get_secrets_from_shared_db() + else: + doc = None - if doc: + if doc is not None: logger.info( 'Found cryptographic secrets in shared recovery ' 'database.') @@ -308,21 +311,24 @@ class SoledadSecrets(object): 'No cryptographic secrets found, creating new ' ' secrets...') self.set_secret_id(self._gen_secret()) - try: - self._put_secrets_in_shared_db() - except Exception as ex: - # storing generated secret in shared db failed for - # some reason, so we erase the generated secret and - # raise. + + if self._shared_db.syncable: try: - os.unlink(self._secrets_path) - except OSError as e: - if e.errno != errno.ENOENT: # no such file or directory - logger.exception(e) - logger.exception(ex) - raise BootstrapSequenceError( - 'Could not store generated secret in the shared ' - 'database, bailing out...') + self._put_secrets_in_shared_db() + except Exception as ex: + # storing generated secret in shared db failed for + # some reason, so we erase the generated secret and + # raise. + try: + os.unlink(self._secrets_path) + except OSError as e: + if e.errno != errno.ENOENT: + # no such file or directory + logger.exception(e) + logger.exception(ex) + raise BootstrapSequenceError( + 'Could not store generated secret in the shared ' + 'database, bailing out...') # # Shared DB related methods @@ -434,7 +440,8 @@ class SoledadSecrets(object): 'contents.') # include secrets in the secret pool. secret_count = 0 - for secret_id, encrypted_secret in data[self.STORAGE_SECRETS_KEY].items(): + secrets = data[self.STORAGE_SECRETS_KEY].items() + for secret_id, encrypted_secret in secrets: if secret_id not in self._secrets: try: self._secrets[secret_id] = \ @@ -664,8 +671,8 @@ class SoledadSecrets(object): self._secrets_path = secrets_path secrets_path = property( - _get_secrets_path, - _set_secrets_path, + _get_secrets_path, + _set_secrets_path, doc='The path for the file containing the encrypted symmetric secret.') @property @@ -689,7 +696,7 @@ class SoledadSecrets(object): Return the secret for remote storage. """ key_start = 0 - key_end = self.REMOTE_STORAGE_SECRET_LENGTH + key_end = self.REMOTE_STORAGE_SECRET_LENGTH return self.storage_secret[key_start:key_end] # @@ -703,8 +710,10 @@ class SoledadSecrets(object): :return: The local storage secret. :rtype: str """ - pwd_start = self.REMOTE_STORAGE_SECRET_LENGTH + self.SALT_LENGTH - pwd_end = self.REMOTE_STORAGE_SECRET_LENGTH + self.LOCAL_STORAGE_SECRET_LENGTH + secret_len = self.REMOTE_STORAGE_SECRET_LENGTH + lsecret_len = self.LOCAL_STORAGE_SECRET_LENGTH + pwd_start = secret_len + self.SALT_LENGTH + pwd_end = secret_len + lsecret_len return self.storage_secret[pwd_start:pwd_end] def _get_local_storage_salt(self): @@ -731,9 +740,9 @@ class SoledadSecrets(object): buflen=32, # we need a key with 256 bits (32 bytes) ) - # - # sync db key - # + # + # sync db key + # def _get_sync_db_salt(self): """ diff --git a/client/src/leap/soledad/client/shared_db.py b/client/src/leap/soledad/client/shared_db.py index 31c4e8e8..7ec71991 100644 --- a/client/src/leap/soledad/client/shared_db.py +++ b/client/src/leap/soledad/client/shared_db.py @@ -26,6 +26,9 @@ from leap.soledad.client.auth import TokenBasedAuth # Soledad shared database # ---------------------------------------------------------------------------- +# TODO could have a hierarchy of soledad exceptions. + + class NoTokenForAuth(Exception): """ No token was found for token-based authentication. @@ -38,6 +41,12 @@ class Unauthorized(Exception): """ +class ImproperlyConfiguredError(Exception): + """ + Wrong parameters in the database configuration. + """ + + class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): """ This is a shared recovery database that enables users to store their @@ -46,6 +55,8 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): # TODO: prevent client from messing with the shared DB. # TODO: define and document API. + syncable = True + # # Token auth methods. # @@ -82,7 +93,7 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): # @staticmethod - def open_database(url, uuid, create, creds=None): + def open_database(url, uuid, create, creds=None, syncable=True): # TODO: users should not be able to create the shared database, so we # have to remove this from here in the future. """ @@ -101,8 +112,13 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): :return: The shared database in the given url. :rtype: SoledadSharedDatabase """ + if syncable and not url.startswith('https://'): + raise ImproperlyConfiguredError( + "Remote soledad server must be an https URI") db = SoledadSharedDatabase(url, uuid, creds=creds) - db.open(create) + db.syncable = syncable + if syncable: + db.open(create) return db @staticmethod @@ -145,9 +161,14 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): :raise HTTPError: Raised if any HTTP error occurs. """ - res, headers = self._request_json('PUT', ['lock', self._uuid], - body={}) - return res['token'], res['timeout'] + # TODO ----- if the shared_db is not syncable, should not + # attempt to resolve. + if self.syncable: + res, headers = self._request_json( + 'PUT', ['lock', self._uuid], body={}) + return res['token'], res['timeout'] + else: + return None, None def unlock(self, token): """ -- cgit v1.2.3 From 092f8b784a260121254a20235fa7aa41b05212e1 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 21 Oct 2014 10:16:43 +0200 Subject: minor naming/documentation fixes after drebs review --- client/src/leap/soledad/client/adbapi.py | 8 ++-- client/src/leap/soledad/client/api.py | 52 ++-------------------- .../src/leap/soledad/client/examples/use_adbapi.py | 4 +- client/src/leap/soledad/client/examples/use_api.py | 4 +- client/src/leap/soledad/client/interfaces.py | 15 ++++--- client/src/leap/soledad/client/shared_db.py | 9 +++- client/src/leap/soledad/client/sqlcipher.py | 35 ++++++++------- 7 files changed, 47 insertions(+), 80 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/adbapi.py b/client/src/leap/soledad/client/adbapi.py index 733fce23..0cdc90eb 100644 --- a/client/src/leap/soledad/client/adbapi.py +++ b/client/src/leap/soledad/client/adbapi.py @@ -86,7 +86,7 @@ class U1DBConnectionPool(adbapi.ConnectionPool): def __init__(self, *args, **kwargs): adbapi.ConnectionPool.__init__(self, *args, **kwargs) # all u1db connections, hashed by thread-id - self.u1dbconnections = {} + self._u1dbconnections = {} # The replica uid, primed by the connections on init. self.replica_uid = ProxyBase(None) @@ -101,7 +101,7 @@ class U1DBConnectionPool(adbapi.ConnectionPool): def _runInteraction(self, interaction, *args, **kw): tid = self.threadID() - u1db = self.u1dbconnections.get(tid) + u1db = self._u1dbconnections.get(tid) conn = self.connectionFactory(self, init_u1db=not bool(u1db)) if self.replica_uid is None: @@ -110,7 +110,7 @@ class U1DBConnectionPool(adbapi.ConnectionPool): print "GOT REPLICA UID IN DBPOOL", self.replica_uid if u1db is None: - self.u1dbconnections[tid] = conn._u1db + self._u1dbconnections[tid] = conn._u1db else: conn._u1db = u1db @@ -134,6 +134,6 @@ class U1DBConnectionPool(adbapi.ConnectionPool): self.running = False for conn in self.connections.values(): self._close(conn) - for u1db in self.u1dbconnections.values(): + for u1db in self._u1dbconnections.values(): self._close(u1db) self.connections.clear() diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 7886f397..00884a12 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -105,7 +105,7 @@ class Soledad(object): """ implements(soledad_interfaces.ILocalStorage, soledad_interfaces.ISyncableStorage, - soledad_interfaces.ISharedSecretsStorage) + soledad_interfaces.ISecretsStorage) local_db_file_name = 'soledad.u1db' secrets_file_name = "soledad.json" @@ -292,10 +292,7 @@ class Soledad(object): call. ============================== WARNING ============================== """ - # TODO what happens with this warning during the deferred life cycle? - # Isn't it better to defend ourselves from the mutability, to avoid - # nasty surprises? - doc.content = self._convert_to_unicode(doc.content) + doc.content = _convert_to_unicode(doc.content) return self._defer("put_doc", doc) def delete_doc(self, doc): @@ -388,7 +385,7 @@ class Soledad(object): soledad_events.SOLEDAD_DONE_DATA_SYNC, self.uuid) return local_gen - sync_url = urlparse.urljoin(self.server_url, 'user-%s' % self.uuid) + sync_url = urlparse.urljoin(self._server_url, 'user-%s' % self.uuid) try: d = self._dbsyncer.sync( sync_url, @@ -405,29 +402,6 @@ class Soledad(object): def stop_sync(self): self._dbsyncer.stop_sync() - # FIXME ------------------------------------------------------- - # review if we really need this. I think that we can the sync - # fail silently if nothing is to be synced. - #def need_sync(self, url): - # XXX dispatch this method in the dbpool ................. - #replica_uid = self._dbpool.replica_uid - #target = SoledadSyncTarget( - #url, replica_uid, creds=self._creds, crypto=self._crypto) -# - # XXX does it matter if we get this from the general dbpool or the - # syncer pool? - #generation = self._dbpool.get_generation() -# - # XXX better unpack it? - #info = target.get_sync_info(replica_uid) -# - # compare source generation with target's last known source generation - #if generation != info[4]: - #soledad_events.signal( - #soledad_events.SOLEDAD_NEW_DATA_TO_SYNC, self.uuid) - #return True - #return False - @property def syncing(self): return self._dbsyncer.syncing @@ -463,15 +437,8 @@ class Soledad(object): token = property(_get_token, _set_token, doc='The authentication Token.') - def _get_server_url(self): - return self._server_url - - server_url = property( - _get_server_url, - doc='The URL of the Soledad server.') - # - # ISharedSecretsStorage + # ISecretsStorage # def init_shared_db(self, server_url, uuid, creds, syncable=True): @@ -483,17 +450,6 @@ class Soledad(object): create=False, # db should exist at this point. syncable=syncable) - def _set_secrets_path(self, secrets_path): - self._secrets.secrets_path = secrets_path - - def _get_secrets_path(self): - return self._secrets.secrets_path - - secrets_path = property( - _get_secrets_path, - _set_secrets_path, - doc='The path for the file containing the encrypted symmetric secret.') - @property def storage_secret(self): """ diff --git a/client/src/leap/soledad/client/examples/use_adbapi.py b/client/src/leap/soledad/client/examples/use_adbapi.py index d3ee8527..d7bd21f2 100644 --- a/client/src/leap/soledad/client/examples/use_adbapi.py +++ b/client/src/leap/soledad/client/examples/use_adbapi.py @@ -88,10 +88,10 @@ def allDone(_): reactor.stop() deferreds = [] +payload = open('manifest.phk').read() for i in range(times): - doc = {"number": i, - "payload": open('manifest.phk').read()} + doc = {"number": i, "payload": payload} d = createDoc(doc) d.addCallbacks(printResult, lambda e: e.printTraceback()) deferreds.append(d) diff --git a/client/src/leap/soledad/client/examples/use_api.py b/client/src/leap/soledad/client/examples/use_api.py index 4268fe71..e2501c98 100644 --- a/client/src/leap/soledad/client/examples/use_api.py +++ b/client/src/leap/soledad/client/examples/use_api.py @@ -52,10 +52,10 @@ db = sqlcipher.SQLCipherDatabase(opts) def allDone(): debug("ALL DONE!") +payload = open('manifest.phk').read() for i in range(times): - doc = {"number": i, - "payload": open('manifest.phk').read()} + doc = {"number": i, "payload": payload} d = db.create_doc(doc) debug(d.doc_id, d.content['number']) diff --git a/client/src/leap/soledad/client/interfaces.py b/client/src/leap/soledad/client/interfaces.py index 6bd3f200..4f7b0779 100644 --- a/client/src/leap/soledad/client/interfaces.py +++ b/client/src/leap/soledad/client/interfaces.py @@ -22,7 +22,8 @@ from zope.interface import Interface, Attribute class ILocalStorage(Interface): """ - I implement core methods for the u1db local storage. + I implement core methods for the u1db local storage of documents and + indexes. """ local_db_path = Attribute( "The path for the local database replica") @@ -285,7 +286,6 @@ class ISyncableStorage(Interface): I implement methods to synchronize with a remote replica. """ replica_uid = Attribute("The uid of the local replica") - server_url = Attribute("The URL of the Soledad server.") syncing = Attribute( "Property, True if the syncer is syncing.") token = Attribute("The authentication Token.") @@ -317,12 +317,11 @@ class ISyncableStorage(Interface): """ -class ISharedSecretsStorage(Interface): +class ISecretsStorage(Interface): """ - I implement methods needed for the Shared Recovery Database. + I implement methods needed for initializing and accessing secrets, that are + synced against the Shared Recovery Database. """ - secrets_path = Attribute( - "Path for storing encrypted key used for symmetric encryption.") secrets_file_name = Attribute( "The name of the file where the storage secrets will be stored") @@ -332,7 +331,9 @@ class ISharedSecretsStorage(Interface): # XXX this used internally from secrets, so it might be good to preserve # as a public boundary with other components. - secrets = Attribute("") + + # We should also probably document its interface. + secrets = Attribute("A SoledadSecrets object containing access to secrets") def init_shared_db(self, server_url, uuid, creds): """ diff --git a/client/src/leap/soledad/client/shared_db.py b/client/src/leap/soledad/client/shared_db.py index 7ec71991..77a7db68 100644 --- a/client/src/leap/soledad/client/shared_db.py +++ b/client/src/leap/soledad/client/shared_db.py @@ -55,6 +55,8 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): # TODO: prevent client from messing with the shared DB. # TODO: define and document API. + # If syncable is False, the database will not attempt to sync against + # a remote replica. Default is True. syncable = True # @@ -109,6 +111,11 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): :param token: An authentication token for accessing the shared db. :type token: str + :param syncable: + If syncable is False, the database will not attempt to sync against + a remote replica. + :type syncable: bool + :return: The shared database in the given url. :rtype: SoledadSharedDatabase """ @@ -161,8 +168,6 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): :raise HTTPError: Raised if any HTTP error occurs. """ - # TODO ----- if the shared_db is not syncable, should not - # attempt to resolve. if self.syncable: res, headers = self._request_json( 'PUT', ['lock', self._uuid], body={}) diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index c8e14176..323d78f1 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -69,7 +69,6 @@ from leap.soledad.client.target import SoledadSyncTarget from leap.soledad.client.target import PendingReceivedDocsSyncError from leap.soledad.client.sync import SoledadSynchronizer -# TODO use adbapi too from leap.soledad.client import pragmas from leap.soledad.common import soledad_assert from leap.soledad.common.document import SoledadDocument @@ -115,7 +114,7 @@ def set_init_pragmas(conn, opts=None, extra_queries=None): This includes the crypto pragmas, and any other options that must be passed early to sqlcipher db. """ - assert opts is not None + soledad_assert(opts is not None) extra_queries = [] if extra_queries is None else extra_queries with _db_init_lock: # only one execution path should initialize the db @@ -196,8 +195,8 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): """ defer_encryption = False - # The attribute _index_storage_value will be used as the lookup key. - # Here we extend it with `encrypted` + # The attribute _index_storage_value will be used as the lookup key for the + # implementation of the SQLCipher storage backend. _index_storage_value = 'expand referenced encrypted' def __init__(self, opts): @@ -227,7 +226,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): # TODO --------------------------------------------------- # Everything else in this initialization has to be factored - # out, so it can be used from U1DBSqlcipherWrapper __init__ + # out, so it can be used from SoledadSQLCipherWrapper.__init__ # too. # --------------------------------------------------------- @@ -406,6 +405,9 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): class SQLCipherU1DBSync(SQLCipherDatabase): + """ + Soledad syncer implementation. + """ _sync_loop = None _sync_enc_pool = None @@ -454,7 +456,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): # we store syncers in a dictionary indexed by the target URL. We also # store a hash of the auth info in case auth info expires and we need # to rebuild the syncer for that target. The final self._syncers - # format is the following:: + # format is the following: # # self._syncers = {'': ('', syncer), ...} @@ -514,10 +516,12 @@ class SQLCipherU1DBSync(SQLCipherDatabase): def init_db(): - # XXX DEBUG --------------------------------------------- - import thread - print "initializing in thread", thread.get_ident() - # XXX DEBUG --------------------------------------------- + # XXX DEBUG ----------------------------------------- + # REMOVE ME when merging. + + #import thread + #print "initializing in thread", thread.get_ident() + # --------------------------------------------------- self._db_handle = initialize_sqlcipher_db( self._opts, check_same_thread=False) @@ -553,11 +557,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): else: sync_db_path = ":memory:" - # XXX use initialize_sqlcipher_db here too - # TODO pass on_init queries to initialize_sqlcipher_db - self._sync_db = None#MPSafeSQLiteDB(sync_db_path) - pragmas.set_crypto_pragmas(self._sync_db, opts) - opts.path = sync_db_path self._sync_db = initialize_sqlcipher_db( @@ -799,6 +798,9 @@ class U1DBSQLiteBackend(sqlite_backend.SQLitePartialExpandDatabase): Instead of initializing the database on the fly, it just uses an existing connection that is passed to it in the initializer. + + It can be used in tests and debug runs to initialize the adbapi with plain + sqlite connections, decoupled from the sqlcipher layer. """ def __init__(self, conn): @@ -814,6 +816,9 @@ class SoledadSQLCipherWrapper(SQLCipherDatabase): Instead of initializing the database on the fly, it just uses an existing connection that is passed to it in the initializer. + + It can be used from adbapi to initialize a soledad database after + getting a regular connection to a sqlcipher database. """ def __init__(self, conn): self._db_handle = conn -- cgit v1.2.3 From d25527ac06563f061aee7771d494522b3ed58b7d Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 18 Nov 2014 14:14:42 -0200 Subject: Save active secret on recovery document. --- client/src/leap/soledad/client/secrets.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index 81ccb114..b0e54220 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -132,6 +132,7 @@ class SoledadSecrets(object): UUID_KEY = 'uuid' STORAGE_SECRETS_KEY = 'storage_secrets' + ACTIVE_SECRET_KEY = 'active_secret' SECRET_KEY = 'secret' CIPHER_KEY = 'cipher' LENGTH_KEY = 'length' @@ -265,10 +266,13 @@ class SoledadSecrets(object): content = None with open(self._secrets_path, 'r') as f: content = json.loads(f.read()) - _, mac = self._import_recovery_document(content) + _, mac, active_secret = self._import_recovery_document(content) # choose first secret if no secret_id was given if self._secret_id is None: - self.set_secret_id(self._secrets.items()[0][0]) + if active_secret is None: + self.set_secret_id(self._secrets.items()[0][0]) + else: + self.set_secret_id(active_secret) # enlarge secret if needed enlarged = False if len(self._secrets[self._secret_id]) < self.GEN_SECRET_LENGTH: @@ -298,12 +302,15 @@ class SoledadSecrets(object): logger.info( 'Found cryptographic secrets in shared recovery ' 'database.') - _, mac = self._import_recovery_document(doc.content) + _, mac, active_secret = self._import_recovery_document(doc.content) if mac is False: self.put_secrets_in_shared_db() self._store_secrets() # save new secrets in local file if self._secret_id is None: - self.set_secret_id(self._secrets.items()[0][0]) + if active_secret is None: + self.set_secret_id(self._secrets.items()[0][0]) + else: + self.set_secret_id(active_secret) else: # STAGE 3 - there are no secrets in server also, so # generate a secret and store it in remote db. @@ -363,6 +370,7 @@ class SoledadSecrets(object): 'secret': '', }, }, + 'active_secret': '', 'kdf': 'scrypt', 'kdf_salt': '', 'kdf_length: , @@ -388,6 +396,7 @@ class SoledadSecrets(object): # create the recovery document data = { self.STORAGE_SECRETS_KEY: encrypted_secrets, + self.ACTIVE_SECRET_KEY: self._secret_id, self.KDF_KEY: self.KDF_SCRYPT, self.KDF_SALT_KEY: binascii.b2a_base64(salt), self.KDF_LENGTH_KEY: len(key), @@ -410,8 +419,9 @@ class SoledadSecrets(object): :param data: The recovery document. :type data: dict - :return: A tuple containing the number of imported secrets and whether - there was MAC informationa available for authenticating. + :return: A tuple containing the number of imported secrets, whether + there was MAC information available for authenticating, and + the secret_id of the last active secret. :rtype: (int, bool) """ soledad_assert(self.STORAGE_SECRETS_KEY in data) @@ -441,6 +451,11 @@ class SoledadSecrets(object): # include secrets in the secret pool. secret_count = 0 secrets = data[self.STORAGE_SECRETS_KEY].items() + active_secret = None + # XXX remove check for existence of key (included for backwards + # compatibility) + if self.ACTIVE_SECRET_KEY in data: + active_secret = data[self.ACTIVE_SECRET_KEY] for secret_id, encrypted_secret in secrets: if secret_id not in self._secrets: try: @@ -450,7 +465,7 @@ class SoledadSecrets(object): except SecretsException as e: logger.error("Failed to decrypt storage secret: %s" % str(e)) - return secret_count, mac + return secret_count, mac, active_secret def _get_secrets_from_shared_db(self): """ -- cgit v1.2.3 From 8b3982ada921af765e7ede7dd3c77ef3fbf075f1 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 18 Nov 2014 14:21:58 -0200 Subject: Standardize export of secrets to avoid miscalculation of MAC. --- client/src/leap/soledad/client/secrets.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index b0e54220..af781a26 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -403,7 +403,7 @@ class SoledadSecrets(object): crypto.MAC_METHOD_KEY: crypto.MacMethods.HMAC, crypto.MAC_KEY: hmac.new( key, - json.dumps(encrypted_secrets), + json.dumps(encrypted_secrets, sort_keys=True), sha256).hexdigest(), } return data @@ -440,7 +440,8 @@ class SoledadSecrets(object): buflen=32) mac = hmac.new( key, - json.dumps(data[self.STORAGE_SECRETS_KEY]), + json.dumps( + data[self.STORAGE_SECRETS_KEY], sort_keys=True), sha256).hexdigest() else: raise crypto.UnknownMacMethodError('Unknown MAC method: %s.' % -- cgit v1.2.3 From 4b317c9f5a9033afaa7435e11f761de4bc3095a3 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 18 Nov 2014 15:33:23 -0200 Subject: Fix interruptable sync. --- client/src/leap/soledad/client/target.py | 34 ++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index 9b546402..ba61cdff 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -348,7 +348,7 @@ class DocumentSyncerPool(object): self._threads.remove(syncer_thread) self._semaphore_pool.release() - def cancel_threads(self, calling_thread): + def cancel_threads(self): """ Stop all threads in the pool. """ @@ -794,6 +794,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self._sync_exchange_lock = threading.Lock() self.source_replica_uid = source_replica_uid self._defer_decryption = False + self._syncer_pool = None # deferred decryption attributes self._sync_db = None @@ -952,7 +953,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): def _get_remote_docs(self, url, last_known_generation, last_known_trans_id, headers, return_doc_cb, ensure_callback, sync_id, - syncer_pool, defer_decryption=False): + defer_decryption=False): """ Fetch sync documents from the remote database and insert them in the local database. @@ -1013,7 +1014,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): break # launch a thread to fetch one document from target - t = syncer_pool.new_syncer_thread( + t = self._syncer_pool.new_syncer_thread( idx, number_of_changes, last_callback_lock=last_callback_lock) @@ -1047,6 +1048,8 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): t.join() if t.success: number_of_changes, _, _ = t.result + else: + raise t.exception first_request = False # make sure all threads finished and we have up-to-date info @@ -1057,6 +1060,8 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): t.join() if t.success: last_successful_thread = t + else: + raise t.exception # get information about last successful thread if last_successful_thread is not None: @@ -1162,9 +1167,9 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): logger.debug("Soledad sync send status: %s" % msg) defer_encryption = self._sync_db is not None - syncer_pool = DocumentSyncerPool( + self._syncer_pool = DocumentSyncerPool( self._raw_url, self._raw_creds, url, headers, ensure_callback, - self.stop) + self.stop_syncer) threads = [] last_callback_lock = None sent = 0 @@ -1209,7 +1214,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # ------------------------------------------------------------- # end of symmetric encryption # ------------------------------------------------------------- - t = syncer_pool.new_syncer_thread( + t = self._syncer_pool.new_syncer_thread( sent + 1, total, last_request_lock=last_request_lock, last_callback_lock=last_callback_lock) @@ -1264,6 +1269,8 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): if t.success: synced.append((doc.doc_id, doc.rev)) last_successful_thread = t + else: + raise t.exception # delete documents from the sync database if defer_encryption: @@ -1282,10 +1289,10 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): cur_target_gen, cur_target_trans_id = self._get_remote_docs( url, last_known_generation, last_known_trans_id, headers, - return_doc_cb, ensure_callback, sync_id, syncer_pool, + return_doc_cb, ensure_callback, sync_id, defer_decryption=defer_decryption) - syncer_pool.cleanup() + self._syncer_pool.cleanup() # decrypt docs in case of deferred decryption if defer_decryption: @@ -1303,6 +1310,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): cur_target_trans_id = trans_id_after_send self.stop() + self._syncer_pool = None return cur_target_gen, cur_target_trans_id def start(self): @@ -1312,6 +1320,11 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): with self._stop_lock: self._stopped = False + + def stop_syncer(self): + with self._stop_lock: + self._stopped = True + def stop(self): """ Mark current sync session as stopped. @@ -1320,8 +1333,9 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): enough information to the synchronizer so the sync session can be recovered afterwards. """ - with self._stop_lock: - self._stopped = True + self.stop_syncer() + if self._syncer_pool: + self._syncer_pool.cancel_threads() @property def stopped(self): -- cgit v1.2.3 From df28f2f99248bdff1a1704e9f6afff7e063d30e9 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 25 Nov 2014 15:38:27 -0200 Subject: Several fixes in soledad api. * Allow passing shared_db to Soledad constructor. * Close syncers on Soledad close. * Fix docstrings. --- client/src/leap/soledad/client/api.py | 373 +++++++++++++++++++++++++++++++--- 1 file changed, 340 insertions(+), 33 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 00884a12..59cbc4ca 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -46,6 +46,7 @@ from zope.interface import implements from twisted.python import log from leap.common.config import get_path_prefix + from leap.soledad.common import SHARED_DB_NAME from leap.soledad.common import soledad_assert from leap.soledad.common import soledad_assert_type @@ -112,7 +113,7 @@ class Soledad(object): default_prefix = os.path.join(get_path_prefix(), 'leap', 'soledad') def __init__(self, uuid, passphrase, secrets_path, local_db_path, - server_url, cert_file, + server_url, cert_file, shared_db=None, auth_token=None, defer_encryption=False, syncable=True): """ Initialize configuration, cryptographic keys and dbs. @@ -142,6 +143,10 @@ class Soledad(object): certificate used by the remote soledad server. :type cert_file: str + :param shared_db: + The shared database. + :type shared_db: HTTPDatabase + :param auth_token: Authorization token for accessing remote databases. :type auth_token: str @@ -157,8 +162,9 @@ class Soledad(object): :type syncable: bool :raise BootstrapSequenceError: - Raised when the secret generation and storage on server sequence - has failed for some reason. + Raised when the secret initialization sequence (i.e. retrieval + from server or generation and storage on server) has failed for + some reason. """ # store config params self._uuid = uuid @@ -168,7 +174,7 @@ class Soledad(object): self._defer_encryption = defer_encryption self._secrets_path = None - self.shared_db = None + self.shared_db = shared_db # configure SSL certificate global SOLEDAD_CERT @@ -225,6 +231,9 @@ class Soledad(object): create_path_if_not_exists(path) def _init_secrets(self): + """ + Initialize Soledad secrets. + """ self._secrets = SoledadSecrets( self.uuid, self._passphrase, self._secrets_path, self.shared_db, self._crypto) @@ -232,8 +241,9 @@ class Soledad(object): def _init_u1db_sqlcipher_backend(self): """ - Initialize the U1DB SQLCipher database for local storage, by - instantiating a modified twisted adbapi that will maintain a threadpool + Initialize the U1DB SQLCipher database for local storage. + + Instantiates a modified twisted adbapi that will maintain a threadpool with a u1db-sqclipher connection for each thread, and will return deferreds for each u1db query. @@ -253,14 +263,16 @@ class Soledad(object): defer_encryption=self._defer_encryption, sync_db_key=sync_db_key, ) - self._soledad_opts = opts + self._sqlcipher_opts = opts self._dbpool = adbapi.getConnectionPool(opts) def _init_u1db_syncer(self): + """ + Initialize the U1DB synchronizer. + """ replica_uid = self._dbpool.replica_uid - print "replica UID (syncer init)", replica_uid self._dbsyncer = SQLCipherU1DBSync( - self._soledad_opts, self._crypto, replica_uid, + self._sqlcipher_opts, self._crypto, replica_uid, self._defer_encryption) # @@ -273,99 +285,351 @@ class Soledad(object): """ logger.debug("Closing soledad") self._dbpool.close() - - # TODO close syncers >>>>>> + self._dbsyncer.close() # # ILocalStorage # def _defer(self, meth, *args, **kw): + """ + Defer a method to be run on a U1DB connection pool. + + :param meth: A method to defer to the U1DB connection pool. + :type meth: callable + :return: A deferred. + :rtype: twisted.internet.defer.Deferred + """ return self._dbpool.runU1DBQuery(meth, *args, **kw) def put_doc(self, doc): """ + Update a document. + + If the document currently has conflicts, put will fail. + If the database specifies a maximum document size and the document + exceeds it, put will fail and raise a DocumentTooBig exception. + ============================== WARNING ============================== This method converts the document's contents to unicode in-place. This means that after calling `put_doc(doc)`, the contents of the document, i.e. `doc.content`, might be different from before the call. ============================== WARNING ============================== + + :param doc: A document with new content. + :type doc: leap.soledad.common.document.SoledadDocument + :return: A deferred whose callback will be invoked with the new + revision identifier for the document. The document object will + also be updated. + :rtype: twisted.internet.defer.Deferred """ doc.content = _convert_to_unicode(doc.content) return self._defer("put_doc", doc) def delete_doc(self, doc): - # XXX what does this do when fired??? + """ + Mark a document as deleted. + + Will abort if the current revision doesn't match doc.rev. + This will also set doc.content to None. + + :param doc: A document to be deleted. + :type doc: leap.soledad.common.document.SoledadDocument + :return: A deferred. + :rtype: twisted.internet.defer.Deferred + """ return self._defer("delete_doc", doc) def get_doc(self, doc_id, include_deleted=False): + """ + Get the JSON string for the given document. + + :param doc_id: The unique document identifier + :type doc_id: str + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise asking for a deleted + document will return None. + :type include_deleted: bool + :return: A deferred whose callback will be invoked with a document + object. + :rtype: twisted.internet.defer.Deferred + """ return self._defer( "get_doc", doc_id, include_deleted=include_deleted) def get_docs( self, doc_ids, check_for_conflicts=True, include_deleted=False): + """ + Get the JSON content for many documents. + + :param doc_ids: A list of document identifiers. + :type doc_ids: list + :param check_for_conflicts: If set to False, then the conflict check + will be skipped, and 'None' will be returned instead of True/False. + :type check_for_conflicts: bool + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted documents will not + be included in the results. + :type include_deleted: bool + :return: A deferred whose callback will be invoked with an iterable + giving the document object for each document id in matching + doc_ids order. + :rtype: twisted.internet.defer.Deferred + """ return self._defer( "get_docs", doc_ids, check_for_conflicts=check_for_conflicts, include_deleted=include_deleted) def get_all_docs(self, include_deleted=False): + """ + Get the JSON content for all documents in the database. + + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted documents will not + be included in the results. + :type include_deleted: bool + + :return: A deferred which, when fired, will pass the a tuple + containing (generation, [Document]) to the callback, with the + current generation of the database, followed by a list of all the + documents in the database. + :rtype: twisted.internet.defer.Deferred + """ return self._defer("get_all_docs", include_deleted) def create_doc(self, content, doc_id=None): + """ + Create a new document. + + You can optionally specify the document identifier, but the document + must not already exist. See 'put_doc' if you want to override an + existing document. + If the database specifies a maximum document size and the document + exceeds it, create will fail and raise a DocumentTooBig exception. + + :param content: A Python dictionary. + :type content: dict + :param doc_id: An optional identifier specifying the document id. + :type doc_id: str + :return: A deferred whose callback will be invoked with a document. + :rtype: twisted.internet.defer.Deferred + """ return self._defer( "create_doc", _convert_to_unicode(content), doc_id=doc_id) def create_doc_from_json(self, json, doc_id=None): + """ + Create a new document. + + You can optionally specify the document identifier, but the document + must not already exist. See 'put_doc' if you want to override an + existing document. + If the database specifies a maximum document size and the document + exceeds it, create will fail and raise a DocumentTooBig exception. + + :param json: The JSON document string + :type json: dict + :param doc_id: An optional identifier specifying the document id. + :type doc_id: str + :return: A deferred whose callback will be invoked with a document. + :rtype: twisted.internet.defer.Deferred + """ return self._defer("create_doc_from_json", json, doc_id=doc_id) def create_index(self, index_name, *index_expressions): + """ + Create a named index, which can then be queried for future lookups. + + Creating an index which already exists is not an error, and is cheap. + Creating an index which does not match the index_expressions of the + existing index is an error. + Creating an index will block until the expressions have been evaluated + and the index generated. + + :param index_name: A unique name which can be used as a key prefix + :type index_name: str + :param index_expressions: index expressions defining the index + information. + + Examples: + + "fieldname", or "fieldname.subfieldname" to index alphabetically + sorted on the contents of a field. + + "number(fieldname, width)", "lower(fieldname)" + :type index_expresions: list of str + :return: A deferred. + :rtype: twisted.internet.defer.Deferred + """ return self._defer("create_index", index_name, *index_expressions) def delete_index(self, index_name): + """ + Remove a named index. + + :param index_name: The name of the index we are removing + :type index_name: str + :return: A deferred. + :rtype: twisted.internet.defer.Deferred + """ return self._defer("delete_index", index_name) def list_indexes(self): + """ + List the definitions of all known indexes. + + :return: A deferred whose callback will be invoked with a list of + [('index-name', ['field', 'field2'])] definitions. + :rtype: twisted.internet.defer.Deferred + """ return self._defer("list_indexes") def get_from_index(self, index_name, *key_values): + """ + Return documents that match the keys supplied. + + You must supply exactly the same number of values as have been defined + in the index. It is possible to do a prefix match by using '*' to + indicate a wildcard match. You can only supply '*' to trailing entries, + (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) + It is also possible to append a '*' to the last supplied value (eg + 'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: list + :return: A deferred whose callback will be invoked with a list of + [Document]. + :rtype: twisted.internet.defer.Deferred + """ return self._defer("get_from_index", index_name, *key_values) def get_count_from_index(self, index_name, *key_values): + """ + Return the count for a given combination of index_name + and key values. + + Extension method made from similar methods in u1db version 13.09 + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: tuple + :return: A deferred whose callback will be invoked with the count. + :rtype: twisted.internet.defer.Deferred + """ return self._defer("get_count_from_index", index_name, *key_values) def get_range_from_index(self, index_name, start_value, end_value): + """ + Return documents that fall within the specified range. + + Both ends of the range are inclusive. For both start_value and + end_value, one must supply exactly the same number of values as have + been defined in the index, or pass None. In case of a single column + index, a string is accepted as an alternative for a tuple with a single + value. It is possible to do a prefix match by using '*' to indicate + a wildcard match. You can only supply '*' to trailing entries, (eg + 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also + possible to append a '*' to the last supplied value (eg 'val*', '*', + '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + + :param index_name: The index to query + :type index_name: str + :param start_values: tuples of values that define the lower bound of + the range. eg, if you have an index with 3 fields then you would + have: (val1, val2, val3) + :type start_values: tuple + :param end_values: tuples of values that define the upper bound of the + range. eg, if you have an index with 3 fields then you would have: + (val1, val2, val3) + :type end_values: tuple + :return: A deferred whose callback will be invoked with a list of + [Document]. + :rtype: twisted.internet.defer.Deferred + """ + return self._defer( "get_range_from_index", index_name, start_value, end_value) def get_index_keys(self, index_name): + """ + Return all keys under which documents are indexed in this index. + + :param index_name: The index to query + :type index_name: str + :return: A deferred whose callback will be invoked with a list of + tuples of indexed keys. + :rtype: twisted.internet.defer.Deferred + """ return self._defer("get_index_keys", index_name) def get_doc_conflicts(self, doc_id): + """ + Get the list of conflicts for the given document. + + The order of the conflicts is such that the first entry is the value + that would be returned by "get_doc". + + :param doc_id: The unique document identifier + :type doc_id: str + :return: A deferred whose callback will be invoked with a list of the + Document entries that are conflicted. + :rtype: twisted.internet.defer.Deferred + """ return self._defer("get_doc_conflicts", doc_id) def resolve_doc(self, doc, conflicted_doc_revs): + """ + Mark a document as no longer conflicted. + + We take the list of revisions that the client knows about that it is + superseding. This may be a different list from the actual current + conflicts, in which case only those are removed as conflicted. This + may fail if the conflict list is significantly different from the + supplied information. (sync could have happened in the background from + the time you GET_DOC_CONFLICTS until the point where you RESOLVE) + + :param doc: A Document with the new content to be inserted. + :type doc: SoledadDocument + :param conflicted_doc_revs: A list of revisions that the new content + supersedes. + :type conflicted_doc_revs: list(str) + :return: A deferred. + :rtype: twisted.internet.defer.Deferred + """ return self._defer("resolve_doc", doc, conflicted_doc_revs) - def _get_local_db_path(self): + @property + def local_db_path(self): return self._local_db_path - # XXX Do we really need all this private / property dance? - - local_db_path = property( - _get_local_db_path, - doc='The path for the local database replica.') - - def _get_uuid(self): + @property + def uuid(self): return self._uuid - uuid = property(_get_uuid, doc='The user uuid.') - # # ISyncableStorage # def sync(self, defer_decryption=True): + """ + Synchronize documents with the server replica. + + :param defer_decryption: + Whether to defer decryption of documents, or do it inline while + syncing. + :type defer_decryption: bool + :return: A deferred whose callback will be invoked with the local + generation before the synchronization was performed. + :rtype: twisted.internet.defer.Deferred + """ # ----------------------------------------------------------------- # TODO this needs work. @@ -377,7 +641,6 @@ class Soledad(object): # thread) # (4) Check that the deferred is called with the local gen. - # TODO document that this returns a deferred # ----------------------------------------------------------------- def on_sync_done(local_gen): @@ -404,6 +667,12 @@ class Soledad(object): @property def syncing(self): + """ + Return wether Soledad is currently synchronizing with the server. + + :return: Wether Soledad is currently synchronizing with the server. + :rtype: bool + """ return self._dbsyncer.syncing def _set_token(self, token): @@ -413,10 +682,11 @@ class Soledad(object): Internally, this builds the credentials dictionary with the following format: - self._{ + { 'token': { 'uuid': '' 'token': '' + } } :param token: The authentication token. @@ -442,18 +712,38 @@ class Soledad(object): # def init_shared_db(self, server_url, uuid, creds, syncable=True): - shared_db_url = urlparse.urljoin(server_url, SHARED_DB_NAME) - self.shared_db = SoledadSharedDatabase.open_database( - shared_db_url, - uuid, - creds=creds, - create=False, # db should exist at this point. - syncable=syncable) + """ + Initialize the shared database. + + :param server_url: URL of the remote database. + :type server_url: str + :param uuid: The user's unique id. + :type uuid: str + :param creds: A tuple containing the authentication method and + credentials. + :type creds: tuple + :param syncable: + If syncable is False, the database will not attempt to sync against + a remote replica. + :type syncable: bool + """ + # only case this is False is for testing purposes + if self.shared_db is None: + shared_db_url = urlparse.urljoin(server_url, SHARED_DB_NAME) + self.shared_db = SoledadSharedDatabase.open_database( + shared_db_url, + uuid, + creds=creds, + create=False, # db should exist at this point. + syncable=syncable) @property def storage_secret(self): """ - Return the secret used for symmetric encryption. + Return the secret used for local storage encryption. + + :return: The secret used for local storage encryption. + :rtype: str """ return self._secrets.storage_secret @@ -461,20 +751,37 @@ class Soledad(object): def remote_storage_secret(self): """ Return the secret used for encryption of remotely stored data. + + :return: The secret used for remote storage encryption. + :rtype: str """ return self._secrets.remote_storage_secret @property def secrets(self): + """ + Return the secrets object. + + :return: The secrets object. + :rtype: SoledadSecrets + """ return self._secrets def change_passphrase(self, new_passphrase): + """ + Change the passphrase that encrypts the storage secret. + + :param new_passphrase: The new passphrase. + :type new_passphrase: unicode + + :raise NoStorageSecret: Raised if there's no storage secret available. + """ self._secrets.change_passphrase(new_passphrase) def _convert_to_unicode(content): """ - Convert content to unicode (or all the strings in content) + Convert content to unicode (or all the strings in content). NOTE: Even though this method supports any type, it will currently ignore contents of lists, tuple or any other -- cgit v1.2.3 From 5a3dee72a03cc930f3357a8ea2a0d6395fdaaab7 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 25 Nov 2014 15:40:44 -0200 Subject: Several fixes in adbapi interface: * Get replica uid upon U1DBConnectionPool initialization. * Fix docstrings. --- client/src/leap/soledad/client/adbapi.py | 101 +++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 6 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/adbapi.py b/client/src/leap/soledad/client/adbapi.py index 0cdc90eb..9ae2889e 100644 --- a/client/src/leap/soledad/client/adbapi.py +++ b/client/src/leap/soledad/client/adbapi.py @@ -37,6 +37,23 @@ if DEBUG_SQL: def getConnectionPool(opts, openfun=None, driver="pysqlcipher"): + """ + Return a connection pool. + + :param opts: + Options for the SQLCipher connection. + :type opts: SQLCipherOptions + :param openfun: + Callback invoked after every connect() on the underlying DB-API + object. + :type openfun: callable + :param driver: + The connection driver. + :type driver: str + + :return: A U1DB connection pool. + :rtype: U1DBConnectionPool + """ if openfun is None and driver == "pysqlcipher": openfun = partial(soledad_sqlcipher.set_init_pragmas, opts=opts) return U1DBConnectionPool( @@ -45,14 +62,29 @@ def getConnectionPool(opts, openfun=None, driver="pysqlcipher"): class U1DBConnection(adbapi.Connection): + """ + A wrapper for a U1DB connection instance. + """ u1db_wrapper = soledad_sqlcipher.SoledadSQLCipherWrapper + """ + The U1DB wrapper to use. + """ def __init__(self, pool, init_u1db=False): + """ + :param pool: The pool of connections to that owns this connection. + :type pool: adbapi.ConnectionPool + :param init_u1db: Wether the u1db database should be initialized. + :type init_u1db: bool + """ self.init_u1db = init_u1db adbapi.Connection.__init__(self, pool) def reconnect(self): + """ + Reconnect to the U1DB database. + """ if self._connection is not None: self._pool.disconnect(self._connection) self._connection = self._pool.connect() @@ -61,29 +93,51 @@ class U1DBConnection(adbapi.Connection): self._u1db = self.u1db_wrapper(self._connection) def __getattr__(self, name): + """ + Route the requested attribute either to the U1DB wrapper or to the + connection. + + :param name: The name of the attribute. + :type name: str + """ if name.startswith('u1db_'): - meth = re.sub('^u1db_', '', name) - return getattr(self._u1db, meth) + attr = re.sub('^u1db_', '', name) + return getattr(self._u1db, attr) else: return getattr(self._connection, name) - class U1DBTransaction(adbapi.Transaction): + """ + A wrapper for a U1DB 'cursor' object. + """ def __getattr__(self, name): + """ + Route the requested attribute either to the U1DB wrapper of the + connection or to the actual connection cursor. + + :param name: The name of the attribute. + :type name: str + """ if name.startswith('u1db_'): - meth = re.sub('^u1db_', '', name) - return getattr(self._connection._u1db, meth) + attr = re.sub('^u1db_', '', name) + return getattr(self._connection._u1db, attr) else: return getattr(self._cursor, name) class U1DBConnectionPool(adbapi.ConnectionPool): + """ + Represent a pool of connections to an U1DB database. + """ connectionFactory = U1DBConnection transactionFactory = U1DBTransaction def __init__(self, *args, **kwargs): + """ + Initialize the connection pool. + """ adbapi.ConnectionPool.__init__(self, *args, **kwargs) # all u1db connections, hashed by thread-id self._u1dbconnections = {} @@ -91,15 +145,48 @@ class U1DBConnectionPool(adbapi.ConnectionPool): # The replica uid, primed by the connections on init. self.replica_uid = ProxyBase(None) + conn = self.connectionFactory(self, init_u1db=True) + replica_uid = conn._u1db._real_replica_uid + setProxiedObject(self.replica_uid, replica_uid) + def runU1DBQuery(self, meth, *args, **kw): + """ + Execute a U1DB query in a thread, using a pooled connection. + + :param meth: The U1DB wrapper method name. + :type meth: str + + :return: a Deferred which will fire the return value of + 'self._runU1DBQuery(Transaction(...), *args, **kw)', or a Failure. + :rtype: twisted.internet.defer.Deferred + """ meth = "u1db_%s" % meth return self.runInteraction(self._runU1DBQuery, meth, *args, **kw) def _runU1DBQuery(self, trans, meth, *args, **kw): + """ + Execute a U1DB query. + + :param trans: An U1DB transaction. + :type trans: adbapi.Transaction + :param meth: the U1DB wrapper method name. + :type meth: str + """ meth = getattr(trans, meth) return meth(*args, **kw) def _runInteraction(self, interaction, *args, **kw): + """ + Interact with the database and return the result. + + :param interaction: + A callable object whose first argument is an + L{adbapi.Transaction}. + :type interaction: callable + :return: a Deferred which will fire the return value of + 'interaction(Transaction(...), *args, **kw)', or a Failure. + :rtype: twisted.internet.defer.Deferred + """ tid = self.threadID() u1db = self._u1dbconnections.get(tid) conn = self.connectionFactory(self, init_u1db=not bool(u1db)) @@ -107,7 +194,6 @@ class U1DBConnectionPool(adbapi.ConnectionPool): if self.replica_uid is None: replica_uid = conn._u1db._real_replica_uid setProxiedObject(self.replica_uid, replica_uid) - print "GOT REPLICA UID IN DBPOOL", self.replica_uid if u1db is None: self._u1dbconnections[tid] = conn._u1db @@ -129,6 +215,9 @@ class U1DBConnectionPool(adbapi.ConnectionPool): raise excType, excValue, excTraceback def finalClose(self): + """ + A final close, only called by the shutdown trigger. + """ self.shutdownID = None self.threadpool.stop() self.running = False -- cgit v1.2.3 From 8f01a07faa6abccfdc6face00e8b6f95b184abdf Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 25 Nov 2014 15:42:29 -0200 Subject: Several fixes in Soledad crypto: * Adapt to removal of the old multiprocessing safe database, by directly querying the sync database. * Fix docstrings. --- client/src/leap/soledad/client/crypto.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index aa8135c0..950576ec 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -530,8 +530,8 @@ class SyncEncryptDecryptPool(object): :param crypto: A SoledadCryto instance to perform the encryption. :type crypto: leap.soledad.crypto.SoledadCrypto - :param sync_db: a database connection handle - :type sync_db: handle + :param sync_db: A database connection handle + :type sync_db: pysqlcipher.dbapi2.Connection :param write_lock: a write lock for controlling concurrent access to the sync_db @@ -909,8 +909,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): if encrypted is not None: sql += " WHERE encrypted = %d" % int(encrypted) sql += " ORDER BY gen ASC" - docs = self._sync_db.select(sql) - return docs + return self._fetchall(sql) def get_insertable_docs_by_gen(self): """ @@ -927,15 +926,12 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): decrypted_docs = self.get_docs_by_generation(encrypted=False) insertable = [] for doc_id, rev, _, gen, trans_id, encrypted in all_docs: - try: - next_doc_id, _, next_content, _, _, _ = decrypted_docs.next() + for next_doc_id, _, next_content, _, _, _ in decrypted_docs: if doc_id == next_doc_id: content = next_content insertable.append((doc_id, rev, content, gen, trans_id)) else: break - except StopIteration: - break return insertable def count_docs_in_sync_db(self, encrypted=None): @@ -955,9 +951,9 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): sql = "SELECT COUNT(*) FROM %s" % (self.TABLE_NAME,) if encrypted is not None: sql += " WHERE encrypted = %d" % int(encrypted) - res = self._sync_db.select(sql) - if res is not None: - val = res.next() + res = self._fetchall(sql) + if res: + val = res.pop() return val[0] else: return 0 @@ -1035,4 +1031,10 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): Empty the received docs table of the sync database. """ sql = "DELETE FROM %s WHERE 1" % (self.TABLE_NAME,) - res = self._sync_db.execute(sql) + self._sync_db.execute(sql) + + def _fetchall(self, *args, **kwargs): + with self._sync_db: + c = self._sync_db.cursor() + c.execute(*args, **kwargs) + return c.fetchall() -- cgit v1.2.3 From dac64ed7d4f9749a620dcbfcabd33e46a94da63c Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 25 Nov 2014 15:54:21 -0200 Subject: Several fixes in SoledadSharedDB: * Remove check for HTTPS address. * Remove creation of shared database. * Fix docstrings. --- client/src/leap/soledad/client/api.py | 1 - client/src/leap/soledad/client/shared_db.py | 22 ++++++++-------------- 2 files changed, 8 insertions(+), 15 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 59cbc4ca..998e9148 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -734,7 +734,6 @@ class Soledad(object): shared_db_url, uuid, creds=creds, - create=False, # db should exist at this point. syncable=syncable) @property diff --git a/client/src/leap/soledad/client/shared_db.py b/client/src/leap/soledad/client/shared_db.py index 77a7db68..26ddc285 100644 --- a/client/src/leap/soledad/client/shared_db.py +++ b/client/src/leap/soledad/client/shared_db.py @@ -95,9 +95,7 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): # @staticmethod - def open_database(url, uuid, create, creds=None, syncable=True): - # TODO: users should not be able to create the shared database, so we - # have to remove this from here in the future. + def open_database(url, uuid, creds=None, syncable=True): """ Open a Soledad shared database. @@ -105,12 +103,9 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): :type url: str :param uuid: The user's unique id. :type uuid: str - :param create: Should the database be created if it does not already - exist? - :type create: bool - :param token: An authentication token for accessing the shared db. - :type token: str - + :param creds: A tuple containing the authentication method and + credentials. + :type creds: tuple :param syncable: If syncable is False, the database will not attempt to sync against a remote replica. @@ -119,13 +114,12 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): :return: The shared database in the given url. :rtype: SoledadSharedDatabase """ - if syncable and not url.startswith('https://'): - raise ImproperlyConfiguredError( - "Remote soledad server must be an https URI") + # XXX fix below, doesn't work with tests. + #if syncable and not url.startswith('https://'): + # raise ImproperlyConfiguredError( + # "Remote soledad server must be an https URI") db = SoledadSharedDatabase(url, uuid, creds=creds) db.syncable = syncable - if syncable: - db.open(create) return db @staticmethod -- cgit v1.2.3 From 5247c231639fc9fd8c4279190af50afa99783bd6 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 25 Nov 2014 16:11:53 -0200 Subject: Several fixes in SQLCipherDatabase: * Add copy of SQLCipherOptions object to avoid modifying the options object in place when instantiating the sync db. * Add string representation of SQLCipherOptions for easiness of debugging. * Make sync db always "ready". * Fix passing options for sync db initialization. * Fix typ0 that made SQLCipherU1DBSync._sync_loop be a tuple. * Do not defer requests for stopping sync to a thread pool. * Do not make pysqlcipher check if object is using in distinct threads. * Reset the sync loop when stopping the syncer. * Fix docstrings. * Check for _db_handle attribute when closing the database. --- client/src/leap/soledad/client/sqlcipher.py | 85 ++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 26 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 323d78f1..91821c25 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -46,6 +46,10 @@ import multiprocessing import os import threading import json +import u1db + +from u1db import errors as u1db_errors +from u1db.backends import sqlite_backend from hashlib import sha256 from contextlib import contextmanager @@ -53,10 +57,6 @@ from collections import defaultdict from httplib import CannotSendRequest from pysqlcipher import dbapi2 as sqlcipher_dbapi2 -from u1db.backends import sqlite_backend -from u1db import errors as u1db_errors -import u1db - from twisted.internet import reactor from twisted.internet.task import LoopingCall @@ -76,6 +76,7 @@ from leap.soledad.common.document import SoledadDocument logger = logging.getLogger(__name__) + # Monkey-patch u1db.backends.sqlite_backend with pysqlcipher.dbapi2 sqlite_backend.dbapi2 = sqlcipher_dbapi2 @@ -88,7 +89,7 @@ def initialize_sqlcipher_db(opts, on_init=None, check_same_thread=True): :type opts: SQLCipherOptions :param on_init: a tuple of queries to be executed on initialization :type on_init: tuple - :return: a SQLCipher connection + :return: pysqlcipher.dbapi2.Connection """ # Note: There seemed to be a bug in sqlite 3.5.9 (with python2.6) # where without re-opening the database on Windows, it @@ -104,6 +105,7 @@ def initialize_sqlcipher_db(opts, on_init=None, check_same_thread=True): set_init_pragmas(conn, opts, extra_queries=on_init) return conn + _db_init_lock = threading.Lock() @@ -146,6 +148,26 @@ class SQLCipherOptions(object): """ A container with options for the initialization of an SQLCipher database. """ + + @classmethod + def copy(cls, source, path=None, key=None, create=None, + is_raw_key=None, cipher=None, kdf_iter=None, cipher_page_size=None, + defer_encryption=None, sync_db_key=None): + """ + Return a copy of C{source} with parameters different than None + replaced by new values. + """ + return SQLCipherOptions( + path if path else source.path, + key if key else source.key, + create=create if create else source.create, + is_raw_key=is_raw_key if is_raw_key else source.is_raw_key, + cipher=cipher if cipher else source.cipher, + kdf_iter=kdf_iter if kdf_iter else source.kdf_iter, + cipher_page_size=cipher_page_size if cipher_page_size else source.cipher_page_size, + defer_encryption=defer_encryption if defer_encryption else source.defer_encryption, + sync_db_key=sync_db_key if sync_db_key else source.sync_db_key) + def __init__(self, path, key, create=True, is_raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024, defer_encryption=False, sync_db_key=None): @@ -156,9 +178,6 @@ class SQLCipherOptions(object): True/False, should the database be created if it doesn't already exist? :param create: bool - :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt - document contents when syncing. - :type crypto: soledad.crypto.SoledadCrypto :param is_raw_key: Whether ``password`` is a raw 64-char hex string or a passphrase that should be hashed to obtain the encyrption key. @@ -184,11 +203,25 @@ class SQLCipherOptions(object): self.defer_encryption = defer_encryption self.sync_db_key = sync_db_key + def __str__(self): + """ + Return string representation of options, for easy debugging. + + :return: String representation of options. + :rtype: str + """ + attr_names = filter(lambda a: not a.startswith('_'), dir(self)) + attr_str = [] + for a in attr_names: + attr_str.append(a + "=" + str(getattr(self, a))) + name = self.__class__.__name__ + return "%s(%s)" % (name, ', '.join(attr_str)) + + # # The SQLCipher database # - class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): """ A U1DB implementation that uses SQLCipher as its persistence layer. @@ -212,9 +245,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): *** IMPORTANT *** - :param soledad_crypto: - :type soldead_crypto: - :param opts: + :param opts: options for initialization of the SQLCipher database. :type opts: SQLCipherOptions """ # ensure the db is encrypted if the file already exists @@ -348,7 +379,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): logger.debug("SQLCipher backend: closing") # close the actual database - if self._db_handle is not None: + if getattr(self, '_db_handle', False): self._db_handle.close() self._db_handle = None @@ -445,8 +476,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): self._crypto = soledad_crypto self.__replica_uid = replica_uid - print "REPLICA UID (u1dbsync init)", replica_uid - self._sync_db_key = opts.sync_db_key self._sync_db = None self._sync_db_write_lock = None @@ -471,28 +500,28 @@ class SQLCipherU1DBSync(SQLCipherDatabase): self._reactor = reactor self._reactor.callWhenRunning(self._start) - self.ready = False + self.ready = True self._db_handle = None self._initialize_syncer_main_db() if defer_encryption: - self._initialize_sync_db() + self._initialize_sync_db(opts) # initialize syncing queue encryption pool self._sync_enc_pool = crypto.SyncEncrypterPool( self._crypto, self._sync_db, self._sync_db_write_lock) - # ------------------------------------------------------------------ + # ----------------------------------------------------------------- # From the documentation: If f returns a deferred, rescheduling # will not take place until the deferred has fired. The result # value is ignored. # TODO use this to avoid multiple sync attempts if the sync has not # finished! - # ------------------------------------------------------------------ + # ----------------------------------------------------------------- # XXX this was called sync_watcher --- trace any remnants - self._sync_loop = LoopingCall(self._encrypt_syncing_docs), + self._sync_loop = LoopingCall(self._encrypt_syncing_docs) self._sync_loop.start(self.ENCRYPT_LOOP_PERIOD) self.shutdownID = None @@ -522,7 +551,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): #import thread #print "initializing in thread", thread.get_ident() # --------------------------------------------------- - self._db_handle = initialize_sqlcipher_db( self._opts, check_same_thread=False) self._real_replica_uid = None @@ -557,10 +585,14 @@ class SQLCipherU1DBSync(SQLCipherDatabase): else: sync_db_path = ":memory:" - opts.path = sync_db_path - + # we copy incoming options because the opts object might be used + # somewhere else + sync_opts = SQLCipherOptions.copy( + opts, path=sync_db_path, create=True) self._sync_db = initialize_sqlcipher_db( - opts, on_init=self._sync_db_extra_init) + sync_opts, on_init=self._sync_db_extra_init, + check_same_thread=False) + pragmas.set_crypto_pragmas(self._sync_db, opts) # --------------------------------------------------------- @property @@ -615,7 +647,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): # callLater this same function after a timeout (deferLater) # Might want to keep track of retries and cancel too. # -------------------------------------------------------------- - print "Syncing to...", url kwargs = {'creds': creds, 'autocreate': autocreate, 'defer_decryption': defer_decryption} return self._defer_to_sync_threadpool(self._sync, url, **kwargs) @@ -629,6 +660,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): # threadpool. log.msg("in _sync") + self.__url = url with self._syncer(url, creds=creds) as syncer: # XXX could mark the critical section here... try: @@ -652,7 +684,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): """ Interrupt all ongoing syncs. """ - self._defer_to_sync_threadpool(self._stop_sync) + self._stop_sync() def _stop_sync(self): for url in self._syncers: @@ -769,6 +801,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): """ # stop the sync loop for deferred encryption if self._sync_loop is not None: + self._sync_loop.reset() self._sync_loop.stop() self._sync_loop = None # close all open syncers -- cgit v1.2.3 From c24452af4da078eaf15aa0841d8f8482886735f4 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 25 Nov 2014 16:22:15 -0200 Subject: Several fixes in SoledadSyncTarget: * Fix arg passing to syncing failure method. * Do not try to start sync loop which should be already running. * Adapt to removal of old multiprocessing safe db, now accesses the sqlcipher database directly. --- client/src/leap/soledad/client/target.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index ba61cdff..dd61c070 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -188,7 +188,7 @@ class DocumentSyncerThread(threading.Thread): self._doc_syncer.failure_callback( self._idx, self._total, self._exception) - self._failed_method(self) + self._failed_method() # we do not release the callback lock here because we # failed and so we don't want other threads to succeed. @@ -1296,7 +1296,6 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # decrypt docs in case of deferred decryption if defer_decryption: - self._sync_loop.start() while self.clear_to_sync() is False: sleep(self.DECRYPT_LOOP_PERIOD) self._teardown_sync_loop() @@ -1362,11 +1361,11 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): encr = SyncEncrypterPool sql = ("SELECT content FROM %s WHERE doc_id=? and rev=?" % ( encr.TABLE_NAME,)) - res = self._sync_db.select(sql, (doc_id, doc_rev)) - try: - val = res.next() + res = self._fetchall(sql, (doc_id, doc_rev)) + if res: + val = res.pop() return val[0] - except StopIteration: + else: # no doc found return None @@ -1508,3 +1507,9 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): :type token: str """ TokenBasedAuth.set_token_credentials(self, uuid, token) + + def _fetchall(self, *args, **kwargs): + with self._sync_db: + c = self._sync_db.cursor() + c.execute(*args, **kwargs) + return c.fetchall() -- cgit v1.2.3 From d8c457680b79c202d54dcf9ea799a03b5ffc6c03 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 25 Nov 2014 16:25:33 -0200 Subject: Add local replica info to sync debug output. --- client/src/leap/soledad/client/sync.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index aa19ddab..1a5e2989 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -115,9 +115,10 @@ class SoledadSynchronizer(Synchronizer): " target generation: %d\n" " target trans id: %s\n" " target my gen: %d\n" - " target my trans_id: %s" + " target my trans_id: %s\n" + " source replica_uid: %s\n" % (self.target_replica_uid, target_gen, target_trans_id, - target_my_gen, target_my_trans_id)) + target_my_gen, target_my_trans_id, self.source._replica_uid)) # make sure we'll have access to target replica uid once it exists if self.target_replica_uid is None: -- cgit v1.2.3 From 0b88ef70ec12d3666a9bfc32481d672cb01cf056 Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 17 Dec 2014 14:51:55 -0200 Subject: Do not try to close db syncer if db is not syncable. --- client/src/leap/soledad/client/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 998e9148..81bf1fd9 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -285,7 +285,8 @@ class Soledad(object): """ logger.debug("Closing soledad") self._dbpool.close() - self._dbsyncer.close() + if getattr(self, '_dbsyncer', None): + self._dbsyncer.close() # # ILocalStorage -- cgit v1.2.3 From 9f0e5ac8db4813b1277c3a858cf1d5cb785a4023 Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 17 Dec 2014 14:53:08 -0200 Subject: Do not try to unlock shared db if db is not syncable. --- client/src/leap/soledad/client/shared_db.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/shared_db.py b/client/src/leap/soledad/client/shared_db.py index 26ddc285..f1a2642e 100644 --- a/client/src/leap/soledad/client/shared_db.py +++ b/client/src/leap/soledad/client/shared_db.py @@ -178,5 +178,6 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): :raise HTTPError: """ - res, headers = self._request_json('DELETE', ['lock', self._uuid], - params={'token': token}) + if self.syncable: + _, _ = self._request_json( + 'DELETE', ['lock', self._uuid], params={'token': token}) -- cgit v1.2.3 From c654d5e777c0d1db75b5f3586bd20ce2ec4edadc Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Mon, 5 Jan 2015 10:46:31 -0400 Subject: add raw sqlcipher query method --- client/src/leap/soledad/client/adbapi.py | 1 + client/src/leap/soledad/client/api.py | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/adbapi.py b/client/src/leap/soledad/client/adbapi.py index 9ae2889e..f0b7f182 100644 --- a/client/src/leap/soledad/client/adbapi.py +++ b/client/src/leap/soledad/client/adbapi.py @@ -106,6 +106,7 @@ class U1DBConnection(adbapi.Connection): else: return getattr(self._connection, name) + class U1DBTransaction(adbapi.Transaction): """ A wrapper for a U1DB 'cursor' object. diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 81bf1fd9..88bb4969 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -778,6 +778,16 @@ class Soledad(object): """ self._secrets.change_passphrase(new_passphrase) + # + # Raw SQLCIPHER Queries + # + + def raw_sqlcipher_query(self, *args, **kw): + """ + Run a raw sqlcipher query in the local database. + """ + return self._dbpool.runQuery(*args, **kw) + def _convert_to_unicode(content): """ -- cgit v1.2.3 From 61212438a57d2450db767860c6e09e43d9e53532 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 23 Dec 2014 02:10:47 -0400 Subject: add some benchmarking skeleton --- .../soledad/client/examples/benchmarks/.gitignore | 1 + .../client/examples/benchmarks/get_sample.sh | 3 + .../examples/benchmarks/measure_index_times.py | 177 +++++++++++++++++++++ .../benchmarks/measure_index_times_custom_docid.py | 177 +++++++++++++++++++++ 4 files changed, 358 insertions(+) create mode 100644 client/src/leap/soledad/client/examples/benchmarks/.gitignore create mode 100755 client/src/leap/soledad/client/examples/benchmarks/get_sample.sh create mode 100644 client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py create mode 100644 client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/examples/benchmarks/.gitignore b/client/src/leap/soledad/client/examples/benchmarks/.gitignore new file mode 100644 index 00000000..2211df63 --- /dev/null +++ b/client/src/leap/soledad/client/examples/benchmarks/.gitignore @@ -0,0 +1 @@ +*.txt diff --git a/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh b/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh new file mode 100755 index 00000000..1995eee1 --- /dev/null +++ b/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh @@ -0,0 +1,3 @@ +#!/bin/sh +mkdir tmp +wget http://www.gutenberg.org/cache/epub/101/pg101.txt -O hacker_crackdown.txt diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py new file mode 100644 index 00000000..7fa1e38f --- /dev/null +++ b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py @@ -0,0 +1,177 @@ +# -*- coding: utf-8 -*- +# measure_index_times.py +# Copyright (C) 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Measure u1db retrieval times for different u1db index situations. +""" +from __future__ import print_function +from functools import partial +import datetime +import hashlib +import os +import sys + +import u1db +from twisted.internet import defer, reactor + +from leap.soledad.client import adbapi +from leap.soledad.client.sqlcipher import SQLCipherOptions + + +folder = os.environ.get("TMPDIR", "tmp") +numdocs = int(os.environ.get("DOCS", "1000")) +silent = os.environ.get("SILENT", False) +tmpdb = os.path.join(folder, "test.soledad") + + +sample_file = os.environ.get("SAMPLE", "hacker_crackdown.txt") +sample_path = os.path.join(os.curdir, sample_file) + +try: + with open(sample_file) as f: + SAMPLE = f.readlines() +except Exception: + print("[!] Problem opening sample file. Did you download " + "the sample, or correctly set 'SAMPLE' env var?") + sys.exit(1) + +if numdocs > len(SAMPLE): + print("[!] Sorry! The requested DOCS number is larger than " + "the num of lines in our sample file") + sys.exit(1) + + +def debug(*args): + if not silent: + print(*args) + +debug("[+] db path:", tmpdb) +debug("[+] num docs", numdocs) + +if os.path.isfile(tmpdb): + debug("[+] Removing existing db file...") + os.remove(tmpdb) + +start_time = datetime.datetime.now() + +opts = SQLCipherOptions(tmpdb, "secret", create=True) +dbpool = adbapi.getConnectionPool(opts) + + +def createDoc(doc): + return dbpool.runU1DBQuery("create_doc", doc) + +db_indexes = { + 'by-chash': ['chash'], + 'by-number': ['number']} + + +def create_indexes(_): + deferreds = [] + for index, definition in db_indexes.items(): + d = dbpool.runU1DBQuery("create_index", index, *definition) + deferreds.append(d) + return defer.gatherResults(deferreds) + + +class TimeWitness(object): + def __init__(self, init_time): + self.init_time = init_time + + def get_time_count(self): + return datetime.datetime.now() - self.init_time + + +def get_from_index(_): + init_time = datetime.datetime.now() + debug("GETTING FROM INDEX...", init_time) + + def printValue(res, time): + print("RESULT->", res) + print("Index Query Took: ", time.get_time_count()) + return res + + d = dbpool.runU1DBQuery( + "get_from_index", "by-chash", + #"1150c7f10fabce0a57ce13071349fc5064f15bdb0cc1bf2852f74ef3f103aff5") + # XXX this is line 89 from the hacker crackdown... + # Should accept any other optional hash as an enviroment variable. + "57793320d4997a673fc7062652da0596c36a4e9fbe31310d2281e67d56d82469") + d.addCallback(printValue, TimeWitness(init_time)) + return d + + +def getAllDocs(): + return dbpool.runU1DBQuery("get_all_docs") + + +def errBack(e): + debug("[!] ERROR FOUND!!!") + e.printTraceback() + reactor.stop() + + +def countDocs(_): + debug("counting docs...") + d = getAllDocs() + d.addCallbacks(printResult, errBack) + d.addCallbacks(allDone, errBack) + return d + + +def printResult(r, **kwargs): + if kwargs: + debug(*kwargs.values()) + elif isinstance(r, u1db.Document): + debug(r.doc_id, r.content['number']) + else: + len_results = len(r[1]) + debug("GOT %s results" % len(r[1])) + + if len_results == numdocs: + debug("ALL GOOD") + else: + debug("[!] MISSING DOCS!!!!!") + raise ValueError("We didn't expect this result len") + + +def allDone(_): + debug("ALL DONE!") + + #if silent: + end_time = datetime.datetime.now() + print((end_time - start_time).total_seconds()) + reactor.stop() + + +def insert_docs(_): + deferreds = [] + for i in range(numdocs): + payload = SAMPLE[i] + chash = hashlib.sha256(payload).hexdigest() + doc = {"number": i, "payload": payload, 'chash': chash} + d = createDoc(doc) + d.addCallbacks(partial(printResult, i=i, chash=chash, payload=payload), + lambda e: e.printTraceback()) + deferreds.append(d) + return defer.gatherResults(deferreds, consumeErrors=True) + +d = create_indexes(None) +d.addCallback(insert_docs) +d.addCallback(get_from_index) +d.addCallback(countDocs) + +reactor.run() diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py new file mode 100644 index 00000000..c6d76e6b --- /dev/null +++ b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py @@ -0,0 +1,177 @@ +# -*- coding: utf-8 -*- +# measure_index_times.py +# Copyright (C) 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Measure u1db retrieval times for different u1db index situations. +""" +from __future__ import print_function +from functools import partial +import datetime +import hashlib +import os +import sys + +import u1db +from twisted.internet import defer, reactor + +from leap.soledad.client import adbapi +from leap.soledad.client.sqlcipher import SQLCipherOptions + + +folder = os.environ.get("TMPDIR", "tmp") +numdocs = int(os.environ.get("DOCS", "1000")) +silent = os.environ.get("SILENT", False) +tmpdb = os.path.join(folder, "test.soledad") + + +sample_file = os.environ.get("SAMPLE", "hacker_crackdown.txt") +sample_path = os.path.join(os.curdir, sample_file) + +try: + with open(sample_file) as f: + SAMPLE = f.readlines() +except Exception: + print("[!] Problem opening sample file. Did you download " + "the sample, or correctly set 'SAMPLE' env var?") + sys.exit(1) + +if numdocs > len(SAMPLE): + print("[!] Sorry! The requested DOCS number is larger than " + "the num of lines in our sample file") + sys.exit(1) + + +def debug(*args): + if not silent: + print(*args) + +debug("[+] db path:", tmpdb) +debug("[+] num docs", numdocs) + +if os.path.isfile(tmpdb): + debug("[+] Removing existing db file...") + os.remove(tmpdb) + +start_time = datetime.datetime.now() + +opts = SQLCipherOptions(tmpdb, "secret", create=True) +dbpool = adbapi.getConnectionPool(opts) + + +def createDoc(doc, doc_id): + return dbpool.runU1DBQuery("create_doc", doc, doc_id=doc_id) + +db_indexes = { + 'by-chash': ['chash'], + 'by-number': ['number']} + + +def create_indexes(_): + deferreds = [] + for index, definition in db_indexes.items(): + d = dbpool.runU1DBQuery("create_index", index, *definition) + deferreds.append(d) + return defer.gatherResults(deferreds) + + +class TimeWitness(object): + def __init__(self, init_time): + self.init_time = init_time + + def get_time_count(self): + return datetime.datetime.now() - self.init_time + + +def get_from_index(_): + init_time = datetime.datetime.now() + debug("GETTING FROM INDEX...", init_time) + + def printValue(res, time): + print("RESULT->", res) + print("Index Query Took: ", time.get_time_count()) + return res + + d = dbpool.runU1DBQuery( + "get_doc", + #"1150c7f10fabce0a57ce13071349fc5064f15bdb0cc1bf2852f74ef3f103aff5") + # XXX this is line 89 from the hacker crackdown... + # Should accept any other optional hash as an enviroment variable. + "57793320d4997a673fc7062652da0596c36a4e9fbe31310d2281e67d56d82469") + d.addCallback(printValue, TimeWitness(init_time)) + return d + + +def getAllDocs(): + return dbpool.runU1DBQuery("get_all_docs") + + +def errBack(e): + debug("[!] ERROR FOUND!!!") + e.printTraceback() + reactor.stop() + + +def countDocs(_): + debug("counting docs...") + d = getAllDocs() + d.addCallbacks(printResult, errBack) + d.addCallbacks(allDone, errBack) + return d + + +def printResult(r, **kwargs): + if kwargs: + debug(*kwargs.values()) + elif isinstance(r, u1db.Document): + debug(r.doc_id, r.content['number']) + else: + len_results = len(r[1]) + debug("GOT %s results" % len(r[1])) + + if len_results == numdocs: + debug("ALL GOOD") + else: + debug("[!] MISSING DOCS!!!!!") + raise ValueError("We didn't expect this result len") + + +def allDone(_): + debug("ALL DONE!") + + #if silent: + end_time = datetime.datetime.now() + print((end_time - start_time).total_seconds()) + reactor.stop() + + +def insert_docs(_): + deferreds = [] + for i in range(numdocs): + payload = SAMPLE[i] + chash = hashlib.sha256(payload).hexdigest() + doc = {"number": i, "payload": payload, 'chash': chash} + d = createDoc(doc, doc_id=chash) + d.addCallbacks(partial(printResult, i=i, chash=chash, payload=payload), + lambda e: e.printTraceback()) + deferreds.append(d) + return defer.gatherResults(deferreds, consumeErrors=True) + +d = create_indexes(None) +d.addCallback(insert_docs) +d.addCallback(get_from_index) +d.addCallback(countDocs) + +reactor.run() -- cgit v1.2.3 From 14f34b1f64a667bf4a146e8579f95c5d308a1f77 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 8 Jan 2015 18:57:38 -0200 Subject: Retry on SQLCipher timeout (#6625). --- client/src/leap/soledad/client/adbapi.py | 45 ++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/adbapi.py b/client/src/leap/soledad/client/adbapi.py index f0b7f182..7ad10db5 100644 --- a/client/src/leap/soledad/client/adbapi.py +++ b/client/src/leap/soledad/client/adbapi.py @@ -21,20 +21,37 @@ It uses twisted.enterprise.adbapi. import re import os import sys +import logging from functools import partial +from threading import BoundedSemaphore from twisted.enterprise import adbapi from twisted.python import log from zope.proxy import ProxyBase, setProxiedObject +from pysqlcipher.dbapi2 import OperationalError from leap.soledad.client import sqlcipher as soledad_sqlcipher +logger = logging.getLogger(name=__name__) + + DEBUG_SQL = os.environ.get("LEAP_DEBUG_SQL") if DEBUG_SQL: log.startLogging(sys.stdout) +""" +How long the SQLCipher connection should wait for the lock to go away until +raising an exception. +""" +SQLCIPHER_CONNECTION_TIMEOUT = 10 + +""" +How many times a SQLCipher query should be retried in case of timeout. +""" +SQLCIPHER_MAX_RETRIES = 10 + def getConnectionPool(opts, openfun=None, driver="pysqlcipher"): """ @@ -58,7 +75,8 @@ def getConnectionPool(opts, openfun=None, driver="pysqlcipher"): openfun = partial(soledad_sqlcipher.set_init_pragmas, opts=opts) return U1DBConnectionPool( "%s.dbapi2" % driver, database=opts.path, - check_same_thread=False, cp_openfun=openfun) + check_same_thread=False, cp_openfun=openfun, + timeout=SQLCIPHER_CONNECTION_TIMEOUT) class U1DBConnection(adbapi.Connection): @@ -154,6 +172,10 @@ class U1DBConnectionPool(adbapi.ConnectionPool): """ Execute a U1DB query in a thread, using a pooled connection. + Concurrent threads trying to update the same database may timeout + because of other threads holding the database lock. Because of this, + we will retry SQLCIPHER_MAX_RETRIES times and fail after that. + :param meth: The U1DB wrapper method name. :type meth: str @@ -162,7 +184,26 @@ class U1DBConnectionPool(adbapi.ConnectionPool): :rtype: twisted.internet.defer.Deferred """ meth = "u1db_%s" % meth - return self.runInteraction(self._runU1DBQuery, meth, *args, **kw) + semaphore = BoundedSemaphore(SQLCIPHER_MAX_RETRIES - 1) + + def _run_interaction(): + return self.runInteraction( + self._runU1DBQuery, meth, *args, **kw) + + def _errback(failure): + failure.trap(OperationalError) + if failure.getErrorMessage() == "database is locked": + should_retry = semaphore.acquire(False) + if should_retry: + logger.warning( + "Database operation timed out while waiting for " + "lock, trying again...") + return _run_interaction() + return failure + + d = _run_interaction() + d.addErrback(_errback) + return d def _runU1DBQuery(self, trans, meth, *args, **kw): """ -- cgit v1.2.3 From 41b34cc0d8bd6c2ae22547bc02845e68cab12c3b Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 20 Feb 2015 16:01:57 -0400 Subject: cutoff for encoding detection --- client/src/leap/soledad/client/api.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 88bb4969..b8409cbe 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -416,6 +416,10 @@ class Soledad(object): :return: A deferred whose callback will be invoked with a document. :rtype: twisted.internet.defer.Deferred """ + # TODO we probably should pass an optional "encoding" parameter to + # create_doc (and probably to put_doc too). There are cases (mail + # payloads for example) in which we already have the encoding in the + # headers, so we don't need to guess it. return self._defer( "create_doc", _convert_to_unicode(content), doc_id=doc_id) @@ -803,12 +807,17 @@ def _convert_to_unicode(content): :rtype: object """ + # Chardet doesn't guess very well with some smallish payloads. + # This parameter might need some empirical tweaking. + CUTOFF_CONFIDENCE = 0.90 + if isinstance(content, unicode): return content elif isinstance(content, str): + encoding = "utf-8" result = chardet.detect(content) - default = "utf-8" - encoding = result["encoding"] or default + if result["confidence"] > CUTOFF_CONFIDENCE: + encoding = result["encoding"] try: content = content.decode(encoding) except UnicodeError as e: -- cgit v1.2.3 From 74dec41c1d99ae8d4a4a79a7cb0d5c3c9f40cbae Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 19 Mar 2015 10:57:54 -0300 Subject: [fix] add explicit dependency on leap.common In the past, we wanted dependency on leap.common to be optional, but now because of the explicit use of the config path prefix and signaling, we want to enforce dependency on leap.common. --- client/src/leap/soledad/client/events.py | 67 +++++++++++++++----------------- 1 file changed, 32 insertions(+), 35 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/events.py b/client/src/leap/soledad/client/events.py index c4c09ac5..88e28674 100644 --- a/client/src/leap/soledad/client/events.py +++ b/client/src/leap/soledad/client/events.py @@ -21,38 +21,35 @@ Signaling functions. """ -SOLEDAD_CREATING_KEYS = 'Creating keys...' -SOLEDAD_DONE_CREATING_KEYS = 'Done creating keys.' -SOLEDAD_DOWNLOADING_KEYS = 'Downloading keys...' -SOLEDAD_DONE_DOWNLOADING_KEYS = 'Done downloading keys.' -SOLEDAD_UPLOADING_KEYS = 'Uploading keys...' -SOLEDAD_DONE_UPLOADING_KEYS = 'Done uploading keys.' -SOLEDAD_NEW_DATA_TO_SYNC = 'New data available.' -SOLEDAD_DONE_DATA_SYNC = 'Done data sync.' -SOLEDAD_SYNC_SEND_STATUS = 'Sync: sent one document.' -SOLEDAD_SYNC_RECEIVE_STATUS = 'Sync: received one document.' - -# we want to use leap.common.events to emits signals, if it is available. -try: - from leap.common import events - from leap.common.events import signal - SOLEDAD_CREATING_KEYS = events.proto.SOLEDAD_CREATING_KEYS - SOLEDAD_DONE_CREATING_KEYS = events.proto.SOLEDAD_DONE_CREATING_KEYS - SOLEDAD_DOWNLOADING_KEYS = events.proto.SOLEDAD_DOWNLOADING_KEYS - SOLEDAD_DONE_DOWNLOADING_KEYS = \ - events.proto.SOLEDAD_DONE_DOWNLOADING_KEYS - SOLEDAD_UPLOADING_KEYS = events.proto.SOLEDAD_UPLOADING_KEYS - SOLEDAD_DONE_UPLOADING_KEYS = \ - events.proto.SOLEDAD_DONE_UPLOADING_KEYS - SOLEDAD_NEW_DATA_TO_SYNC = events.proto.SOLEDAD_NEW_DATA_TO_SYNC - SOLEDAD_DONE_DATA_SYNC = events.proto.SOLEDAD_DONE_DATA_SYNC - SOLEDAD_SYNC_SEND_STATUS = events.proto.SOLEDAD_SYNC_SEND_STATUS - SOLEDAD_SYNC_RECEIVE_STATUS = events.proto.SOLEDAD_SYNC_RECEIVE_STATUS - -except ImportError: - # we define a fake signaling function and fake signal constants that will - # allow for logging signaling attempts in case leap.common.events is not - # available. - - def signal(signal, content=""): - logger.info("Would signal: %s - %s." % (str(signal), content)) +from leap.common import events +from leap.common.events import signal + + +SOLEDAD_CREATING_KEYS = events.proto.SOLEDAD_CREATING_KEYS +SOLEDAD_DONE_CREATING_KEYS = events.proto.SOLEDAD_DONE_CREATING_KEYS +SOLEDAD_DOWNLOADING_KEYS = events.proto.SOLEDAD_DOWNLOADING_KEYS +SOLEDAD_DONE_DOWNLOADING_KEYS = \ + events.proto.SOLEDAD_DONE_DOWNLOADING_KEYS +SOLEDAD_UPLOADING_KEYS = events.proto.SOLEDAD_UPLOADING_KEYS +SOLEDAD_DONE_UPLOADING_KEYS = \ + events.proto.SOLEDAD_DONE_UPLOADING_KEYS +SOLEDAD_NEW_DATA_TO_SYNC = events.proto.SOLEDAD_NEW_DATA_TO_SYNC +SOLEDAD_DONE_DATA_SYNC = events.proto.SOLEDAD_DONE_DATA_SYNC +SOLEDAD_SYNC_SEND_STATUS = events.proto.SOLEDAD_SYNC_SEND_STATUS +SOLEDAD_SYNC_RECEIVE_STATUS = events.proto.SOLEDAD_SYNC_RECEIVE_STATUS + + +__all__ = [ + "events", + "signal", + "SOLEDAD_CREATING_KEYS", + "SOLEDAD_DONE_CREATING_KEYS", + "SOLEDAD_DOWNLOADING_KEYS", + "SOLEDAD_DONE_DOWNLOADING_KEYS", + "SOLEDAD_UPLOADING_KEYS", + "SOLEDAD_DONE_UPLOADING_KEYS", + "SOLEDAD_NEW_DATA_TO_SYNC", + "SOLEDAD_DONE_DATA_SYNC", + "SOLEDAD_SYNC_SEND_STATUS", + "SOLEDAD_SYNC_RECEIVE_STATUS", +] -- cgit v1.2.3 From 147b5793fd9a3c2fd67a716d64f8cb2ed1496e39 Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 25 Mar 2015 17:30:23 -0300 Subject: [bug] fail gracefully when sync fails With new soledad async api, we need to catch errors using errbacks instead of catching exceptions explicitelly. This commit fixed the api sync() call to intercept sync failures, log them, and do not propagate them down the callback chain. --- client/src/leap/soledad/client/api.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index b8409cbe..35b44ac8 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -44,6 +44,7 @@ from u1db.remote.ssl_match_hostname import match_hostname from zope.interface import implements from twisted.python import log +from twisted.internet import defer from leap.common.config import get_path_prefix @@ -654,18 +655,20 @@ class Soledad(object): return local_gen sync_url = urlparse.urljoin(self._server_url, 'user-%s' % self.uuid) - try: - d = self._dbsyncer.sync( - sync_url, - creds=self._creds, autocreate=False, - defer_decryption=defer_decryption) - - d.addCallbacks(on_sync_done, lambda err: log.err(err)) - return d - - # TODO catch the exception by adding an Errback - except Exception as e: - logger.error("Soledad exception when syncing: %s" % str(e)) + d = self._dbsyncer.sync( + sync_url, + creds=self._creds, autocreate=False, + defer_decryption=defer_decryption) + + # prevent sync failures from crashing the app by adding an errback + # that logs the failure and does not propagate it down the callback + # chain + def _errback(failure): + log.err(failure) + logger.error("Soledad exception when syncing: %s" % str(failure)) + + d.addCallbacks(on_sync_done, _errback) + return d def stop_sync(self): self._dbsyncer.stop_sync() -- cgit v1.2.3 From c7996bf4bfe489a7c0341a27bb2bcf49245fbca7 Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 8 Apr 2015 13:26:15 -0300 Subject: [feat] ensure sync fails will raise an exception This commit makes 2 changes that allow sync failures to raise exceptions that can be caught by the api: 1. Remove try/except statements in sync.py level that would prevent an exception to be caught by the soledad client api. 2. Ensure that if an asynchronous decrypting process fails the exception will be re-raised to eventually reach the api. Related: #6757. --- client/src/leap/soledad/client/api.py | 1 - client/src/leap/soledad/client/crypto.py | 69 +++++++++++++++----------------- client/src/leap/soledad/client/target.py | 12 +++--- 3 files changed, 39 insertions(+), 43 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 35b44ac8..b2cabe08 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -44,7 +44,6 @@ from u1db.remote.ssl_match_hostname import match_hostname from zope.interface import implements from twisted.python import log -from twisted.internet import defer from leap.common.config import get_path_prefix diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index 950576ec..107bf7f1 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -725,6 +725,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): self._insert_doc_cb = kwargs.pop("insert_doc_cb") SyncEncryptDecryptPool.__init__(self, *args, **kwargs) self.source_replica_uid = None + self._async_results = [] def set_source_replica_uid(self, source_replica_uid): """ @@ -850,33 +851,20 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): # not encrypted payload return - try: - content = json.loads(content) - except TypeError: - logger.warning("Wrong type while decoding json: %s" - % repr(content)) - return - + content = json.loads(content) key = self._crypto.doc_passphrase(doc_id) secret = self._crypto.secret args = doc_id, rev, content, gen, trans_id, key, secret - try: - if workers: - # Ouch. This is sent to the workers asynchronously, so - # we have no way of logging errors. We'd have to inspect - # lingering results by querying successful / get() over them... - # Or move the heck out of it to twisted. - res = self._pool.apply_async( - decrypt_doc_task, args, - callback=self.decrypt_doc_cb) - else: - # decrypt inline - res = decrypt_doc_task(*args) - self.decrypt_doc_cb(res) - - except Exception as exc: - logger.exception(exc) + if workers: + # save the async result object so we can inspect it for failures + self._async_results.append(self._pool.apply_async( + decrypt_doc_task, args, + callback=self.decrypt_doc_cb)) + else: + # decrypt inline + res = decrypt_doc_task(*args) + self.decrypt_doc_cb(res) def decrypt_doc_cb(self, result): """ @@ -1010,21 +998,16 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): insert_fun = self._insert_doc_cb[self.source_replica_uid] logger.debug("Sync decrypter pool: inserting doc in local db: " "%s:%s %s" % (doc_id, doc_rev, gen)) - try: - # convert deleted documents to avoid error on document creation - if content == 'null': - content = None - doc = SoledadDocument(doc_id, doc_rev, content) - gen = int(gen) - insert_fun(doc, gen, trans_id) - except Exception as exc: - logger.error("Sync decrypter pool: error while inserting " - "decrypted doc into local db.") - logger.exception(exc) - else: - # If no errors found, remove it from the received database. - self.delete_received_doc(doc_id, doc_rev) + # convert deleted documents to avoid error on document creation + if content == 'null': + content = None + doc = SoledadDocument(doc_id, doc_rev, content) + gen = int(gen) + insert_fun(doc, gen, trans_id) + + # If no errors found, remove it from the received database. + self.delete_received_doc(doc_id, doc_rev) def empty(self): """ @@ -1038,3 +1021,15 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): c = self._sync_db.cursor() c.execute(*args, **kwargs) return c.fetchall() + + def raise_in_case_of_failed_async_calls(self): + """ + Re-raise any exception raised by an async call. + + :raise Exception: Raised if an async call has raised an exception. + """ + for res in self._async_results: + if res.ready(): + if not res.successful(): + # re-raise the exception raised by the remote call + res.get() diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index dd61c070..986bd991 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -1296,7 +1296,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # decrypt docs in case of deferred decryption if defer_decryption: - while self.clear_to_sync() is False: + while not self.clear_to_sync(): sleep(self.DECRYPT_LOOP_PERIOD) self._teardown_sync_loop() self._teardown_sync_decr_pool() @@ -1435,13 +1435,14 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): def clear_to_sync(self): """ - Return True if sync can proceed (ie, the received db table is empty). + Return whether sync can proceed (ie, the received db table is empty). + + :return: Whether sync can proceed. :rtype: bool """ - if self._sync_decr_pool is not None: + if self._sync_decr_pool: return self._sync_decr_pool.count_docs_in_sync_db() == 0 - else: - return True + return True def set_decryption_callback(self, cb): """ @@ -1478,6 +1479,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): return decrypter = self._sync_decr_pool + decrypter.raise_in_case_of_failed_async_calls() decrypter.decrypt_received_docs() decrypter.process_decrypted() -- cgit v1.2.3 From 512d744da3e6836020feb5a71d949c5dad23db58 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 27 Apr 2015 15:08:10 -0300 Subject: [bug] log traceback on sync failures on client Conversion of Twisted failures to string that rely on __str__ or __repr__ might not return all the information we would like to have, especially on sync failures. This commit asks for a detailed traceback of such failures and logs them both in Twisted and client logs. --- client/src/leap/soledad/client/api.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index b2cabe08..ce026cdf 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -39,12 +39,11 @@ try: except ImportError: import chardet +from StringIO import StringIO from u1db.remote import http_client from u1db.remote.ssl_match_hostname import match_hostname from zope.interface import implements -from twisted.python import log - from leap.common.config import get_path_prefix from leap.soledad.common import SHARED_DB_NAME @@ -663,8 +662,10 @@ class Soledad(object): # that logs the failure and does not propagate it down the callback # chain def _errback(failure): - log.err(failure) - logger.error("Soledad exception when syncing: %s" % str(failure)) + s = StringIO() + failure.printDetailedTraceback(file=s) + msg = "Soledad exception when syncing!\n" + s.getvalue() + logger.error(msg) d.addCallbacks(on_sync_done, _errback) return d -- cgit v1.2.3 From c28663169e4bbd724c5f8b55610ee6e2fd008e1b Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 27 Apr 2015 15:15:56 -0300 Subject: [refactor] rename sync callbacks on client api --- client/src/leap/soledad/client/api.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index ce026cdf..0f29503f 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -647,27 +647,27 @@ class Soledad(object): # ----------------------------------------------------------------- - def on_sync_done(local_gen): - soledad_events.signal( - soledad_events.SOLEDAD_DONE_DATA_SYNC, self.uuid) - return local_gen - sync_url = urlparse.urljoin(self._server_url, 'user-%s' % self.uuid) d = self._dbsyncer.sync( sync_url, creds=self._creds, autocreate=False, defer_decryption=defer_decryption) + def _sync_callback(local_gen): + soledad_events.signal( + soledad_events.SOLEDAD_DONE_DATA_SYNC, self.uuid) + return local_gen + # prevent sync failures from crashing the app by adding an errback # that logs the failure and does not propagate it down the callback # chain - def _errback(failure): + def _sync_errback(failure): s = StringIO() failure.printDetailedTraceback(file=s) msg = "Soledad exception when syncing!\n" + s.getvalue() logger.error(msg) - d.addCallbacks(on_sync_done, _errback) + d.addCallbacks(_sync_callback, _sync_errback) return d def stop_sync(self): -- cgit v1.2.3 From 0ac9b6a612f2e8ff138008bb7add93bdd6a71b9d Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 27 Apr 2015 15:27:37 -0300 Subject: [bug] remove old code for sync deferred db init The database initialization on the client sync module is deferred to another thread. As there is only one thread in the thread pool, this should not be a problem for now, as operations will actually be queued in that thread. There was some old code left from when we had to explicitelly wait for the db to be initialize before using it. This commit removes that old code and introduces some documentation so we remember to deal with deferred db init if we ever change the number of threads in the thread pool. --- client/src/leap/soledad/client/sqlcipher.py | 42 +++++++++++------------------ 1 file changed, 16 insertions(+), 26 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 91821c25..db3cb5cb 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -500,9 +500,8 @@ class SQLCipherU1DBSync(SQLCipherDatabase): self._reactor = reactor self._reactor.callWhenRunning(self._start) - self.ready = True self._db_handle = None - self._initialize_syncer_main_db() + self._initialize_main_db() if defer_encryption: self._initialize_sync_db(opts) @@ -541,23 +540,16 @@ class SQLCipherU1DBSync(SQLCipherDatabase): return deferToThreadPool( self._reactor, self._sync_threadpool, meth, *args, **kwargs) - def _initialize_syncer_main_db(self): + def _initialize_main_db(self): - def init_db(): - - # XXX DEBUG ----------------------------------------- - # REMOVE ME when merging. - - #import thread - #print "initializing in thread", thread.get_ident() - # --------------------------------------------------- + def _init_db(): self._db_handle = initialize_sqlcipher_db( self._opts, check_same_thread=False) self._real_replica_uid = None self._ensure_schema() self.set_document_factory(soledad_doc_factory) - return self._defer_to_sync_threadpool(init_db) + return self._defer_to_sync_threadpool(_init_db) def _initialize_sync_threadpool(self): """ @@ -568,6 +560,9 @@ class SQLCipherU1DBSync(SQLCipherDatabase): calls, and then we can ditch this syncing thread and reintegrate into the main reactor. """ + # XXX if the number of threads in this thread pool is ever changed, we + # should make sure that no operations on the database shuold occur + # before the database has been initialized. self._sync_threadpool = ThreadPool(0, 1) def _initialize_sync_db(self, opts): @@ -616,9 +611,13 @@ class SQLCipherU1DBSync(SQLCipherDatabase): """ Synchronize documents with remote replica exposed at url. - There can be at most one instance syncing the same database replica at - the same time, so this method will block until the syncing lock can be - acquired. + This method defers a sync to a 1-threaded threadpool. The main + database initialziation was deferred to that thread during this + object's initialization. As there's currently only one thread in that + threadpool, the db init was queued before this method was called, so + we don't need to actually wait for the db to be ready. If this ever + changes, we should add a thread-safe condition to ensure the db is + ready before using it. :param url: The url of the target replica to sync with. :type url: str @@ -636,17 +635,8 @@ class SQLCipherU1DBSync(SQLCipherDatabase): :return: A Deferred, that will fire with the local generation (type `int`) before the synchronisation was performed. - :rtype: deferred - """ - if not self.ready: - print "not ready yet..." - # XXX --------------------------------------------------------- - # This might happen because the database has not yet been - # initialized (it's deferred to the theadpool). - # A good strategy might involve to return a deferred that will - # callLater this same function after a timeout (deferLater) - # Might want to keep track of retries and cancel too. - # -------------------------------------------------------------- + :rtype: Deferred + """ kwargs = {'creds': creds, 'autocreate': autocreate, 'defer_decryption': defer_decryption} return self._defer_to_sync_threadpool(self._sync, url, **kwargs) -- cgit v1.2.3 From 68f89f1dc3de213783b32dbfa3ff6d1e294c51a8 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 27 Apr 2015 15:31:36 -0300 Subject: [doc] fix doc on why to re-raise sync exceptions --- client/src/leap/soledad/client/sync.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index 1a5e2989..d3f106da 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -81,10 +81,13 @@ class SoledadSynchronizer(Synchronizer): return self._sync(autocreate=autocreate, defer_decryption=defer_decryption) except Exception: - # re-raising the exceptions to let syqlcipher.sync catch them - # (and re-create the syncer instance if needed) + # we want this exception to reach either SQLCipherU1DBSync.sync or + # the Solead api object itself, so it is poperly handled and/or + # logged... raise finally: + # ... but we also want to release the syncing lock so this + # Synchronizer may be reused later. self.release_syncing_lock() def _sync(self, autocreate=False, defer_decryption=True): -- cgit v1.2.3 From 3a3f2d8ca5b0ae2adb5007577f2d828677ff64e0 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 30 Apr 2015 11:24:21 -0300 Subject: [bug] always initialize sync db Both deferred encryption and decryption rely on a special sync db. Previous to this fix, the sync db was only initialized if a syncer was configured with deferred encryption capabilities. This was a problem when the syncer was not configured like so, but the actual sync method was initiated configured to do deferred decryption. This commit fixes this by always initializing the sync db, so we have the option of doing all combinations of deferred encryption and decryption. --- client/src/leap/soledad/client/sqlcipher.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index db3cb5cb..ec7946b7 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -503,8 +503,12 @@ class SQLCipherU1DBSync(SQLCipherDatabase): self._db_handle = None self._initialize_main_db() + # the sync_db is used both for deferred encryption and decryption, so + # we want to initialize it anyway to allow for all combinations of + # deferred encryption and decryption configurations. + self._initialize_sync_db(opts) + if defer_encryption: - self._initialize_sync_db(opts) # initialize syncing queue encryption pool self._sync_enc_pool = crypto.SyncEncrypterPool( -- cgit v1.2.3 From 6ab3fe57764c2e5f2a5688d377fe46a51437f0be Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 30 Apr 2015 11:47:10 -0300 Subject: [bug] fix log messages when fetching documents We always got a log message saying "canceling sync threads" in the end of the sync process, even when there was no error during the sync. This commit changes that in a way that we only have that log when the sync was actually cancelled because of an error. --- client/src/leap/soledad/client/target.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index 986bd991..d59923b2 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -300,7 +300,7 @@ class DocumentSyncerPool(object): # we rely on DocumentSyncerThread.run() to release the lock using # self.release_syncer so we can launch a new thread. t = DocumentSyncerThread( - doc_syncer, self.release_syncer, self.cancel_threads, + doc_syncer, self.release_syncer, self.stop_threads, idx, total, last_request_lock=last_request_lock, last_callback_lock=last_callback_lock) @@ -348,17 +348,21 @@ class DocumentSyncerPool(object): self._threads.remove(syncer_thread) self._semaphore_pool.release() - def cancel_threads(self): + def stop_threads(self, fail=True): """ Stop all threads in the pool. + + :param fail: Whether we are stopping because of a failure. + :type fail: bool """ # stop sync self._stop_method() stopped = [] # stop all threads - logger.warning("Soledad sync: cancelling sync threads...") with self._pool_access_lock: - self._failures = True + if fail: + self._failures = True + logger.error("sync failed: cancelling sync threads...") while self._threads: t = self._threads.pop(0) t.stop() @@ -377,7 +381,8 @@ class DocumentSyncerPool(object): self._semaphore_pool.release() except ValueError: break - logger.warning("Soledad sync: cancelled sync threads.") + if fail: + logger.error("Soledad sync: cancelled sync threads.") def cleanup(self): """ @@ -1020,7 +1025,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # bail out if any thread failed if t is None: - self.stop() + self.stop(fail=True) break t.doc_syncer.set_request_method( @@ -1220,7 +1225,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # bail out if any thread failed if t is None: - self.stop() + self.stop(fail=True) break # set the request method @@ -1308,7 +1313,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): cur_target_gen = gen_after_send cur_target_trans_id = trans_id_after_send - self.stop() + self.stop(fail=False) self._syncer_pool = None return cur_target_gen, cur_target_trans_id @@ -1324,17 +1329,20 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): with self._stop_lock: self._stopped = True - def stop(self): + def stop(self, fail=False): """ Mark current sync session as stopped. This will eventually interrupt the sync_exchange() method and return enough information to the synchronizer so the sync session can be recovered afterwards. + + :param fail: Whether we are stopping because of a failure. + :type fail: bool """ self.stop_syncer() if self._syncer_pool: - self._syncer_pool.cancel_threads() + self._syncer_pool.stop_threads(fail=fail) @property def stopped(self): -- cgit v1.2.3 From b75bedb065cfbbb2993659d867ef554ff70596ae Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 4 May 2015 13:26:57 -0300 Subject: [bug] fix log messages for secrets in storage --- client/src/leap/soledad/client/secrets.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index af781a26..96f7e906 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -246,22 +246,26 @@ class SoledadSecrets(object): :return: Whether there's a storage secret for symmetric encryption. :rtype: bool """ - if self._secret_id is None or self._secret_id not in self._secrets: + logger.info("Checking if there's a secret in local storage...") + if (self._secret_id is None or self._secret_id not in self._secrets) \ + and os.path.isfile(self._secrets_path): try: self._load_secrets() # try to load from disk except IOError as e: logger.warning( 'IOError while loading secrets from disk: %s' % str(e)) - return False - return self.storage_secret is not None + + if self.storage_secret is not None: + logger.info("Found a secret in local storage.") + return True + + logger.info("Could not find a secret in local storage.") + return False def _load_secrets(self): """ Load storage secrets from local file. """ - # does the file exist in disk? - if not os.path.isfile(self._secrets_path): - raise IOError('File does not exist: %s' % self._secrets_path) # read storage secrets from file content = None with open(self._secrets_path, 'r') as f: -- cgit v1.2.3 From 3a7ddacd06fd57afb10cc3d7083c2aa196c9328f Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 4 May 2015 13:04:56 -0300 Subject: [feature] use async adbapi for async decryption Since we started implementing twisted api in soledad, some pieces are missing. Accessing the sqlcipher database directly with the twisted adbapi facilities is one of them. The async encryption/decryption was touching the database directly, and this was causing some difficulties like having different threads accessing the same database. This commit implements the twisted adbapi stuff for the asynchronous encryption/decryption facilities. Next steps would be use async adbapi for async encryption and use async adbapi for all sqlcipher access. --- client/src/leap/soledad/client/adbapi.py | 3 +- client/src/leap/soledad/client/crypto.py | 190 ++++++++++++++++------------ client/src/leap/soledad/client/pragmas.py | 43 +++++++ client/src/leap/soledad/client/sqlcipher.py | 130 ++++++++++--------- client/src/leap/soledad/client/target.py | 71 ++--------- 5 files changed, 232 insertions(+), 205 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/adbapi.py b/client/src/leap/soledad/client/adbapi.py index 7ad10db5..5b882bbe 100644 --- a/client/src/leap/soledad/client/adbapi.py +++ b/client/src/leap/soledad/client/adbapi.py @@ -32,6 +32,7 @@ from zope.proxy import ProxyBase, setProxiedObject from pysqlcipher.dbapi2 import OperationalError from leap.soledad.client import sqlcipher as soledad_sqlcipher +from leap.soledad.client.pragmas import set_init_pragmas logger = logging.getLogger(name=__name__) @@ -72,7 +73,7 @@ def getConnectionPool(opts, openfun=None, driver="pysqlcipher"): :rtype: U1DBConnectionPool """ if openfun is None and driver == "pysqlcipher": - openfun = partial(soledad_sqlcipher.set_init_pragmas, opts=opts) + openfun = partial(set_init_pragmas, opts=opts) return U1DBConnectionPool( "%s.dbapi2" % driver, database=opts.path, check_same_thread=False, cp_openfun=openfun, diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index 107bf7f1..dd40b198 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -25,11 +25,15 @@ import json import logging import multiprocessing import threading +import time from pycryptopp.cipher.aes import AES from pycryptopp.cipher.xsalsa20 import XSalsa20 from zope.proxy import sameProxiedObjects +from twisted.internet import defer +from twisted.internet.threads import deferToThread + from leap.soledad.common import soledad_assert from leap.soledad.common import soledad_assert_type from leap.soledad.common import crypto @@ -227,7 +231,7 @@ class SoledadCrypto(object): # def mac_doc(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, enc_iv, - mac_method, secret): + mac_method, secret): """ Calculate a MAC for C{doc} using C{ciphertext}. @@ -378,7 +382,7 @@ def decrypt_doc(crypto, doc): def _verify_doc_mac(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, - enc_iv, mac_method, secret, doc_mac): + enc_iv, mac_method, secret, doc_mac): """ Verify that C{doc_mac} is a correct MAC for the given document. @@ -523,7 +527,7 @@ class SyncEncryptDecryptPool(object): """ WORKERS = multiprocessing.cpu_count() - def __init__(self, crypto, sync_db, write_lock): + def __init__(self, crypto, sync_db): """ Initialize the pool of encryption-workers. @@ -540,7 +544,6 @@ class SyncEncryptDecryptPool(object): self._pool = multiprocessing.Pool(self.WORKERS) self._crypto = crypto self._sync_db = sync_db - self._sync_db_write_lock = write_lock def close(self): """ @@ -592,7 +595,7 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): # TODO implement throttling to reduce cpu usage?? WORKERS = multiprocessing.cpu_count() TABLE_NAME = "docs_tosync" - FIELD_NAMES = "doc_id, rev, content" + FIELD_NAMES = "doc_id PRIMARY KEY, rev, content" def encrypt_doc(self, doc, workers=True): """ @@ -633,8 +636,9 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): :type result: tuple(str, str, str) """ doc_id, doc_rev, content = result - self.insert_encrypted_local_doc(doc_id, doc_rev, content) + return self.insert_encrypted_local_doc(doc_id, doc_rev, content) + @defer.inlineCallbacks def insert_encrypted_local_doc(self, doc_id, doc_rev, content): """ Insert the contents of the encrypted doc into the local sync @@ -652,13 +656,9 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): # FIXME --- callback should complete immediately since otherwise the # thread which handles the results will get blocked # Right now we're blocking the dispatcher with the writes to sqlite. - sql_del = "DELETE FROM '%s' WHERE doc_id=?" % (self.TABLE_NAME,) - sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?)" % (self.TABLE_NAME,) - - con = self._sync_db - with self._sync_db_write_lock: - con.execute(sql_del, (doc_id, )) - con.execute(sql_ins, (doc_id, doc_rev, content)) + query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?)" \ + % (self.TABLE_NAME,) + yield self._sync_db.runQuery(query, (doc_id, doc_rev, content)) def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret): @@ -704,9 +704,12 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): """ # TODO implement throttling to reduce cpu usage?? TABLE_NAME = "docs_received" - FIELD_NAMES = "doc_id, rev, content, gen, trans_id, encrypted" + FIELD_NAMES = "doc_id PRIMARY_KEY, rev, content, gen, trans_id, encrypted" - write_encrypted_lock = threading.Lock() + """ + Period of recurrence of the periodic decrypting task, in seconds. + """ + DECRYPT_LOOP_PERIOD = 0.5 def __init__(self, *args, **kwargs): """ @@ -723,19 +726,16 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :type last_known_generation: int """ self._insert_doc_cb = kwargs.pop("insert_doc_cb") + self.source_replica_uid = kwargs.pop("source_replica_uid") SyncEncryptDecryptPool.__init__(self, *args, **kwargs) - self.source_replica_uid = None self._async_results = [] - def set_source_replica_uid(self, source_replica_uid): - """ - Set the source replica uid for this decrypter pool instance. - - :param source_replica_uid: The uid of the source replica. - :type source_replica_uid: str - """ - self.source_replica_uid = source_replica_uid + self._stopped = threading.Event() + self._deferred_loop = deferToThread(self._decrypt_and_process_docs) + self._deferred_loop.addCallback( + lambda _: logger.debug("Finished decryptor thread.")) + @defer.inlineCallbacks def insert_encrypted_received_doc(self, doc_id, doc_rev, content, gen, trans_id): """ @@ -754,17 +754,13 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :type trans_id: str """ docstr = json.dumps(content) - sql_del = "DELETE FROM '%s' WHERE doc_id=?" % (self.TABLE_NAME,) - sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?, ?, ?, ?)" % ( + query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?)" % ( self.TABLE_NAME,) + yield self._sync_db.runQuery( + query, + (doc_id, doc_rev, docstr, gen, trans_id, 1)) - con = self._sync_db - with self._sync_db_write_lock: - con.execute(sql_del, (doc_id, )) - con.execute( - sql_ins, - (doc_id, doc_rev, docstr, gen, trans_id, 1)) - + @defer.inlineCallbacks def insert_received_doc(self, doc_id, doc_rev, content, gen, trans_id): """ Insert a document that is not symmetrically encrypted. @@ -784,17 +780,13 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): """ if not isinstance(content, str): content = json.dumps(content) - sql_del = "DELETE FROM '%s' WHERE doc_id=?" % ( - self.TABLE_NAME,) - sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?, ?, ?, ?)" % ( + query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?)" % ( self.TABLE_NAME,) - con = self._sync_db - with self._sync_db_write_lock: - con.execute(sql_del, (doc_id,)) - con.execute( - sql_ins, - (doc_id, doc_rev, content, gen, trans_id, 0)) + yield self._sync_db.runQuery( + query, + (doc_id, doc_rev, content, gen, trans_id, 0)) + @defer.inlineCallbacks def delete_received_doc(self, doc_id, doc_rev): """ Delete a received doc after it was inserted into the local db. @@ -806,12 +798,10 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): """ sql_del = "DELETE FROM '%s' WHERE doc_id=? AND rev=?" % ( self.TABLE_NAME,) - con = self._sync_db - with self._sync_db_write_lock: - con.execute(sql_del, (doc_id, doc_rev)) + yield self._sync_db.runQuery(sql_del, (doc_id, doc_rev)) - def decrypt_doc(self, doc_id, rev, content, gen, trans_id, - source_replica_uid, workers=True): + def _decrypt_doc(self, doc_id, rev, content, gen, trans_id, + source_replica_uid, workers=True): """ Symmetrically decrypt a document. @@ -860,16 +850,16 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): # save the async result object so we can inspect it for failures self._async_results.append(self._pool.apply_async( decrypt_doc_task, args, - callback=self.decrypt_doc_cb)) + callback=self._decrypt_doc_cb)) else: # decrypt inline res = decrypt_doc_task(*args) - self.decrypt_doc_cb(res) + self._decrypt_doc_cb(res) - def decrypt_doc_cb(self, result): + def _decrypt_doc_cb(self, result): """ Store the decryption result in the sync db from where it will later be - picked by process_decrypted. + picked by _process_decrypted. :param result: A tuple containing the doc id, revision and encrypted content. @@ -878,7 +868,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): doc_id, rev, content, gen, trans_id = result logger.debug("Sync decrypter pool: decrypted doc %s: %s %s %s" % (doc_id, rev, gen, trans_id)) - self.insert_received_doc(doc_id, rev, content, gen, trans_id) + return self.insert_received_doc(doc_id, rev, content, gen, trans_id) def get_docs_by_generation(self, encrypted=None): """ @@ -899,6 +889,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): sql += " ORDER BY gen ASC" return self._fetchall(sql) + @defer.inlineCallbacks def get_insertable_docs_by_gen(self): """ Return a list of non-encrypted documents ready to be inserted. @@ -910,8 +901,8 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): # docs, then some document might have been decrypted between these two # calls, and if it is just the right doc then it might not be caught # by the next loop. - all_docs = self.get_docs_by_generation() - decrypted_docs = self.get_docs_by_generation(encrypted=False) + all_docs = yield self.get_docs_by_generation() + decrypted_docs = yield self.get_docs_by_generation(encrypted=False) insertable = [] for doc_id, rev, _, gen, trans_id, encrypted in all_docs: for next_doc_id, _, next_content, _, _, _ in decrypted_docs: @@ -920,9 +911,10 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): insertable.append((doc_id, rev, content, gen, trans_id)) else: break - return insertable + defer.returnValue(insertable) - def count_docs_in_sync_db(self, encrypted=None): + @defer.inlineCallbacks + def _count_docs_in_sync_db(self, encrypted=None): """ Count how many documents we have in the table for received docs. @@ -933,31 +925,30 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :return: The count of documents. :rtype: int """ - if self._sync_db is None: - logger.warning("cannot return count with null sync_db") - return - sql = "SELECT COUNT(*) FROM %s" % (self.TABLE_NAME,) + query = "SELECT COUNT(*) FROM %s" % (self.TABLE_NAME,) if encrypted is not None: - sql += " WHERE encrypted = %d" % int(encrypted) - res = self._fetchall(sql) + query += " WHERE encrypted = %d" % int(encrypted) + res = yield self._sync_db.runQuery(query) if res: val = res.pop() - return val[0] + defer.returnValue(val[0]) else: - return 0 + defer.returnValue(0) - def decrypt_received_docs(self): + @defer.inlineCallbacks + def _decrypt_received_docs(self): """ Get all the encrypted documents from the sync database and dispatch a decrypt worker to decrypt each one of them. """ - docs_by_generation = self.get_docs_by_generation(encrypted=True) - for doc_id, rev, content, gen, trans_id, _ \ - in filter(None, docs_by_generation): - self.decrypt_doc( + self._raise_in_case_of_failed_async_calls() + docs_by_generation = yield self.get_docs_by_generation(encrypted=True) + for doc_id, rev, content, gen, trans_id, _ in docs_by_generation: + self._decrypt_doc( doc_id, rev, content, gen, trans_id, self.source_replica_uid) - def process_decrypted(self): + @defer.inlineCallbacks + def _process_decrypted(self): """ Process the already decrypted documents, and insert as many documents as can be taken from the expected order without finding a gap. @@ -968,12 +959,11 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): # Acquire the lock to avoid processing while we're still # getting data from the syncing stream, to avoid InvalidGeneration # problems. - with self.write_encrypted_lock: - for doc_fields in self.get_insertable_docs_by_gen(): - self.insert_decrypted_local_doc(*doc_fields) - remaining = self.count_docs_in_sync_db() - return remaining == 0 + insertable = yield self.get_insertable_docs_by_gen() + for doc_fields in insertable: + yield self.insert_decrypted_local_doc(*doc_fields) + @defer.inlineCallbacks def insert_decrypted_local_doc(self, doc_id, doc_rev, content, gen, trans_id): """ @@ -1007,22 +997,22 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): insert_fun(doc, gen, trans_id) # If no errors found, remove it from the received database. - self.delete_received_doc(doc_id, doc_rev) + yield self.delete_received_doc(doc_id, doc_rev) + @defer.inlineCallbacks def empty(self): """ Empty the received docs table of the sync database. """ sql = "DELETE FROM %s WHERE 1" % (self.TABLE_NAME,) - self._sync_db.execute(sql) + yield self._sync_db.runQuery(sql) + @defer.inlineCallbacks def _fetchall(self, *args, **kwargs): - with self._sync_db: - c = self._sync_db.cursor() - c.execute(*args, **kwargs) - return c.fetchall() + results = yield self._sync_db.runQuery(*args, **kwargs) + defer.returnValue(results) - def raise_in_case_of_failed_async_calls(self): + def _raise_in_case_of_failed_async_calls(self): """ Re-raise any exception raised by an async call. @@ -1033,3 +1023,39 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): if not res.successful(): # re-raise the exception raised by the remote call res.get() + + def _stop_decr_loop(self): + """ + """ + self._stopped.set() + + def close(self): + """ + """ + self._stop_decr_loop() + SyncEncryptDecryptPool.close(self) + + def _decrypt_and_process_docs(self): + """ + Decrypt the documents received from remote replica and insert them + into the local one. + + Called periodically from LoopingCall self._sync_loop. + """ + while not self._stopped.is_set(): + if sameProxiedObjects( + self._insert_doc_cb.get(self.source_replica_uid), + None): + continue + self._decrypt_received_docs() + self._process_decrypted() + time.sleep(self.DECRYPT_LOOP_PERIOD) + + def wait(self): + while not self.clear_to_sync(): + time.sleep(self.DECRYPT_LOOP_PERIOD) + + @defer.inlineCallbacks + def clear_to_sync(self): + count = yield self._count_docs_in_sync_db() + defer.returnValue(count == 0) diff --git a/client/src/leap/soledad/client/pragmas.py b/client/src/leap/soledad/client/pragmas.py index 2e9c53a3..55397d10 100644 --- a/client/src/leap/soledad/client/pragmas.py +++ b/client/src/leap/soledad/client/pragmas.py @@ -19,10 +19,53 @@ Different pragmas used in the initialization of the SQLCipher database. """ import logging import string +import threading +import os + +from leap.soledad.common import soledad_assert + logger = logging.getLogger(__name__) +_db_init_lock = threading.Lock() + + +def set_init_pragmas(conn, opts=None, extra_queries=None): + """ + Set the initialization pragmas. + + This includes the crypto pragmas, and any other options that must + be passed early to sqlcipher db. + """ + soledad_assert(opts is not None) + extra_queries = [] if extra_queries is None else extra_queries + with _db_init_lock: + # only one execution path should initialize the db + _set_init_pragmas(conn, opts, extra_queries) + + +def _set_init_pragmas(conn, opts, extra_queries): + + sync_off = os.environ.get('LEAP_SQLITE_NOSYNC') + memstore = os.environ.get('LEAP_SQLITE_MEMSTORE') + nowal = os.environ.get('LEAP_SQLITE_NOWAL') + + set_crypto_pragmas(conn, opts) + + if not nowal: + set_write_ahead_logging(conn) + if sync_off: + set_synchronous_off(conn) + else: + set_synchronous_normal(conn) + if memstore: + set_mem_temp_store(conn) + + for query in extra_queries: + conn.cursor().execute(query) + + def set_crypto_pragmas(db_handle, sqlcipher_opts): """ Set cryptographic params (key, cipher, KDF number of iterations and diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index ec7946b7..4f7ecd1b 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -55,6 +55,7 @@ from hashlib import sha256 from contextlib import contextmanager from collections import defaultdict from httplib import CannotSendRequest +from functools import partial from pysqlcipher import dbapi2 as sqlcipher_dbapi2 @@ -63,6 +64,7 @@ from twisted.internet.task import LoopingCall from twisted.internet.threads import deferToThreadPool from twisted.python.threadpool import ThreadPool from twisted.python import log +from twisted.enterprise import adbapi from leap.soledad.client import crypto from leap.soledad.client.target import SoledadSyncTarget @@ -102,46 +104,14 @@ def initialize_sqlcipher_db(opts, on_init=None, check_same_thread=True): conn = sqlcipher_dbapi2.connect( opts.path, check_same_thread=check_same_thread) - set_init_pragmas(conn, opts, extra_queries=on_init) + pragmas.set_init_pragmas(conn, opts, extra_queries=on_init) return conn -_db_init_lock = threading.Lock() - - -def set_init_pragmas(conn, opts=None, extra_queries=None): - """ - Set the initialization pragmas. - - This includes the crypto pragmas, and any other options that must - be passed early to sqlcipher db. - """ - soledad_assert(opts is not None) - extra_queries = [] if extra_queries is None else extra_queries - with _db_init_lock: - # only one execution path should initialize the db - _set_init_pragmas(conn, opts, extra_queries) - - -def _set_init_pragmas(conn, opts, extra_queries): - - sync_off = os.environ.get('LEAP_SQLITE_NOSYNC') - memstore = os.environ.get('LEAP_SQLITE_MEMSTORE') - nowal = os.environ.get('LEAP_SQLITE_NOWAL') - - pragmas.set_crypto_pragmas(conn, opts) - - if not nowal: - pragmas.set_write_ahead_logging(conn) - if sync_off: - pragmas.set_synchronous_off(conn) - else: - pragmas.set_synchronous_normal(conn) - if memstore: - pragmas.set_mem_temp_store(conn) - - for query in extra_queries: - conn.cursor().execute(query) +def initialize_sqlcipher_adbapi_db(opts, extra_queries=None): + from leap.soledad.client import sqlcipher_adbapi + return sqlcipher_adbapi.getConnectionPool( + opts, extra_queries=extra_queries) class SQLCipherOptions(object): @@ -151,22 +121,32 @@ class SQLCipherOptions(object): @classmethod def copy(cls, source, path=None, key=None, create=None, - is_raw_key=None, cipher=None, kdf_iter=None, cipher_page_size=None, - defer_encryption=None, sync_db_key=None): + is_raw_key=None, cipher=None, kdf_iter=None, + cipher_page_size=None, defer_encryption=None, sync_db_key=None): """ Return a copy of C{source} with parameters different than None replaced by new values. """ - return SQLCipherOptions( - path if path else source.path, - key if key else source.key, - create=create if create else source.create, - is_raw_key=is_raw_key if is_raw_key else source.is_raw_key, - cipher=cipher if cipher else source.cipher, - kdf_iter=kdf_iter if kdf_iter else source.kdf_iter, - cipher_page_size=cipher_page_size if cipher_page_size else source.cipher_page_size, - defer_encryption=defer_encryption if defer_encryption else source.defer_encryption, - sync_db_key=sync_db_key if sync_db_key else source.sync_db_key) + local_vars = locals() + args = [] + kwargs = {} + + for name in ["path", "key"]: + val = local_vars[name] + if val is not None: + args.append(val) + else: + args.append(getattr(source, name)) + + for name in ["create", "is_raw_key", "cipher", "kdf_iter", + "cipher_page_size", "defer_encryption", "sync_db_key"]: + val = local_vars[name] + if val is not None: + kwargs[name] = val + else: + kwargs[name] = getattr(source, name) + + return SQLCipherOptions(*args, **kwargs) def __init__(self, path, key, create=True, is_raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024, @@ -478,7 +458,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): self._sync_db_key = opts.sync_db_key self._sync_db = None - self._sync_db_write_lock = None self._sync_enc_pool = None self.sync_queue = None @@ -490,7 +469,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): # self._syncers = {'': ('', syncer), ...} self._syncers = {} - self._sync_db_write_lock = threading.Lock() self.sync_queue = multiprocessing.Queue() self.running = False @@ -512,7 +490,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): # initialize syncing queue encryption pool self._sync_enc_pool = crypto.SyncEncrypterPool( - self._crypto, self._sync_db, self._sync_db_write_lock) + self._crypto, self._sync_db) # ----------------------------------------------------------------- # From the documentation: If f returns a deferred, rescheduling @@ -588,11 +566,8 @@ class SQLCipherU1DBSync(SQLCipherDatabase): # somewhere else sync_opts = SQLCipherOptions.copy( opts, path=sync_db_path, create=True) - self._sync_db = initialize_sqlcipher_db( - sync_opts, on_init=self._sync_db_extra_init, - check_same_thread=False) - pragmas.set_crypto_pragmas(self._sync_db, opts) - # --------------------------------------------------------- + self._sync_db = getConnectionPool( + sync_opts, extra_queries=self._sync_db_extra_init) @property def _sync_db_extra_init(self): @@ -727,7 +702,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): h = sha256(json.dumps([url, creds])).hexdigest() cur_h, syncer = self._syncers.get(url, (None, None)) if syncer is None or h != cur_h: - wlock = self._sync_db_write_lock syncer = SoledadSynchronizer( self, SoledadSyncTarget(url, @@ -735,8 +709,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): self._replica_uid, creds=creds, crypto=self._crypto, - sync_db=self._sync_db, - sync_db_write_lock=wlock)) + sync_db=self._sync_db)) self._syncers[url] = (h, syncer) # in order to reuse the same synchronizer multiple times we have to # reset its state (i.e. the number of documents received from target @@ -907,3 +880,40 @@ def soledad_doc_factory(doc_id=None, rev=None, json='{}', has_conflicts=False, has_conflicts=has_conflicts, syncable=syncable) sqlite_backend.SQLiteDatabase.register_implementation(SQLCipherDatabase) + + +# +# twisted.enterprise.adbapi SQLCipher implementation +# + +SQLCIPHER_CONNECTION_TIMEOUT = 10 + + +def getConnectionPool(opts, extra_queries=None): + openfun = partial( + pragmas.set_init_pragmas, + opts=opts, + extra_queries=extra_queries) + return SQLCipherConnectionPool( + database=opts.path, + check_same_thread=False, + cp_openfun=openfun, + timeout=SQLCIPHER_CONNECTION_TIMEOUT) + + +class SQLCipherConnection(adbapi.Connection): + pass + + +class SQLCipherTransaction(adbapi.Transaction): + pass + + +class SQLCipherConnectionPool(adbapi.ConnectionPool): + + connectionFactory = SQLCipherConnection + transactionFactory = SQLCipherTransaction + + def __init__(self, *args, **kwargs): + adbapi.ConnectionPool.__init__( + self, "pysqlcipher.dbapi2", *args, **kwargs) diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index d59923b2..06cef1ee 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -36,9 +36,8 @@ from u1db.remote import utils, http_errors from u1db.remote.http_target import HTTPSyncTarget from u1db.remote.http_client import _encode_query_parameter, HTTPClientBase from zope.proxy import ProxyBase -from zope.proxy import sameProxiedObjects, setProxiedObject +from zope.proxy import setProxiedObject -from twisted.internet.task import LoopingCall from leap.soledad.common.document import SoledadDocument from leap.soledad.client.auth import TokenBasedAuth @@ -755,17 +754,12 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # passed to sync_exchange _insert_doc_cb = defaultdict(lambda: ProxyBase(None)) - """ - Period of recurrence of the periodic decrypting task, in seconds. - """ - DECRYPT_LOOP_PERIOD = 0.5 - # # Modified HTTPSyncTarget methods. # def __init__(self, url, source_replica_uid=None, creds=None, crypto=None, - sync_db=None, sync_db_write_lock=None): + sync_db=None): """ Initialize the SoledadSyncTarget. @@ -786,9 +780,6 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): instead of retreiving it from the dedicated database. :type sync_db: Sqlite handler - :param sync_db_write_lock: a write lock for controlling concurrent - access to the sync_db - :type sync_db_write_lock: threading.Lock """ HTTPSyncTarget.__init__(self, url, creds) self._raw_url = url @@ -802,14 +793,9 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self._syncer_pool = None # deferred decryption attributes - self._sync_db = None - self._sync_db_write_lock = None + self._sync_db = sync_db self._decryption_callback = None self._sync_decr_pool = None - self._sync_loop = None - if sync_db and sync_db_write_lock is not None: - self._sync_db = sync_db - self._sync_db_write_lock = sync_db_write_lock def _setup_sync_decr_pool(self): """ @@ -818,11 +804,10 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): if self._sync_decr_pool is None: # initialize syncing queue decryption pool self._sync_decr_pool = SyncDecrypterPool( - self._crypto, self._sync_db, - self._sync_db_write_lock, - insert_doc_cb=self._insert_doc_cb) - self._sync_decr_pool.set_source_replica_uid( - self.source_replica_uid) + self._crypto, + self._sync_db, + insert_doc_cb=self._insert_doc_cb, + source_replica_uid=self.source_replica_uid) def _teardown_sync_decr_pool(self): """ @@ -832,23 +817,6 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self._sync_decr_pool.close() self._sync_decr_pool = None - def _setup_sync_loop(self): - """ - Set up the sync loop for deferred decryption. - """ - if self._sync_loop is None: - self._sync_loop = LoopingCall( - self._decrypt_syncing_received_docs) - self._sync_loop.start(self.DECRYPT_LOOP_PERIOD) - - def _teardown_sync_loop(self): - """ - Tear down the sync loop. - """ - if self._sync_loop is not None: - self._sync_loop.stop() - self._sync_loop = None - def _get_replica_uid(self, url): """ Return replica uid from the url, or None. @@ -1138,7 +1106,6 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): if defer_decryption and self._sync_db is not None: self._sync_exchange_lock.acquire() self._setup_sync_decr_pool() - self._setup_sync_loop() self._defer_decryption = True else: # fall back @@ -1301,9 +1268,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # decrypt docs in case of deferred decryption if defer_decryption: - while not self.clear_to_sync(): - sleep(self.DECRYPT_LOOP_PERIOD) - self._teardown_sync_loop() + self._sync_decr_pool.wait() self._teardown_sync_decr_pool() self._sync_exchange_lock.release() @@ -1324,7 +1289,6 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): with self._stop_lock: self._stopped = False - def stop_syncer(self): with self._stop_lock: self._stopped = True @@ -1449,7 +1413,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): :rtype: bool """ if self._sync_decr_pool: - return self._sync_decr_pool.count_docs_in_sync_db() == 0 + return self._sync_decr_pool.clear_to_sync() return True def set_decryption_callback(self, cb): @@ -1474,23 +1438,6 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): """ return self._sync_db is not None - def _decrypt_syncing_received_docs(self): - """ - Decrypt the documents received from remote replica and insert them - into the local one. - - Called periodically from LoopingCall self._sync_loop. - """ - if sameProxiedObjects( - self._insert_doc_cb.get(self.source_replica_uid), - None): - return - - decrypter = self._sync_decr_pool - decrypter.raise_in_case_of_failed_async_calls() - decrypter.decrypt_received_docs() - decrypter.process_decrypted() - def _sign_request(self, method, url_query, params): """ Return an authorization header to be included in the HTTP request. -- cgit v1.2.3 From eae4468d99029006cc36a021e82350a0f62f7006 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 7 May 2015 14:49:40 -0300 Subject: [bug] fix order of insertion of decrypted docs This commit actually does some different things: * When doing asynchronous decryption of incoming documents in soledad client during a sync, there was the possibility that a document corresponding to a newer generation would be decrypted and inserted in the local database before a document corresponding to an older generation. When this happened, the metadata about the target database (i.e. its locally-known generation) would be first updated to the newer generation, and then an attempt to insert a document corresponding to an older generation would cause the infamous InvalidGeneration error. To fix that we use the sync-index information that is contained in the sync stream to correctly find the insertable docs to be inserted in the local database, thus avoiding the problem described above. * Refactor the sync encrypt/decrypt pool to its own file. * Fix the use of twisted adbapi with multiprocessing. Closes: #6757. --- client/src/leap/soledad/client/crypto.py | 552 ---------------------- client/src/leap/soledad/client/encdecpool.py | 673 +++++++++++++++++++++++++++ client/src/leap/soledad/client/sqlcipher.py | 8 +- client/src/leap/soledad/client/target.py | 64 +-- 4 files changed, 700 insertions(+), 597 deletions(-) create mode 100644 client/src/leap/soledad/client/encdecpool.py (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index dd40b198..bdbaa8e0 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -23,21 +23,13 @@ import hmac import hashlib import json import logging -import multiprocessing -import threading -import time from pycryptopp.cipher.aes import AES from pycryptopp.cipher.xsalsa20 import XSalsa20 -from zope.proxy import sameProxiedObjects - -from twisted.internet import defer -from twisted.internet.threads import deferToThread from leap.soledad.common import soledad_assert from leap.soledad.common import soledad_assert_type from leap.soledad.common import crypto -from leap.soledad.common.document import SoledadDocument logger = logging.getLogger(__name__) @@ -515,547 +507,3 @@ def is_symmetrically_encrypted(doc): == crypto.EncryptionSchemes.SYMKEY: return True return False - - -# -# Encrypt/decrypt pools of workers -# - -class SyncEncryptDecryptPool(object): - """ - Base class for encrypter/decrypter pools. - """ - WORKERS = multiprocessing.cpu_count() - - def __init__(self, crypto, sync_db): - """ - Initialize the pool of encryption-workers. - - :param crypto: A SoledadCryto instance to perform the encryption. - :type crypto: leap.soledad.crypto.SoledadCrypto - - :param sync_db: A database connection handle - :type sync_db: pysqlcipher.dbapi2.Connection - - :param write_lock: a write lock for controlling concurrent access - to the sync_db - :type write_lock: threading.Lock - """ - self._pool = multiprocessing.Pool(self.WORKERS) - self._crypto = crypto - self._sync_db = sync_db - - def close(self): - """ - Cleanly close the pool of workers. - """ - logger.debug("Closing %s" % (self.__class__.__name__,)) - self._pool.close() - try: - self._pool.join() - except Exception: - pass - - def terminate(self): - """ - Terminate the pool of workers. - """ - logger.debug("Terminating %s" % (self.__class__.__name__,)) - self._pool.terminate() - - -def encrypt_doc_task(doc_id, doc_rev, content, key, secret): - """ - Encrypt the content of the given document. - - :param doc_id: The document id. - :type doc_id: str - :param doc_rev: The document revision. - :type doc_rev: str - :param content: The serialized content of the document. - :type content: str - :param key: The encryption key. - :type key: str - :param secret: The Soledad storage secret (used for MAC auth). - :type secret: str - - :return: A tuple containing the doc id, revision and encrypted content. - :rtype: tuple(str, str, str) - """ - encrypted_content = encrypt_docstr( - content, doc_id, doc_rev, key, secret) - return doc_id, doc_rev, encrypted_content - - -class SyncEncrypterPool(SyncEncryptDecryptPool): - """ - Pool of workers that spawn subprocesses to execute the symmetric encryption - of documents to be synced. - """ - # TODO implement throttling to reduce cpu usage?? - WORKERS = multiprocessing.cpu_count() - TABLE_NAME = "docs_tosync" - FIELD_NAMES = "doc_id PRIMARY KEY, rev, content" - - def encrypt_doc(self, doc, workers=True): - """ - Symmetrically encrypt a document. - - :param doc: The document with contents to be encrypted. - :type doc: SoledadDocument - - :param workers: Whether to defer the decryption to the multiprocess - pool of workers. Useful for debugging purposes. - :type workers: bool - """ - soledad_assert(self._crypto is not None, "need a crypto object") - docstr = doc.get_json() - key = self._crypto.doc_passphrase(doc.doc_id) - secret = self._crypto.secret - args = doc.doc_id, doc.rev, docstr, key, secret - - try: - if workers: - res = self._pool.apply_async( - encrypt_doc_task, args, - callback=self.encrypt_doc_cb) - else: - # encrypt inline - res = encrypt_doc_task(*args) - self.encrypt_doc_cb(res) - - except Exception as exc: - logger.exception(exc) - - def encrypt_doc_cb(self, result): - """ - Insert results of encryption routine into the local sync database. - - :param result: A tuple containing the doc id, revision and encrypted - content. - :type result: tuple(str, str, str) - """ - doc_id, doc_rev, content = result - return self.insert_encrypted_local_doc(doc_id, doc_rev, content) - - @defer.inlineCallbacks - def insert_encrypted_local_doc(self, doc_id, doc_rev, content): - """ - Insert the contents of the encrypted doc into the local sync - database. - - :param doc_id: The document id. - :type doc_id: str - :param doc_rev: The document revision. - :type doc_rev: str - :param content: The serialized content of the document. - :type content: str - :param content: The encrypted document. - :type content: str - """ - # FIXME --- callback should complete immediately since otherwise the - # thread which handles the results will get blocked - # Right now we're blocking the dispatcher with the writes to sqlite. - query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?)" \ - % (self.TABLE_NAME,) - yield self._sync_db.runQuery(query, (doc_id, doc_rev, content)) - - -def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret): - """ - Decrypt the content of the given document. - - :param doc_id: The document id. - :type doc_id: str - :param doc_rev: The document revision. - :type doc_rev: str - :param content: The encrypted content of the document. - :type content: str - :param gen: The generation corresponding to the modification of that - document. - :type gen: int - :param trans_id: The transaction id corresponding to the modification of - that document. - :type trans_id: str - :param key: The encryption key. - :type key: str - :param secret: The Soledad storage secret (used for MAC auth). - :type secret: str - - :return: A tuple containing the doc id, revision and encrypted content. - :rtype: tuple(str, str, str) - """ - decrypted_content = decrypt_doc_dict( - content, doc_id, doc_rev, key, secret) - return doc_id, doc_rev, decrypted_content, gen, trans_id - - -class SyncDecrypterPool(SyncEncryptDecryptPool): - """ - Pool of workers that spawn subprocesses to execute the symmetric decryption - of documents that were received. - - The decryption of the received documents is done in two steps: - - 1. All the encrypted docs are collected, together with their generation - and transaction-id - 2. The docs are enqueued for decryption. When completed, they are - inserted following the generation order. - """ - # TODO implement throttling to reduce cpu usage?? - TABLE_NAME = "docs_received" - FIELD_NAMES = "doc_id PRIMARY_KEY, rev, content, gen, trans_id, encrypted" - - """ - Period of recurrence of the periodic decrypting task, in seconds. - """ - DECRYPT_LOOP_PERIOD = 0.5 - - def __init__(self, *args, **kwargs): - """ - Initialize the decrypter pool, and setup a dict for putting the - results of the decrypted docs until they are picked by the insert - routine that gets them in order. - - :param insert_doc_cb: A callback for inserting received documents from - target. If not overriden, this will call u1db - insert_doc_from_target in synchronizer, which - implements the TAKE OTHER semantics. - :type insert_doc_cb: function - :param last_known_generation: Target's last known generation. - :type last_known_generation: int - """ - self._insert_doc_cb = kwargs.pop("insert_doc_cb") - self.source_replica_uid = kwargs.pop("source_replica_uid") - SyncEncryptDecryptPool.__init__(self, *args, **kwargs) - self._async_results = [] - - self._stopped = threading.Event() - self._deferred_loop = deferToThread(self._decrypt_and_process_docs) - self._deferred_loop.addCallback( - lambda _: logger.debug("Finished decryptor thread.")) - - @defer.inlineCallbacks - def insert_encrypted_received_doc(self, doc_id, doc_rev, content, - gen, trans_id): - """ - Insert a received message with encrypted content, to be decrypted later - on. - - :param doc_id: The Document ID. - :type doc_id: str - :param doc_rev: The Document Revision - :param doc_rev: str - :param content: the Content of the document - :type content: str - :param gen: the Document Generation - :type gen: int - :param trans_id: Transaction ID - :type trans_id: str - """ - docstr = json.dumps(content) - query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?)" % ( - self.TABLE_NAME,) - yield self._sync_db.runQuery( - query, - (doc_id, doc_rev, docstr, gen, trans_id, 1)) - - @defer.inlineCallbacks - def insert_received_doc(self, doc_id, doc_rev, content, gen, trans_id): - """ - Insert a document that is not symmetrically encrypted. - We store it in the staging area (the decrypted_docs dictionary) to be - picked up in order as the preceding documents are decrypted. - - :param doc_id: The Document ID. - :type doc_id: str - :param doc_rev: The Document Revision - :param doc_rev: str - :param content: the Content of the document - :type content: str - :param gen: the Document Generation - :type gen: int - :param trans_id: Transaction ID - :type trans_id: str - """ - if not isinstance(content, str): - content = json.dumps(content) - query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?)" % ( - self.TABLE_NAME,) - yield self._sync_db.runQuery( - query, - (doc_id, doc_rev, content, gen, trans_id, 0)) - - @defer.inlineCallbacks - def delete_received_doc(self, doc_id, doc_rev): - """ - Delete a received doc after it was inserted into the local db. - - :param doc_id: Document ID. - :type doc_id: str - :param doc_rev: Document revision. - :type doc_rev: str - """ - sql_del = "DELETE FROM '%s' WHERE doc_id=? AND rev=?" % ( - self.TABLE_NAME,) - yield self._sync_db.runQuery(sql_del, (doc_id, doc_rev)) - - def _decrypt_doc(self, doc_id, rev, content, gen, trans_id, - source_replica_uid, workers=True): - """ - Symmetrically decrypt a document. - - :param doc_id: The ID for the document with contents to be encrypted. - :type doc: str - :param rev: The revision of the document. - :type rev: str - :param content: The serialized content of the document. - :type content: str - :param gen: The generation corresponding to the modification of that - document. - :type gen: int - :param trans_id: The transaction id corresponding to the modification - of that document. - :type trans_id: str - :param source_replica_uid: - :type source_replica_uid: str - - :param workers: Whether to defer the decryption to the multiprocess - pool of workers. Useful for debugging purposes. - :type workers: bool - """ - self.source_replica_uid = source_replica_uid - - # insert_doc_cb is a proxy object that gets updated with the right - # insert function only when the sync_target invokes the sync_exchange - # method. so, if we don't still have a non-empty callback, we refuse - # to proceed. - if sameProxiedObjects(self._insert_doc_cb.get(source_replica_uid), - None): - logger.debug("Sync decrypter pool: no insert_doc_cb() yet.") - return - - soledad_assert(self._crypto is not None, "need a crypto object") - - if len(content) == 0: - # not encrypted payload - return - - content = json.loads(content) - key = self._crypto.doc_passphrase(doc_id) - secret = self._crypto.secret - args = doc_id, rev, content, gen, trans_id, key, secret - - if workers: - # save the async result object so we can inspect it for failures - self._async_results.append(self._pool.apply_async( - decrypt_doc_task, args, - callback=self._decrypt_doc_cb)) - else: - # decrypt inline - res = decrypt_doc_task(*args) - self._decrypt_doc_cb(res) - - def _decrypt_doc_cb(self, result): - """ - Store the decryption result in the sync db from where it will later be - picked by _process_decrypted. - - :param result: A tuple containing the doc id, revision and encrypted - content. - :type result: tuple(str, str, str) - """ - doc_id, rev, content, gen, trans_id = result - logger.debug("Sync decrypter pool: decrypted doc %s: %s %s %s" - % (doc_id, rev, gen, trans_id)) - return self.insert_received_doc(doc_id, rev, content, gen, trans_id) - - def get_docs_by_generation(self, encrypted=None): - """ - Get all documents in the received table from the sync db, - ordered by generation. - - :param encrypted: If not None, only return documents with encrypted - field equal to given parameter. - :type encrypted: bool or None - - :return: list of doc_id, rev, generation, gen, trans_id - :rtype: list - """ - sql = "SELECT doc_id, rev, content, gen, trans_id, encrypted FROM %s" \ - % self.TABLE_NAME - if encrypted is not None: - sql += " WHERE encrypted = %d" % int(encrypted) - sql += " ORDER BY gen ASC" - return self._fetchall(sql) - - @defer.inlineCallbacks - def get_insertable_docs_by_gen(self): - """ - Return a list of non-encrypted documents ready to be inserted. - """ - # here, we compare the list of all available docs with the list of - # decrypted docs and find the longest common prefix between these two - # lists. Note that the order of lists fetch matters: if instead we - # first fetch the list of decrypted docs and then the list of all - # docs, then some document might have been decrypted between these two - # calls, and if it is just the right doc then it might not be caught - # by the next loop. - all_docs = yield self.get_docs_by_generation() - decrypted_docs = yield self.get_docs_by_generation(encrypted=False) - insertable = [] - for doc_id, rev, _, gen, trans_id, encrypted in all_docs: - for next_doc_id, _, next_content, _, _, _ in decrypted_docs: - if doc_id == next_doc_id: - content = next_content - insertable.append((doc_id, rev, content, gen, trans_id)) - else: - break - defer.returnValue(insertable) - - @defer.inlineCallbacks - def _count_docs_in_sync_db(self, encrypted=None): - """ - Count how many documents we have in the table for received docs. - - :param encrypted: If not None, return count of documents with - encrypted field equal to given parameter. - :type encrypted: bool or None - - :return: The count of documents. - :rtype: int - """ - query = "SELECT COUNT(*) FROM %s" % (self.TABLE_NAME,) - if encrypted is not None: - query += " WHERE encrypted = %d" % int(encrypted) - res = yield self._sync_db.runQuery(query) - if res: - val = res.pop() - defer.returnValue(val[0]) - else: - defer.returnValue(0) - - @defer.inlineCallbacks - def _decrypt_received_docs(self): - """ - Get all the encrypted documents from the sync database and dispatch a - decrypt worker to decrypt each one of them. - """ - self._raise_in_case_of_failed_async_calls() - docs_by_generation = yield self.get_docs_by_generation(encrypted=True) - for doc_id, rev, content, gen, trans_id, _ in docs_by_generation: - self._decrypt_doc( - doc_id, rev, content, gen, trans_id, self.source_replica_uid) - - @defer.inlineCallbacks - def _process_decrypted(self): - """ - Process the already decrypted documents, and insert as many documents - as can be taken from the expected order without finding a gap. - - :return: Whether we have processed all the pending docs. - :rtype: bool - """ - # Acquire the lock to avoid processing while we're still - # getting data from the syncing stream, to avoid InvalidGeneration - # problems. - insertable = yield self.get_insertable_docs_by_gen() - for doc_fields in insertable: - yield self.insert_decrypted_local_doc(*doc_fields) - - @defer.inlineCallbacks - def insert_decrypted_local_doc(self, doc_id, doc_rev, content, - gen, trans_id): - """ - Insert the decrypted document into the local sqlcipher database. - Makes use of the passed callback `return_doc_cb` passed to the caller - by u1db sync. - - :param doc_id: The document id. - :type doc_id: str - :param doc_rev: The document revision. - :type doc_rev: str - :param content: The serialized content of the document. - :type content: str - :param gen: The generation corresponding to the modification of that - document. - :type gen: int - :param trans_id: The transaction id corresponding to the modification - of that document. - :type trans_id: str - """ - # could pass source_replica in params for callback chain - insert_fun = self._insert_doc_cb[self.source_replica_uid] - logger.debug("Sync decrypter pool: inserting doc in local db: " - "%s:%s %s" % (doc_id, doc_rev, gen)) - - # convert deleted documents to avoid error on document creation - if content == 'null': - content = None - doc = SoledadDocument(doc_id, doc_rev, content) - gen = int(gen) - insert_fun(doc, gen, trans_id) - - # If no errors found, remove it from the received database. - yield self.delete_received_doc(doc_id, doc_rev) - - @defer.inlineCallbacks - def empty(self): - """ - Empty the received docs table of the sync database. - """ - sql = "DELETE FROM %s WHERE 1" % (self.TABLE_NAME,) - yield self._sync_db.runQuery(sql) - - @defer.inlineCallbacks - def _fetchall(self, *args, **kwargs): - results = yield self._sync_db.runQuery(*args, **kwargs) - defer.returnValue(results) - - def _raise_in_case_of_failed_async_calls(self): - """ - Re-raise any exception raised by an async call. - - :raise Exception: Raised if an async call has raised an exception. - """ - for res in self._async_results: - if res.ready(): - if not res.successful(): - # re-raise the exception raised by the remote call - res.get() - - def _stop_decr_loop(self): - """ - """ - self._stopped.set() - - def close(self): - """ - """ - self._stop_decr_loop() - SyncEncryptDecryptPool.close(self) - - def _decrypt_and_process_docs(self): - """ - Decrypt the documents received from remote replica and insert them - into the local one. - - Called periodically from LoopingCall self._sync_loop. - """ - while not self._stopped.is_set(): - if sameProxiedObjects( - self._insert_doc_cb.get(self.source_replica_uid), - None): - continue - self._decrypt_received_docs() - self._process_decrypted() - time.sleep(self.DECRYPT_LOOP_PERIOD) - - def wait(self): - while not self.clear_to_sync(): - time.sleep(self.DECRYPT_LOOP_PERIOD) - - @defer.inlineCallbacks - def clear_to_sync(self): - count = yield self._count_docs_in_sync_db() - defer.returnValue(count == 0) diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py new file mode 100644 index 00000000..0466ec5d --- /dev/null +++ b/client/src/leap/soledad/client/encdecpool.py @@ -0,0 +1,673 @@ +# -*- coding: utf-8 -*- +# encdecpool.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +""" +A pool of encryption/decryption concurrent and parallel workers for using +during synchronization. +""" + + +import multiprocessing +import threading +import time +import json +import logging + +from zope.proxy import sameProxiedObjects + +from twisted.internet import defer +from twisted.internet.threads import deferToThread + +from leap.soledad.common.document import SoledadDocument +from leap.soledad.common import soledad_assert + +from leap.soledad.client.crypto import encrypt_docstr +from leap.soledad.client.crypto import decrypt_doc_dict + + +logger = logging.getLogger(__name__) + + +# +# Encrypt/decrypt pools of workers +# + +class SyncEncryptDecryptPool(object): + """ + Base class for encrypter/decrypter pools. + """ + WORKERS = multiprocessing.cpu_count() + + def __init__(self, crypto, sync_db): + """ + Initialize the pool of encryption-workers. + + :param crypto: A SoledadCryto instance to perform the encryption. + :type crypto: leap.soledad.crypto.SoledadCrypto + + :param sync_db: A database connection handle + :type sync_db: pysqlcipher.dbapi2.Connection + """ + self._pool = multiprocessing.Pool(self.WORKERS) + self._crypto = crypto + self._sync_db = sync_db + + def close(self): + """ + Cleanly close the pool of workers. + """ + logger.debug("Closing %s" % (self.__class__.__name__,)) + self._pool.close() + try: + self._pool.join() + except Exception: + pass + + def terminate(self): + """ + Terminate the pool of workers. + """ + logger.debug("Terminating %s" % (self.__class__.__name__,)) + self._pool.terminate() + + +def encrypt_doc_task(doc_id, doc_rev, content, key, secret): + """ + Encrypt the content of the given document. + + :param doc_id: The document id. + :type doc_id: str + :param doc_rev: The document revision. + :type doc_rev: str + :param content: The serialized content of the document. + :type content: str + :param key: The encryption key. + :type key: str + :param secret: The Soledad storage secret (used for MAC auth). + :type secret: str + + :return: A tuple containing the doc id, revision and encrypted content. + :rtype: tuple(str, str, str) + """ + encrypted_content = encrypt_docstr( + content, doc_id, doc_rev, key, secret) + return doc_id, doc_rev, encrypted_content + + +class SyncEncrypterPool(SyncEncryptDecryptPool): + """ + Pool of workers that spawn subprocesses to execute the symmetric encryption + of documents to be synced. + """ + # TODO implement throttling to reduce cpu usage?? + WORKERS = multiprocessing.cpu_count() + TABLE_NAME = "docs_tosync" + FIELD_NAMES = "doc_id PRIMARY KEY, rev, content" + + def encrypt_doc(self, doc, workers=True): + """ + Symmetrically encrypt a document. + + :param doc: The document with contents to be encrypted. + :type doc: SoledadDocument + + :param workers: Whether to defer the decryption to the multiprocess + pool of workers. Useful for debugging purposes. + :type workers: bool + """ + soledad_assert(self._crypto is not None, "need a crypto object") + docstr = doc.get_json() + key = self._crypto.doc_passphrase(doc.doc_id) + secret = self._crypto.secret + args = doc.doc_id, doc.rev, docstr, key, secret + + try: + if workers: + res = self._pool.apply_async( + encrypt_doc_task, args, + callback=self.encrypt_doc_cb) + else: + # encrypt inline + res = encrypt_doc_task(*args) + self.encrypt_doc_cb(res) + + except Exception as exc: + logger.exception(exc) + + def encrypt_doc_cb(self, result): + """ + Insert results of encryption routine into the local sync database. + + :param result: A tuple containing the doc id, revision and encrypted + content. + :type result: tuple(str, str, str) + """ + doc_id, doc_rev, content = result + return self.insert_encrypted_local_doc(doc_id, doc_rev, content) + + def insert_encrypted_local_doc(self, doc_id, doc_rev, content): + """ + Insert the contents of the encrypted doc into the local sync + database. + + :param doc_id: The document id. + :type doc_id: str + :param doc_rev: The document revision. + :type doc_rev: str + :param content: The serialized content of the document. + :type content: str + """ + query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?)" \ + % (self.TABLE_NAME,) + return self._sync_db.runOperation(query, (doc_id, doc_rev, content)) + + +def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret, + idx): + """ + Decrypt the content of the given document. + + :param doc_id: The document id. + :type doc_id: str + :param doc_rev: The document revision. + :type doc_rev: str + :param content: The encrypted content of the document. + :type content: str + :param gen: The generation corresponding to the modification of that + document. + :type gen: int + :param trans_id: The transaction id corresponding to the modification of + that document. + :type trans_id: str + :param key: The encryption key. + :type key: str + :param secret: The Soledad storage secret (used for MAC auth). + :type secret: str + :param idx: The index of this document in the current sync process. + :type idx: int + + :return: A tuple containing the doc id, revision and encrypted content. + :rtype: tuple(str, str, str) + """ + decrypted_content = decrypt_doc_dict(content, doc_id, doc_rev, key, secret) + return doc_id, doc_rev, decrypted_content, gen, trans_id, idx + + +class SyncDecrypterPool(SyncEncryptDecryptPool): + """ + Pool of workers that spawn subprocesses to execute the symmetric decryption + of documents that were received. + + The decryption of the received documents is done in two steps: + + 1. Encrypted documents are stored in the sync db by the actual soledad + sync loop. + 2. The soledad sync loop tells us how many documents we should expect + to process. + 3. We start a decrypt-and-process loop: + + a. Encrypted documents are fetched. + b. Encrypted documents are decrypted. + c. The longest possible list of decrypted documents are inserted + in the soledad db (this depends on which documents have already + arrived and which documents have already been decrypte, because + the order of insertion in the local soledad db matters). + d. Processed documents are deleted from the database. + + 4. When we have processed as many documents as we should, the loop + finishes. + """ + # TODO implement throttling to reduce cpu usage?? + TABLE_NAME = "docs_received" + FIELD_NAMES = "doc_id PRIMARY KEY, rev, content, gen, " \ + "trans_id, encrypted, idx" + + """ + Period of recurrence of the periodic decrypting task, in seconds. + """ + DECRYPT_LOOP_PERIOD = 0.5 + + def __init__(self, *args, **kwargs): + """ + Initialize the decrypter pool, and setup a dict for putting the + results of the decrypted docs until they are picked by the insert + routine that gets them in order. + + :param insert_doc_cb: A callback for inserting received documents from + target. If not overriden, this will call u1db + insert_doc_from_target in synchronizer, which + implements the TAKE OTHER semantics. + :type insert_doc_cb: function + :param source_replica_uid: The source replica uid, used to find the + correct callback for inserting documents. + :type source_replica_uid: str + """ + self._insert_doc_cb = kwargs.pop("insert_doc_cb") + self.source_replica_uid = kwargs.pop("source_replica_uid") + SyncEncryptDecryptPool.__init__(self, *args, **kwargs) + + self._last_inserted_idx = 0 + self._docs_to_process = None + self._processed_docs = 0 + + self._async_results = [] + self._exception = None + self._finished = threading.Event() + + # clear the database before starting the sync + self._empty_db = threading.Event() + d = self._empty() + d.addCallback(lambda _: self._empty_db.set()) + + # start the decryption loop + self._deferred_loop = deferToThread(self._decrypt_and_process_docs) + self._deferred_loop.addCallback( + lambda _: logger.debug("Finished decryptor thread.")) + + def set_docs_to_process(self, docs_to_process): + """ + Set the number of documents we expect to process. + + This should be called by the during the sync exchange process as soon + as we know how many documents are arriving from the server. + + :param docs_to_process: The number of documents to process. + :type docs_to_process: int + """ + self._docs_to_process = docs_to_process + + def insert_encrypted_received_doc( + self, doc_id, doc_rev, content, gen, trans_id, idx): + """ + Insert a received message with encrypted content, to be decrypted later + on. + + :param doc_id: The Document ID. + :type doc_id: str + :param doc_rev: The Document Revision + :param doc_rev: str + :param content: the Content of the document + :type content: str + :param gen: the Document Generation + :type gen: int + :param trans_id: Transaction ID + :type trans_id: str + :param idx: The index of this document in the current sync process. + :type idx: int + + :return: A deferred that will fire when the operation in the database + has finished. + :rtype: twisted.internet.defer.Deferred + """ + docstr = json.dumps(content) + query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?, ?)" \ + % self.TABLE_NAME + return self._sync_db.runOperation( + query, (doc_id, doc_rev, docstr, gen, trans_id, 1, idx)) + + def insert_received_doc( + self, doc_id, doc_rev, content, gen, trans_id, idx): + """ + Insert a document that is not symmetrically encrypted. + We store it in the staging area (the decrypted_docs dictionary) to be + picked up in order as the preceding documents are decrypted. + + :param doc_id: The Document ID. + :type doc_id: str + :param doc_rev: The Document Revision + :param doc_rev: str + :param content: the Content of the document + :type content: str + :param gen: the Document Generation + :type gen: int + :param trans_id: Transaction ID + :type trans_id: str + :param idx: The index of this document in the current sync process. + :type idx: int + + :return: A deferred that will fire when the operation in the database + has finished. + :rtype: twisted.internet.defer.Deferred + """ + if not isinstance(content, str): + content = json.dumps(content) + query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?, ?)" \ + % self.TABLE_NAME + return self._sync_db.runOperation( + query, (doc_id, doc_rev, content, gen, trans_id, 0, idx)) + + def _delete_received_doc(self, doc_id): + """ + Delete a received doc after it was inserted into the local db. + + :param doc_id: Document ID. + :type doc_id: str + + :return: A deferred that will fire when the operation in the database + has finished. + :rtype: twisted.internet.defer.Deferred + """ + query = "DELETE FROM '%s' WHERE doc_id=?" \ + % self.TABLE_NAME + return self._sync_db.runOperation(query, (doc_id,)) + + def _decrypt_doc(self, doc_id, rev, content, gen, trans_id, idx, + workers=True): + """ + Symmetrically decrypt a document and store in the sync db. + + :param doc_id: The ID for the document with contents to be encrypted. + :type doc: str + :param rev: The revision of the document. + :type rev: str + :param content: The serialized content of the document. + :type content: str + :param gen: The generation corresponding to the modification of that + document. + :type gen: int + :param trans_id: The transaction id corresponding to the modification + of that document. + :type trans_id: str + :param idx: The index of this document in the current sync process. + :type idx: int + :param workers: Whether to defer the decryption to the multiprocess + pool of workers. Useful for debugging purposes. + :type workers: bool + + :return: A deferred that will fire after the document hasa been + decrypted and inserted in the sync db. + :rtype: twisted.internet.defer.Deferred + """ + # insert_doc_cb is a proxy object that gets updated with the right + # insert function only when the sync_target invokes the sync_exchange + # method. so, if we don't still have a non-empty callback, we refuse + # to proceed. + if sameProxiedObjects( + self._insert_doc_cb.get(self.source_replica_uid), + None): + logger.debug("Sync decrypter pool: no insert_doc_cb() yet.") + return + + soledad_assert(self._crypto is not None, "need a crypto object") + + content = json.loads(content) + key = self._crypto.doc_passphrase(doc_id) + secret = self._crypto.secret + args = doc_id, rev, content, gen, trans_id, key, secret, idx + + if workers: + # when using multiprocessing, we need to wait for all parallel + # processing to finish before continuing with the + # decrypt-and-process loop. We do this by using an extra deferred + # that will be fired by the multiprocessing callback when it has + # finished processing. + d1 = defer.Deferred() + + def _multiprocessing_callback(result): + d2 = self._decrypt_doc_cb(result) + d2.addCallback(lambda defres: d1.callback(defres)) + + # save the async result object so we can inspect it for failures + self._async_results.append( + self._pool.apply_async( + decrypt_doc_task, args, + callback=_multiprocessing_callback)) + + return d1 + else: + # decrypt inline + res = decrypt_doc_task(*args) + return self._decrypt_doc_cb(res) + + def _decrypt_doc_cb(self, result): + """ + Store the decryption result in the sync db from where it will later be + picked by _process_decrypted. + + :param result: A tuple containing the document's id, revision, + content, generation, transaction id and sync index. + :type result: tuple(str, str, str, int, str, int) + + :return: A deferred that will fire after the document has been + inserted in the sync db. + :rtype: twisted.internet.defer.Deferred + """ + doc_id, rev, content, gen, trans_id, idx = result + logger.debug("Sync decrypter pool: decrypted doc %s: %s %s %s" + % (doc_id, rev, gen, trans_id)) + return self.insert_received_doc( + doc_id, rev, content, gen, trans_id, idx) + + def _get_docs(self, encrypted=None, order_by='idx', order='ASC'): + """ + Get documents from the received docs table in the sync db. + + :param encrypted: If not None, only return documents with encrypted + field equal to given parameter. + :type encrypted: bool or None + :param order_by: The name of the field to order results. + :type order_by: str + :param order: Whether the order should be ASC or DESC. + :type order: str + + :return: A deferred that will fire with the results of the database + query. + :rtype: twisted.internet.defer.Deferred + """ + query = "SELECT doc_id, rev, content, gen, trans_id, encrypted, " \ + "idx FROM %s" % self.TABLE_NAME + if encrypted is not None: + query += " WHERE encrypted = %d" % int(encrypted) + query += " ORDER BY %s %s" % (order_by, order) + return self._sync_db.runQuery(query) + + @defer.inlineCallbacks + def _get_insertable_docs(self): + """ + Return a list of non-encrypted documents ready to be inserted. + + :return: A deferred that will fire with the list of insertable + documents. + :rtype: twisted.internet.defer.Deferred + """ + # here, we fetch the list of decrypted documents and compare with the + # index of the last succesfully processed document. + decrypted_docs = yield self._get_docs(encrypted=False) + insertable = [] + last_idx = self._last_inserted_idx + for doc_id, rev, content, gen, trans_id, encrypted, idx in \ + decrypted_docs: + # XXX for some reason, a document might not have been deleted from + # the database. This is a bug. In this point, already + # processed documents should have been removed from the sync + # database and we should not have to skip them here. We need + # to find out why this is happening, fix, and remove the + # skipping below. + if (idx < last_idx + 1): + continue + if (idx != last_idx + 1): + break + insertable.append((doc_id, rev, content, gen, trans_id, idx)) + last_idx += 1 + defer.returnValue(insertable) + + def _decrypt_received_docs(self): + """ + Get all the encrypted documents from the sync database and dispatch a + decrypt worker to decrypt each one of them. + + :return: A deferred that will fire after all documents have been + decrypted and inserted back in the sync db. + :rtype: twisted.internet.defer.Deferred + """ + + def _callback(received_docs): + deferreds = [] + for doc_id, rev, content, gen, trans_id, _, idx in received_docs: + deferreds.append( + self._decrypt_doc( + doc_id, rev, content, gen, trans_id, idx)) + return defer.gatherResults(deferreds) + + d = self._get_docs(encrypted=True) + d.addCallback(_callback) + return d + + def _process_decrypted(self): + """ + Fetch as many decrypted documents as can be taken from the expected + order and insert them in the database. + + :return: A deferred that will fire with the list of inserted + documents. + :rtype: twisted.internet.defer.Deferred + """ + + def _callback(insertable): + for doc_fields in insertable: + self._insert_decrypted_local_doc(*doc_fields) + return insertable + + d = self._get_insertable_docs() + d.addCallback(_callback) + return d + + def _delete_processed_docs(self, inserted): + """ + Delete from the sync db documents that have been processed. + + :param inserted: List of documents inserted in the previous process + step. + :type inserted: list + + :return: A list of deferreds that will fire when each operation in the + database has finished. + :rtype: twisted.internet.defer.DeferredList + """ + deferreds = [] + for doc_id, doc_rev, _, _, _, _ in inserted: + deferreds.append( + self._delete_received_doc(doc_id)) + if not deferreds: + return defer.succeed(None) + return defer.gatherResults(deferreds) + + def _insert_decrypted_local_doc(self, doc_id, doc_rev, content, + gen, trans_id, idx): + """ + Insert the decrypted document into the local sqlcipher database. + Makes use of the passed callback `return_doc_cb` passed to the caller + by u1db sync. + + :param doc_id: The document id. + :type doc_id: str + :param doc_rev: The document revision. + :type doc_rev: str + :param content: The serialized content of the document. + :type content: str + :param gen: The generation corresponding to the modification of that + document. + :type gen: int + :param trans_id: The transaction id corresponding to the modification + of that document. + :type trans_id: str + """ + # could pass source_replica in params for callback chain + insert_fun = self._insert_doc_cb[self.source_replica_uid] + logger.debug("Sync decrypter pool: inserting doc in local db: " + "%s:%s %s" % (doc_id, doc_rev, gen)) + + # convert deleted documents to avoid error on document creation + if content == 'null': + content = None + doc = SoledadDocument(doc_id, doc_rev, content) + gen = int(gen) + insert_fun(doc, gen, trans_id) + + # store info about processed docs + self._last_inserted_idx = idx + self._processed_docs += 1 + + def _empty(self): + """ + Empty the received docs table of the sync database. + + :return: A deferred that will fire when the operation in the database + has finished. + :rtype: twisted.internet.defer.Deferred + """ + query = "DELETE FROM %s WHERE 1" % (self.TABLE_NAME,) + return self._sync_db.runOperation(query) + + def _raise_if_async_fails(self): + """ + Raise any exception raised by a multiprocessing async decryption + call. + + :raise Exception: Raised if an async call has raised an exception. + """ + for res in self._async_results: + if res.ready(): + if not res.successful(): + # re-raise the exception raised by the remote call + res.get() + + def _decrypt_and_process_docs(self): + """ + Decrypt the documents received from remote replica and insert them + into the local one. + + This method runs in its own thread, so sleeping will not interfere + with the main thread. + """ + try: + # wait for database to be emptied + self._empty_db.wait() + # wait until we know how many documents we need to process + while self._docs_to_process is None: + time.sleep(self.DECRYPT_LOOP_PERIOD) + # because all database operations are asynchronous, we use an event to + # make sure we don't start the next loop before the current one has + # finished. + event = threading.Event() + # loop until we have processes as many docs as the number of changes + while self._processed_docs < self._docs_to_process: + if sameProxiedObjects( + self._insert_doc_cb.get(self.source_replica_uid), + None): + continue + event.clear() + d = self._decrypt_received_docs() + d.addCallback(lambda _: self._raise_if_async_fails()) + d.addCallback(lambda _: self._process_decrypted()) + d.addCallback(self._delete_processed_docs) + d.addCallback(lambda _: event.set()) + event.wait() + # sleep a bit to give time for some decryption work + time.sleep(self.DECRYPT_LOOP_PERIOD) + except Exception as e: + self._exception = e + self._finished.set() + + def wait(self): + """ + Wait for the decrypt-and-process loop to finish. + """ + self._finished.wait() + if self._exception: + raise self._exception diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 4f7ecd1b..d3b3d01b 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -66,7 +66,7 @@ from twisted.python.threadpool import ThreadPool from twisted.python import log from twisted.enterprise import adbapi -from leap.soledad.client import crypto +from leap.soledad.client import encdecpool from leap.soledad.client.target import SoledadSyncTarget from leap.soledad.client.target import PendingReceivedDocsSyncError from leap.soledad.client.sync import SoledadSynchronizer @@ -489,7 +489,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): if defer_encryption: # initialize syncing queue encryption pool - self._sync_enc_pool = crypto.SyncEncrypterPool( + self._sync_enc_pool = encdecpool.SyncEncrypterPool( self._crypto, self._sync_db) # ----------------------------------------------------------------- @@ -578,8 +578,8 @@ class SQLCipherU1DBSync(SQLCipherDatabase): :rtype: tuple of strings """ maybe_create = "CREATE TABLE IF NOT EXISTS %s (%s)" - encr = crypto.SyncEncrypterPool - decr = crypto.SyncDecrypterPool + encr = encdecpool.SyncEncrypterPool + decr = encdecpool.SyncDecrypterPool sql_encr_table_query = (maybe_create % ( encr.TABLE_NAME, encr.FIELD_NAMES)) sql_decr_table_query = (maybe_create % ( diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index 06cef1ee..17ce718f 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -43,7 +43,8 @@ from leap.soledad.common.document import SoledadDocument from leap.soledad.client.auth import TokenBasedAuth from leap.soledad.client.crypto import is_symmetrically_encrypted from leap.soledad.client.crypto import encrypt_doc, decrypt_doc -from leap.soledad.client.crypto import SyncEncrypterPool, SyncDecrypterPool +from leap.soledad.client.encdecpool import SyncEncrypterPool +from leap.soledad.client.encdecpool import SyncDecrypterPool from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS from leap.soledad.client.events import signal @@ -787,9 +788,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self._crypto = crypto self._stopped = True self._stop_lock = threading.Lock() - self._sync_exchange_lock = threading.Lock() self.source_replica_uid = source_replica_uid - self._defer_decryption = False self._syncer_pool = None # deferred decryption attributes @@ -813,9 +812,8 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): """ Tear down the SyncDecrypterPool. """ - if self._sync_decr_pool is not None: - self._sync_decr_pool.close() - self._sync_decr_pool = None + self._sync_decr_pool.close() + self._sync_decr_pool = None def _get_replica_uid(self, url): """ @@ -903,7 +901,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): doc = SoledadDocument(doc_id, rev, content) if is_symmetrically_encrypted(doc): if self._queue_for_decrypt: - self._save_encrypted_received_doc( + self._enqueue_encrypted_received_doc( doc, gen, trans_id, idx, total) else: # defer_decryption is False or no-sync-db fallback @@ -913,7 +911,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # not symmetrically encrypted doc, insert it directly # or save it in the decrypted stage. if self._queue_for_decrypt: - self._save_received_doc(doc, gen, trans_id, idx, total) + self._enqueue_received_doc(doc, gen, trans_id, idx, total) else: self._return_doc_cb(doc, gen, trans_id) # ------------------------------------------------------------- @@ -996,6 +994,9 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self.stop(fail=True) break + if defer_decryption: + self._setup_sync_decr_pool() + t.doc_syncer.set_request_method( 'get', idx, sync_id, last_known_generation, last_known_trans_id) @@ -1021,6 +1022,9 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): t.join() if t.success: number_of_changes, _, _ = t.result + if defer_decryption and number_of_changes: + self._sync_decr_pool.set_docs_to_process( + number_of_changes) else: raise t.exception first_request = False @@ -1053,6 +1057,11 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): new_generation = doc_data['gen'] new_transaction_id = doc_data['trans_id'] + # decrypt docs in case of deferred decryption + if defer_decryption: + self._sync_decr_pool.wait() + self._teardown_sync_decr_pool() + return new_generation, new_transaction_id def sync_exchange(self, docs_by_generations, @@ -1103,14 +1112,6 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): """ self._ensure_callback = ensure_callback - if defer_decryption and self._sync_db is not None: - self._sync_exchange_lock.acquire() - self._setup_sync_decr_pool() - self._defer_decryption = True - else: - # fall back - defer_decryption = False - self.start() if sync_id is None: @@ -1120,10 +1121,6 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): setProxiedObject(self._insert_doc_cb[source_replica_uid], return_doc_cb) - # empty the database before starting a new sync - if defer_decryption is True and not self.clear_to_sync(): - self._sync_decr_pool.empty() - self._ensure_connection() if self._trace_hook: # for tests self._trace_hook('sync_exchange') @@ -1257,6 +1254,8 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): trans_id_after_send = response_dict['new_transaction_id'] # get docs from target + if self._sync_db is None: + defer_decryption = False if self.stopped is False: cur_target_gen, cur_target_trans_id = self._get_remote_docs( url, @@ -1266,12 +1265,6 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self._syncer_pool.cleanup() - # decrypt docs in case of deferred decryption - if defer_decryption: - self._sync_decr_pool.wait() - self._teardown_sync_decr_pool() - self._sync_exchange_lock.release() - # update gen and trans id info in case we just sent and did not # receive docs. if gen_after_send is not None and gen_after_send > cur_target_gen: @@ -1357,7 +1350,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): encr.TABLE_NAME,)) self._sync_db.execute(sql, (doc_id, doc_rev)) - def _save_encrypted_received_doc(self, doc, gen, trans_id, idx, total): + def _enqueue_encrypted_received_doc(self, doc, gen, trans_id, idx, total): """ Save a symmetrically encrypted incoming document into the received docs table in the sync db. A decryption task will pick it up @@ -1378,9 +1371,9 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): "Enqueueing doc for decryption: %d/%d." % (idx + 1, total)) self._sync_decr_pool.insert_encrypted_received_doc( - doc.doc_id, doc.rev, doc.content, gen, trans_id) + doc.doc_id, doc.rev, doc.content, gen, trans_id, idx + 1) - def _save_received_doc(self, doc, gen, trans_id, idx, total): + def _enqueue_received_doc(self, doc, gen, trans_id, idx, total): """ Save any incoming document into the received docs table in the sync db. @@ -1399,23 +1392,12 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): "Enqueueing doc, no decryption needed: %d/%d." % (idx + 1, total)) self._sync_decr_pool.insert_received_doc( - doc.doc_id, doc.rev, doc.content, gen, trans_id) + doc.doc_id, doc.rev, doc.content, gen, trans_id, idx + 1) # # Symmetric decryption of syncing docs # - def clear_to_sync(self): - """ - Return whether sync can proceed (ie, the received db table is empty). - - :return: Whether sync can proceed. - :rtype: bool - """ - if self._sync_decr_pool: - return self._sync_decr_pool.clear_to_sync() - return True - def set_decryption_callback(self, cb): """ Set callback to be called when the decryption finishes. -- cgit v1.2.3 From 67f17cd30d01696ab24407b907bb55ae0fddacad Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 12 May 2015 10:33:23 -0400 Subject: [bug] remove illegal CR from auth header The b64 encoding of the auth token was introducing an illegal character (\n), which was breaking the authentication step since an exception was being raised - when that multi-line header was attempted to be built. this commit fixes that bug. - Resolves: #6959 --- client/src/leap/soledad/client/auth.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/auth.py b/client/src/leap/soledad/client/auth.py index 72ab0008..6dfabeb4 100644 --- a/client/src/leap/soledad/client/auth.py +++ b/client/src/leap/soledad/client/auth.py @@ -14,15 +14,13 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . - - """ Methods for token-based authentication. These methods have to be included in all classes that extend HTTPClient so they can do token-based auth requests to the Soledad server. """ - +import base64 from u1db import errors @@ -49,7 +47,7 @@ class TokenBasedAuth(object): Return an authorization header to be included in the HTTP request, in the form: - [('Authorization', 'Token ')] :param method: The HTTP method. :type method: str @@ -64,7 +62,8 @@ class TokenBasedAuth(object): if 'token' in self._creds: uuid, token = self._creds['token'] auth = '%s:%s' % (uuid, token) - return [('Authorization', 'Token %s' % auth.encode('base64')[:-1])] + b64_token = base64.b64encode(auth) + return [('Authorization', 'Token %s' % b64_token)] else: raise errors.UnknownAuthMethod( 'Wrong credentials: %s' % self._creds) -- cgit v1.2.3 From d1c39d389737ee844f42c5ed3dfc97c8ddf24250 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 12 May 2015 18:07:27 -0300 Subject: [bug] remove unused pending documents exception When we started implementing the sync db, one of the ideas was to reuse the data in the database in the case of a sync interruption. We don't do that now and thus the pending documents exception is unneeded. This commit removes that exception from the code. --- client/src/leap/soledad/client/sqlcipher.py | 6 ------ client/src/leap/soledad/client/target.py | 4 ---- 2 files changed, 10 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index d3b3d01b..39d5dd0e 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -68,7 +68,6 @@ from twisted.enterprise import adbapi from leap.soledad.client import encdecpool from leap.soledad.client.target import SoledadSyncTarget -from leap.soledad.client.target import PendingReceivedDocsSyncError from leap.soledad.client.sync import SoledadSynchronizer from leap.soledad.client import pragmas @@ -636,17 +635,12 @@ class SQLCipherU1DBSync(SQLCipherDatabase): log.msg('syncer sync...') res = syncer.sync(autocreate=autocreate, defer_decryption=defer_decryption) - - except PendingReceivedDocsSyncError: - logger.warning("Local sync db is not clear, skipping sync...") - return except CannotSendRequest: logger.warning("Connection with sync target couldn't be " "established. Resetting connection...") # closing the connection it will be recreated in the next try syncer.sync_target.close() return - return res def stop_sync(self): diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index 17ce718f..f2415218 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -71,10 +71,6 @@ def _gunzip(data): return data -class PendingReceivedDocsSyncError(Exception): - pass - - class DocumentSyncerThread(threading.Thread): """ A thread that knowns how to either send or receive a document during the -- cgit v1.2.3 From 94cbe24f6c6cd54e14d8d1b14e617c2d52c427fd Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 13 May 2015 10:31:47 -0300 Subject: [feature] use twisted adbapi for async encryption The access to the sync db was modified to use twisted.enterprise.adbapi, but only the asynchronous decryption of incoming documents during sync was adapted. This commit modifies the asynchornous encryption of documents to also use the adbapi for accessing the sync db. --- client/src/leap/soledad/client/encdecpool.py | 175 ++++++++++++++++++--- client/src/leap/soledad/client/sqlcipher.py | 64 +------- client/src/leap/soledad/client/sync.py | 4 - client/src/leap/soledad/client/target.py | 217 +++++++++++++++------------ 4 files changed, 275 insertions(+), 185 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py index 0466ec5d..0c1f92ea 100644 --- a/client/src/leap/soledad/client/encdecpool.py +++ b/client/src/leap/soledad/client/encdecpool.py @@ -85,6 +85,36 @@ class SyncEncryptDecryptPool(object): logger.debug("Terminating %s" % (self.__class__.__name__,)) self._pool.terminate() + def _runOperation(self, query, *args): + """ + Run an operation on the sync db. + + :param query: The query to be executed. + :type query: str + :param args: A list of query arguments. + :type args: list + + :return: A deferred that will fire when the operation in the database + has finished. + :rtype: twisted.internet.defer.Deferred + """ + return self._sync_db.runOperation(query, *args) + + def _runQuery(self, query, *args): + """ + Run a query on the sync db. + + :param query: The query to be executed. + :type query: str + :param args: A list of query arguments. + :type args: list + + :return: A deferred that will fire with the results of the database + query. + :rtype: twisted.internet.defer.Deferred + """ + return self._sync_db.runQuery(query, *args) + def encrypt_doc_task(doc_id, doc_rev, content, key, secret): """ @@ -119,7 +149,50 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): TABLE_NAME = "docs_tosync" FIELD_NAMES = "doc_id PRIMARY KEY, rev, content" - def encrypt_doc(self, doc, workers=True): + ENCRYPT_LOOP_PERIOD = 0.5 + + def __init__(self, *args, **kwargs): + """ + Initialize the sync encrypter pool. + """ + SyncEncryptDecryptPool.__init__(self, *args, **kwargs) + + self._stopped = False + self._sync_queue = multiprocessing.Queue() + + # start the encryption loop + self._deferred_loop = deferToThread(self._encrypt_docs_loop) + self._deferred_loop.addCallback( + lambda _: logger.debug("Finished encrypter thread.")) + + def enqueue_doc_for_encryption(self, doc): + """ + Enqueue a document for encryption. + + :param doc: The document to be encrypted. + :type doc: SoledadDocument + """ + try: + self.sync_queue.put_nowait(doc) + except multiprocessing.Queue.Full: + # do not asynchronously encrypt this file if the queue is full + pass + + def _encrypt_docs_loop(self): + """ + Process the syncing queue and send the documents there to be encrypted + in the sync db. They will be read by the SoledadSyncTarget during the + sync_exchange. + """ + logger.debug("Starting encrypter thread.") + while not self._stopped: + try: + doc = self._sync_queue.get(True, self.ENCRYPT_LOOP_PERIOD) + self._encrypt_doc(doc) + except multiprocessing.Queue.Empty: + pass + + def _encrypt_doc(self, doc, workers=True): """ Symmetrically encrypt a document. @@ -136,20 +209,20 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): secret = self._crypto.secret args = doc.doc_id, doc.rev, docstr, key, secret - try: - if workers: - res = self._pool.apply_async( - encrypt_doc_task, args, - callback=self.encrypt_doc_cb) - else: - # encrypt inline + if workers: + # encrypt asynchronously + self._pool.apply_async( + encrypt_doc_task, args, + callback=self._encrypt_doc_cb) + else: + # encrypt inline + try: res = encrypt_doc_task(*args) - self.encrypt_doc_cb(res) + self._encrypt_doc_cb(res) + except Exception as exc: + logger.exception(exc) - except Exception as exc: - logger.exception(exc) - - def encrypt_doc_cb(self, result): + def _encrypt_doc_cb(self, result): """ Insert results of encryption routine into the local sync database. @@ -158,9 +231,9 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): :type result: tuple(str, str, str) """ doc_id, doc_rev, content = result - return self.insert_encrypted_local_doc(doc_id, doc_rev, content) + return self._insert_encrypted_local_doc(doc_id, doc_rev, content) - def insert_encrypted_local_doc(self, doc_id, doc_rev, content): + def _insert_encrypted_local_doc(self, doc_id, doc_rev, content): """ Insert the contents of the encrypted doc into the local sync database. @@ -174,7 +247,58 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): """ query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?)" \ % (self.TABLE_NAME,) - return self._sync_db.runOperation(query, (doc_id, doc_rev, content)) + return self._runOperation(query, (doc_id, doc_rev, content)) + + @defer.inlineCallbacks + def get_encrypted_doc(self, doc_id, doc_rev): + """ + Get an encrypted document from the sync db. + + :param doc_id: The id of the document. + :type doc_id: str + :param doc_rev: The revision of the document. + :type doc_rev: str + + :return: A deferred that will fire with the encrypted content of the + document or None if the document was not found in the sync + db. + :rtype: twisted.internet.defer.Deferred + """ + logger.debug("Trying to get encrypted doc from sync db: %s" % doc_id) + query = "SELECT content FROM %s WHERE doc_id=? and rev=?" \ + % self.TABLE_NAME + result = yield self._runQuery(query, (doc_id, doc_rev)) + if result: + val = result.pop() + defer.returnValue(val[0]) + defer.returnValue(None) + + def delete_encrypted_doc(self, doc_id, doc_rev): + """ + Delete an encrypted document from the sync db. + + :param doc_id: The id of the document. + :type doc_id: str + :param doc_rev: The revision of the document. + :type doc_rev: str + + :return: A deferred that will fire when the operation in the database + has finished. + :rtype: twisted.internet.defer.Deferred + """ + query = "DELETE FROM %s WHERE doc_id=? and rev=?" \ + % self.TABLE_NAME + self._runOperation(query, (doc_id, doc_rev)) + + def close(self): + """ + Close the encrypter pool. + """ + self._stopped = True + self._sync_queue.close() + q = self._sync_queue + del q + self._sync_queue = None def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret, @@ -275,9 +399,10 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): d.addCallback(lambda _: self._empty_db.set()) # start the decryption loop - self._deferred_loop = deferToThread(self._decrypt_and_process_docs) + self._deferred_loop = deferToThread( + self._decrypt_and_process_docs_loop) self._deferred_loop.addCallback( - lambda _: logger.debug("Finished decryptor thread.")) + lambda _: logger.debug("Finished decrypter thread.")) def set_docs_to_process(self, docs_to_process): """ @@ -317,7 +442,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): docstr = json.dumps(content) query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?, ?)" \ % self.TABLE_NAME - return self._sync_db.runOperation( + return self._runOperation( query, (doc_id, doc_rev, docstr, gen, trans_id, 1, idx)) def insert_received_doc( @@ -348,7 +473,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): content = json.dumps(content) query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?, ?)" \ % self.TABLE_NAME - return self._sync_db.runOperation( + return self._runOperation( query, (doc_id, doc_rev, content, gen, trans_id, 0, idx)) def _delete_received_doc(self, doc_id): @@ -364,7 +489,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): """ query = "DELETE FROM '%s' WHERE doc_id=?" \ % self.TABLE_NAME - return self._sync_db.runOperation(query, (doc_id,)) + return self._runOperation(query, (doc_id,)) def _decrypt_doc(self, doc_id, rev, content, gen, trans_id, idx, workers=True): @@ -474,7 +599,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): if encrypted is not None: query += " WHERE encrypted = %d" % int(encrypted) query += " ORDER BY %s %s" % (order_by, order) - return self._sync_db.runQuery(query) + return self._runQuery(query) @defer.inlineCallbacks def _get_insertable_docs(self): @@ -612,7 +737,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :rtype: twisted.internet.defer.Deferred """ query = "DELETE FROM %s WHERE 1" % (self.TABLE_NAME,) - return self._sync_db.runOperation(query) + return self._runOperation(query) def _raise_if_async_fails(self): """ @@ -627,7 +752,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): # re-raise the exception raised by the remote call res.get() - def _decrypt_and_process_docs(self): + def _decrypt_and_process_docs_loop(self): """ Decrypt the documents received from remote replica and insert them into the local one. @@ -668,6 +793,8 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): """ Wait for the decrypt-and-process loop to finish. """ + logger.debug("Waiting for asynchronous decryption of incoming documents...") self._finished.wait() + logger.debug("Asynchronous decryption of incoming documents finished.") if self._exception: raise self._exception diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 39d5dd0e..16241621 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -42,7 +42,6 @@ SQLCipher 1.1 databases, we do not implement them as all SQLCipher databases handled by Soledad should be created by SQLCipher >= 2.0. """ import logging -import multiprocessing import os import threading import json @@ -286,10 +285,9 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): :rtype: str """ doc_rev = sqlite_backend.SQLitePartialExpandDatabase.put_doc(self, doc) - - # TODO XXX move to API XXX if self.defer_encryption: - self.sync_queue.put_nowait(doc) + # TODO move to api? + self._sync_enc_pool.enqueue_doc_for_encryption(doc) return doc_rev # @@ -428,13 +426,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): """ LOCAL_SYMMETRIC_SYNC_FILE_NAME = 'sync.u1db' - """ - A dictionary that hold locks which avoid multiple sync attempts from the - same database replica. - """ - # XXX We do not need the lock here now. Remove. - encrypting_lock = threading.Lock() - """ Period or recurrence of the Looping Call that will do the encryption to the syncdb (in seconds). @@ -458,7 +449,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): self._sync_db_key = opts.sync_db_key self._sync_db = None self._sync_enc_pool = None - self.sync_queue = None # we store syncers in a dictionary indexed by the target URL. We also # store a hash of the auth info in case auth info expires and we need @@ -468,7 +458,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): # self._syncers = {'': ('', syncer), ...} self._syncers = {} - self.sync_queue = multiprocessing.Queue() self.running = False self._sync_threadpool = None @@ -486,24 +475,10 @@ class SQLCipherU1DBSync(SQLCipherDatabase): self._initialize_sync_db(opts) if defer_encryption: - # initialize syncing queue encryption pool self._sync_enc_pool = encdecpool.SyncEncrypterPool( self._crypto, self._sync_db) - # ----------------------------------------------------------------- - # From the documentation: If f returns a deferred, rescheduling - # will not take place until the deferred has fired. The result - # value is ignored. - - # TODO use this to avoid multiple sync attempts if the sync has not - # finished! - # ----------------------------------------------------------------- - - # XXX this was called sync_watcher --- trace any remnants - self._sync_loop = LoopingCall(self._encrypt_syncing_docs) - self._sync_loop.start(self.ENCRYPT_LOOP_PERIOD) - self.shutdownID = None @property @@ -703,7 +678,8 @@ class SQLCipherU1DBSync(SQLCipherDatabase): self._replica_uid, creds=creds, crypto=self._crypto, - sync_db=self._sync_db)) + sync_db=self._sync_db, + sync_enc_pool=self._sync_enc_pool)) self._syncers[url] = (h, syncer) # in order to reuse the same synchronizer multiple times we have to # reset its state (i.e. the number of documents received from target @@ -715,33 +691,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): # Symmetric encryption of syncing docs # - def _encrypt_syncing_docs(self): - """ - Process the syncing queue and send the documents there - to be encrypted in the sync db. They will be read by the - SoledadSyncTarget during the sync_exchange. - - Called periodically from the LoopingCall self._sync_loop. - """ - # TODO should return a deferred that would firewhen the encryption is - # done. See note on __init__ - - lock = self.encrypting_lock - # optional wait flag used to avoid blocking - if not lock.acquire(False): - return - else: - queue = self.sync_queue - try: - while not queue.empty(): - doc = queue.get_nowait() - self._sync_enc_pool.encrypt_doc(doc) - - except Exception as exc: - logger.error("Error while encrypting docs to sync") - logger.exception(exc) - finally: - lock.release() def get_generation(self): # FIXME @@ -779,11 +728,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): if self._sync_db is not None: self._sync_db.close() self._sync_db = None - # close the sync queue - if self.sync_queue is not None: - self.sync_queue.close() - del self.sync_queue - self.sync_queue = None class U1DBSQLiteBackend(sqlite_backend.SQLitePartialExpandDatabase): diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index d3f106da..d4ca4258 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -195,10 +195,6 @@ class SoledadSynchronizer(Synchronizer): "my_gen": my_gen } self._syncing_info = info - if defer_decryption and not sync_target.has_syncdb(): - logger.debug("Sync target has no valid sync db, " - "aborting defer_decryption") - defer_decryption = False self.complete_sync() except Exception as e: logger.error("Soledad sync error: %s" % str(e)) diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index f2415218..667aab15 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -28,6 +28,7 @@ import threading from collections import defaultdict from time import sleep from uuid import uuid4 +from functools import partial import simplejson as json @@ -38,12 +39,12 @@ from u1db.remote.http_client import _encode_query_parameter, HTTPClientBase from zope.proxy import ProxyBase from zope.proxy import setProxiedObject +from twisted.internet import defer from leap.soledad.common.document import SoledadDocument from leap.soledad.client.auth import TokenBasedAuth from leap.soledad.client.crypto import is_symmetrically_encrypted from leap.soledad.client.crypto import encrypt_doc, decrypt_doc -from leap.soledad.client.encdecpool import SyncEncrypterPool from leap.soledad.client.encdecpool import SyncDecrypterPool from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS @@ -111,6 +112,7 @@ class DocumentSyncerThread(threading.Thread): self._exception = None self._result = None self._success = False + self.started = threading.Event() # a lock so we can signal when we're finished self._request_lock = threading.Lock() self._request_lock.acquire() @@ -128,6 +130,8 @@ class DocumentSyncerThread(threading.Thread): finish before actually performing the request. It also traps any exception and register any failure with the request. """ + self.started.set() + with self._stop_lock: if self._stopped is None: self._stopped = False @@ -756,7 +760,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # def __init__(self, url, source_replica_uid=None, creds=None, crypto=None, - sync_db=None): + sync_db=None, sync_enc_pool=None): """ Initialize the SoledadSyncTarget. @@ -787,8 +791,9 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self.source_replica_uid = source_replica_uid self._syncer_pool = None - # deferred decryption attributes + # asynchronous encryption/decryption attributes self._sync_db = sync_db + self._sync_enc_pool = sync_enc_pool self._decryption_callback = None self._sync_decr_pool = None @@ -796,7 +801,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): """ Set up the SyncDecrypterPool for deferred decryption. """ - if self._sync_decr_pool is None: + if self._sync_decr_pool is None and self._sync_db is not None: # initialize syncing queue decryption pool self._sync_decr_pool = SyncDecrypterPool( self._crypto, @@ -1018,7 +1023,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): t.join() if t.success: number_of_changes, _, _ = t.result - if defer_decryption and number_of_changes: + if defer_decryption: self._sync_decr_pool.set_docs_to_process( number_of_changes) else: @@ -1060,6 +1065,14 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): return new_generation, new_transaction_id + @property + def _defer_encryption(self): + return self._sync_enc_pool is not None + + @property + def _defer_decryption(self): + return self._sync_decr_pool is not None + def sync_exchange(self, docs_by_generations, source_replica_uid, last_known_generation, last_known_trans_id, return_doc_cb, @@ -1126,17 +1139,19 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): cur_target_gen = last_known_generation cur_target_trans_id = last_known_trans_id - # send docs + # ------------------------------------------------------------------- + # start of send documents to target + # ------------------------------------------------------------------- msg = "%d/%d" % (0, len(docs_by_generations)) signal(SOLEDAD_SYNC_SEND_STATUS, msg) logger.debug("Soledad sync send status: %s" % msg) - defer_encryption = self._sync_db is not None self._syncer_pool = DocumentSyncerPool( self._raw_url, self._raw_creds, url, headers, ensure_callback, self.stop_syncer) threads = [] last_callback_lock = None + sent = 0 total = len(docs_by_generations) @@ -1156,66 +1171,78 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # ------------------------------------------------------------- # symmetric encryption of document's contents # ------------------------------------------------------------- - doc_json = doc.get_json() - if not doc.is_tombstone(): - if not defer_encryption: - # fallback case, for tests - doc_json = encrypt_doc(self._crypto, doc) - else: - try: - doc_json = self.get_encrypted_doc_from_db( - doc.doc_id, doc.rev) - except Exception as exc: - logger.error("Error while getting " - "encrypted doc from db") - logger.exception(exc) - continue + + # the following var will hold a deferred because we may try to + # fetch the encrypted document from the sync db + d = None + + if doc.is_tombstone(): + d = defer.succeed(None) + elif not self._defer_encryption: + # fallback case, for tests + d = defer.succeed(encrypt_doc(self._crypto, doc)) + else: + + def _maybe_encrypt_doc_inline(doc_json): if doc_json is None: - # Not marked as tombstone, but we got nothing - # from the sync db. As it is not encrypted yet, we - # force inline encryption. + # the document is not marked as tombstone, but we got + # nothing from the sync db. As it is not encrypted + # yet, we force inline encryption. # TODO: implement a queue to deal with these cases. - doc_json = encrypt_doc(self._crypto, doc) + return encrypt_doc(self._crypto, doc) + return doc_json + + d = self.get_encrypted_doc_from_db(doc.doc_id, doc.rev) + d.addCallback(_maybe_encrypt_doc_inline) # ------------------------------------------------------------- # end of symmetric encryption # ------------------------------------------------------------- + t = self._syncer_pool.new_syncer_thread( sent + 1, total, last_request_lock=last_request_lock, last_callback_lock=last_callback_lock) - # bail out if any thread failed + # bail out if creation of any thread failed if t is None: self.stop(fail=True) break - # set the request method - t.doc_syncer.set_request_method( - 'put', sync_id, cur_target_gen, cur_target_trans_id, - id=doc.doc_id, rev=doc.rev, content=doc_json, gen=gen, - trans_id=trans_id, number_of_docs=number_of_docs, - doc_idx=sent + 1) - # set the success calback + # the following callback will be called when the document's + # encrypted content is available, either because it was found on + # the sync db or because it has been encrypted inline. - def _success_callback(idx, total, response): - _success_msg = "Soledad sync send status: %d/%d" \ - % (idx, total) - signal(SOLEDAD_SYNC_SEND_STATUS, _success_msg) - logger.debug(_success_msg) + def _configure_and_start_thread(t, doc_json): + # set the request method + t.doc_syncer.set_request_method( + 'put', sync_id, cur_target_gen, cur_target_trans_id, + id=doc.doc_id, rev=doc.rev, content=doc_json, gen=gen, + trans_id=trans_id, number_of_docs=number_of_docs, + doc_idx=sent + 1) + # set the success calback - t.doc_syncer.set_success_callback(_success_callback) + def _success_callback(idx, total, response): + _success_msg = "Soledad sync send status: %d/%d" \ + % (idx, total) + signal(SOLEDAD_SYNC_SEND_STATUS, _success_msg) + logger.debug(_success_msg) - # set the failure callback - def _failure_callback(idx, total, exception): - _failure_msg = "Soledad sync: error while sending document " \ - "%d/%d: %s" % (idx, total, exception) - logger.warning("%s" % _failure_msg) - logger.warning("Soledad sync: failing gracefully, will " - "recover on next sync.") + t.doc_syncer.set_success_callback(_success_callback) - t.doc_syncer.set_failure_callback(_failure_callback) + # set the failure callback + def _failure_callback(idx, total, exception): + _failure_msg = "Soledad sync: error while sending document " \ + "%d/%d: %s" % (idx, total, exception) + logger.warning("%s" % _failure_msg) + logger.warning("Soledad sync: failing gracefully, will " + "recover on next sync.") + + t.doc_syncer.set_failure_callback(_failure_callback) + + # save thread and append + t.start() + + d.addCallback(partial(_configure_and_start_thread, t)) - # save thread and append - t.start() threads.append((t, doc)) # update lock references so they can be used in next call to @@ -1230,6 +1257,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): while threads: # check if there are failures t, doc = threads.pop(0) + t.started.wait() t.join() if t.success: synced.append((doc.doc_id, doc.rev)) @@ -1238,8 +1266,8 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): raise t.exception # delete documents from the sync database - if defer_encryption: - self.delete_encrypted_docs_from_db(synced) + if self._defer_encryption: + self._delete_encrypted_docs_from_db(synced) # get target gen and trans_id after docs gen_after_send = None @@ -1248,16 +1276,23 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): response_dict = json.loads(last_successful_thread.response[0])[0] gen_after_send = response_dict['new_generation'] trans_id_after_send = response_dict['new_transaction_id'] - - # get docs from target - if self._sync_db is None: - defer_decryption = False + # ------------------------------------------------------------------- + # end of send documents to target + # ------------------------------------------------------------------- + + # ------------------------------------------------------------------- + # start of fetch documents from target + # ------------------------------------------------------------------- + defer_decryption = defer_decryption and self._defer_decryption if self.stopped is False: cur_target_gen, cur_target_trans_id = self._get_remote_docs( url, last_known_generation, last_known_trans_id, headers, return_doc_cb, ensure_callback, sync_id, defer_decryption=defer_decryption) + # ------------------------------------------------------------------- + # end of fetch documents from target + # ------------------------------------------------------------------- self._syncer_pool.cleanup() @@ -1308,6 +1343,10 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): with self._stop_lock: return self._stopped is True + # + # Symmetric encryption of syncing docs + # + def get_encrypted_doc_from_db(self, doc_id, doc_rev): """ Retrieve encrypted document from the database of encrypted docs for @@ -1318,33 +1357,31 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): :param doc_rev: The document revision :type doc_rev: str + + :return: A deferred which is fired with the document's encrypted + content or None if the document was not found on the sync db. + :rtype: twisted.internet.defer.Deferred """ - encr = SyncEncrypterPool - sql = ("SELECT content FROM %s WHERE doc_id=? and rev=?" % ( - encr.TABLE_NAME,)) - res = self._fetchall(sql, (doc_id, doc_rev)) - if res: - val = res.pop() - return val[0] - else: - # no doc found - return None + logger.debug("Looking for encrypted document on sync db: %s" % doc_id) + return self._sync_enc_pool.get_encrypted_doc(doc_id, doc_rev) - def delete_encrypted_docs_from_db(self, docs_ids): + def _delete_encrypted_docs_from_db(self, docs): """ Delete several encrypted documents from the database of symmetrically encrypted docs to sync. - :param docs_ids: an iterable with (doc_id, doc_rev) for all documents - to be deleted. - :type docs_ids: any iterable of tuples of str + :param docs: an iterable with (doc_id, doc_rev) for all documents + to be deleted. + :type docs: any iterable of tuples of str """ - if docs_ids: - encr = SyncEncrypterPool - for doc_id, doc_rev in docs_ids: - sql = ("DELETE FROM %s WHERE doc_id=? and rev=?" % ( - encr.TABLE_NAME,)) - self._sync_db.execute(sql, (doc_id, doc_rev)) + for doc_id, doc_rev in docs: + logger.debug("Removing encrypted document on sync db: %s" + % doc_id) + return self._sync_enc_pool.delete_encrypted_doc(doc_id, doc_rev) + + # + # Symmetric decryption of syncing docs + # def _enqueue_encrypted_received_doc(self, doc, gen, trans_id, idx, total): """ @@ -1357,15 +1394,14 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): :param gen: The generation. :type gen: str :param trans_id: Transacion id. - :type gen: str + :param idx: The index count of the current operation. :type idx: int :param total: The total number of operations. :type total: int """ - logger.debug( - "Enqueueing doc for decryption: %d/%d." - % (idx + 1, total)) + logger.debug("Enqueueing doc for decryption: %d/%d." + % (idx + 1, total)) self._sync_decr_pool.insert_encrypted_received_doc( doc.doc_id, doc.rev, doc.content, gen, trans_id, idx + 1) @@ -1384,16 +1420,11 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): :param total: The total number of operations. :type total: int """ - logger.debug( - "Enqueueing doc, no decryption needed: %d/%d." - % (idx + 1, total)) + logger.debug("Enqueueing doc, no decryption needed: %d/%d." + % (idx + 1, total)) self._sync_decr_pool.insert_received_doc( doc.doc_id, doc.rev, doc.content, gen, trans_id, idx + 1) - # - # Symmetric decryption of syncing docs - # - def set_decryption_callback(self, cb): """ Set callback to be called when the decryption finishes. @@ -1410,11 +1441,9 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): """ return self._decryption_callback is not None - def has_syncdb(self): - """ - Return True if we have an initialized syncdb. - """ - return self._sync_db is not None + # + # Authentication methods + # def _sign_request(self, method, url_query, params): """ @@ -1442,9 +1471,3 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): :type token: str """ TokenBasedAuth.set_token_credentials(self, uuid, token) - - def _fetchall(self, *args, **kwargs): - with self._sync_db: - c = self._sync_db.cursor() - c.execute(*args, **kwargs) - return c.fetchall() -- cgit v1.2.3 From e62dafeba8f08c1f7588e37cf9cd3fb28e79a020 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 19 May 2015 18:46:53 -0300 Subject: [feature] use twisted.web.client in client sync This change uses twisted deferreds for the whole syncing process and paves the way to implementing other transport schemes. It removes a lot of threaded code that used locks and was very difficult to maintain, and lets twisted to the dirty work. Furthermore, all blocking network i/o is now handled asynchronously by the twisted. This commit removes the possibility of interrupting a sync, and we should reimplement it using cancellable deferreds if we need it. --- client/src/leap/soledad/client/encdecpool.py | 11 +- client/src/leap/soledad/client/http_target.py | 570 ++++++++++ client/src/leap/soledad/client/sqlcipher.py | 46 +- client/src/leap/soledad/client/sync.py | 83 +- client/src/leap/soledad/client/target.py | 1473 ------------------------- 5 files changed, 634 insertions(+), 1549 deletions(-) create mode 100644 client/src/leap/soledad/client/http_target.py delete mode 100644 client/src/leap/soledad/client/target.py (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py index 0c1f92ea..7c21c30e 100644 --- a/client/src/leap/soledad/client/encdecpool.py +++ b/client/src/leap/soledad/client/encdecpool.py @@ -789,12 +789,5 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): self._exception = e self._finished.set() - def wait(self): - """ - Wait for the decrypt-and-process loop to finish. - """ - logger.debug("Waiting for asynchronous decryption of incoming documents...") - self._finished.wait() - logger.debug("Asynchronous decryption of incoming documents finished.") - if self._exception: - raise self._exception + def has_finished(self): + return self._finished.is_set() diff --git a/client/src/leap/soledad/client/http_target.py b/client/src/leap/soledad/client/http_target.py new file mode 100644 index 00000000..041180e6 --- /dev/null +++ b/client/src/leap/soledad/client/http_target.py @@ -0,0 +1,570 @@ +# -*- coding: utf-8 -*- +# target.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +""" +A U1DB backend for encrypting data before sending to server and decrypting +after receiving. +""" + +import json +import base64 +import logging + +from zope.proxy import setProxiedObject +from zope.proxy import ProxyBase +from uuid import uuid4 +from functools import partial +from collections import defaultdict + +from twisted.internet import defer +from twisted.internet import reactor +from twisted.web.client import getPage + +from u1db import errors +from u1db import SyncTarget +from u1db.remote import utils + +from leap.soledad.common.document import SoledadDocument + +from leap.soledad.client.crypto import is_symmetrically_encrypted +from leap.soledad.client.crypto import encrypt_doc +from leap.soledad.client.crypto import decrypt_doc +from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS +from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS +from leap.soledad.client.events import signal +from leap.soledad.client.encdecpool import SyncDecrypterPool + + +logger = logging.getLogger(__name__) + + +class SoledadHTTPSyncTarget(SyncTarget): + """ + A SyncTarget that encrypts data before sending and decrypts data after + receiving. + + Normally encryption will have been written to the sync database upon + document modification. The sync database is also used to write temporarily + the parsed documents that the remote send us, before being decrypted and + written to the main database. + """ + + # will later keep a reference to the insert-doc callback + # passed to sync_exchange + _insert_doc_cb = defaultdict(lambda: ProxyBase(None)) + + def __init__(self, url, source_replica_uid, creds, crypto, + sync_db=None, sync_enc_pool=None): + """ + Initialize the sync target. + + :param url: The server sync url. + :type url: str + :param source_replica_uid: The source replica uid which we use when + deferring decryption. + :type source_replica_uid: str + :param url: The url of the target replica to sync with. + :type url: str + :param creds: A dictionary containing the uuid and token. + :type creds: creds + :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt + document contents when syncing. + :type crypto: soledad.crypto.SoledadCrypto + :param sync_db: Optional. handler for the db with the symmetric + encryption of the syncing documents. If + None, encryption will be done in-place, + instead of retreiving it from the dedicated + database. + :type sync_db: Sqlite handler + """ + if url.endswith("/"): + url = url[:-1] + self._url = str(url) + "/sync-from/" + source_replica_uid + self.source_replica_uid = source_replica_uid + self._auth_header = None + self.set_creds(creds) + self._crypto = crypto + self._sync_db = sync_db + self._sync_enc_pool = sync_enc_pool + # asynchronous encryption/decryption attributes + self._decryption_callback = None + self._sync_decr_pool = None + + def set_creds(self, creds): + """ + Update credentials. + + :param creds: A dictionary containing the uuid and token. + :type creds: dict + """ + uuid = creds['token']['uuid'] + token = creds['token']['token'] + auth = '%s:%s' % (uuid, token) + b64_token = base64.b64encode(auth) + self._auth_header = {'Authorization': 'Token %s' % b64_token} + + @property + def _defer_encryption(self): + return self._sync_enc_pool is not None + + # + # SyncTarget API + # + + @defer.inlineCallbacks + def get_sync_info(self, source_replica_uid): + """ + Return information about known state of remote database. + + Return the replica_uid and the current database generation of the + remote database, and its last-seen database generation for the client + replica. + + :param source_replica_uid: The client-size replica uid. + :type source_replica_uid: str + + :return: A deferred which fires with (target_replica_uid, + target_replica_generation, target_trans_id, + source_replica_last_known_generation, + source_replica_last_known_transaction_id) + :rtype: twisted.internet.defer.Deferred + """ + raw = yield getPage(self._url, headers=self._auth_header) + res = json.loads(raw) + defer.returnValue([ + res['target_replica_uid'], + res['target_replica_generation'], + res['target_replica_transaction_id'], + res['source_replica_generation'], + res['source_transaction_id'] + ]) + + def record_sync_info( + self, source_replica_uid, source_replica_generation, + source_replica_transaction_id): + """ + Record tip information for another replica. + + After sync_exchange has been processed, the caller will have + received new content from this replica. This call allows the + source replica instigating the sync to inform us what their + generation became after applying the documents we returned. + + This is used to allow future sync operations to not need to repeat data + that we just talked about. It also means that if this is called at the + wrong time, there can be database records that will never be + synchronized. + + :param source_replica_uid: The identifier for the source replica. + :type source_replica_uid: str + :param source_replica_generation: The database generation for the + source replica. + :type source_replica_generation: int + :param source_replica_transaction_id: The transaction id associated + with the source replica + generation. + :type source_replica_transaction_id: str + + :return: A deferred which fires with the result of the query. + :rtype: twisted.internet.defer.Deferred + """ + data = json.dumps({ + 'generation': source_replica_generation, + 'transaction_id': source_replica_transaction_id + }) + headers = self._auth_header.copy() + headers.update({'content-type': 'application/json'}) + return getPage( + self._url, + method='PUT', + headers=headers, + postdata=data) + + @defer.inlineCallbacks + def sync_exchange(self, docs_by_generation, source_replica_uid, + last_known_generation, last_known_trans_id, + return_doc_cb, ensure_callback=None, + defer_decryption=True, sync_id=None): + """ + Find out which documents the remote database does not know about, + encrypt and send them. After that, receive documents from the remote + database. + + :param docs_by_generations: A list of (doc_id, generation, trans_id) + of local documents that were changed since + the last local generation the remote + replica knows about. + :type docs_by_generations: list of tuples + + :param source_replica_uid: The uid of the source replica. + :type source_replica_uid: str + + :param last_known_generation: Target's last known generation. + :type last_known_generation: int + + :param last_known_trans_id: Target's last known transaction id. + :type last_known_trans_id: str + + :param return_doc_cb: A callback for inserting received documents from + target. If not overriden, this will call u1db + insert_doc_from_target in synchronizer, which + implements the TAKE OTHER semantics. + :type return_doc_cb: function + + :param ensure_callback: A callback that ensures we know the target + replica uid if the target replica was just + created. + :type ensure_callback: function + + :param defer_decryption: Whether to defer the decryption process using + the intermediate database. If False, + decryption will be done inline. + :type defer_decryption: bool + + :return: A deferred which fires with the new generation and + transaction id of the target replica. + :rtype: twisted.internet.defer.Deferred + """ + + self._ensure_callback = ensure_callback + + if sync_id is None: + sync_id = str(uuid4()) + self.source_replica_uid = source_replica_uid + + # let the decrypter pool access the passed callback to insert docs + setProxiedObject(self._insert_doc_cb[source_replica_uid], + return_doc_cb) + + gen_after_send, trans_id_after_send = yield self._send_docs( + docs_by_generation, + last_known_generation, + last_known_trans_id, + sync_id) + + cur_target_gen, cur_target_trans_id = yield self._receive_docs( + last_known_generation, last_known_trans_id, + return_doc_cb, ensure_callback, sync_id, + defer_decryption=defer_decryption) + + # update gen and trans id info in case we just sent and did not + # receive docs. + if gen_after_send is not None and gen_after_send > cur_target_gen: + cur_target_gen = gen_after_send + cur_target_trans_id = trans_id_after_send + + defer.returnValue([cur_target_gen, cur_target_trans_id]) + + # + # methods to send docs + # + + def _prepare(self, comma, entries, **dic): + entry = comma + '\r\n' + json.dumps(dic) + entries.append(entry) + return len(entry) + + @defer.inlineCallbacks + def _send_docs(self, docs_by_generation, last_known_generation, + last_known_trans_id, sync_id): + + if not docs_by_generation: + defer.returnValue([None, None]) + + headers = self._auth_header.copy() + headers.update({'content-type': 'application/x-soledad-sync-put'}) + # add remote replica metadata to the request + first_entries = ['['] + self._prepare( + '', first_entries, + last_known_generation=last_known_generation, + last_known_trans_id=last_known_trans_id, + sync_id=sync_id, + ensure=self._ensure_callback is not None) + idx = 0 + total = len(docs_by_generation) + for doc, gen, trans_id in docs_by_generation: + idx += 1 + result = yield self._send_one_doc( + headers, first_entries, doc, + gen, trans_id, total, idx) + if self._defer_encryption: + self._sync_enc_pool.delete_encrypted_doc( + doc.doc_id, doc.rev) + signal(SOLEDAD_SYNC_SEND_STATUS, + "Soledad sync send status: %d/%d" + % (idx, total)) + response_dict = json.loads(result)[0] + gen_after_send = response_dict['new_generation'] + trans_id_after_send = response_dict['new_transaction_id'] + defer.returnValue([gen_after_send, trans_id_after_send]) + + @defer.inlineCallbacks + def _send_one_doc(self, headers, first_entries, doc, gen, trans_id, + number_of_docs, doc_idx): + entries = first_entries[:] + # add the document to the request + content = yield self._encrypt_doc(doc) + self._prepare( + ',', entries, + id=doc.doc_id, rev=doc.rev, content=content, gen=gen, + trans_id=trans_id, number_of_docs=number_of_docs, + doc_idx=doc_idx) + entries.append('\r\n]') + data = ''.join(entries) + result = yield getPage( + self._url, + method='POST', + headers=headers, + postdata=data) + defer.returnValue(result) + + def _encrypt_doc(self, doc): + d = None + if doc.is_tombstone(): + d = defer.succeed(None) + elif not self._defer_encryption: + # fallback case, for tests + d = defer.succeed(encrypt_doc(self._crypto, doc)) + else: + + def _maybe_encrypt_doc_inline(doc_json): + if doc_json is None: + # the document is not marked as tombstone, but we got + # nothing from the sync db. As it is not encrypted + # yet, we force inline encryption. + return encrypt_doc(self._crypto, doc) + return doc_json + + d = self._sync_enc_pool.get_encrypted_doc(doc.doc_id, doc.rev) + d.addCallback(_maybe_encrypt_doc_inline) + return d + + # + # methods to receive doc + # + + @defer.inlineCallbacks + def _receive_docs(self, last_known_generation, last_known_trans_id, + return_doc_cb, ensure_callback, sync_id, + defer_decryption): + # we keep a reference to the callback in case we defer the decryption + self._return_doc_cb = return_doc_cb + self._queue_for_decrypt = defer_decryption \ + and self._sync_db is not None + + new_generation = last_known_generation + new_transaction_id = last_known_trans_id + + if self._queue_for_decrypt: + logger.debug( + "Soledad sync: will queue received docs for decrypting.") + + if defer_decryption: + self._setup_sync_decr_pool() + + headers = self._auth_header.copy() + headers.update({'content-type': 'application/x-soledad-sync-get'}) + + # maybe get one doc + d = self._receive_one_doc( + headers, last_known_generation, last_known_trans_id, + sync_id, 0) + d.addCallback(partial(self._insert_received_doc, 1, 1)) + number_of_changes, ngen, ntrans = yield d + + if defer_decryption: + self._sync_decr_pool.set_docs_to_process( + number_of_changes) + idx = 1 + + # maybe get more documents + deferreds = [] + while idx < number_of_changes: + d = self._receive_one_doc( + headers, last_known_generation, + last_known_trans_id, sync_id, idx) + d.addCallback( + partial( + self._insert_received_doc, + idx + 1, + number_of_changes)) + deferreds.append(d) + idx += 1 + results = yield defer.gatherResults(deferreds) + + # get genration and transaction id of target after insertions + if deferreds: + _, new_generation, new_transaction_id = results.pop() + + # get current target gen and trans id in case no documents were + def _shutdown_and_finish(res): + self._sync_decr_pool.close() + return new_generation, new_transaction_id + + d = defer.Deferred() + d.addCallback(_shutdown_and_finish) + + def _wait_or_finish(): + if not self._sync_decr_pool.has_finished(): + reactor.callLater( + SyncDecrypterPool.DECRYPT_LOOP_PERIOD, + _wait_or_finish) + else: + d.callback(None) + + # decrypt docs in case of deferred decryption + if defer_decryption: + _wait_or_finish() + else: + d.callback(None) + + new_generation, new_transaction_id = yield d + defer.returnValue([new_generation, new_transaction_id]) + + def _receive_one_doc(self, headers, last_known_generation, + last_known_trans_id, sync_id, received): + entries = ['['] + # add remote replica metadata to the request + self._prepare( + '', entries, + last_known_generation=last_known_generation, + last_known_trans_id=last_known_trans_id, + sync_id=sync_id, + ensure=self._ensure_callback is not None) + # inform server of how many documents have already been received + self._prepare( + ',', entries, received=received) + entries.append('\r\n]') + # send headers + return getPage( + self._url, + method='POST', + headers=headers, + postdata=''.join(entries)) + + def _insert_received_doc(self, idx, total, response): + """ + Insert a received document into the local replica. + + :param idx: The index count of the current operation. + :type idx: int + :param total: The total number of operations. + :type total: int + :param response: The body and headers of the response. + :type response: tuple(str, dict) + """ + new_generation, new_transaction_id, number_of_changes, doc_id, \ + rev, content, gen, trans_id = \ + self._parse_received_doc_response(response) + if doc_id is not None: + print doc_id + # decrypt incoming document and insert into local database + # ------------------------------------------------------------- + # symmetric decryption of document's contents + # ------------------------------------------------------------- + # If arriving content was symmetrically encrypted, we decrypt it. + # We do it inline if defer_decryption flag is False or no sync_db + # was defined, otherwise we defer it writing it to the received + # docs table. + doc = SoledadDocument(doc_id, rev, content) + if is_symmetrically_encrypted(doc): + if self._queue_for_decrypt: + self._sync_decr_pool.insert_encrypted_received_doc( + doc.doc_id, doc.rev, doc.content, gen, trans_id, + idx) + else: + # defer_decryption is False or no-sync-db fallback + doc.set_json(decrypt_doc(self._crypto, doc)) + self._return_doc_cb(doc, gen, trans_id) + else: + # not symmetrically encrypted doc, insert it directly + # or save it in the decrypted stage. + if self._queue_for_decrypt: + self._sync_decr_pool.insert_received_doc( + doc.doc_id, doc.rev, doc.content, gen, trans_id, + idx) + else: + self._return_doc_cb(doc, gen, trans_id) + # ------------------------------------------------------------- + # end of symmetric decryption + # ------------------------------------------------------------- + msg = "%d/%d" % (idx, total) + signal(SOLEDAD_SYNC_RECEIVE_STATUS, msg) + logger.debug("Soledad sync receive status: %s" % msg) + return number_of_changes, new_generation, new_transaction_id + + def _parse_received_doc_response(self, response): + """ + Parse the response from the server containing the received document. + + :param response: The body and headers of the response. + :type response: tuple(str, dict) + + :return: (new_gen, new_trans_id, number_of_changes, doc_id, rev, + content, gen, trans_id) + :rtype: tuple + """ + # decode incoming stream + parts = response.splitlines() + if not parts or parts[0] != '[' or parts[-1] != ']': + raise errors.BrokenSyncStream + data = parts[1:-1] + # decode metadata + line, comma = utils.check_and_strip_comma(data[0]) + metadata = None + try: + metadata = json.loads(line) + new_generation = metadata['new_generation'] + new_transaction_id = metadata['new_transaction_id'] + number_of_changes = metadata['number_of_changes'] + except (json.JSONDecodeError, KeyError): + raise errors.BrokenSyncStream + # make sure we have replica_uid from fresh new dbs + if self._ensure_callback and 'replica_uid' in metadata: + self._ensure_callback(metadata['replica_uid']) + # parse incoming document info + doc_id = None + rev = None + content = None + gen = None + trans_id = None + if number_of_changes > 0: + try: + entry = json.loads(data[1]) + doc_id = entry['id'] + rev = entry['rev'] + content = entry['content'] + gen = entry['gen'] + trans_id = entry['trans_id'] + except (IndexError, KeyError): + raise errors.BrokenSyncStream + return new_generation, new_transaction_id, number_of_changes, \ + doc_id, rev, content, gen, trans_id + + def _setup_sync_decr_pool(self): + """ + Set up the SyncDecrypterPool for deferred decryption. + """ + if self._sync_decr_pool is None and self._sync_db is not None: + # initialize syncing queue decryption pool + self._sync_decr_pool = SyncDecrypterPool( + self._crypto, + self._sync_db, + insert_doc_cb=self._insert_doc_cb, + source_replica_uid=self.source_replica_uid) diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 16241621..53afbda8 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -53,20 +53,17 @@ from u1db.backends import sqlite_backend from hashlib import sha256 from contextlib import contextmanager from collections import defaultdict -from httplib import CannotSendRequest from functools import partial from pysqlcipher import dbapi2 as sqlcipher_dbapi2 from twisted.internet import reactor -from twisted.internet.task import LoopingCall from twisted.internet.threads import deferToThreadPool from twisted.python.threadpool import ThreadPool -from twisted.python import log from twisted.enterprise import adbapi from leap.soledad.client import encdecpool -from leap.soledad.client.target import SoledadSyncTarget +from leap.soledad.client.http_target import SoledadHTTPSyncTarget from leap.soledad.client.sync import SoledadSynchronizer from leap.soledad.client import pragmas @@ -590,33 +587,13 @@ class SQLCipherU1DBSync(SQLCipherDatabase): before the synchronisation was performed. :rtype: Deferred """ - kwargs = {'creds': creds, 'autocreate': autocreate, - 'defer_decryption': defer_decryption} - return self._defer_to_sync_threadpool(self._sync, url, **kwargs) - - def _sync(self, url, creds=None, autocreate=True, defer_decryption=True): - res = None - # the following context manager blocks until the syncing lock can be # acquired. - # TODO review, I think this is no longer needed with a 1-thread - # threadpool. - - log.msg("in _sync") - self.__url = url with self._syncer(url, creds=creds) as syncer: # XXX could mark the critical section here... - try: - log.msg('syncer sync...') - res = syncer.sync(autocreate=autocreate, - defer_decryption=defer_decryption) - except CannotSendRequest: - logger.warning("Connection with sync target couldn't be " - "established. Resetting connection...") - # closing the connection it will be recreated in the next try - syncer.sync_target.close() - return - return res + return syncer.sync( + autocreate=autocreate, + defer_decryption=defer_decryption) def stop_sync(self): """ @@ -673,13 +650,14 @@ class SQLCipherU1DBSync(SQLCipherDatabase): if syncer is None or h != cur_h: syncer = SoledadSynchronizer( self, - SoledadSyncTarget(url, - # XXX is the replica_uid ready? - self._replica_uid, - creds=creds, - crypto=self._crypto, - sync_db=self._sync_db, - sync_enc_pool=self._sync_enc_pool)) + SoledadHTTPSyncTarget( + url, + # XXX is the replica_uid ready? + self._replica_uid, + creds=creds, + crypto=self._crypto, + sync_db=self._sync_db, + sync_enc_pool=self._sync_enc_pool)) self._syncers[url] = (h, syncer) # in order to reuse the same synchronizer multiple times we have to # reset its state (i.e. the number of documents received from target diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index d4ca4258..f8f74ce7 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -25,9 +25,10 @@ Extend u1db Synchronizer with the ability to: * Be interrupted and recovered. """ import logging -import traceback from threading import Lock +from twisted.internet import defer + from u1db import errors from u1db.sync import Synchronizer @@ -90,6 +91,7 @@ class SoledadSynchronizer(Synchronizer): # Synchronizer may be reused later. self.release_syncing_lock() + @defer.inlineCallbacks def _sync(self, autocreate=False, defer_decryption=True): """ Helper function, called from the main `sync` method. @@ -102,7 +104,7 @@ class SoledadSynchronizer(Synchronizer): ensure_callback = None try: (self.target_replica_uid, target_gen, target_trans_id, - target_my_gen, target_my_trans_id) = \ + target_my_gen, target_my_trans_id) = yield \ sync_target.get_sync_info(self.source._replica_uid) except errors.DatabaseDoesNotExist: if not autocreate: @@ -151,15 +153,15 @@ class SoledadSynchronizer(Synchronizer): self.target_replica_uid) logger.debug( "Soledad source sync info:\n" - " source target gen: %d\n" - " source target trans_id: %s" + " last target gen known to source: %d\n" + " last target trans_id known to source: %s" % (target_last_known_gen, target_last_known_trans_id)) # validate transaction ids if not changes and target_last_known_gen == target_gen: if target_trans_id != target_last_known_trans_id: raise errors.InvalidTransactionId - return my_gen + defer.returnValue(my_gen) # prepare to send all the changed docs changed_doc_ids = [doc_id for doc_id, _, _ in changes] @@ -177,33 +179,26 @@ class SoledadSynchronizer(Synchronizer): # # The sync_exchange method may be interrupted, in which case it will # return a tuple of Nones. - try: - new_gen, new_trans_id = sync_target.sync_exchange( - docs_by_generation, self.source._replica_uid, - target_last_known_gen, target_last_known_trans_id, - self._insert_doc_from_target, ensure_callback=ensure_callback, - defer_decryption=defer_decryption) - logger.debug( - "Soledad source sync info after sync exchange:\n" - " source target gen: %d\n" - " source target trans_id: %s" - % (new_gen, new_trans_id)) - info = { - "target_replica_uid": self.target_replica_uid, - "new_gen": new_gen, - "new_trans_id": new_trans_id, - "my_gen": my_gen - } - self._syncing_info = info - self.complete_sync() - except Exception as e: - logger.error("Soledad sync error: %s" % str(e)) - logger.error(traceback.format_exc()) - sync_target.stop() - finally: - sync_target.close() - - return my_gen + new_gen, new_trans_id = yield sync_target.sync_exchange( + docs_by_generation, self.source._replica_uid, + target_last_known_gen, target_last_known_trans_id, + self._insert_doc_from_target, ensure_callback=ensure_callback, + defer_decryption=defer_decryption) + logger.debug( + "Soledad source sync info after sync exchange:\n" + " source known target gen: %d\n" + " source known target trans_id: %s" + % (new_gen, new_trans_id)) + info = { + "target_replica_uid": self.target_replica_uid, + "new_gen": new_gen, + "new_trans_id": new_trans_id, + "my_gen": my_gen + } + self._syncing_info = info + yield self.complete_sync() + + defer.returnValue(my_gen) def complete_sync(self): """ @@ -211,6 +206,9 @@ class SoledadSynchronizer(Synchronizer): (a) record last known generation and transaction uid for the remote replica, and (b) make target aware of our current reached generation. + + :return: A deferred which will fire when the sync has been completed. + :rtype: twisted.internet.defer.Deferred """ logger.debug("Completing deferred last step in SYNC...") @@ -221,7 +219,26 @@ class SoledadSynchronizer(Synchronizer): info["target_replica_uid"], info["new_gen"], info["new_trans_id"]) # if gapless record current reached generation with target - self._record_sync_info_with_the_target(info["my_gen"]) + return self._record_sync_info_with_the_target(info["my_gen"]) + + def _record_sync_info_with_the_target(self, start_generation): + """ + Store local replica metadata in server. + + :param start_generation: The local generation when the sync was + started. + :type start_generation: int + + :return: A deferred which will fire when the operation has been + completed. + :rtype: twisted.internet.defer.Deferred + """ + cur_gen, trans_id = self.source._get_generation_info() + if (cur_gen == start_generation + self.num_inserted + and self.num_inserted > 0): + return self.sync_target.record_sync_info( + self.source._replica_uid, cur_gen, trans_id) + return defer.succeed(None) @property def syncing(self): diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py deleted file mode 100644 index 667aab15..00000000 --- a/client/src/leap/soledad/client/target.py +++ /dev/null @@ -1,1473 +0,0 @@ -# -*- coding: utf-8 -*- -# target.py -# Copyright (C) 2013, 2014 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -""" -A U1DB backend for encrypting data before sending to server and decrypting -after receiving. -""" -import cStringIO -import gzip -import logging -import re -import urllib -import threading - -from collections import defaultdict -from time import sleep -from uuid import uuid4 -from functools import partial - -import simplejson as json - -from u1db import errors -from u1db.remote import utils, http_errors -from u1db.remote.http_target import HTTPSyncTarget -from u1db.remote.http_client import _encode_query_parameter, HTTPClientBase -from zope.proxy import ProxyBase -from zope.proxy import setProxiedObject - -from twisted.internet import defer - -from leap.soledad.common.document import SoledadDocument -from leap.soledad.client.auth import TokenBasedAuth -from leap.soledad.client.crypto import is_symmetrically_encrypted -from leap.soledad.client.crypto import encrypt_doc, decrypt_doc -from leap.soledad.client.encdecpool import SyncDecrypterPool -from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS -from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS -from leap.soledad.client.events import signal - - -logger = logging.getLogger(__name__) - - -def _gunzip(data): - """ - Uncompress data that is gzipped. - - :param data: gzipped data - :type data: basestring - """ - buffer = cStringIO.StringIO() - buffer.write(data) - buffer.seek(0) - try: - data = gzip.GzipFile(mode='r', fileobj=buffer).read() - except Exception: - logger.warning("Error while decrypting gzipped data") - buffer.close() - return data - - -class DocumentSyncerThread(threading.Thread): - """ - A thread that knowns how to either send or receive a document during the - sync process. - """ - - def __init__(self, doc_syncer, release_method, failed_method, - idx, total, last_request_lock=None, last_callback_lock=None): - """ - Initialize a new syncer thread. - - :param doc_syncer: A document syncer. - :type doc_syncer: HTTPDocumentSyncer - :param release_method: A method to be called when finished running. - :type release_method: callable(DocumentSyncerThread) - :param failed_method: A method to be called when we failed. - :type failed_method: callable(DocumentSyncerThread) - :param idx: The index count of the current operation. - :type idx: int - :param total: The total number of operations. - :type total: int - :param last_request_lock: A lock to wait for before actually performing - the request. - :type last_request_lock: threading.Lock - :param last_callback_lock: A lock to wait for before actually running - the success callback. - :type last_callback_lock: threading.Lock - """ - threading.Thread.__init__(self) - self._doc_syncer = doc_syncer - self._release_method = release_method - self._failed_method = failed_method - self._idx = idx - self._total = total - self._last_request_lock = last_request_lock - self._last_callback_lock = last_callback_lock - self._response = None - self._exception = None - self._result = None - self._success = False - self.started = threading.Event() - # a lock so we can signal when we're finished - self._request_lock = threading.Lock() - self._request_lock.acquire() - self._callback_lock = threading.Lock() - self._callback_lock.acquire() - # make thread interruptable - self._stopped = None - self._stop_lock = threading.Lock() - - def run(self): - """ - Run the HTTP request and store results. - - This method will block and wait for an eventual previous operation to - finish before actually performing the request. It also traps any - exception and register any failure with the request. - """ - self.started.set() - - with self._stop_lock: - if self._stopped is None: - self._stopped = False - else: - return - - # eventually wait for the previous thread to finish - if self._last_request_lock is not None: - self._last_request_lock.acquire() - - # bail out in case we've been interrupted - if self.stopped is True: - return - - try: - self._response = self._doc_syncer.do_request() - self._request_lock.release() - - # run success callback - if self._doc_syncer.success_callback is not None: - - # eventually wait for callback lock release - if self._last_callback_lock is not None: - self._last_callback_lock.acquire() - - # bail out in case we've been interrupted - if self._stopped is True: - return - - self._result = self._doc_syncer.success_callback( - self._idx, self._total, self._response) - self._success = True - doc_syncer = self._doc_syncer - self._release_method(self, doc_syncer) - self._doc_syncer = None - # let next thread executed its callback - self._callback_lock.release() - - # trap any exception and signal failure - except Exception as e: - self._exception = e - self._success = False - # run failure callback - if self._doc_syncer.failure_callback is not None: - - # eventually wait for callback lock release - if self._last_callback_lock is not None: - self._last_callback_lock.acquire() - - # bail out in case we've been interrupted - if self.stopped is True: - return - - self._doc_syncer.failure_callback( - self._idx, self._total, self._exception) - - self._failed_method() - # we do not release the callback lock here because we - # failed and so we don't want other threads to succeed. - - @property - def doc_syncer(self): - return self._doc_syncer - - @property - def response(self): - return self._response - - @property - def exception(self): - return self._exception - - @property - def callback_lock(self): - return self._callback_lock - - @property - def request_lock(self): - return self._request_lock - - @property - def success(self): - return self._success - - def stop(self): - with self._stop_lock: - self._stopped = True - - @property - def stopped(self): - with self._stop_lock: - return self._stopped - - @property - def result(self): - return self._result - - -class DocumentSyncerPool(object): - """ - A pool of reusable document syncers. - """ - - POOL_SIZE = 10 - """ - The maximum amount of syncer threads running at the same time. - """ - - def __init__(self, raw_url, raw_creds, query_string, headers, - ensure_callback, stop_method): - """ - Initialize the document syncer pool. - - :param raw_url: The complete raw URL for the HTTP request. - :type raw_url: str - :param raw_creds: The credentials for the HTTP request. - :type raw_creds: dict - :param query_string: The query string for the HTTP request. - :type query_string: str - :param headers: The headers for the HTTP request. - :type headers: dict - :param ensure_callback: A callback to ensure we have the correct - target_replica_uid, if it was just created. - :type ensure_callback: callable - - """ - # save syncer params - self._raw_url = raw_url - self._raw_creds = raw_creds - self._query_string = query_string - self._headers = headers - self._ensure_callback = ensure_callback - self._stop_method = stop_method - # pool attributes - self._failures = False - self._semaphore_pool = threading.BoundedSemaphore( - DocumentSyncerPool.POOL_SIZE) - self._pool_access_lock = threading.Lock() - self._doc_syncers = [] - self._threads = [] - - def new_syncer_thread(self, idx, total, last_request_lock=None, - last_callback_lock=None): - """ - Yield a new document syncer thread. - - :param idx: The index count of the current operation. - :type idx: int - :param total: The total number of operations. - :type total: int - :param last_request_lock: A lock to wait for before actually performing - the request. - :type last_request_lock: threading.Lock - :param last_callback_lock: A lock to wait for before actually running - the success callback. - :type last_callback_lock: threading.Lock - """ - t = None - # wait for available threads - self._semaphore_pool.acquire() - with self._pool_access_lock: - if self._failures is True: - return None - # get a syncer - doc_syncer = self._get_syncer() - # we rely on DocumentSyncerThread.run() to release the lock using - # self.release_syncer so we can launch a new thread. - t = DocumentSyncerThread( - doc_syncer, self.release_syncer, self.stop_threads, - idx, total, - last_request_lock=last_request_lock, - last_callback_lock=last_callback_lock) - self._threads.append(t) - return t - - def _failed(self): - with self._pool_access_lock: - self._failures = True - - @property - def failures(self): - return self._failures - - def _get_syncer(self): - """ - Get a document syncer from the pool. - - This method will create a new syncer whenever there is no syncer - available in the pool. - - :return: A syncer. - :rtype: HTTPDocumentSyncer - """ - syncer = None - # get an available syncer or create a new one - try: - syncer = self._doc_syncers.pop() - except IndexError: - syncer = HTTPDocumentSyncer( - self._raw_url, self._raw_creds, self._query_string, - self._headers, self._ensure_callback) - return syncer - - def release_syncer(self, syncer_thread, doc_syncer): - """ - Return a syncer to the pool after use and check for any failures. - - :param syncer: The syncer to be returned to the pool. - :type syncer: HTTPDocumentSyncer - """ - with self._pool_access_lock: - self._doc_syncers.append(doc_syncer) - if syncer_thread.success is True: - self._threads.remove(syncer_thread) - self._semaphore_pool.release() - - def stop_threads(self, fail=True): - """ - Stop all threads in the pool. - - :param fail: Whether we are stopping because of a failure. - :type fail: bool - """ - # stop sync - self._stop_method() - stopped = [] - # stop all threads - with self._pool_access_lock: - if fail: - self._failures = True - logger.error("sync failed: cancelling sync threads...") - while self._threads: - t = self._threads.pop(0) - t.stop() - self._doc_syncers.append(t.doc_syncer) - stopped.append(t) - # release locks and join - while stopped: - t = stopped.pop(0) - t.request_lock.acquire(False) # just in case - t.request_lock.release() - t.callback_lock.acquire(False) # just in case - t.callback_lock.release() - # release any blocking semaphores - for i in xrange(DocumentSyncerPool.POOL_SIZE): - try: - self._semaphore_pool.release() - except ValueError: - break - if fail: - logger.error("Soledad sync: cancelled sync threads.") - - def cleanup(self): - """ - Close and remove any syncers from the pool. - """ - with self._pool_access_lock: - while self._doc_syncers: - syncer = self._doc_syncers.pop() - syncer.close() - del syncer - - -class HTTPDocumentSyncer(HTTPClientBase, TokenBasedAuth): - - def __init__(self, raw_url, creds, query_string, headers, ensure_callback): - """ - Initialize the client. - - :param raw_url: The raw URL of the target HTTP server. - :type raw_url: str - :param creds: Authentication credentials. - :type creds: dict - :param query_string: The query string for the HTTP request. - :type query_string: str - :param headers: The headers for the HTTP request. - :type headers: dict - :param ensure_callback: A callback to ensure we have the correct - target_replica_uid, if it was just created. - :type ensure_callback: callable - """ - HTTPClientBase.__init__(self, raw_url, creds=creds) - # info needed to perform the request - self._query_string = query_string - self._headers = headers - self._ensure_callback = ensure_callback - # the actual request method - self._request_method = None - self._success_callback = None - self._failure_callback = None - - def _reset(self): - """ - Reset this document syncer so we can reuse it. - """ - self._request_method = None - self._success_callback = None - self._failure_callback = None - self._request_method = None - - def set_request_method(self, method, *args, **kwargs): - """ - Set the actual method to perform the request. - - :param method: Either 'get' or 'put'. - :type method: str - :param args: Arguments for the request method. - :type args: list - :param kwargs: Keyworded arguments for the request method. - :type kwargs: dict - """ - self._reset() - # resolve request method - if method is 'get': - self._request_method = self._get_doc - elif method is 'put': - self._request_method = self._put_doc - else: - raise Exception - # store request method args - self._args = args - self._kwargs = kwargs - - def set_success_callback(self, callback): - self._success_callback = callback - - def set_failure_callback(self, callback): - self._failure_callback = callback - - @property - def success_callback(self): - return self._success_callback - - @property - def failure_callback(self): - return self._failure_callback - - def do_request(self): - """ - Actually perform the request. - - :return: The body and headers of the response. - :rtype: tuple - """ - self._ensure_connection() - args = self._args - kwargs = self._kwargs - return self._request_method(*args, **kwargs) - - def _request(self, method, url_parts, params=None, body=None, - content_type=None): - """ - Perform an HTTP request. - - :param method: The HTTP request method. - :type method: str - :param url_parts: A list representing the request path. - :type url_parts: list - :param params: Parameters for the URL query string. - :type params: dict - :param body: The body of the request. - :type body: str - :param content-type: The content-type of the request. - :type content-type: str - - :return: The body and headers of the response. - :rtype: tuple - - :raise errors.Unavailable: Raised after a number of unsuccesful - request attempts. - :raise Exception: Raised for any other exception ocurring during the - request. - """ - - self._ensure_connection() - unquoted_url = url_query = self._url.path - if url_parts: - if not url_query.endswith('/'): - url_query += '/' - unquoted_url = url_query - url_query += '/'.join(urllib.quote(part, safe='') - for part in url_parts) - # oauth performs its own quoting - unquoted_url += '/'.join(url_parts) - encoded_params = {} - if params: - for key, value in params.items(): - key = unicode(key).encode('utf-8') - encoded_params[key] = _encode_query_parameter(value) - url_query += ('?' + urllib.urlencode(encoded_params)) - if body is not None and not isinstance(body, basestring): - body = json.dumps(body) - content_type = 'application/json' - headers = {} - if content_type: - headers['content-type'] = content_type - - # Patched: We would like to receive gzip pretty please - # ---------------------------------------------------- - headers['accept-encoding'] = "gzip" - # ---------------------------------------------------- - - headers.update( - self._sign_request(method, unquoted_url, encoded_params)) - - for delay in self._delays: - try: - self._conn.request(method, url_query, body, headers) - return self._response() - except errors.Unavailable, e: - sleep(delay) - raise e - - def _response(self): - """ - Return the response of the (possibly gzipped) HTTP request. - - :return: The body and headers of the response. - :rtype: tuple - """ - resp = self._conn.getresponse() - body = resp.read() - headers = dict(resp.getheaders()) - - # Patched: We would like to decode gzip - # ---------------------------------------------------- - encoding = headers.get('content-encoding', '') - if "gzip" in encoding: - body = _gunzip(body) - # ---------------------------------------------------- - - if resp.status in (200, 201): - return body, headers - elif resp.status in http_errors.ERROR_STATUSES: - try: - respdic = json.loads(body) - except ValueError: - pass - else: - self._error(respdic) - # special case - if resp.status == 503: - raise errors.Unavailable(body, headers) - raise errors.HTTPError(resp.status, body, headers) - - def _prepare(self, comma, entries, **dic): - """ - Prepare an entry to be sent through a syncing POST request. - - :param comma: A string to be prepended to the current entry. - :type comma: str - :param entries: A list of entries accumulated to be sent on the - request. - :type entries: list - :param dic: The data to be included in this entry. - :type dic: dict - - :return: The size of the prepared entry. - :rtype: int - """ - entry = comma + '\r\n' + json.dumps(dic) - entries.append(entry) - return len(entry) - - def _init_post_request(self, action, content_length): - """ - Initiate a syncing POST request. - - :param url: The syncing URL. - :type url: str - :param action: The syncing action, either 'get' or 'receive'. - :type action: str - :param headers: The initial headers to be sent on this request. - :type headers: dict - :param content_length: The content-length of the request. - :type content_length: int - """ - self._conn.putrequest('POST', self._query_string) - self._conn.putheader( - 'content-type', 'application/x-soledad-sync-%s' % action) - for header_name, header_value in self._headers: - self._conn.putheader(header_name, header_value) - self._conn.putheader('accept-encoding', 'gzip') - self._conn.putheader('content-length', str(content_length)) - self._conn.endheaders() - - def _get_doc(self, received, sync_id, last_known_generation, - last_known_trans_id): - """ - Get a sync document from server by means of a POST request. - - :param received: The number of documents already received in the - current sync session. - :type received: int - :param sync_id: The id for the current sync session. - :type sync_id: str - :param last_known_generation: Target's last known generation. - :type last_known_generation: int - :param last_known_trans_id: Target's last known transaction id. - :type last_known_trans_id: str - - :return: The body and headers of the response. - :rtype: tuple - """ - entries = ['['] - size = 1 - # add remote replica metadata to the request - size += self._prepare( - '', entries, - last_known_generation=last_known_generation, - last_known_trans_id=last_known_trans_id, - sync_id=sync_id, - ensure=self._ensure_callback is not None) - # inform server of how many documents have already been received - size += self._prepare( - ',', entries, received=received) - entries.append('\r\n]') - size += len(entries[-1]) - # send headers - self._init_post_request('get', size) - # get document - for entry in entries: - self._conn.send(entry) - return self._response() - - def _put_doc(self, sync_id, last_known_generation, last_known_trans_id, - id, rev, content, gen, trans_id, number_of_docs, doc_idx): - """ - Put a sync document on server by means of a POST request. - - :param sync_id: The id for the current sync session. - :type sync_id: str - :param last_known_generation: Target's last known generation. - :type last_known_generation: int - :param last_known_trans_id: Target's last known transaction id. - :type last_known_trans_id: str - :param id: The document id. - :type id: str - :param rev: The document revision. - :type rev: str - :param content: The serialized document content. - :type content: str - :param gen: The generation of the modification of the document. - :type gen: int - :param trans_id: The transaction id of the modification of the - document. - :type trans_id: str - :param number_of_docs: The total amount of documents sent on this sync - session. - :type number_of_docs: int - :param doc_idx: The index of the current document being sent. - :type doc_idx: int - - :return: The body and headers of the response. - :rtype: tuple - """ - # prepare to send the document - entries = ['['] - size = 1 - # add remote replica metadata to the request - size += self._prepare( - '', entries, - last_known_generation=last_known_generation, - last_known_trans_id=last_known_trans_id, - sync_id=sync_id, - ensure=self._ensure_callback is not None) - # add the document to the request - size += self._prepare( - ',', entries, - id=id, rev=rev, content=content, gen=gen, trans_id=trans_id, - number_of_docs=number_of_docs, doc_idx=doc_idx) - entries.append('\r\n]') - size += len(entries[-1]) - # send headers - self._init_post_request('put', size) - # send document - for entry in entries: - self._conn.send(entry) - return self._response() - - def _sign_request(self, method, url_query, params): - """ - Return an authorization header to be included in the HTTP request. - - :param method: The HTTP method. - :type method: str - :param url_query: The URL query string. - :type url_query: str - :param params: A list with encoded query parameters. - :type param: list - - :return: The Authorization header. - :rtype: list of tuple - """ - return TokenBasedAuth._sign_request(self, method, url_query, params) - - def set_token_credentials(self, uuid, token): - """ - Store given credentials so we can sign the request later. - - :param uuid: The user's uuid. - :type uuid: str - :param token: The authentication token. - :type token: str - """ - TokenBasedAuth.set_token_credentials(self, uuid, token) - - -class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): - """ - A SyncTarget that encrypts data before sending and decrypts data after - receiving. - - Normally encryption will have been written to the sync database upon - document modification. The sync database is also used to write temporarily - the parsed documents that the remote send us, before being decrypted and - written to the main database. - """ - - # will later keep a reference to the insert-doc callback - # passed to sync_exchange - _insert_doc_cb = defaultdict(lambda: ProxyBase(None)) - - # - # Modified HTTPSyncTarget methods. - # - - def __init__(self, url, source_replica_uid=None, creds=None, crypto=None, - sync_db=None, sync_enc_pool=None): - """ - Initialize the SoledadSyncTarget. - - :param source_replica_uid: The source replica uid which we use when - deferring decryption. - :type source_replica_uid: str - :param url: The url of the target replica to sync with. - :type url: str - :param creds: Optional dictionary giving credentials. - to authorize the operation with the server. - :type creds: dict - :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt - document contents when syncing. - :type crypto: soledad.crypto.SoledadCrypto - :param sync_db: Optional. handler for the db with the symmetric - encryption of the syncing documents. If - None, encryption will be done in-place, - instead of retreiving it from the dedicated - database. - :type sync_db: Sqlite handler - """ - HTTPSyncTarget.__init__(self, url, creds) - self._raw_url = url - self._raw_creds = creds - self._crypto = crypto - self._stopped = True - self._stop_lock = threading.Lock() - self.source_replica_uid = source_replica_uid - self._syncer_pool = None - - # asynchronous encryption/decryption attributes - self._sync_db = sync_db - self._sync_enc_pool = sync_enc_pool - self._decryption_callback = None - self._sync_decr_pool = None - - def _setup_sync_decr_pool(self): - """ - Set up the SyncDecrypterPool for deferred decryption. - """ - if self._sync_decr_pool is None and self._sync_db is not None: - # initialize syncing queue decryption pool - self._sync_decr_pool = SyncDecrypterPool( - self._crypto, - self._sync_db, - insert_doc_cb=self._insert_doc_cb, - source_replica_uid=self.source_replica_uid) - - def _teardown_sync_decr_pool(self): - """ - Tear down the SyncDecrypterPool. - """ - self._sync_decr_pool.close() - self._sync_decr_pool = None - - def _get_replica_uid(self, url): - """ - Return replica uid from the url, or None. - - :param url: the replica url - :type url: str - """ - replica_uid_match = re.findall("user-([0-9a-fA-F]+)", url) - return replica_uid_match[0] if len(replica_uid_match) > 0 else None - - @staticmethod - def connect(url, source_replica_uid=None, crypto=None): - return SoledadSyncTarget( - url, source_replica_uid=source_replica_uid, crypto=crypto) - - def _parse_received_doc_response(self, response): - """ - Parse the response from the server containing the received document. - - :param response: The body and headers of the response. - :type response: tuple(str, dict) - """ - data, _ = response - # decode incoming stream - parts = data.splitlines() - if not parts or parts[0] != '[' or parts[-1] != ']': - raise errors.BrokenSyncStream - data = parts[1:-1] - # decode metadata - line, comma = utils.check_and_strip_comma(data[0]) - metadata = None - try: - metadata = json.loads(line) - new_generation = metadata['new_generation'] - new_transaction_id = metadata['new_transaction_id'] - number_of_changes = metadata['number_of_changes'] - except (json.JSONDecodeError, KeyError): - raise errors.BrokenSyncStream - # make sure we have replica_uid from fresh new dbs - if self._ensure_callback and 'replica_uid' in metadata: - self._ensure_callback(metadata['replica_uid']) - # parse incoming document info - doc_id = None - rev = None - content = None - gen = None - trans_id = None - if number_of_changes > 0: - try: - entry = json.loads(data[1]) - doc_id = entry['id'] - rev = entry['rev'] - content = entry['content'] - gen = entry['gen'] - trans_id = entry['trans_id'] - except (IndexError, KeyError): - raise errors.BrokenSyncStream - return new_generation, new_transaction_id, number_of_changes, \ - doc_id, rev, content, gen, trans_id - - def _insert_received_doc(self, idx, total, response): - """ - Insert a received document into the local replica. - - :param idx: The index count of the current operation. - :type idx: int - :param total: The total number of operations. - :type total: int - :param response: The body and headers of the response. - :type response: tuple(str, dict) - """ - new_generation, new_transaction_id, number_of_changes, doc_id, \ - rev, content, gen, trans_id = \ - self._parse_received_doc_response(response) - if doc_id is not None: - # decrypt incoming document and insert into local database - # ------------------------------------------------------------- - # symmetric decryption of document's contents - # ------------------------------------------------------------- - # If arriving content was symmetrically encrypted, we decrypt it. - # We do it inline if defer_decryption flag is False or no sync_db - # was defined, otherwise we defer it writing it to the received - # docs table. - doc = SoledadDocument(doc_id, rev, content) - if is_symmetrically_encrypted(doc): - if self._queue_for_decrypt: - self._enqueue_encrypted_received_doc( - doc, gen, trans_id, idx, total) - else: - # defer_decryption is False or no-sync-db fallback - doc.set_json(decrypt_doc(self._crypto, doc)) - self._return_doc_cb(doc, gen, trans_id) - else: - # not symmetrically encrypted doc, insert it directly - # or save it in the decrypted stage. - if self._queue_for_decrypt: - self._enqueue_received_doc(doc, gen, trans_id, idx, total) - else: - self._return_doc_cb(doc, gen, trans_id) - # ------------------------------------------------------------- - # end of symmetric decryption - # ------------------------------------------------------------- - msg = "%d/%d" % (idx + 1, total) - signal(SOLEDAD_SYNC_RECEIVE_STATUS, msg) - logger.debug("Soledad sync receive status: %s" % msg) - return number_of_changes, new_generation, new_transaction_id - - def _get_remote_docs(self, url, last_known_generation, last_known_trans_id, - headers, return_doc_cb, ensure_callback, sync_id, - defer_decryption=False): - """ - Fetch sync documents from the remote database and insert them in the - local database. - - If an incoming document's encryption scheme is equal to - EncryptionSchemes.SYMKEY, then this method will decrypt it with - Soledad's symmetric key. - - :param url: The syncing URL. - :type url: str - :param last_known_generation: Target's last known generation. - :type last_known_generation: int - :param last_known_trans_id: Target's last known transaction id. - :type last_known_trans_id: str - :param headers: The headers of the HTTP request. - :type headers: dict - :param return_doc_cb: A callback to insert docs from target. - :type return_doc_cb: callable - :param ensure_callback: A callback to ensure we have the correct - target_replica_uid, if it was just created. - :type ensure_callback: callable - :param sync_id: The id for the current sync session. - :type sync_id: str - :param defer_decryption: Whether to defer the decryption process using - the intermediate database. If False, - decryption will be done inline. - :type defer_decryption: bool - - :raise BrokenSyncStream: If `data` is malformed. - - :return: A dictionary representing the first line of the response got - from remote replica. - :rtype: dict - """ - # we keep a reference to the callback in case we defer the decryption - self._return_doc_cb = return_doc_cb - self._queue_for_decrypt = defer_decryption \ - and self._sync_db is not None - - new_generation = last_known_generation - new_transaction_id = last_known_trans_id - - if self._queue_for_decrypt: - logger.debug( - "Soledad sync: will queue received docs for decrypting.") - - idx = 0 - number_of_changes = 1 - - first_request = True - last_callback_lock = None - threads = [] - - # get incoming documents - while idx < number_of_changes: - # bail out if sync process was interrupted - if self.stopped is True: - break - - # launch a thread to fetch one document from target - t = self._syncer_pool.new_syncer_thread( - idx, number_of_changes, - last_callback_lock=last_callback_lock) - - # bail out if any thread failed - if t is None: - self.stop(fail=True) - break - - if defer_decryption: - self._setup_sync_decr_pool() - - t.doc_syncer.set_request_method( - 'get', idx, sync_id, last_known_generation, - last_known_trans_id) - t.doc_syncer.set_success_callback(self._insert_received_doc) - - def _failure_callback(idx, total, exception): - _failure_msg = "Soledad sync: error while getting document " \ - "%d/%d: %s" \ - % (idx + 1, total, exception) - logger.warning("%s" % _failure_msg) - logger.warning("Soledad sync: failing gracefully, will " - "recover on next sync.") - - t.doc_syncer.set_failure_callback(_failure_callback) - threads.append(t) - t.start() - last_callback_lock = t.callback_lock - idx += 1 - - # if this is the first request, wait to update the number of - # changes - if first_request is True: - t.join() - if t.success: - number_of_changes, _, _ = t.result - if defer_decryption: - self._sync_decr_pool.set_docs_to_process( - number_of_changes) - else: - raise t.exception - first_request = False - - # make sure all threads finished and we have up-to-date info - last_successful_thread = None - while threads: - # check if there are failures - t = threads.pop(0) - t.join() - if t.success: - last_successful_thread = t - else: - raise t.exception - - # get information about last successful thread - if last_successful_thread is not None: - body, _ = last_successful_thread.response - parsed_body = json.loads(body) - # get current target gen and trans id in case no documents were - # transferred - if len(parsed_body) == 1: - metadata = parsed_body[0] - new_generation = metadata['new_generation'] - new_transaction_id = metadata['new_transaction_id'] - # get current target gen and trans id from last transferred - # document - else: - doc_data = parsed_body[1] - new_generation = doc_data['gen'] - new_transaction_id = doc_data['trans_id'] - - # decrypt docs in case of deferred decryption - if defer_decryption: - self._sync_decr_pool.wait() - self._teardown_sync_decr_pool() - - return new_generation, new_transaction_id - - @property - def _defer_encryption(self): - return self._sync_enc_pool is not None - - @property - def _defer_decryption(self): - return self._sync_decr_pool is not None - - def sync_exchange(self, docs_by_generations, - source_replica_uid, last_known_generation, - last_known_trans_id, return_doc_cb, - ensure_callback=None, defer_decryption=True, - sync_id=None): - """ - Find out which documents the remote database does not know about, - encrypt and send them. - - This does the same as the parent's method but encrypts content before - syncing. - - :param docs_by_generations: A list of (doc_id, generation, trans_id) - of local documents that were changed since - the last local generation the remote - replica knows about. - :type docs_by_generations: list of tuples - - :param source_replica_uid: The uid of the source replica. - :type source_replica_uid: str - - :param last_known_generation: Target's last known generation. - :type last_known_generation: int - - :param last_known_trans_id: Target's last known transaction id. - :type last_known_trans_id: str - - :param return_doc_cb: A callback for inserting received documents from - target. If not overriden, this will call u1db - insert_doc_from_target in synchronizer, which - implements the TAKE OTHER semantics. - :type return_doc_cb: function - - :param ensure_callback: A callback that ensures we know the target - replica uid if the target replica was just - created. - :type ensure_callback: function - - :param defer_decryption: Whether to defer the decryption process using - the intermediate database. If False, - decryption will be done inline. - :type defer_decryption: bool - - :return: The new generation and transaction id of the target replica. - :rtype: tuple - """ - self._ensure_callback = ensure_callback - - self.start() - - if sync_id is None: - sync_id = str(uuid4()) - self.source_replica_uid = source_replica_uid - # let the decrypter pool access the passed callback to insert docs - setProxiedObject(self._insert_doc_cb[source_replica_uid], - return_doc_cb) - - self._ensure_connection() - if self._trace_hook: # for tests - self._trace_hook('sync_exchange') - url = '%s/sync-from/%s' % (self._url.path, source_replica_uid) - headers = self._sign_request('POST', url, {}) - - cur_target_gen = last_known_generation - cur_target_trans_id = last_known_trans_id - - # ------------------------------------------------------------------- - # start of send documents to target - # ------------------------------------------------------------------- - msg = "%d/%d" % (0, len(docs_by_generations)) - signal(SOLEDAD_SYNC_SEND_STATUS, msg) - logger.debug("Soledad sync send status: %s" % msg) - - self._syncer_pool = DocumentSyncerPool( - self._raw_url, self._raw_creds, url, headers, ensure_callback, - self.stop_syncer) - threads = [] - last_callback_lock = None - - sent = 0 - total = len(docs_by_generations) - - synced = [] - number_of_docs = len(docs_by_generations) - - last_request_lock = None - for doc, gen, trans_id in docs_by_generations: - # allow for interrupting the sync process - if self.stopped is True: - break - - # skip non-syncable docs - if isinstance(doc, SoledadDocument) and not doc.syncable: - continue - - # ------------------------------------------------------------- - # symmetric encryption of document's contents - # ------------------------------------------------------------- - - # the following var will hold a deferred because we may try to - # fetch the encrypted document from the sync db - d = None - - if doc.is_tombstone(): - d = defer.succeed(None) - elif not self._defer_encryption: - # fallback case, for tests - d = defer.succeed(encrypt_doc(self._crypto, doc)) - else: - - def _maybe_encrypt_doc_inline(doc_json): - if doc_json is None: - # the document is not marked as tombstone, but we got - # nothing from the sync db. As it is not encrypted - # yet, we force inline encryption. - # TODO: implement a queue to deal with these cases. - return encrypt_doc(self._crypto, doc) - return doc_json - - d = self.get_encrypted_doc_from_db(doc.doc_id, doc.rev) - d.addCallback(_maybe_encrypt_doc_inline) - # ------------------------------------------------------------- - # end of symmetric encryption - # ------------------------------------------------------------- - - t = self._syncer_pool.new_syncer_thread( - sent + 1, total, last_request_lock=last_request_lock, - last_callback_lock=last_callback_lock) - - # bail out if creation of any thread failed - if t is None: - self.stop(fail=True) - break - - # the following callback will be called when the document's - # encrypted content is available, either because it was found on - # the sync db or because it has been encrypted inline. - - def _configure_and_start_thread(t, doc_json): - # set the request method - t.doc_syncer.set_request_method( - 'put', sync_id, cur_target_gen, cur_target_trans_id, - id=doc.doc_id, rev=doc.rev, content=doc_json, gen=gen, - trans_id=trans_id, number_of_docs=number_of_docs, - doc_idx=sent + 1) - # set the success calback - - def _success_callback(idx, total, response): - _success_msg = "Soledad sync send status: %d/%d" \ - % (idx, total) - signal(SOLEDAD_SYNC_SEND_STATUS, _success_msg) - logger.debug(_success_msg) - - t.doc_syncer.set_success_callback(_success_callback) - - # set the failure callback - def _failure_callback(idx, total, exception): - _failure_msg = "Soledad sync: error while sending document " \ - "%d/%d: %s" % (idx, total, exception) - logger.warning("%s" % _failure_msg) - logger.warning("Soledad sync: failing gracefully, will " - "recover on next sync.") - - t.doc_syncer.set_failure_callback(_failure_callback) - - # save thread and append - t.start() - - d.addCallback(partial(_configure_and_start_thread, t)) - - threads.append((t, doc)) - - # update lock references so they can be used in next call to - # syncer_pool.new_syncer_thread() above - last_callback_lock = t.callback_lock - last_request_lock = t.request_lock - - sent += 1 - - # make sure all threads finished and we have up-to-date info - last_successful_thread = None - while threads: - # check if there are failures - t, doc = threads.pop(0) - t.started.wait() - t.join() - if t.success: - synced.append((doc.doc_id, doc.rev)) - last_successful_thread = t - else: - raise t.exception - - # delete documents from the sync database - if self._defer_encryption: - self._delete_encrypted_docs_from_db(synced) - - # get target gen and trans_id after docs - gen_after_send = None - trans_id_after_send = None - if last_successful_thread is not None: - response_dict = json.loads(last_successful_thread.response[0])[0] - gen_after_send = response_dict['new_generation'] - trans_id_after_send = response_dict['new_transaction_id'] - # ------------------------------------------------------------------- - # end of send documents to target - # ------------------------------------------------------------------- - - # ------------------------------------------------------------------- - # start of fetch documents from target - # ------------------------------------------------------------------- - defer_decryption = defer_decryption and self._defer_decryption - if self.stopped is False: - cur_target_gen, cur_target_trans_id = self._get_remote_docs( - url, - last_known_generation, last_known_trans_id, headers, - return_doc_cb, ensure_callback, sync_id, - defer_decryption=defer_decryption) - # ------------------------------------------------------------------- - # end of fetch documents from target - # ------------------------------------------------------------------- - - self._syncer_pool.cleanup() - - # update gen and trans id info in case we just sent and did not - # receive docs. - if gen_after_send is not None and gen_after_send > cur_target_gen: - cur_target_gen = gen_after_send - cur_target_trans_id = trans_id_after_send - - self.stop(fail=False) - self._syncer_pool = None - return cur_target_gen, cur_target_trans_id - - def start(self): - """ - Mark current sync session as running. - """ - with self._stop_lock: - self._stopped = False - - def stop_syncer(self): - with self._stop_lock: - self._stopped = True - - def stop(self, fail=False): - """ - Mark current sync session as stopped. - - This will eventually interrupt the sync_exchange() method and return - enough information to the synchronizer so the sync session can be - recovered afterwards. - - :param fail: Whether we are stopping because of a failure. - :type fail: bool - """ - self.stop_syncer() - if self._syncer_pool: - self._syncer_pool.stop_threads(fail=fail) - - @property - def stopped(self): - """ - Return whether this sync session is stopped. - - :return: Whether this sync session is stopped. - :rtype: bool - """ - with self._stop_lock: - return self._stopped is True - - # - # Symmetric encryption of syncing docs - # - - def get_encrypted_doc_from_db(self, doc_id, doc_rev): - """ - Retrieve encrypted document from the database of encrypted docs for - sync. - - :param doc_id: The Document id. - :type doc_id: str - - :param doc_rev: The document revision - :type doc_rev: str - - :return: A deferred which is fired with the document's encrypted - content or None if the document was not found on the sync db. - :rtype: twisted.internet.defer.Deferred - """ - logger.debug("Looking for encrypted document on sync db: %s" % doc_id) - return self._sync_enc_pool.get_encrypted_doc(doc_id, doc_rev) - - def _delete_encrypted_docs_from_db(self, docs): - """ - Delete several encrypted documents from the database of symmetrically - encrypted docs to sync. - - :param docs: an iterable with (doc_id, doc_rev) for all documents - to be deleted. - :type docs: any iterable of tuples of str - """ - for doc_id, doc_rev in docs: - logger.debug("Removing encrypted document on sync db: %s" - % doc_id) - return self._sync_enc_pool.delete_encrypted_doc(doc_id, doc_rev) - - # - # Symmetric decryption of syncing docs - # - - def _enqueue_encrypted_received_doc(self, doc, gen, trans_id, idx, total): - """ - Save a symmetrically encrypted incoming document into the received - docs table in the sync db. A decryption task will pick it up - from here in turn. - - :param doc: The document to save. - :type doc: SoledadDocument - :param gen: The generation. - :type gen: str - :param trans_id: Transacion id. - - :param idx: The index count of the current operation. - :type idx: int - :param total: The total number of operations. - :type total: int - """ - logger.debug("Enqueueing doc for decryption: %d/%d." - % (idx + 1, total)) - self._sync_decr_pool.insert_encrypted_received_doc( - doc.doc_id, doc.rev, doc.content, gen, trans_id, idx + 1) - - def _enqueue_received_doc(self, doc, gen, trans_id, idx, total): - """ - Save any incoming document into the received docs table in the sync db. - - :param doc: The document to save. - :type doc: SoledadDocument - :param gen: The generation. - :type gen: str - :param trans_id: Transacion id. - :type gen: str - :param idx: The index count of the current operation. - :type idx: int - :param total: The total number of operations. - :type total: int - """ - logger.debug("Enqueueing doc, no decryption needed: %d/%d." - % (idx + 1, total)) - self._sync_decr_pool.insert_received_doc( - doc.doc_id, doc.rev, doc.content, gen, trans_id, idx + 1) - - def set_decryption_callback(self, cb): - """ - Set callback to be called when the decryption finishes. - - :param cb: The callback to be set. - :type cb: callable - """ - self._decryption_callback = cb - - def has_decryption_callback(self): - """ - Return True if there is a decryption callback set. - :rtype: bool - """ - return self._decryption_callback is not None - - # - # Authentication methods - # - - def _sign_request(self, method, url_query, params): - """ - Return an authorization header to be included in the HTTP request. - - :param method: The HTTP method. - :type method: str - :param url_query: The URL query string. - :type url_query: str - :param params: A list with encoded query parameters. - :type param: list - - :return: The Authorization header. - :rtype: list of tuple - """ - return TokenBasedAuth._sign_request(self, method, url_query, params) - - def set_token_credentials(self, uuid, token): - """ - Store given credentials so we can sign the request later. - - :param uuid: The user's uuid. - :type uuid: str - :param token: The authentication token. - :type token: str - """ - TokenBasedAuth.set_token_credentials(self, uuid, token) -- cgit v1.2.3 From 5376b0eb9ff906fc755b18b39c87ffdc36849d1c Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 19 May 2015 19:04:26 -0300 Subject: [refactor] cleanup sync, remove unused stuff This commit does the following: * Remove the autocreate parameter from the sync() method. * Remove the syncing lock from the sync module because it did the same job as the lock in the sqlcipher module. * Remove the close/stop methods from sync module as they don't make sense after we started to use twisted in client-side sync. --- client/src/leap/soledad/client/api.py | 5 +- client/src/leap/soledad/client/sqlcipher.py | 25 +------- client/src/leap/soledad/client/sync.py | 96 +++-------------------------- 3 files changed, 10 insertions(+), 116 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 0f29503f..7b45dd7f 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -650,7 +650,7 @@ class Soledad(object): sync_url = urlparse.urljoin(self._server_url, 'user-%s' % self.uuid) d = self._dbsyncer.sync( sync_url, - creds=self._creds, autocreate=False, + creds=self._creds, defer_decryption=defer_decryption) def _sync_callback(local_gen): @@ -670,9 +670,6 @@ class Soledad(object): d.addCallbacks(_sync_callback, _sync_errback) return d - def stop_sync(self): - self._dbsyncer.stop_sync() - @property def syncing(self): """ diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 53afbda8..7fde9a7c 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -414,7 +414,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): Soledad syncer implementation. """ - _sync_loop = None _sync_enc_pool = None """ @@ -557,7 +556,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): decr.TABLE_NAME, decr.FIELD_NAMES)) return (sql_encr_table_query, sql_decr_table_query) - def sync(self, url, creds=None, autocreate=True, defer_decryption=True): + def sync(self, url, creds=None, defer_decryption=True): """ Synchronize documents with remote replica exposed at url. @@ -575,8 +574,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): optional dictionary giving credentials. to authorize the operation with the server. :type creds: dict - :param autocreate: Ask the target to create the db if non-existent. - :type autocreate: bool :param defer_decryption: Whether to defer the decryption process using the intermediate database. If False, decryption will be done inline. @@ -591,20 +588,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): # acquired. with self._syncer(url, creds=creds) as syncer: # XXX could mark the critical section here... - return syncer.sync( - autocreate=autocreate, - defer_decryption=defer_decryption) - - def stop_sync(self): - """ - Interrupt all ongoing syncs. - """ - self._stop_sync() - - def _stop_sync(self): - for url in self._syncers: - _, syncer = self._syncers[url] - syncer.stop() + return syncer.sync(defer_decryption=defer_decryption) @contextmanager def _syncer(self, url, creds=None): @@ -687,11 +671,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): """ Close the syncer and syncdb orderly """ - # stop the sync loop for deferred encryption - if self._sync_loop is not None: - self._sync_loop.reset() - self._sync_loop.stop() - self._sync_loop = None # close all open syncers for url in self._syncers: _, syncer = self._syncers[url] diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index f8f74ce7..53172f31 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -16,16 +16,8 @@ # along with this program. If not, see . """ Soledad synchronization utilities. - -Extend u1db Synchronizer with the ability to: - - * Postpone the update of the known replica uid until all the decryption of - the incoming messages has been processed. - - * Be interrupted and recovered. """ import logging -from threading import Lock from twisted.internet import defer @@ -48,17 +40,8 @@ class SoledadSynchronizer(Synchronizer): Also modified to allow for interrupting the synchronization process. """ - # TODO can delegate the syncing to the api object, living in the reactor - # thread, and use a simple flag. - syncing_lock = Lock() - - def stop(self): - """ - Stop the current sync in progress. - """ - self.sync_target.stop() - - def sync(self, autocreate=False, defer_decryption=True): + @defer.inlineCallbacks + def sync(self, defer_decryption=True): """ Synchronize documents between source and target. @@ -70,49 +53,22 @@ class SoledadSynchronizer(Synchronizer): This is done to allow the ongoing parallel decryption of the incoming docs to proceed without `InvalidGeneration` conflicts. - :param autocreate: Whether the target replica should be created or not. - :type autocreate: bool :param defer_decryption: Whether to defer the decryption process using the intermediate database. If False, decryption will be done inline. :type defer_decryption: bool - """ - self.syncing_lock.acquire() - try: - return self._sync(autocreate=autocreate, - defer_decryption=defer_decryption) - except Exception: - # we want this exception to reach either SQLCipherU1DBSync.sync or - # the Solead api object itself, so it is poperly handled and/or - # logged... - raise - finally: - # ... but we also want to release the syncing lock so this - # Synchronizer may be reused later. - self.release_syncing_lock() - @defer.inlineCallbacks - def _sync(self, autocreate=False, defer_decryption=True): - """ - Helper function, called from the main `sync` method. - See `sync` docstring. + :return: A deferred which will fire after the sync has finished. + :rtype: twisted.internet.defer.Deferred """ sync_target = self.sync_target # get target identifier, its current generation, # and its last-seen database generation for this source ensure_callback = None - try: - (self.target_replica_uid, target_gen, target_trans_id, - target_my_gen, target_my_trans_id) = yield \ - sync_target.get_sync_info(self.source._replica_uid) - except errors.DatabaseDoesNotExist: - if not autocreate: - raise - # will try to ask sync_exchange() to create the db - self.target_replica_uid = None - target_gen, target_trans_id = (0, '') - target_my_gen, target_my_trans_id = (0, '') + (self.target_replica_uid, target_gen, target_trans_id, + target_my_gen, target_my_trans_id) = yield \ + sync_target.get_sync_info(self.source._replica_uid) logger.debug( "Soledad target sync info:\n" @@ -176,9 +132,6 @@ class SoledadSynchronizer(Synchronizer): # exchange documents and try to insert the returned ones with # the target, return target synced-up-to gen. - # - # The sync_exchange method may be interrupted, in which case it will - # return a tuple of Nones. new_gen, new_trans_id = yield sync_target.sync_exchange( docs_by_generation, self.source._replica_uid, target_last_known_gen, target_last_known_trans_id, @@ -239,38 +192,3 @@ class SoledadSynchronizer(Synchronizer): return self.sync_target.record_sync_info( self.source._replica_uid, cur_gen, trans_id) return defer.succeed(None) - - @property - def syncing(self): - """ - Return True if a sync is ongoing, False otherwise. - :rtype: bool - """ - # XXX FIXME we need some mechanism for timeout: should cleanup and - # release if something in the syncdb-decrypt goes wrong. we could keep - # track of the release date and cleanup unrealistic sync entries after - # some time. - - # TODO use cancellable deferreds instead - locked = self.syncing_lock.locked() - return locked - - def release_syncing_lock(self): - """ - Release syncing lock if it's locked. - """ - if self.syncing_lock.locked(): - self.syncing_lock.release() - - def close(self): - """ - Close sync target pool of workers. - """ - self.release_syncing_lock() - self.sync_target.close() - - def __del__(self): - """ - Cleanup: release lock. - """ - self.release_syncing_lock() -- cgit v1.2.3 From 0c23b1c767d98b5a63bb4b94d56b1fe69ce71c43 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 19 May 2015 18:53:02 -0300 Subject: [bug] ensure sync failures are not ignored --- client/src/leap/soledad/client/api.py | 1 + 1 file changed, 1 insertion(+) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 7b45dd7f..cd06fba1 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -666,6 +666,7 @@ class Soledad(object): failure.printDetailedTraceback(file=s) msg = "Soledad exception when syncing!\n" + s.getvalue() logger.error(msg) + return failure d.addCallbacks(_sync_callback, _sync_errback) return d -- cgit v1.2.3 From ec55459fa697f5d8676e16e5fee8a0c0f75c8c2c Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 20 May 2015 10:57:25 -0300 Subject: [bug] wrap unauth errors as invalid token errors --- client/src/leap/soledad/client/api.py | 3 -- client/src/leap/soledad/client/http_target.py | 40 ++++++++++++++++++--------- 2 files changed, 27 insertions(+), 16 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index cd06fba1..ffd95f6c 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -658,9 +658,6 @@ class Soledad(object): soledad_events.SOLEDAD_DONE_DATA_SYNC, self.uuid) return local_gen - # prevent sync failures from crashing the app by adding an errback - # that logs the failure and does not propagate it down the callback - # chain def _sync_errback(failure): s = StringIO() failure.printDetailedTraceback(file=s) diff --git a/client/src/leap/soledad/client/http_target.py b/client/src/leap/soledad/client/http_target.py index 041180e6..5f18e4a9 100644 --- a/client/src/leap/soledad/client/http_target.py +++ b/client/src/leap/soledad/client/http_target.py @@ -34,12 +34,14 @@ from collections import defaultdict from twisted.internet import defer from twisted.internet import reactor from twisted.web.client import getPage +from twisted.web.error import Error from u1db import errors from u1db import SyncTarget from u1db.remote import utils from leap.soledad.common.document import SoledadDocument +from leap.soledad.common.errors import InvalidAuthTokenError from leap.soledad.client.crypto import is_symmetrically_encrypted from leap.soledad.client.crypto import encrypt_doc @@ -53,6 +55,19 @@ from leap.soledad.client.encdecpool import SyncDecrypterPool logger = logging.getLogger(__name__) +def _unauth_to_invalid_token_error(failure): + failure.trap(Error) + if failure.getErrorMessage() == "401 Unauthorized": + raise InvalidAuthTokenError + return failure + + +def getSoledadPage(*args, **kwargs): + d = getPage(*args, **kwargs) + d.addErrback(_unauth_to_invalid_token_error) + return d + + class SoledadHTTPSyncTarget(SyncTarget): """ A SyncTarget that encrypts data before sending and decrypts data after @@ -69,7 +84,7 @@ class SoledadHTTPSyncTarget(SyncTarget): _insert_doc_cb = defaultdict(lambda: ProxyBase(None)) def __init__(self, url, source_replica_uid, creds, crypto, - sync_db=None, sync_enc_pool=None): + sync_db=None, sync_enc_pool=None): """ Initialize the sync target. @@ -124,7 +139,7 @@ class SoledadHTTPSyncTarget(SyncTarget): # # SyncTarget API - # + # @defer.inlineCallbacks def get_sync_info(self, source_replica_uid): @@ -144,7 +159,7 @@ class SoledadHTTPSyncTarget(SyncTarget): source_replica_last_known_transaction_id) :rtype: twisted.internet.defer.Deferred """ - raw = yield getPage(self._url, headers=self._auth_header) + raw = yield getSoledadPage(self._url, headers=self._auth_header) res = json.loads(raw) defer.returnValue([ res['target_replica_uid'], @@ -189,7 +204,7 @@ class SoledadHTTPSyncTarget(SyncTarget): }) headers = self._auth_header.copy() headers.update({'content-type': 'application/json'}) - return getPage( + return getSoledadPage( self._url, method='PUT', headers=headers, @@ -281,7 +296,7 @@ class SoledadHTTPSyncTarget(SyncTarget): @defer.inlineCallbacks def _send_docs(self, docs_by_generation, last_known_generation, - last_known_trans_id, sync_id): + last_known_trans_id, sync_id): if not docs_by_generation: defer.returnValue([None, None]) @@ -307,8 +322,8 @@ class SoledadHTTPSyncTarget(SyncTarget): self._sync_enc_pool.delete_encrypted_doc( doc.doc_id, doc.rev) signal(SOLEDAD_SYNC_SEND_STATUS, - "Soledad sync send status: %d/%d" - % (idx, total)) + "Soledad sync send status: %d/%d" + % (idx, total)) response_dict = json.loads(result)[0] gen_after_send = response_dict['new_generation'] trans_id_after_send = response_dict['new_transaction_id'] @@ -316,7 +331,7 @@ class SoledadHTTPSyncTarget(SyncTarget): @defer.inlineCallbacks def _send_one_doc(self, headers, first_entries, doc, gen, trans_id, - number_of_docs, doc_idx): + number_of_docs, doc_idx): entries = first_entries[:] # add the document to the request content = yield self._encrypt_doc(doc) @@ -327,7 +342,7 @@ class SoledadHTTPSyncTarget(SyncTarget): doc_idx=doc_idx) entries.append('\r\n]') data = ''.join(entries) - result = yield getPage( + result = yield getSoledadPage( self._url, method='POST', headers=headers, @@ -354,7 +369,7 @@ class SoledadHTTPSyncTarget(SyncTarget): d = self._sync_enc_pool.get_encrypted_doc(doc.doc_id, doc.rev) d.addCallback(_maybe_encrypt_doc_inline) return d - + # # methods to receive doc # @@ -438,7 +453,7 @@ class SoledadHTTPSyncTarget(SyncTarget): defer.returnValue([new_generation, new_transaction_id]) def _receive_one_doc(self, headers, last_known_generation, - last_known_trans_id, sync_id, received): + last_known_trans_id, sync_id, received): entries = ['['] # add remote replica metadata to the request self._prepare( @@ -452,7 +467,7 @@ class SoledadHTTPSyncTarget(SyncTarget): ',', entries, received=received) entries.append('\r\n]') # send headers - return getPage( + return getSoledadPage( self._url, method='POST', headers=headers, @@ -473,7 +488,6 @@ class SoledadHTTPSyncTarget(SyncTarget): rev, content, gen, trans_id = \ self._parse_received_doc_response(response) if doc_id is not None: - print doc_id # decrypt incoming document and insert into local database # ------------------------------------------------------------- # symmetric decryption of document's contents -- cgit v1.2.3 From d59ac3b5ce713787cd7a46e181f2381de3a8fde2 Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 20 May 2015 10:58:16 -0300 Subject: [feature] ensure reactor stops on client db script --- client/src/leap/soledad/client/encdecpool.py | 9 +++++---- client/src/leap/soledad/client/sqlcipher.py | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py index 7c21c30e..02eeb590 100644 --- a/client/src/leap/soledad/client/encdecpool.py +++ b/client/src/leap/soledad/client/encdecpool.py @@ -766,11 +766,12 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): # wait until we know how many documents we need to process while self._docs_to_process is None: time.sleep(self.DECRYPT_LOOP_PERIOD) - # because all database operations are asynchronous, we use an event to - # make sure we don't start the next loop before the current one has - # finished. + # because all database operations are asynchronous, we use an + # event to make sure we don't start the next loop before the + # current one has finished. event = threading.Event() - # loop until we have processes as many docs as the number of changes + # loop until we have processes as many docs as the number of + # changes while self._processed_docs < self._docs_to_process: if sameProxiedObjects( self._insert_doc_cb.get(self.source_replica_uid), diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 7fde9a7c..96732325 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -653,7 +653,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): # Symmetric encryption of syncing docs # - def get_generation(self): # FIXME # XXX this SHOULD BE a callback -- cgit v1.2.3 From ce161f9623a1dea6eda9fc2350c60073dbcdce06 Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 20 May 2015 17:28:27 -0300 Subject: [bug] ensure async decryption failures are logged We have to make sure any failures in asynchronous decryption code is grabbed and properly transmitted up the deferred chain so it can be logged. This commit adds errbacks in the decryption pool that grab any failure and a check on the http target the failure if that is the case. --- client/src/leap/soledad/client/encdecpool.py | 89 +++++++++++++++++---------- client/src/leap/soledad/client/http_target.py | 51 +++++++++++---- 2 files changed, 97 insertions(+), 43 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py index 02eeb590..d9f3d28c 100644 --- a/client/src/leap/soledad/client/encdecpool.py +++ b/client/src/leap/soledad/client/encdecpool.py @@ -32,6 +32,7 @@ from zope.proxy import sameProxiedObjects from twisted.internet import defer from twisted.internet.threads import deferToThread +from twisted.python.failure import Failure from leap.soledad.common.document import SoledadDocument from leap.soledad.common import soledad_assert @@ -390,7 +391,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): self._processed_docs = 0 self._async_results = [] - self._exception = None + self._failure = None self._finished = threading.Event() # clear the database before starting the sync @@ -399,10 +400,26 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): d.addCallback(lambda _: self._empty_db.set()) # start the decryption loop + def _maybe_store_failure_and_finish(result): + if isinstance(result, Failure): + self._set_failure(result) + self._finished.set() + logger.debug("Finished decrypter thread.") + self._deferred_loop = deferToThread( self._decrypt_and_process_docs_loop) - self._deferred_loop.addCallback( - lambda _: logger.debug("Finished decrypter thread.")) + self._deferred_loop.addBoth( + _maybe_store_failure_and_finish) + + @property + def failure(self): + return self._failure + + def _set_failure(self, failure): + self._failure = failure + + def succeeded(self): + return self._failure is None def set_docs_to_process(self, docs_to_process): """ @@ -760,35 +777,43 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): This method runs in its own thread, so sleeping will not interfere with the main thread. """ - try: - # wait for database to be emptied - self._empty_db.wait() - # wait until we know how many documents we need to process - while self._docs_to_process is None: - time.sleep(self.DECRYPT_LOOP_PERIOD) - # because all database operations are asynchronous, we use an - # event to make sure we don't start the next loop before the - # current one has finished. - event = threading.Event() - # loop until we have processes as many docs as the number of - # changes - while self._processed_docs < self._docs_to_process: - if sameProxiedObjects( - self._insert_doc_cb.get(self.source_replica_uid), - None): - continue - event.clear() - d = self._decrypt_received_docs() - d.addCallback(lambda _: self._raise_if_async_fails()) - d.addCallback(lambda _: self._process_decrypted()) - d.addCallback(self._delete_processed_docs) - d.addCallback(lambda _: event.set()) - event.wait() - # sleep a bit to give time for some decryption work - time.sleep(self.DECRYPT_LOOP_PERIOD) - except Exception as e: - self._exception = e - self._finished.set() + # wait for database to be emptied + self._empty_db.wait() + + # wait until we know how many documents we need to process + while self._docs_to_process is None: + time.sleep(self.DECRYPT_LOOP_PERIOD) + + # because all database operations are asynchronous, we use an + # event to make sure we don't start the next loop before the + # current one has finished. + event = threading.Event() + + # loop until we have processes as many docs as the number of + # changes + while self._processed_docs < self._docs_to_process: + + if sameProxiedObjects( + self._insert_doc_cb.get(self.source_replica_uid), + None): + continue + + event.clear() + + d = self._decrypt_received_docs() + d.addCallback(lambda _: self._raise_if_async_fails()) + d.addCallback(lambda _: self._process_decrypted()) + d.addCallback(lambda r: self._delete_processed_docs(r)) + d.addErrback(self._set_failure) # grab failure and continue + d.addCallback(lambda _: event.set()) + + event.wait() + + if not self.succeeded(): + break + + # sleep a bit to give time for some decryption work + time.sleep(self.DECRYPT_LOOP_PERIOD) def has_finished(self): return self._finished.is_set() diff --git a/client/src/leap/soledad/client/http_target.py b/client/src/leap/soledad/client/http_target.py index 5f18e4a9..bf397cfe 100644 --- a/client/src/leap/soledad/client/http_target.py +++ b/client/src/leap/soledad/client/http_target.py @@ -396,7 +396,14 @@ class SoledadHTTPSyncTarget(SyncTarget): headers = self._auth_header.copy() headers.update({'content-type': 'application/x-soledad-sync-get'}) - # maybe get one doc + #--------------------------------------------------------------------- + # maybe receive the first document + #--------------------------------------------------------------------- + + # we fetch the first document before fetching the rest because we need + # to know the total number of documents to be received, and this + # information comes as metadata to each request. + d = self._receive_one_doc( headers, last_known_generation, last_known_trans_id, sync_id, 0) @@ -406,28 +413,48 @@ class SoledadHTTPSyncTarget(SyncTarget): if defer_decryption: self._sync_decr_pool.set_docs_to_process( number_of_changes) - idx = 1 - # maybe get more documents + #--------------------------------------------------------------------- + # maybe receive the rest of the documents + #--------------------------------------------------------------------- + + # launch many asynchronous fetches and inserts of received documents + # in the temporary sync db. Will wait for all results before + # continuing. + + received = 1 deferreds = [] - while idx < number_of_changes: + while received < number_of_changes: d = self._receive_one_doc( headers, last_known_generation, - last_known_trans_id, sync_id, idx) + last_known_trans_id, sync_id, received) d.addCallback( partial( self._insert_received_doc, - idx + 1, + received + 1, # the index of the current received doc number_of_changes)) deferreds.append(d) - idx += 1 + received += 1 results = yield defer.gatherResults(deferreds) - # get genration and transaction id of target after insertions + # get generation and transaction id of target after insertions if deferreds: _, new_generation, new_transaction_id = results.pop() - # get current target gen and trans id in case no documents were + #--------------------------------------------------------------------- + # wait for async decryption to finish + #--------------------------------------------------------------------- + + # below we do a trick so we can wait for the SyncDecrypterPool to + # finish its work before finally returning the new generation and + # transaction id of the remote replica. To achieve that, we create a + # Deferred that will return the results of the sync and, if we are + # decrypting asynchronously, we use reactor.callLater() to + # periodically poll the decrypter and check if it has finished its + # work. When it has finished, we either call the callback or errback + # of that deferred. In case we are not asynchronously decrypting, we + # just fire the deferred. + def _shutdown_and_finish(res): self._sync_decr_pool.close() return new_generation, new_transaction_id @@ -441,9 +468,11 @@ class SoledadHTTPSyncTarget(SyncTarget): SyncDecrypterPool.DECRYPT_LOOP_PERIOD, _wait_or_finish) else: - d.callback(None) + if self._sync_decr_pool.succeeded(): + d.callback(None) + else: + d.errback(self._sync_decr_pool.failure) - # decrypt docs in case of deferred decryption if defer_decryption: _wait_or_finish() else: -- cgit v1.2.3 From 33fa691e1df4d64d10313d5d192b3c064aafadb7 Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 20 May 2015 18:19:28 -0300 Subject: [refactor] remove unneeded proxy for insert_doc_cb When we initialized the async decrypter pool in the target's init method we needed a proxy to ensure we could update the insert doc callback with the correct method later on. Now we initialize the decrypter only when we need it, so we don't need this proxy anymore. This commit removes the unneeded proxy. --- client/src/leap/soledad/client/encdecpool.py | 22 ++----------------- client/src/leap/soledad/client/http_target.py | 31 ++++++++++----------------- 2 files changed, 13 insertions(+), 40 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py index d9f3d28c..2f58d06c 100644 --- a/client/src/leap/soledad/client/encdecpool.py +++ b/client/src/leap/soledad/client/encdecpool.py @@ -28,8 +28,6 @@ import time import json import logging -from zope.proxy import sameProxiedObjects - from twisted.internet import defer from twisted.internet.threads import deferToThread from twisted.python.failure import Failure @@ -535,16 +533,6 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): decrypted and inserted in the sync db. :rtype: twisted.internet.defer.Deferred """ - # insert_doc_cb is a proxy object that gets updated with the right - # insert function only when the sync_target invokes the sync_exchange - # method. so, if we don't still have a non-empty callback, we refuse - # to proceed. - if sameProxiedObjects( - self._insert_doc_cb.get(self.source_replica_uid), - None): - logger.debug("Sync decrypter pool: no insert_doc_cb() yet.") - return - soledad_assert(self._crypto is not None, "need a crypto object") content = json.loads(content) @@ -713,7 +701,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): gen, trans_id, idx): """ Insert the decrypted document into the local sqlcipher database. - Makes use of the passed callback `return_doc_cb` passed to the caller + Makes use of the passed callback `insert_doc_cb` passed to the caller by u1db sync. :param doc_id: The document id. @@ -730,7 +718,6 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :type trans_id: str """ # could pass source_replica in params for callback chain - insert_fun = self._insert_doc_cb[self.source_replica_uid] logger.debug("Sync decrypter pool: inserting doc in local db: " "%s:%s %s" % (doc_id, doc_rev, gen)) @@ -739,7 +726,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): content = None doc = SoledadDocument(doc_id, doc_rev, content) gen = int(gen) - insert_fun(doc, gen, trans_id) + self._insert_doc_cb(doc, gen, trans_id) # store info about processed docs self._last_inserted_idx = idx @@ -793,11 +780,6 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): # changes while self._processed_docs < self._docs_to_process: - if sameProxiedObjects( - self._insert_doc_cb.get(self.source_replica_uid), - None): - continue - event.clear() d = self._decrypt_received_docs() diff --git a/client/src/leap/soledad/client/http_target.py b/client/src/leap/soledad/client/http_target.py index bf397cfe..bf563b34 100644 --- a/client/src/leap/soledad/client/http_target.py +++ b/client/src/leap/soledad/client/http_target.py @@ -25,11 +25,8 @@ import json import base64 import logging -from zope.proxy import setProxiedObject -from zope.proxy import ProxyBase from uuid import uuid4 from functools import partial -from collections import defaultdict from twisted.internet import defer from twisted.internet import reactor @@ -79,10 +76,6 @@ class SoledadHTTPSyncTarget(SyncTarget): written to the main database. """ - # will later keep a reference to the insert-doc callback - # passed to sync_exchange - _insert_doc_cb = defaultdict(lambda: ProxyBase(None)) - def __init__(self, url, source_replica_uid, creds, crypto, sync_db=None, sync_enc_pool=None): """ @@ -116,6 +109,7 @@ class SoledadHTTPSyncTarget(SyncTarget): self._crypto = crypto self._sync_db = sync_db self._sync_enc_pool = sync_enc_pool + self._insert_doc_cb = None # asynchronous encryption/decryption attributes self._decryption_callback = None self._sync_decr_pool = None @@ -213,7 +207,7 @@ class SoledadHTTPSyncTarget(SyncTarget): @defer.inlineCallbacks def sync_exchange(self, docs_by_generation, source_replica_uid, last_known_generation, last_known_trans_id, - return_doc_cb, ensure_callback=None, + insert_doc_cb, ensure_callback=None, defer_decryption=True, sync_id=None): """ Find out which documents the remote database does not know about, @@ -235,11 +229,11 @@ class SoledadHTTPSyncTarget(SyncTarget): :param last_known_trans_id: Target's last known transaction id. :type last_known_trans_id: str - :param return_doc_cb: A callback for inserting received documents from + :param insert_doc_cb: A callback for inserting received documents from target. If not overriden, this will call u1db insert_doc_from_target in synchronizer, which implements the TAKE OTHER semantics. - :type return_doc_cb: function + :type insert_doc_cb: function :param ensure_callback: A callback that ensures we know the target replica uid if the target replica was just @@ -262,9 +256,8 @@ class SoledadHTTPSyncTarget(SyncTarget): sync_id = str(uuid4()) self.source_replica_uid = source_replica_uid - # let the decrypter pool access the passed callback to insert docs - setProxiedObject(self._insert_doc_cb[source_replica_uid], - return_doc_cb) + # save a reference to the callback so we can use it after decrypting + self._insert_doc_cb = insert_doc_cb gen_after_send, trans_id_after_send = yield self._send_docs( docs_by_generation, @@ -274,7 +267,7 @@ class SoledadHTTPSyncTarget(SyncTarget): cur_target_gen, cur_target_trans_id = yield self._receive_docs( last_known_generation, last_known_trans_id, - return_doc_cb, ensure_callback, sync_id, + ensure_callback, sync_id, defer_decryption=defer_decryption) # update gen and trans id info in case we just sent and did not @@ -376,10 +369,8 @@ class SoledadHTTPSyncTarget(SyncTarget): @defer.inlineCallbacks def _receive_docs(self, last_known_generation, last_known_trans_id, - return_doc_cb, ensure_callback, sync_id, - defer_decryption): - # we keep a reference to the callback in case we defer the decryption - self._return_doc_cb = return_doc_cb + ensure_callback, sync_id, defer_decryption): + self._queue_for_decrypt = defer_decryption \ and self._sync_db is not None @@ -534,7 +525,7 @@ class SoledadHTTPSyncTarget(SyncTarget): else: # defer_decryption is False or no-sync-db fallback doc.set_json(decrypt_doc(self._crypto, doc)) - self._return_doc_cb(doc, gen, trans_id) + self._insert_doc_cb(doc, gen, trans_id) else: # not symmetrically encrypted doc, insert it directly # or save it in the decrypted stage. @@ -543,7 +534,7 @@ class SoledadHTTPSyncTarget(SyncTarget): doc.doc_id, doc.rev, doc.content, gen, trans_id, idx) else: - self._return_doc_cb(doc, gen, trans_id) + self._insert_doc_cb(doc, gen, trans_id) # ------------------------------------------------------------- # end of symmetric decryption # ------------------------------------------------------------- -- cgit v1.2.3 From 478dd0eba5129e2e68c85b7b93561bf9f9de2f19 Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 22 May 2015 16:59:58 -0300 Subject: [refactor] remove inline enc/dec from client pool The whole idea of the encrypter/decrypter pool is to be able to use multiple cores to allow parallel encryption/decryption. Previous to this commit, the encryptor/decryptor pools could be configured to not use workers and instead do encryption/decryption inline. That was meant for testing purposes and defeated the purpose of the pools. This commit removes the possibility of inline encrypting/decrypting when using the pools. It also refactors the enc/dec pool code so any failures while using the pool are correctly grabbed and raised to the top of the sync deferred chain. --- client/src/leap/soledad/client/encdecpool.py | 220 ++++++++++---------------- client/src/leap/soledad/client/http_target.py | 5 +- 2 files changed, 84 insertions(+), 141 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py index 2f58d06c..c0a05d38 100644 --- a/client/src/leap/soledad/client/encdecpool.py +++ b/client/src/leap/soledad/client/encdecpool.py @@ -23,14 +23,12 @@ during synchronization. import multiprocessing -import threading -import time import json import logging +from twisted.internet import reactor from twisted.internet import defer from twisted.internet.threads import deferToThread -from twisted.python.failure import Failure from leap.soledad.common.document import SoledadDocument from leap.soledad.common import soledad_assert @@ -50,6 +48,8 @@ class SyncEncryptDecryptPool(object): """ Base class for encrypter/decrypter pools. """ + + # TODO implement throttling to reduce cpu usage?? WORKERS = multiprocessing.cpu_count() def __init__(self, crypto, sync_db): @@ -62,9 +62,9 @@ class SyncEncryptDecryptPool(object): :param sync_db: A database connection handle :type sync_db: pysqlcipher.dbapi2.Connection """ - self._pool = multiprocessing.Pool(self.WORKERS) self._crypto = crypto self._sync_db = sync_db + self._pool = multiprocessing.Pool(self.WORKERS) def close(self): """ @@ -143,8 +143,6 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): Pool of workers that spawn subprocesses to execute the symmetric encryption of documents to be synced. """ - # TODO implement throttling to reduce cpu usage?? - WORKERS = multiprocessing.cpu_count() TABLE_NAME = "docs_tosync" FIELD_NAMES = "doc_id PRIMARY KEY, rev, content" @@ -191,7 +189,7 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): except multiprocessing.Queue.Empty: pass - def _encrypt_doc(self, doc, workers=True): + def _encrypt_doc(self, doc): """ Symmetrically encrypt a document. @@ -207,19 +205,10 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): key = self._crypto.doc_passphrase(doc.doc_id) secret = self._crypto.secret args = doc.doc_id, doc.rev, docstr, key, secret - - if workers: - # encrypt asynchronously - self._pool.apply_async( - encrypt_doc_task, args, - callback=self._encrypt_doc_cb) - else: - # encrypt inline - try: - res = encrypt_doc_task(*args) - self._encrypt_doc_cb(res) - except Exception as exc: - logger.exception(exc) + # encrypt asynchronously + self._pool.apply_async( + encrypt_doc_task, args, + callback=self._encrypt_doc_cb) def _encrypt_doc_cb(self, result): """ @@ -390,24 +379,22 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): self._async_results = [] self._failure = None - self._finished = threading.Event() + self._finished = False - # clear the database before starting the sync - self._empty_db = threading.Event() - d = self._empty() - d.addCallback(lambda _: self._empty_db.set()) + # XXX we want to empty the database before starting, but this is an + # asynchronous call, so we have to somehow make sure that it is + # executed before any other call to the database, without + # blocking. + self._empty() - # start the decryption loop - def _maybe_store_failure_and_finish(result): - if isinstance(result, Failure): - self._set_failure(result) - self._finished.set() - logger.debug("Finished decrypter thread.") + def _launch_decrypt_and_process(self): + d = self._decrypt_and_process_docs() + d.addErrback(lambda f: self._set_failure(f)) - self._deferred_loop = deferToThread( - self._decrypt_and_process_docs_loop) - self._deferred_loop.addBoth( - _maybe_store_failure_and_finish) + def _schedule_decrypt_and_process(self): + reactor.callLater( + self.DECRYPT_LOOP_PERIOD, + self._launch_decrypt_and_process) @property def failure(self): @@ -415,11 +402,12 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): def _set_failure(self, failure): self._failure = failure + self._finished = True - def succeeded(self): - return self._failure is None + def failed(self): + return bool(self._failure) - def set_docs_to_process(self, docs_to_process): + def start(self, docs_to_process): """ Set the number of documents we expect to process. @@ -430,6 +418,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :type docs_to_process: int """ self._docs_to_process = docs_to_process + self._schedule_decrypt_and_process() def insert_encrypted_received_doc( self, doc_id, doc_rev, content, gen, trans_id, idx): @@ -506,10 +495,10 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): % self.TABLE_NAME return self._runOperation(query, (doc_id,)) - def _decrypt_doc(self, doc_id, rev, content, gen, trans_id, idx, - workers=True): + def _async_decrypt_doc(self, doc_id, rev, content, gen, trans_id, idx): """ - Symmetrically decrypt a document and store in the sync db. + Dispatch an asynchronous document decrypting routine and save the + result object. :param doc_id: The ID for the document with contents to be encrypted. :type doc: str @@ -525,9 +514,6 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :type trans_id: str :param idx: The index of this document in the current sync process. :type idx: int - :param workers: Whether to defer the decryption to the multiprocess - pool of workers. Useful for debugging purposes. - :type workers: bool :return: A deferred that will fire after the document hasa been decrypted and inserted in the sync db. @@ -539,35 +525,15 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): key = self._crypto.doc_passphrase(doc_id) secret = self._crypto.secret args = doc_id, rev, content, gen, trans_id, key, secret, idx - - if workers: - # when using multiprocessing, we need to wait for all parallel - # processing to finish before continuing with the - # decrypt-and-process loop. We do this by using an extra deferred - # that will be fired by the multiprocessing callback when it has - # finished processing. - d1 = defer.Deferred() - - def _multiprocessing_callback(result): - d2 = self._decrypt_doc_cb(result) - d2.addCallback(lambda defres: d1.callback(defres)) - - # save the async result object so we can inspect it for failures - self._async_results.append( - self._pool.apply_async( - decrypt_doc_task, args, - callback=_multiprocessing_callback)) - - return d1 - else: - # decrypt inline - res = decrypt_doc_task(*args) - return self._decrypt_doc_cb(res) + # decrypt asynchronously + self._async_results.append( + self._pool.apply_async( + decrypt_doc_task, args)) def _decrypt_doc_cb(self, result): """ Store the decryption result in the sync db from where it will later be - picked by _process_decrypted. + picked by _process_decrypted_docs. :param result: A tuple containing the document's id, revision, content, generation, transaction id and sync index. @@ -636,7 +602,8 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): last_idx += 1 defer.returnValue(insertable) - def _decrypt_received_docs(self): + @defer.inlineCallbacks + def _async_decrypt_received_docs(self): """ Get all the encrypted documents from the sync database and dispatch a decrypt worker to decrypt each one of them. @@ -645,37 +612,25 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): decrypted and inserted back in the sync db. :rtype: twisted.internet.defer.Deferred """ + docs = yield self._get_docs(encrypted=True) + for doc_id, rev, content, gen, trans_id, _, idx in docs: + self._async_decrypt_doc( + doc_id, rev, content, gen, trans_id, idx) - def _callback(received_docs): - deferreds = [] - for doc_id, rev, content, gen, trans_id, _, idx in received_docs: - deferreds.append( - self._decrypt_doc( - doc_id, rev, content, gen, trans_id, idx)) - return defer.gatherResults(deferreds) - - d = self._get_docs(encrypted=True) - d.addCallback(_callback) - return d - - def _process_decrypted(self): + @defer.inlineCallbacks + def _process_decrypted_docs(self): """ Fetch as many decrypted documents as can be taken from the expected - order and insert them in the database. + order and insert them in the local replica. :return: A deferred that will fire with the list of inserted documents. :rtype: twisted.internet.defer.Deferred """ - - def _callback(insertable): - for doc_fields in insertable: - self._insert_decrypted_local_doc(*doc_fields) - return insertable - - d = self._get_insertable_docs() - d.addCallback(_callback) - return d + insertable = yield self._get_insertable_docs() + for doc_fields in insertable: + self._insert_decrypted_local_doc(*doc_fields) + defer.returnValue(insertable) def _delete_processed_docs(self, inserted): """ @@ -700,8 +655,9 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): def _insert_decrypted_local_doc(self, doc_id, doc_rev, content, gen, trans_id, idx): """ - Insert the decrypted document into the local sqlcipher database. - Makes use of the passed callback `insert_doc_cb` passed to the caller + Insert the decrypted document into the local replica. + + Make use of the passed callback `insert_doc_cb` passed to the caller by u1db sync. :param doc_id: The document id. @@ -743,59 +699,47 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): query = "DELETE FROM %s WHERE 1" % (self.TABLE_NAME,) return self._runOperation(query) - def _raise_if_async_fails(self): + def _collect_async_decryption_results(self): """ - Raise any exception raised by a multiprocessing async decryption - call. + Collect the results of the asynchronous doc decryptions and re-raise + any exception raised by a multiprocessing async decryption call. :raise Exception: Raised if an async call has raised an exception. """ - for res in self._async_results: + async_results = self._async_results[:] + for res in async_results: if res.ready(): - if not res.successful(): - # re-raise the exception raised by the remote call - res.get() + self._decrypt_doc_cb(res.get()) # might raise an exception! + self._async_results.remove(res) - def _decrypt_and_process_docs_loop(self): + @defer.inlineCallbacks + def _decrypt_and_process_docs(self): """ Decrypt the documents received from remote replica and insert them into the local one. - This method runs in its own thread, so sleeping will not interfere - with the main thread. - """ - # wait for database to be emptied - self._empty_db.wait() - - # wait until we know how many documents we need to process - while self._docs_to_process is None: - time.sleep(self.DECRYPT_LOOP_PERIOD) - - # because all database operations are asynchronous, we use an - # event to make sure we don't start the next loop before the - # current one has finished. - event = threading.Event() - - # loop until we have processes as many docs as the number of - # changes - while self._processed_docs < self._docs_to_process: + This method implicitelly returns a defferred (see the decorator + above). It should only be called by _launch_decrypt_and_process(). + because this way any exceptions raised here will be stored by the + errback attached to the deferred returned. - event.clear() - - d = self._decrypt_received_docs() - d.addCallback(lambda _: self._raise_if_async_fails()) - d.addCallback(lambda _: self._process_decrypted()) - d.addCallback(lambda r: self._delete_processed_docs(r)) - d.addErrback(self._set_failure) # grab failure and continue - d.addCallback(lambda _: event.set()) - - event.wait() - - if not self.succeeded(): - break - - # sleep a bit to give time for some decryption work - time.sleep(self.DECRYPT_LOOP_PERIOD) + :return: A deferred which will fire after all decrypt, process and + delete operations have been executed. + :rtype: twisted.internet.defer.Deferred + """ + if not self.failed(): + if self._processed_docs < self._docs_to_process: + yield self._async_decrypt_received_docs() + yield self._collect_async_decryption_results() + docs = yield self._process_decrypted_docs() + yield self._delete_processed_docs(docs) + # recurse + self._schedule_decrypt_and_process() + else: + self._finished = True def has_finished(self): - return self._finished.is_set() + """ + Return whether the decrypter has finished its work. + """ + return self._finished diff --git a/client/src/leap/soledad/client/http_target.py b/client/src/leap/soledad/client/http_target.py index bf563b34..75af9cf7 100644 --- a/client/src/leap/soledad/client/http_target.py +++ b/client/src/leap/soledad/client/http_target.py @@ -402,8 +402,7 @@ class SoledadHTTPSyncTarget(SyncTarget): number_of_changes, ngen, ntrans = yield d if defer_decryption: - self._sync_decr_pool.set_docs_to_process( - number_of_changes) + self._sync_decr_pool.start(number_of_changes) #--------------------------------------------------------------------- # maybe receive the rest of the documents @@ -459,7 +458,7 @@ class SoledadHTTPSyncTarget(SyncTarget): SyncDecrypterPool.DECRYPT_LOOP_PERIOD, _wait_or_finish) else: - if self._sync_decr_pool.succeeded(): + if not self._sync_decr_pool.failed(): d.callback(None) else: d.errback(self._sync_decr_pool.failure) -- cgit v1.2.3 From 31757168f6ad243ec83ba52b2e022298ba08f8d1 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 25 May 2015 11:46:24 -0300 Subject: [feature] add pool of http/https connections Instead of opening one TCP connection for each HTTP request, we want to reuse connections. Also, we need to be able to verify SSL certificates. This commit implements both features in the twisted http client sync. --- client/src/leap/soledad/client/api.py | 4 +- client/src/leap/soledad/client/http_client.py | 194 ++++++++++++++++++++++++++ client/src/leap/soledad/client/http_target.py | 53 ++++--- client/src/leap/soledad/client/sqlcipher.py | 13 +- 4 files changed, 230 insertions(+), 34 deletions(-) create mode 100644 client/src/leap/soledad/client/http_client.py (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index ffd95f6c..91e0a4a0 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -272,7 +272,8 @@ class Soledad(object): replica_uid = self._dbpool.replica_uid self._dbsyncer = SQLCipherU1DBSync( self._sqlcipher_opts, self._crypto, replica_uid, - self._defer_encryption) + SOLEDAD_CERT, + defer_encryption=self._defer_encryption) # # Closing methods @@ -630,6 +631,7 @@ class Soledad(object): Whether to defer decryption of documents, or do it inline while syncing. :type defer_decryption: bool + :return: A deferred whose callback will be invoked with the local generation before the synchronization was performed. :rtype: twisted.internet.defer.Deferred diff --git a/client/src/leap/soledad/client/http_client.py b/client/src/leap/soledad/client/http_client.py new file mode 100644 index 00000000..b08d199e --- /dev/null +++ b/client/src/leap/soledad/client/http_client.py @@ -0,0 +1,194 @@ +# -*- coding: utf-8 -*- +# http_client.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +""" +Twisted HTTP/HTTPS client. +""" + +import os + +from zope.interface import implements + +from OpenSSL.crypto import load_certificate +from OpenSSL.crypto import FILETYPE_PEM + +from twisted.internet import reactor +from twisted.internet.ssl import ClientContextFactory +from twisted.internet.ssl import CertificateOptions +from twisted.internet.defer import succeed + +from twisted.web.client import Agent +from twisted.web.client import HTTPConnectionPool +from twisted.web.client import readBody +from twisted.web.http_headers import Headers +from twisted.web.error import Error +from twisted.web.iweb import IBodyProducer + + +from leap.soledad.common.errors import InvalidAuthTokenError + + +# +# Setup a pool of connections +# + +_pool = HTTPConnectionPool(reactor, persistent=True) +_pool.maxPersistentPerHost = 10 +_agent = None + +# if we ever want to trust the system's CAs, we should use an agent like this: +# from twisted.web.client import BrowserLikePolicyForHTTPS +# _agent = Agent(reactor, BrowserLikePolicyForHTTPS(), pool=_pool) + + +# +# SSL/TLS certificate configuration +# + +def configure_certificate(cert_file): + """ + Configure an agent that verifies server certificates against a CA cert + file. + + :param cert_file: The path to the certificate file. + :type cert_file: str + """ + global _agent + cert = _load_cert(cert_file) + _agent = Agent( + reactor, + SoledadClientContextFactory(cert), + pool=_pool) + + +def _load_cert(cert_file): + """ + Load a X509 certificate from a file. + + :param cert_file: The path to the certificate file. + :type cert_file: str + + :return: The X509 certificate. + :rtype: OpenSSL.crypto.X509 + """ + if os.path.exists(cert_file): + with open(cert_file) as f: + data = f.read() + return load_certificate(FILETYPE_PEM, data) + + +class SoledadClientContextFactory(ClientContextFactory): + """ + A context factory that will verify the server's certificate against a + given CA certificate. + """ + + def __init__(self, cacert): + """ + Initialize the context factory. + + :param cacert: The CA certificate. + :type cacert: OpenSSL.crypto.X509 + """ + self._cacert = cacert + + def getContext(self, hostname, port): + opts = CertificateOptions(verify=True, caCerts=[self._cacert]) + return opts.getContext() + + +# +# HTTP request facilities +# + +def _unauth_to_invalid_token_error(failure): + """ + An errback to translate unauthorized errors to our own invalid token + class. + + :param failure: The original failure. + :type failure: twisted.python.failure.Failure + + :return: Either the original failure or an invalid auth token error. + :rtype: twisted.python.failure.Failure + """ + failure.trap(Error) + if failure.getErrorMessage() == "401 Unauthorized": + raise InvalidAuthTokenError + return failure + + +class StringBodyProducer(object): + """ + A producer that writes the body of a request to a consumer. + """ + + implements(IBodyProducer) + + def __init__(self, body): + """ + Initialize the string produer. + + :param body: The body of the request. + :type body: str + """ + self.body = body + self.length = len(body) + + def startProducing(self, consumer): + """ + Write the body to the consumer. + + :param consumer: Any IConsumer provider. + :type consumer: twisted.internet.interfaces.IConsumer + + :return: A successful deferred. + :rtype: twisted.internet.defer.Deferred + """ + consumer.write(self.body) + return succeed(None) + + def pauseProducing(self): + pass + + def stopProducing(self): + pass + + +def httpRequest(url, method='GET', body=None, headers={}): + """ + Perform an HTTP request. + + :param url: The URL for the request. + :type url: str + :param method: The HTTP method of the request. + :type method: str + :param body: The body of the request, if any. + :type body: str + :param headers: The headers of the request. + :type headers: dict + + :return: A deferred that fires with the body of the request. + :rtype: twisted.internet.defer.Deferred + """ + if body: + body = StringBodyProducer(body) + d = _agent.request( + method, url, headers=Headers(headers), bodyProducer=body) + d.addCallbacks(readBody, _unauth_to_invalid_token_error) + return d diff --git a/client/src/leap/soledad/client/http_target.py b/client/src/leap/soledad/client/http_target.py index 75af9cf7..dc6c0e0a 100644 --- a/client/src/leap/soledad/client/http_target.py +++ b/client/src/leap/soledad/client/http_target.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# target.py +# http_target.py # Copyright (C) 2015 LEAP # # This program is free software: you can redistribute it and/or modify @@ -21,6 +21,7 @@ A U1DB backend for encrypting data before sending to server and decrypting after receiving. """ + import json import base64 import logging @@ -30,15 +31,12 @@ from functools import partial from twisted.internet import defer from twisted.internet import reactor -from twisted.web.client import getPage -from twisted.web.error import Error from u1db import errors from u1db import SyncTarget from u1db.remote import utils from leap.soledad.common.document import SoledadDocument -from leap.soledad.common.errors import InvalidAuthTokenError from leap.soledad.client.crypto import is_symmetrically_encrypted from leap.soledad.client.crypto import encrypt_doc @@ -47,24 +45,13 @@ from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS from leap.soledad.client.events import signal from leap.soledad.client.encdecpool import SyncDecrypterPool +from leap.soledad.client.http_client import httpRequest +from leap.soledad.client.http_client import configure_certificate logger = logging.getLogger(__name__) -def _unauth_to_invalid_token_error(failure): - failure.trap(Error) - if failure.getErrorMessage() == "401 Unauthorized": - raise InvalidAuthTokenError - return failure - - -def getSoledadPage(*args, **kwargs): - d = getPage(*args, **kwargs) - d.addErrback(_unauth_to_invalid_token_error) - return d - - class SoledadHTTPSyncTarget(SyncTarget): """ A SyncTarget that encrypts data before sending and decrypts data after @@ -76,7 +63,7 @@ class SoledadHTTPSyncTarget(SyncTarget): written to the main database. """ - def __init__(self, url, source_replica_uid, creds, crypto, + def __init__(self, url, source_replica_uid, creds, crypto, cert_file, sync_db=None, sync_enc_pool=None): """ Initialize the sync target. @@ -93,12 +80,19 @@ class SoledadHTTPSyncTarget(SyncTarget): :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt document contents when syncing. :type crypto: soledad.crypto.SoledadCrypto + :param cert_file: Path to the certificate of the ca used to validate + the SSL certificate used by the remote soledad + server. + :type cert_file: str :param sync_db: Optional. handler for the db with the symmetric encryption of the syncing documents. If None, encryption will be done in-place, instead of retreiving it from the dedicated database. :type sync_db: Sqlite handler + :param verify_ssl: Whether we should perform SSL server certificate + verification. + :type verify_ssl: bool """ if url.endswith("/"): url = url[:-1] @@ -113,6 +107,7 @@ class SoledadHTTPSyncTarget(SyncTarget): # asynchronous encryption/decryption attributes self._decryption_callback = None self._sync_decr_pool = None + configure_certificate(cert_file) def set_creds(self, creds): """ @@ -125,7 +120,7 @@ class SoledadHTTPSyncTarget(SyncTarget): token = creds['token']['token'] auth = '%s:%s' % (uuid, token) b64_token = base64.b64encode(auth) - self._auth_header = {'Authorization': 'Token %s' % b64_token} + self._auth_header = {'Authorization': ['Token %s' % b64_token]} @property def _defer_encryption(self): @@ -153,7 +148,7 @@ class SoledadHTTPSyncTarget(SyncTarget): source_replica_last_known_transaction_id) :rtype: twisted.internet.defer.Deferred """ - raw = yield getSoledadPage(self._url, headers=self._auth_header) + raw = yield httpRequest(self._url, headers=self._auth_header) res = json.loads(raw) defer.returnValue([ res['target_replica_uid'], @@ -197,12 +192,12 @@ class SoledadHTTPSyncTarget(SyncTarget): 'transaction_id': source_replica_transaction_id }) headers = self._auth_header.copy() - headers.update({'content-type': 'application/json'}) - return getSoledadPage( + headers.update({'content-type': ['application/json']}) + return httpRequest( self._url, method='PUT', headers=headers, - postdata=data) + body=data) @defer.inlineCallbacks def sync_exchange(self, docs_by_generation, source_replica_uid, @@ -295,7 +290,7 @@ class SoledadHTTPSyncTarget(SyncTarget): defer.returnValue([None, None]) headers = self._auth_header.copy() - headers.update({'content-type': 'application/x-soledad-sync-put'}) + headers.update({'content-type': ['application/x-soledad-sync-put']}) # add remote replica metadata to the request first_entries = ['['] self._prepare( @@ -335,11 +330,11 @@ class SoledadHTTPSyncTarget(SyncTarget): doc_idx=doc_idx) entries.append('\r\n]') data = ''.join(entries) - result = yield getSoledadPage( + result = yield httpRequest( self._url, method='POST', headers=headers, - postdata=data) + body=data) defer.returnValue(result) def _encrypt_doc(self, doc): @@ -385,7 +380,7 @@ class SoledadHTTPSyncTarget(SyncTarget): self._setup_sync_decr_pool() headers = self._auth_header.copy() - headers.update({'content-type': 'application/x-soledad-sync-get'}) + headers.update({'content-type': ['application/x-soledad-sync-get']}) #--------------------------------------------------------------------- # maybe receive the first document @@ -486,11 +481,11 @@ class SoledadHTTPSyncTarget(SyncTarget): ',', entries, received=received) entries.append('\r\n]') # send headers - return getSoledadPage( + return httpRequest( self._url, method='POST', headers=headers, - postdata=''.join(entries)) + body=''.join(entries)) def _insert_received_doc(self, idx, total, response): """ diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 96732325..ed9e95dc 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -434,13 +434,14 @@ class SQLCipherU1DBSync(SQLCipherDatabase): """ syncing_lock = defaultdict(threading.Lock) - def __init__(self, opts, soledad_crypto, replica_uid, + def __init__(self, opts, soledad_crypto, replica_uid, cert_file, defer_encryption=False): self._opts = opts self._path = opts.path self._crypto = soledad_crypto self.__replica_uid = replica_uid + self._cert_file = cert_file self._sync_db_key = opts.sync_db_key self._sync_db = None @@ -570,9 +571,8 @@ class SQLCipherU1DBSync(SQLCipherDatabase): :param url: The url of the target replica to sync with. :type url: str - :param creds: - optional dictionary giving credentials. - to authorize the operation with the server. + :param creds: optional dictionary giving credentials to authorize the + operation with the server. :type creds: dict :param defer_decryption: Whether to defer the decryption process using the intermediate @@ -599,6 +599,10 @@ class SQLCipherU1DBSync(SQLCipherDatabase): one instance synchronizing the same database replica at the same time. Because of that, this method blocks until the syncing lock can be acquired. + + :param creds: optional dictionary giving credentials to authorize the + operation with the server. + :type creds: dict """ with self.syncing_lock[self._path]: syncer = self._get_syncer(url, creds=creds) @@ -640,6 +644,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): self._replica_uid, creds=creds, crypto=self._crypto, + cert_file=self._cert_file, sync_db=self._sync_db, sync_enc_pool=self._sync_enc_pool)) self._syncers[url] = (h, syncer) -- cgit v1.2.3 From 3e6e51649bb6206125f20ac6773f6744ec8bf175 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 25 May 2015 13:47:57 -0300 Subject: [bug] remove client syncer call to close method --- client/src/leap/soledad/client/sqlcipher.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index ed9e95dc..8e7d39c2 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -677,9 +677,8 @@ class SQLCipherU1DBSync(SQLCipherDatabase): """ # close all open syncers for url in self._syncers: - _, syncer = self._syncers[url] - syncer.close() - self._syncers = [] + del self._syncers[url] + # stop the encryption pool if self._sync_enc_pool is not None: self._sync_enc_pool.close() -- cgit v1.2.3 From 5feb66707d84d6644158b5c9b848628a4814610f Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Mon, 25 May 2015 20:14:26 -0300 Subject: [bug] Empty comes from Queue When handling this exception Python got lost because the import was incorrect. Queue.Empty comes from Queue, not from multiprocessing.Queue --- client/src/leap/soledad/client/encdecpool.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py index c0a05d38..d9a72b25 100644 --- a/client/src/leap/soledad/client/encdecpool.py +++ b/client/src/leap/soledad/client/encdecpool.py @@ -23,6 +23,7 @@ during synchronization. import multiprocessing +import Queue import json import logging @@ -186,7 +187,7 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): try: doc = self._sync_queue.get(True, self.ENCRYPT_LOOP_PERIOD) self._encrypt_doc(doc) - except multiprocessing.Queue.Empty: + except Queue.Empty: pass def _encrypt_doc(self, doc): -- cgit v1.2.3 From 6d4953457726ec7830e48fb899e2e8c17b1ec995 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Mon, 25 May 2015 20:16:30 -0300 Subject: [bug] dictionary cant be modified during iteration I tested that code and this cant happen. We need to iterate keys and then ask 'del'. The previous method raised: RuntimeError: dictionary changed size during iteration --- client/src/leap/soledad/client/sqlcipher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 8e7d39c2..b2025130 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -676,7 +676,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): Close the syncer and syncdb orderly """ # close all open syncers - for url in self._syncers: + for url in self._syncers.keys(): del self._syncers[url] # stop the encryption pool -- cgit v1.2.3 From 91674a40edb19cd241c76b27ad998bb5df404570 Mon Sep 17 00:00:00 2001 From: Ruben Pollan Date: Tue, 26 May 2015 22:04:52 +0200 Subject: [refactor] move the twisted http code to leap.common --- client/src/leap/soledad/client/http_client.py | 194 -------------------------- client/src/leap/soledad/client/http_target.py | 38 ++++- 2 files changed, 31 insertions(+), 201 deletions(-) delete mode 100644 client/src/leap/soledad/client/http_client.py (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/http_client.py b/client/src/leap/soledad/client/http_client.py deleted file mode 100644 index b08d199e..00000000 --- a/client/src/leap/soledad/client/http_client.py +++ /dev/null @@ -1,194 +0,0 @@ -# -*- coding: utf-8 -*- -# http_client.py -# Copyright (C) 2015 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -""" -Twisted HTTP/HTTPS client. -""" - -import os - -from zope.interface import implements - -from OpenSSL.crypto import load_certificate -from OpenSSL.crypto import FILETYPE_PEM - -from twisted.internet import reactor -from twisted.internet.ssl import ClientContextFactory -from twisted.internet.ssl import CertificateOptions -from twisted.internet.defer import succeed - -from twisted.web.client import Agent -from twisted.web.client import HTTPConnectionPool -from twisted.web.client import readBody -from twisted.web.http_headers import Headers -from twisted.web.error import Error -from twisted.web.iweb import IBodyProducer - - -from leap.soledad.common.errors import InvalidAuthTokenError - - -# -# Setup a pool of connections -# - -_pool = HTTPConnectionPool(reactor, persistent=True) -_pool.maxPersistentPerHost = 10 -_agent = None - -# if we ever want to trust the system's CAs, we should use an agent like this: -# from twisted.web.client import BrowserLikePolicyForHTTPS -# _agent = Agent(reactor, BrowserLikePolicyForHTTPS(), pool=_pool) - - -# -# SSL/TLS certificate configuration -# - -def configure_certificate(cert_file): - """ - Configure an agent that verifies server certificates against a CA cert - file. - - :param cert_file: The path to the certificate file. - :type cert_file: str - """ - global _agent - cert = _load_cert(cert_file) - _agent = Agent( - reactor, - SoledadClientContextFactory(cert), - pool=_pool) - - -def _load_cert(cert_file): - """ - Load a X509 certificate from a file. - - :param cert_file: The path to the certificate file. - :type cert_file: str - - :return: The X509 certificate. - :rtype: OpenSSL.crypto.X509 - """ - if os.path.exists(cert_file): - with open(cert_file) as f: - data = f.read() - return load_certificate(FILETYPE_PEM, data) - - -class SoledadClientContextFactory(ClientContextFactory): - """ - A context factory that will verify the server's certificate against a - given CA certificate. - """ - - def __init__(self, cacert): - """ - Initialize the context factory. - - :param cacert: The CA certificate. - :type cacert: OpenSSL.crypto.X509 - """ - self._cacert = cacert - - def getContext(self, hostname, port): - opts = CertificateOptions(verify=True, caCerts=[self._cacert]) - return opts.getContext() - - -# -# HTTP request facilities -# - -def _unauth_to_invalid_token_error(failure): - """ - An errback to translate unauthorized errors to our own invalid token - class. - - :param failure: The original failure. - :type failure: twisted.python.failure.Failure - - :return: Either the original failure or an invalid auth token error. - :rtype: twisted.python.failure.Failure - """ - failure.trap(Error) - if failure.getErrorMessage() == "401 Unauthorized": - raise InvalidAuthTokenError - return failure - - -class StringBodyProducer(object): - """ - A producer that writes the body of a request to a consumer. - """ - - implements(IBodyProducer) - - def __init__(self, body): - """ - Initialize the string produer. - - :param body: The body of the request. - :type body: str - """ - self.body = body - self.length = len(body) - - def startProducing(self, consumer): - """ - Write the body to the consumer. - - :param consumer: Any IConsumer provider. - :type consumer: twisted.internet.interfaces.IConsumer - - :return: A successful deferred. - :rtype: twisted.internet.defer.Deferred - """ - consumer.write(self.body) - return succeed(None) - - def pauseProducing(self): - pass - - def stopProducing(self): - pass - - -def httpRequest(url, method='GET', body=None, headers={}): - """ - Perform an HTTP request. - - :param url: The URL for the request. - :type url: str - :param method: The HTTP method of the request. - :type method: str - :param body: The body of the request, if any. - :type body: str - :param headers: The headers of the request. - :type headers: dict - - :return: A deferred that fires with the body of the request. - :rtype: twisted.internet.defer.Deferred - """ - if body: - body = StringBodyProducer(body) - d = _agent.request( - method, url, headers=Headers(headers), bodyProducer=body) - d.addCallbacks(readBody, _unauth_to_invalid_token_error) - return d diff --git a/client/src/leap/soledad/client/http_target.py b/client/src/leap/soledad/client/http_target.py index dc6c0e0a..5eef2df3 100644 --- a/client/src/leap/soledad/client/http_target.py +++ b/client/src/leap/soledad/client/http_target.py @@ -31,12 +31,16 @@ from functools import partial from twisted.internet import defer from twisted.internet import reactor +from twisted.web.error import Error from u1db import errors from u1db import SyncTarget from u1db.remote import utils +from leap.common.http import HTTPClient + from leap.soledad.common.document import SoledadDocument +from leap.soledad.common.errors import InvalidAuthTokenError from leap.soledad.client.crypto import is_symmetrically_encrypted from leap.soledad.client.crypto import encrypt_doc @@ -45,8 +49,6 @@ from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS from leap.soledad.client.events import signal from leap.soledad.client.encdecpool import SyncDecrypterPool -from leap.soledad.client.http_client import httpRequest -from leap.soledad.client.http_client import configure_certificate logger = logging.getLogger(__name__) @@ -107,7 +109,7 @@ class SoledadHTTPSyncTarget(SyncTarget): # asynchronous encryption/decryption attributes self._decryption_callback = None self._sync_decr_pool = None - configure_certificate(cert_file) + self._http = HTTPClient(cert_file) def set_creds(self, creds): """ @@ -148,7 +150,7 @@ class SoledadHTTPSyncTarget(SyncTarget): source_replica_last_known_transaction_id) :rtype: twisted.internet.defer.Deferred """ - raw = yield httpRequest(self._url, headers=self._auth_header) + raw = yield self._http_request(self._url, headers=self._auth_header) res = json.loads(raw) defer.returnValue([ res['target_replica_uid'], @@ -193,7 +195,7 @@ class SoledadHTTPSyncTarget(SyncTarget): }) headers = self._auth_header.copy() headers.update({'content-type': ['application/json']}) - return httpRequest( + return self._http_request( self._url, method='PUT', headers=headers, @@ -330,7 +332,7 @@ class SoledadHTTPSyncTarget(SyncTarget): doc_idx=doc_idx) entries.append('\r\n]') data = ''.join(entries) - result = yield httpRequest( + result = yield self._http_request( self._url, method='POST', headers=headers, @@ -481,7 +483,7 @@ class SoledadHTTPSyncTarget(SyncTarget): ',', entries, received=received) entries.append('\r\n]') # send headers - return httpRequest( + return self._http_request( self._url, method='POST', headers=headers, @@ -596,3 +598,25 @@ class SoledadHTTPSyncTarget(SyncTarget): self._sync_db, insert_doc_cb=self._insert_doc_cb, source_replica_uid=self.source_replica_uid) + + def _http_request(self, url, method='GET', body=None, headers={}): + d = self._http.request(url, method, body, headers) + d.addErrback(_unauth_to_invalid_token_error) + return d + + +def _unauth_to_invalid_token_error(failure): + """ + An errback to translate unauthorized errors to our own invalid token + class. + + :param failure: The original failure. + :type failure: twisted.python.failure.Failure + + :return: Either the original failure or an invalid auth token error. + :rtype: twisted.python.failure.Failure + """ + failure.trap(Error) + if failure.getErrorMessage() == "401 Unauthorized": + raise InvalidAuthTokenError + return failure -- cgit v1.2.3 From 7d4ab674a167d48686f61310ff4ff6a62a545e67 Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Wed, 6 May 2015 17:42:26 -0300 Subject: [feat] adapt to new events api on common - Related: #6359 --- client/src/leap/soledad/client/api.py | 2 +- client/src/leap/soledad/client/events.py | 29 +++++++++++++-------------- client/src/leap/soledad/client/http_target.py | 6 +++--- client/src/leap/soledad/client/secrets.py | 12 +++++------ 4 files changed, 24 insertions(+), 25 deletions(-) (limited to 'client/src/leap') diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 91e0a4a0..76d6acc3 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -656,7 +656,7 @@ class Soledad(object): defer_decryption=defer_decryption) def _sync_callback(local_gen): - soledad_events.signal( + soledad_events.emit( soledad_events.SOLEDAD_DONE_DATA_SYNC, self.uuid) return local_gen diff --git a/client/src/leap/soledad/client/events.py b/client/src/leap/soledad/client/events.py index 88e28674..b1379521 100644 --- a/client/src/leap/soledad/client/events.py +++ b/client/src/leap/soledad/client/events.py @@ -20,28 +20,27 @@ Signaling functions. """ +from leap.common.events import emit +from leap.common.events import catalog -from leap.common import events -from leap.common.events import signal - -SOLEDAD_CREATING_KEYS = events.proto.SOLEDAD_CREATING_KEYS -SOLEDAD_DONE_CREATING_KEYS = events.proto.SOLEDAD_DONE_CREATING_KEYS -SOLEDAD_DOWNLOADING_KEYS = events.proto.SOLEDAD_DOWNLOADING_KEYS +SOLEDAD_CREATING_KEYS = catalog.SOLEDAD_CREATING_KEYS +SOLEDAD_DONE_CREATING_KEYS = catalog.SOLEDAD_DONE_CREATING_KEYS +SOLEDAD_DOWNLOADING_KEYS = catalog.SOLEDAD_DOWNLOADING_KEYS SOLEDAD_DONE_DOWNLOADING_KEYS = \ - events.proto.SOLEDAD_DONE_DOWNLOADING_KEYS -SOLEDAD_UPLOADING_KEYS = events.proto.SOLEDAD_UPLOADING_KEYS + catalog.SOLEDAD_DONE_DOWNLOADING_KEYS +SOLEDAD_UPLOADING_KEYS = catalog.SOLEDAD_UPLOADING_KEYS SOLEDAD_DONE_UPLOADING_KEYS = \ - events.proto.SOLEDAD_DONE_UPLOADING_KEYS -SOLEDAD_NEW_DATA_TO_SYNC = events.proto.SOLEDAD_NEW_DATA_TO_SYNC -SOLEDAD_DONE_DATA_SYNC = events.proto.SOLEDAD_DONE_DATA_SYNC -SOLEDAD_SYNC_SEND_STATUS = events.proto.SOLEDAD_SYNC_SEND_STATUS -SOLEDAD_SYNC_RECEIVE_STATUS = events.proto.SOLEDAD_SYNC_RECEIVE_STATUS + catalog.SOLEDAD_DONE_UPLOADING_KEYS +SOLEDAD_NEW_DATA_TO_SYNC = catalog.SOLEDAD_NEW_DATA_TO_SYNC +SOLEDAD_DONE_DATA_SYNC = catalog.SOLEDAD_DONE_DATA_SYNC +SOLEDAD_SYNC_SEND_STATUS = catalog.SOLEDAD_SYNC_SEND_STATUS +SOLEDAD_SYNC_RECEIVE_STATUS = catalog.SOLEDAD_SYNC_RECEIVE_STATUS __all__ = [ - "events", - "signal", + "catalog", + "emit", "SOLEDAD_CREATING_KEYS", "SOLEDAD_DONE_CREATING_KEYS", "SOLEDAD_DOWNLOADING_KEYS", diff --git a/client/src/leap/soledad/client/http_target.py b/client/src/leap/soledad/client/http_target.py index 5eef2df3..30590ae1 100644 --- a/client/src/leap/soledad/client/http_target.py +++ b/client/src/leap/soledad/client/http_target.py @@ -47,7 +47,7 @@ from leap.soledad.client.crypto import encrypt_doc from leap.soledad.client.crypto import decrypt_doc from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS -from leap.soledad.client.events import signal +from leap.soledad.client.events import emit from leap.soledad.client.encdecpool import SyncDecrypterPool @@ -311,7 +311,7 @@ class SoledadHTTPSyncTarget(SyncTarget): if self._defer_encryption: self._sync_enc_pool.delete_encrypted_doc( doc.doc_id, doc.rev) - signal(SOLEDAD_SYNC_SEND_STATUS, + emit(SOLEDAD_SYNC_SEND_STATUS, "Soledad sync send status: %d/%d" % (idx, total)) response_dict = json.loads(result)[0] @@ -535,7 +535,7 @@ class SoledadHTTPSyncTarget(SyncTarget): # end of symmetric decryption # ------------------------------------------------------------- msg = "%d/%d" % (idx, total) - signal(SOLEDAD_SYNC_RECEIVE_STATUS, msg) + emit(SOLEDAD_SYNC_RECEIVE_STATUS, msg) logger.debug("Soledad sync receive status: %s" % msg) return number_of_changes, new_generation, new_transaction_id diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index 96f7e906..e89e21aa 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -480,13 +480,13 @@ class SoledadSecrets(object): :return: a document with encrypted key material in its contents :rtype: document.SoledadDocument """ - events.signal(events.SOLEDAD_DOWNLOADING_KEYS, self._uuid) + events.emit(events.SOLEDAD_DOWNLOADING_KEYS, self._uuid) db = self._shared_db if not db: logger.warning('No shared db found') return doc = db.get_doc(self._shared_db_doc_id()) - events.signal(events.SOLEDAD_DONE_DOWNLOADING_KEYS, self._uuid) + events.emit(events.SOLEDAD_DONE_DOWNLOADING_KEYS, self._uuid) return doc def _put_secrets_in_shared_db(self): @@ -509,13 +509,13 @@ class SoledadSecrets(object): # fill doc with encrypted secrets doc.content = self._export_recovery_document() # upload secrets to server - events.signal(events.SOLEDAD_UPLOADING_KEYS, self._uuid) + events.emit(events.SOLEDAD_UPLOADING_KEYS, self._uuid) db = self._shared_db if not db: logger.warning('No shared db found') return db.put_doc(doc) - events.signal(events.SOLEDAD_DONE_UPLOADING_KEYS, self._uuid) + events.emit(events.SOLEDAD_DONE_UPLOADING_KEYS, self._uuid) # # Management of secret for symmetric encryption. @@ -635,13 +635,13 @@ class SoledadSecrets(object): :return: The id of the generated secret. :rtype: str """ - events.signal(events.SOLEDAD_CREATING_KEYS, self._uuid) + events.emit(events.SOLEDAD_CREATING_KEYS, self._uuid) # generate random secret secret = os.urandom(self.GEN_SECRET_LENGTH) secret_id = sha256(secret).hexdigest() self._secrets[secret_id] = secret self._store_secrets() - events.signal(events.SOLEDAD_DONE_CREATING_KEYS, self._uuid) + events.emit(events.SOLEDAD_DONE_CREATING_KEYS, self._uuid) return secret_id def _store_secrets(self): -- cgit v1.2.3