From 4fde2537564ee298b967184bfdbe48cb963a8bd6 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 16 Sep 2016 04:12:10 -0300 Subject: [feature] revert sync download into straming (server) Instead of concurrent download, we are going to download a stream. This commit modifies server to support it. --- server/src/leap/soledad/server/sync.py | 45 +++++++++++++++++----------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 3f5c4aba..a0324a27 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -21,6 +21,7 @@ from leap.soledad.common.l2db import sync, Document from leap.soledad.common.l2db.remote import http_app from leap.soledad.server.caching import get_cache_for from leap.soledad.server.state import ServerSyncState +from itertools import izip MAX_REQUEST_SIZE = 200 # in Mb @@ -78,38 +79,38 @@ class SyncExchange(sync.SyncExchange): self._trace('after whats_changed') seen_ids = self._sync_state.seen_ids() # changed docs that weren't superseded by or converged with - changes_to_return = [ + self.changes_to_return = [ (doc_id, gen, trans_id) for (doc_id, gen, trans_id) in changes # there was a subsequent update if doc_id not in seen_ids or seen_ids.get(doc_id) < gen] self._sync_state.put_changes_to_return( - new_gen, new_trans_id, changes_to_return) - number_of_changes = len(changes_to_return) - # query server for stored changes - _, _, next_change_to_return = \ - self._sync_state.next_change_to_return(received) + new_gen, new_trans_id, self.changes_to_return) + number_of_changes = len(self.changes_to_return) self.new_gen = new_gen self.new_trans_id = new_trans_id - # and append one change - self.change_to_return = next_change_to_return return self.new_gen, number_of_changes - def return_one_doc(self, return_doc_cb): - """ - Return one changed document and its last change generation to the - source syncing replica by invoking the callback return_doc_cb. + def return_docs(self, return_doc_cb): + """Return the changed documents and their last change generation + repeatedly invoking the callback return_doc_cb. - This is called once for each document to be transferred from target to - source. + The final step of a sync exchange. - :param return_doc_cb: is a callback used to return the documents with - their last change generation to the target - replica. - :type return_doc_cb: callable(doc, gen, trans_id) + :param: return_doc_cb(doc, gen, trans_id): is a callback + used to return the documents with their last change generation + to the target replica. + :return: None """ - if self.change_to_return is not None: - changed_doc_id, gen, trans_id = self.change_to_return - doc = self._db.get_doc(changed_doc_id, include_deleted=True) + changes_to_return = self.changes_to_return + # return docs, including conflicts + changed_doc_ids = [doc_id for doc_id, _, _ in changes_to_return] + docs = self._db.get_docs( + changed_doc_ids, check_for_conflicts=False, include_deleted=True) + + docs_by_gen = izip( + docs, (gen for _, gen, _ in changes_to_return), + (trans_id for _, _, trans_id in changes_to_return)) + for doc, gen, trans_id in docs_by_gen: return_doc_cb(doc, gen, trans_id) def batched_insert_from_source(self, entries, sync_id): @@ -264,7 +265,7 @@ class SyncResource(http_app.SyncResource): if self.replica_uid is not None: header['replica_uid'] = self.replica_uid self.responder.stream_entry(header) - self.sync_exch.return_one_doc(send_doc) + self.sync_exch.return_docs(send_doc) self.responder.end_stream() self.responder.finish_response() -- cgit v1.2.3 From 81f97ec532a13dc57bf23a44dab3d44d12cc2ba4 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 16 Sep 2016 04:13:26 -0300 Subject: [feature] get attachments as generator runs Instead of getting the attachments as the generator runs, get_docs will now get as needed. Also, deepcopy solves a memory issue where we were feeding the couchdb lib view with blobs while modifying it unintentionally. --- common/src/leap/soledad/common/couch/__init__.py | 32 ++++++++++++++++-------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index 0f4102db..d751747d 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -20,6 +20,7 @@ import json +import copy import re import uuid import binascii @@ -337,14 +338,22 @@ class CouchDatabase(object): in matching doc_ids order. :rtype: iterable """ - params = {'include_docs': 'true', 'attachments': 'true'} + params = {'include_docs': 'true', 'attachments': 'false'} if doc_ids is not None: params['keys'] = doc_ids view = self._database.view("_all_docs", **params) for row in view.rows: - result = row['doc'] + result = copy.deepcopy(row['doc']) + attachment_file_names = result['_attachments'].keys() + result['_attachments'] = {} + for file_name in attachment_file_names: + result['_attachments'][file_name] = { + 'data': json.load( + self._database.get_attachment(result, file_name)) + } doc = self.__parse_doc_from_couch( - result, result['_id'], check_for_conflicts=check_for_conflicts) + result, result['_id'], + check_for_conflicts=check_for_conflicts, decode=False) # filter out non-u1db or deleted documents if not doc or (not include_deleted and doc.is_tombstone()): continue @@ -408,7 +417,7 @@ class CouchDatabase(object): self.batch_docs.clear() return rev - def __parse_doc_from_couch(self, result, doc_id, + def __parse_doc_from_couch(self, result, doc_id, decode=True, check_for_conflicts=False): # restrict to u1db documents if 'u1db_rev' not in result: @@ -418,19 +427,22 @@ class CouchDatabase(object): if '_attachments' not in result \ or 'u1db_content' not in result['_attachments']: doc.make_tombstone() - else: + elif decode: doc.content = json.loads( binascii.a2b_base64( result['_attachments']['u1db_content']['data'])) + else: + doc.content = result['_attachments']['u1db_content']['data'] # determine if there are conflicts if check_for_conflicts \ and '_attachments' in result \ and 'u1db_conflicts' in result['_attachments']: - doc.set_conflicts( - self._build_conflicts( - doc.doc_id, - json.loads(binascii.a2b_base64( - result['_attachments']['u1db_conflicts']['data'])))) + if decode: + conflicts = json.loads(binascii.a2b_base64( + result['_attachments']['u1db_conflicts']['data'])) + else: + conflicts = result['_attachments']['u1db_conflicts']['data'] + doc.set_conflicts(self._build_conflicts(doc.doc_id, conflicts)) # store couch revision doc.couch_rev = result['_rev'] return doc -- cgit v1.2.3 From ffe15f154541b6f929c569caf07560d117ad5efa Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 11 Aug 2016 11:34:47 -0300 Subject: [feature] use transactions on sync We were using 1 transaction per doc, which is bad. Reference: http://stackoverflow.com/questions/1711631/improve-insert-per-second-performance-of-sqlite Code now uses 1 transaction for the whole sync. --- client/src/leap/soledad/client/sqlcipher.py | 3 +++ client/src/leap/soledad/client/sync.py | 1 + .../src/leap/soledad/common/l2db/backends/sqlite_backend.py | 11 +++++------ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 3921c323..b198607d 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -458,6 +458,9 @@ class SQLCipherU1DBSync(SQLCipherDatabase): if DO_STATS: self.sync_phase = None + def commit(self): + self._db_handle.commit() + @property def _replica_uid(self): return str(self.__replica_uid) diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index 7ed5f693..8303f65d 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -176,6 +176,7 @@ class SoledadSynchronizer(Synchronizer): defer_decryption=defer_decryption) logger.debug("target gen after sync: %d" % new_gen) logger.debug("target trans_id after sync: %s" % new_trans_id) + self.source.commit() # sync worked, commit info = { "target_replica_uid": self.target_replica_uid, "new_gen": new_gen, diff --git a/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py b/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py index d73c0d16..295f3132 100644 --- a/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py +++ b/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py @@ -505,12 +505,11 @@ class SQLiteDatabase(CommonBackend): def _put_doc_if_newer(self, doc, save_conflict, replica_uid=None, replica_gen=None, replica_trans_id=None): - with self._db_handle: - return super(SQLiteDatabase, self)._put_doc_if_newer( - doc, - save_conflict=save_conflict, - replica_uid=replica_uid, replica_gen=replica_gen, - replica_trans_id=replica_trans_id) + return super(SQLiteDatabase, self)._put_doc_if_newer( + doc, + save_conflict=save_conflict, + replica_uid=replica_uid, replica_gen=replica_gen, + replica_trans_id=replica_trans_id) def _add_conflict(self, c, doc_id, my_doc_rev, my_content): c.execute("INSERT INTO conflicts VALUES (?, ?, ?)", -- cgit v1.2.3 From 1e3de25ce10156655bcb1bc879f5340baa889ead Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 16 Sep 2016 04:37:20 -0300 Subject: [bug] disable decpool Temporary fix for server streaming --- .../src/leap/soledad/client/http_target/fetch.py | 128 ++++++++------------- testing/tests/perf/test_sync.py | 3 +- 2 files changed, 51 insertions(+), 80 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 184c5883..0fb5040f 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -51,6 +51,7 @@ class HTTPDocFetcher(object): @defer.inlineCallbacks def _receive_docs(self, last_known_generation, last_known_trans_id, ensure_callback, sync_id, defer_decryption): + defer_decryption = False self._queue_for_decrypt = defer_decryption \ and self._sync_db is not None @@ -73,42 +74,17 @@ class HTTPDocFetcher(object): # to know the total number of documents to be received, and this # information comes as metadata to each request. - doc = yield self._receive_one_doc( + docs = yield self._fetch_all( last_known_generation, last_known_trans_id, sync_id, 0) self._received_docs = 0 - number_of_changes, ngen, ntrans = self._insert_received_doc(doc, 1, 1) + number_of_changes, ngen, ntrans =\ + self._insert_received_docs(docs, 1, 1) if ngen: new_generation = ngen new_transaction_id = ntrans - # --------------------------------------------------------------------- - # maybe receive the rest of the documents - # --------------------------------------------------------------------- - - # launch many asynchronous fetches and inserts of received documents - # in the temporary sync db. Will wait for all results before - # continuing. - - received = 1 - deferreds = [] - while received < number_of_changes: - d = self._receive_one_doc( - last_known_generation, - last_known_trans_id, sync_id, received) - d.addCallback( - self._insert_received_doc, - received + 1, # the index of the current received doc - number_of_changes) - deferreds.append(d) - received += 1 - results = yield defer.gatherResults(deferreds) - - # get generation and transaction id of target after insertions - if deferreds: - _, new_generation, new_transaction_id = results.pop() - # --------------------------------------------------------------------- # wait for async decryption to finish # --------------------------------------------------------------------- @@ -119,8 +95,8 @@ class HTTPDocFetcher(object): defer.returnValue([new_generation, new_transaction_id]) - def _receive_one_doc(self, last_known_generation, - last_known_trans_id, sync_id, received): + def _fetch_all(self, last_known_generation, + last_known_trans_id, sync_id, received): # add remote replica metadata to the request body = RequestBody( last_known_generation=last_known_generation, @@ -136,7 +112,7 @@ class HTTPDocFetcher(object): body=str(body), content_type='application/x-soledad-sync-get') - def _insert_received_doc(self, response, idx, total): + def _insert_received_docs(self, response, idx, total): """ Insert a received document into the local replica. @@ -147,47 +123,47 @@ class HTTPDocFetcher(object): :param total: The total number of operations. :type total: int """ - new_generation, new_transaction_id, number_of_changes, doc_id, \ - rev, content, gen, trans_id = \ + new_generation, new_transaction_id, number_of_changes, entries =\ self._parse_received_doc_response(response) if self._sync_decr_pool and not self._sync_decr_pool.running: self._sync_decr_pool.start(number_of_changes) - if doc_id is not None: - # decrypt incoming document and insert into local database - # ------------------------------------------------------------- - # symmetric decryption of document's contents - # ------------------------------------------------------------- - # If arriving content was symmetrically encrypted, we decrypt it. - # We do it inline if defer_decryption flag is False or no sync_db - # was defined, otherwise we defer it writing it to the received - # docs table. - doc = SoledadDocument(doc_id, rev, content) - if is_symmetrically_encrypted(doc): - if self._queue_for_decrypt: - self._sync_decr_pool.insert_encrypted_received_doc( - doc.doc_id, doc.rev, doc.content, gen, trans_id, - idx) - else: - # defer_decryption is False or no-sync-db fallback - doc.set_json(self._crypto.decrypt_doc(doc)) - self._insert_doc_cb(doc, gen, trans_id) - else: - # not symmetrically encrypted doc, insert it directly - # or save it in the decrypted stage. - if self._queue_for_decrypt: - self._sync_decr_pool.insert_received_doc( - doc.doc_id, doc.rev, doc.content, gen, trans_id, - idx) + for doc_id, rev, content, gen, trans_id in entries: + if doc_id is not None: + # decrypt incoming document and insert into local database + # --------------------------------------------------------- + # symmetric decryption of document's contents + # --------------------------------------------------------- + # If arriving content was symmetrically encrypted, we decrypt + # it. We do it inline if defer_decryption flag is False or no + # sync_db was defined, otherwise we defer it writing it to the + # received docs table. + doc = SoledadDocument(doc_id, rev, content) + if is_symmetrically_encrypted(doc): + if self._queue_for_decrypt: + self._sync_decr_pool.insert_encrypted_received_doc( + doc.doc_id, doc.rev, doc.content, gen, trans_id, + idx) + else: + # defer_decryption is False or no-sync-db fallback + doc.set_json(self._crypto.decrypt_doc(doc)) + self._insert_doc_cb(doc, gen, trans_id) else: - self._insert_doc_cb(doc, gen, trans_id) - # ------------------------------------------------------------- - # end of symmetric decryption - # ------------------------------------------------------------- - self._received_docs += 1 - user_data = {'uuid': self.uuid, 'userid': self.userid} - _emit_receive_status(user_data, self._received_docs, total) + # not symmetrically encrypted doc, insert it directly + # or save it in the decrypted stage. + if self._queue_for_decrypt: + self._sync_decr_pool.insert_received_doc( + doc.doc_id, doc.rev, doc.content, gen, trans_id, + idx) + else: + self._insert_doc_cb(doc, gen, trans_id) + # ------------------------------------------------------------- + # end of symmetric decryption + # ------------------------------------------------------------- + self._received_docs += 1 + user_data = {'uuid': self.uuid, 'userid': self.userid} + _emit_receive_status(user_data, self._received_docs, total) return number_of_changes, new_generation, new_transaction_id def _parse_received_doc_response(self, response): @@ -223,23 +199,17 @@ class HTTPDocFetcher(object): if self._ensure_callback and 'replica_uid' in metadata: self._ensure_callback(metadata['replica_uid']) # parse incoming document info - doc_id = None - rev = None - content = None - gen = None - trans_id = None - if number_of_changes > 0: + entries = [] + for data in data[1:]: try: - entry = json.loads(data[1]) - doc_id = entry['id'] - rev = entry['rev'] - content = entry['content'] - gen = entry['gen'] - trans_id = entry['trans_id'] + line, comma = utils.check_and_strip_comma(data) + entry = json.loads(line) + entries.append((entry['id'], entry['rev'], entry['content'], + entry['gen'], entry['trans_id'])) except (IndexError, KeyError): raise errors.BrokenSyncStream return new_generation, new_transaction_id, number_of_changes, \ - doc_id, rev, content, gen, trans_id + entries def _setup_sync_decr_pool(self): """ diff --git a/testing/tests/perf/test_sync.py b/testing/tests/perf/test_sync.py index 0b48a0b9..4d42395b 100644 --- a/testing/tests/perf/test_sync.py +++ b/testing/tests/perf/test_sync.py @@ -23,7 +23,8 @@ def create_upload(uploads, size): def setup(): return load_up(client, uploads, payload(size)) - yield txbenchmark_with_setup(setup, client.sync) + yield txbenchmark_with_setup(setup, client.sync, + defer_decryption=False) return test -- cgit v1.2.3 From 5d056170357acd0945899d7f0c40f530cbe816e0 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 16 Sep 2016 19:33:06 -0300 Subject: [feature] server download stream from file object couchdb lib returns a file object representing the attachment. This commit dumps the read() call into the wsgi write() call. Doc representation uses 2 lines also, separating metadata from content. --- common/src/leap/soledad/common/couch/__init__.py | 5 ++--- common/src/leap/soledad/common/l2db/remote/http_app.py | 4 +++- server/src/leap/soledad/server/sync.py | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index d751747d..1a95e590 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -348,8 +348,7 @@ class CouchDatabase(object): result['_attachments'] = {} for file_name in attachment_file_names: result['_attachments'][file_name] = { - 'data': json.load( - self._database.get_attachment(result, file_name)) + 'data': self._database.get_attachment(result, file_name) } doc = self.__parse_doc_from_couch( result, result['_id'], @@ -432,7 +431,7 @@ class CouchDatabase(object): binascii.a2b_base64( result['_attachments']['u1db_content']['data'])) else: - doc.content = result['_attachments']['u1db_content']['data'] + doc._json = result['_attachments']['u1db_content']['data'] # determine if there are conflicts if check_for_conflicts \ and '_attachments' in result \ diff --git a/common/src/leap/soledad/common/l2db/remote/http_app.py b/common/src/leap/soledad/common/l2db/remote/http_app.py index 5cf6645e..a9680890 100644 --- a/common/src/leap/soledad/common/l2db/remote/http_app.py +++ b/common/src/leap/soledad/common/l2db/remote/http_app.py @@ -501,7 +501,9 @@ class HTTPResponder(object): self._write('\r\n') else: self._write(',\r\n') - self._write(json.dumps(entry)) + if type(entry) == dict: + entry = json.dumps(entry) + self._write(entry) def end_stream(self): "end stream (array)." diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index a0324a27..253139a9 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -248,9 +248,10 @@ class SyncResource(http_app.SyncResource): """ def send_doc(doc, gen, trans_id): - entry = dict(id=doc.doc_id, rev=doc.rev, content=doc.get_json(), + entry = dict(id=doc.doc_id, rev=doc.rev, gen=gen, trans_id=trans_id) self.responder.stream_entry(entry) + self.responder.stream_entry(doc.get_json().read()) new_gen, number_of_changes = \ self.sync_exch.find_changes_to_return(received) -- cgit v1.2.3 From ea3eea052069d5cc933937fd077d94569e4336a4 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 16 Sep 2016 19:34:37 -0300 Subject: [feature] simple adaptation to let the client run Make the client parse a 2-line doc on sync download stream. --- client/src/leap/soledad/client/http_target/fetch.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 0fb5040f..24d73025 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -200,11 +200,12 @@ class HTTPDocFetcher(object): self._ensure_callback(metadata['replica_uid']) # parse incoming document info entries = [] - for data in data[1:]: + for index in xrange(1, len(data[1:]), 2): try: - line, comma = utils.check_and_strip_comma(data) + line, comma = utils.check_and_strip_comma(data[index]) + content, _ = utils.check_and_strip_comma(data[index + 1]) entry = json.loads(line) - entries.append((entry['id'], entry['rev'], entry['content'], + entries.append((entry['id'], entry['rev'], content, entry['gen'], entry['trans_id'])) except (IndexError, KeyError): raise errors.BrokenSyncStream -- cgit v1.2.3 From 01625647c291a90b72a5c5caa9793fbf0d98a8f7 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 17 Sep 2016 04:22:37 -0300 Subject: [test] commit isnt part of the backend api Check if the backend provides a commit method before calling or we will break the tests with InMemoryDatabase --- client/src/leap/soledad/client/sync.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index 8303f65d..4cbd9f2a 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -176,7 +176,8 @@ class SoledadSynchronizer(Synchronizer): defer_decryption=defer_decryption) logger.debug("target gen after sync: %d" % new_gen) logger.debug("target trans_id after sync: %s" % new_trans_id) - self.source.commit() # sync worked, commit + if hasattr(self.source, 'commit'): + self.source.commit() # sync worked, commit info = { "target_replica_uid": self.target_replica_uid, "new_gen": new_gen, -- cgit v1.2.3 From 2f7dc19efc8b89820cb44ed8b0b9cb225555d446 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 17 Sep 2016 04:25:00 -0300 Subject: [bug] use an empty string to represent tumbstones If a doc doesnt have a content it means it was deleted. Sync stream was unable to represent this state. --- client/src/leap/soledad/client/http_target/fetch.py | 2 +- server/src/leap/soledad/server/sync.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 24d73025..26606e9b 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -205,7 +205,7 @@ class HTTPDocFetcher(object): line, comma = utils.check_and_strip_comma(data[index]) content, _ = utils.check_and_strip_comma(data[index + 1]) entry = json.loads(line) - entries.append((entry['id'], entry['rev'], content, + entries.append((entry['id'], entry['rev'], content or None, entry['gen'], entry['trans_id'])) except (IndexError, KeyError): raise errors.BrokenSyncStream diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 253139a9..77d4b840 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -251,7 +251,8 @@ class SyncResource(http_app.SyncResource): entry = dict(id=doc.doc_id, rev=doc.rev, gen=gen, trans_id=trans_id) self.responder.stream_entry(entry) - self.responder.stream_entry(doc.get_json().read()) + content = doc.get_json() + self.responder.stream_entry(content.read() if content else '') new_gen, number_of_changes = \ self.sync_exch.find_changes_to_return(received) -- cgit v1.2.3 From b774387754ecae77d3ae00de2a9e072cef2eb2e7 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 17 Sep 2016 04:26:08 -0300 Subject: [feature] make reading attachments optional Will put a file object on doc json string if read_content is False, otherwise it will fetch and fill as usual. This is useful for improving server througput on sync download stream by receiving a bulk-get without attachments and consume the file-objects as they come. --- common/src/leap/soledad/common/backend.py | 4 +-- common/src/leap/soledad/common/couch/__init__.py | 42 ++++++++---------------- server/src/leap/soledad/server/sync.py | 6 ++-- 3 files changed, 19 insertions(+), 33 deletions(-) diff --git a/common/src/leap/soledad/common/backend.py b/common/src/leap/soledad/common/backend.py index f4f48f86..5c995d38 100644 --- a/common/src/leap/soledad/common/backend.py +++ b/common/src/leap/soledad/common/backend.py @@ -570,7 +570,7 @@ class SoledadBackend(CommonBackend): self._put_doc(cur_doc, doc) def get_docs(self, doc_ids, check_for_conflicts=True, - include_deleted=False): + include_deleted=False, read_content=True): """ Get the JSON content for many documents. @@ -588,7 +588,7 @@ class SoledadBackend(CommonBackend): :rtype: iterable """ return self._database.get_docs(doc_ids, check_for_conflicts, - include_deleted) + include_deleted, read_content) def _prune_conflicts(self, doc, doc_vcr): """ diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index 1a95e590..f19b0acb 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -296,31 +296,14 @@ class CouchDatabase(object): generation, _ = self.get_generation_info() results = list( - self._get_docs(None, True, include_deleted)) + self.get_docs(None, True, include_deleted)) return (generation, results) def get_docs(self, doc_ids, check_for_conflicts=True, - include_deleted=False): + include_deleted=False, read_content=True): """ Get the JSON content for many documents. - :param doc_ids: A list of document identifiers or None for all. - :type doc_ids: list - :param check_for_conflicts: If set to False, then the conflict check - will be skipped, and 'None' will be - returned instead of True/False. - :type check_for_conflicts: bool - :param include_deleted: If set to True, deleted documents will be - returned with empty content. Otherwise deleted - documents will not be included in the results. - :return: iterable giving the Document object for each document id - in matching doc_ids order. - :rtype: iterable - """ - return self._get_docs(doc_ids, check_for_conflicts, include_deleted) - - def _get_docs(self, doc_ids, check_for_conflicts, include_deleted): - """ Use couch's `_all_docs` view to get the documents indicated in `doc_ids`, @@ -344,12 +327,12 @@ class CouchDatabase(object): view = self._database.view("_all_docs", **params) for row in view.rows: result = copy.deepcopy(row['doc']) - attachment_file_names = result['_attachments'].keys() - result['_attachments'] = {} - for file_name in attachment_file_names: - result['_attachments'][file_name] = { - 'data': self._database.get_attachment(result, file_name) - } + for file_name in result.get('_attachments', {}).keys(): + data = self._database.get_attachment(result, file_name) + if data: + if read_content: + data = data.read() + result['_attachments'][file_name] = {'data': data} doc = self.__parse_doc_from_couch( result, result['_id'], check_for_conflicts=check_for_conflicts, decode=False) @@ -416,8 +399,8 @@ class CouchDatabase(object): self.batch_docs.clear() return rev - def __parse_doc_from_couch(self, result, doc_id, decode=True, - check_for_conflicts=False): + def __parse_doc_from_couch(self, result, doc_id, + check_for_conflicts=False, decode=True): # restrict to u1db documents if 'u1db_rev' not in result: return None @@ -437,10 +420,11 @@ class CouchDatabase(object): and '_attachments' in result \ and 'u1db_conflicts' in result['_attachments']: if decode: - conflicts = json.loads(binascii.a2b_base64( - result['_attachments']['u1db_conflicts']['data'])) + conflicts = binascii.a2b_base64( + result['_attachments']['u1db_conflicts']['data']) else: conflicts = result['_attachments']['u1db_conflicts']['data'] + conflicts = json.loads(conflicts) doc.set_conflicts(self._build_conflicts(doc.doc_id, conflicts)) # store couch revision doc.couch_rev = result['_rev'] diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 77d4b840..6f2ffe9f 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -102,10 +102,12 @@ class SyncExchange(sync.SyncExchange): :return: None """ changes_to_return = self.changes_to_return - # return docs, including conflicts + # return docs, including conflicts. + # content as a file-object (will be read when writing) changed_doc_ids = [doc_id for doc_id, _, _ in changes_to_return] docs = self._db.get_docs( - changed_doc_ids, check_for_conflicts=False, include_deleted=True) + changed_doc_ids, check_for_conflicts=False, + include_deleted=True, read_content=False) docs_by_gen = izip( docs, (gen for _, gen, _ in changes_to_return), -- cgit v1.2.3 From 35563cb74fcfd7f6ae969ed3af3a74d3c18cbf5b Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sun, 18 Sep 2016 00:26:45 -0300 Subject: [refactor] remove decpool It's not being used --- client/src/leap/soledad/client/api.py | 31 +- client/src/leap/soledad/client/encdecpool.py | 380 +-------------------- client/src/leap/soledad/client/http_target/api.py | 12 +- .../src/leap/soledad/client/http_target/fetch.py | 66 +--- client/src/leap/soledad/client/interfaces.py | 7 +- client/src/leap/soledad/client/sqlcipher.py | 11 +- client/src/leap/soledad/client/sync.py | 18 +- testing/tests/perf/test_encdecpool.py | 41 --- testing/tests/perf/test_sync.py | 3 +- testing/tests/sync/test_encdecpool.py | 258 -------------- testing/tests/sync/test_sync.py | 2 +- testing/tests/sync/test_sync_deferred.py | 15 +- testing/tests/sync/test_sync_mutex.py | 4 +- testing/tests/sync/test_sync_target.py | 27 +- 14 files changed, 36 insertions(+), 839 deletions(-) diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 6870d5ba..cbcae4f7 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -169,7 +169,7 @@ class Soledad(object): :type auth_token: str :param defer_encryption: - Whether to defer encryption/decryption of documents, or do it + Whether to defer encryption of documents, or do it inline while syncing. :type defer_encryption: bool @@ -299,9 +299,9 @@ class Soledad(object): ) self._sqlcipher_opts = opts - # the sync_db is used both for deferred encryption and decryption, so + # the sync_db is used both for deferred encryption, so # we want to initialize it anyway to allow for all combinations of - # deferred encryption and decryption configurations. + # deferred encryption configurations. self._initialize_sync_db(opts) self._dbpool = adbapi.getConnectionPool( opts, sync_enc_pool=self._sync_enc_pool) @@ -700,37 +700,26 @@ class Soledad(object): if syncable and not self._dbsyncer: self._init_u1db_syncer() - def sync(self, defer_decryption=True): + def sync(self): """ Synchronize documents with the server replica. This method uses a lock to prevent multiple concurrent sync processes over the same local db file. - :param defer_decryption: - Whether to defer decryption of documents, or do it inline while - syncing. - :type defer_decryption: bool - :return: A deferred lock that will run the actual sync process when the lock is acquired, and which will fire with with the local generation before the synchronization was performed. :rtype: twisted.internet.defer.Deferred """ d = self.sync_lock.run( - self._sync, - defer_decryption) + self._sync) return d - def _sync(self, defer_decryption): + def _sync(self): """ Synchronize documents with the server replica. - :param defer_decryption: - Whether to defer decryption of documents, or do it inline while - syncing. - :type defer_decryption: bool - :return: A deferred whose callback will be invoked with the local generation before the synchronization was performed. :rtype: twisted.internet.defer.Deferred @@ -740,8 +729,7 @@ class Soledad(object): return d = self._dbsyncer.sync( sync_url, - creds=self._creds, - defer_decryption=defer_decryption) + creds=self._creds) def _sync_callback(local_gen): self._last_received_docs = docs = self._dbsyncer.received_docs @@ -874,12 +862,9 @@ class Soledad(object): """ maybe_create = "CREATE TABLE IF NOT EXISTS %s (%s)" encr = encdecpool.SyncEncrypterPool - decr = encdecpool.SyncDecrypterPool sql_encr_table_query = (maybe_create % ( encr.TABLE_NAME, encr.FIELD_NAMES)) - sql_decr_table_query = (maybe_create % ( - decr.TABLE_NAME, decr.FIELD_NAMES)) - return (sql_encr_table_query, sql_decr_table_query) + return (sql_encr_table_query,) # # ISecretsStorage diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py index 056b012f..8eaefa77 100644 --- a/client/src/leap/soledad/client/encdecpool.py +++ b/client/src/leap/soledad/client/encdecpool.py @@ -22,14 +22,9 @@ during synchronization. """ -import json -from uuid import uuid4 - -from twisted.internet.task import LoopingCall from twisted.internet import threads from twisted.internet import defer -from leap.soledad.common.document import SoledadDocument from leap.soledad.common import soledad_assert from leap.soledad.common.log import getLogger @@ -41,7 +36,7 @@ logger = getLogger(__name__) # -# Encrypt/decrypt pools of workers +# Encrypt pool of workers # class SyncEncryptDecryptPool(object): @@ -282,376 +277,3 @@ def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret, """ decrypted_content = decrypt_doc_dict(content, doc_id, doc_rev, key, secret) return doc_id, doc_rev, decrypted_content, gen, trans_id, idx - - -class SyncDecrypterPool(SyncEncryptDecryptPool): - """ - Pool of workers that spawn subprocesses to execute the symmetric decryption - of documents that were received. - - The decryption of the received documents is done in two steps: - - 1. Encrypted documents are stored in the sync db by the actual soledad - sync loop. - 2. The soledad sync loop tells us how many documents we should expect - to process. - 3. We start a decrypt-and-process loop: - - a. Encrypted documents are fetched. - b. Encrypted documents are decrypted. - c. The longest possible list of decrypted documents are inserted - in the soledad db (this depends on which documents have already - arrived and which documents have already been decrypte, because - the order of insertion in the local soledad db matters). - d. Processed documents are deleted from the database. - - 4. When we have processed as many documents as we should, the loop - finishes. - """ - TABLE_NAME = "docs_received" - FIELD_NAMES = "doc_id PRIMARY KEY, rev, content, gen, " \ - "trans_id, encrypted, idx, sync_id" - - """ - Period of recurrence of the periodic decrypting task, in seconds. - """ - DECRYPT_LOOP_PERIOD = 0.5 - - def __init__(self, *args, **kwargs): - """ - Initialize the decrypter pool, and setup a dict for putting the - results of the decrypted docs until they are picked by the insert - routine that gets them in order. - - :param insert_doc_cb: A callback for inserting received documents from - target. If not overriden, this will call u1db - insert_doc_from_target in synchronizer, which - implements the TAKE OTHER semantics. - :type insert_doc_cb: function - :param source_replica_uid: The source replica uid, used to find the - correct callback for inserting documents. - :type source_replica_uid: str - """ - self._insert_doc_cb = kwargs.pop("insert_doc_cb") - self.source_replica_uid = kwargs.pop("source_replica_uid") - - SyncEncryptDecryptPool.__init__(self, *args, **kwargs) - - self._docs_to_process = None - self._processed_docs = 0 - self._last_inserted_idx = 0 - - self._loop = LoopingCall(self._decrypt_and_recurse) - - def _start_pool(self, period): - self._loop.start(period) - - def start(self, docs_to_process): - """ - Set the number of documents we expect to process. - - This should be called by the during the sync exchange process as soon - as we know how many documents are arriving from the server. - - :param docs_to_process: The number of documents to process. - :type docs_to_process: int - """ - SyncEncryptDecryptPool.start(self) - self._decrypted_docs_indexes = set() - self._sync_id = uuid4().hex - self._docs_to_process = docs_to_process - self._deferred = defer.Deferred() - d = self._init_db() - d.addCallback(lambda _: self._start_pool(self.DECRYPT_LOOP_PERIOD)) - return d - - def stop(self): - if self._loop.running: - self._loop.stop() - self._finish() - SyncEncryptDecryptPool.stop(self) - - def _init_db(self): - """ - Ensure sync_id column is present then - Empty the received docs table of the sync database. - - :return: A deferred that will fire when the operation in the database - has finished. - :rtype: twisted.internet.defer.Deferred - """ - ensure_sync_id_column = ("ALTER TABLE %s ADD COLUMN sync_id" % - self.TABLE_NAME) - d = self._runQuery(ensure_sync_id_column) - - def empty_received_docs(_): - query = "DELETE FROM %s WHERE sync_id <> ?" % (self.TABLE_NAME,) - return self._runOperation(query, (self._sync_id,)) - - d.addCallbacks(empty_received_docs, empty_received_docs) - return d - - def _errback(self, failure): - logger.error(failure) - self._deferred.errback(failure) - self._processed_docs = 0 - self._last_inserted_idx = 0 - - @property - def deferred(self): - """ - Deferred that will be fired when the decryption loop has finished - processing all the documents. - """ - return self._deferred - - def insert_encrypted_received_doc( - self, doc_id, doc_rev, content, gen, trans_id, idx): - """ - Decrypt and insert a received document into local staging area to be - processed later on. - - :param doc_id: The document ID. - :type doc_id: str - :param doc_rev: The document Revision - :param doc_rev: str - :param content: The content of the document - :type content: dict - :param gen: The document Generation - :type gen: int - :param trans_id: Transaction ID - :type trans_id: str - :param idx: The index of this document in the current sync process. - :type idx: int - - :return: A deferred that will fire after the decrypted document has - been inserted in the sync db. - :rtype: twisted.internet.defer.Deferred - """ - soledad_assert(self._crypto is not None, "need a crypto object") - - key = self._crypto.doc_passphrase(doc_id) - secret = self._crypto.secret - args = doc_id, doc_rev, content, gen, trans_id, key, secret, idx - # decrypt asynchronously - # TODO use dedicated threadpool / move to ampoule - d = threads.deferToThread( - decrypt_doc_task, *args) - # callback will insert it for later processing - d.addCallback(self._decrypt_doc_cb) - return d - - def insert_received_doc( - self, doc_id, doc_rev, content, gen, trans_id, idx): - """ - Insert a document that is not symmetrically encrypted. - We store it in the staging area (the decrypted_docs dictionary) to be - picked up in order as the preceding documents are decrypted. - - :param doc_id: The document id - :type doc_id: str - :param doc_rev: The document revision - :param doc_rev: str or dict - :param content: The content of the document - :type content: dict - :param gen: The document generation - :type gen: int - :param trans_id: The transaction id - :type trans_id: str - :param idx: The index of this document in the current sync process. - :type idx: int - - :return: A deferred that will fire when the operation in the database - has finished. - :rtype: twisted.internet.defer.Deferred - """ - if not isinstance(content, str): - content = json.dumps(content) - query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?, ?, ?)" \ - % self.TABLE_NAME - d = self._runOperation( - query, (doc_id, doc_rev, content, gen, trans_id, 0, - idx, self._sync_id)) - d.addCallback(lambda _: self._decrypted_docs_indexes.add(idx)) - return d - - def _delete_received_docs(self, doc_ids): - """ - Delete a list of received docs after get them inserted into the db. - - :param doc_id: Document ID list. - :type doc_id: list - - :return: A deferred that will fire when the operation in the database - has finished. - :rtype: twisted.internet.defer.Deferred - """ - placeholders = ', '.join('?' for _ in doc_ids) - query = "DELETE FROM '%s' WHERE doc_id in (%s)" \ - % (self.TABLE_NAME, placeholders) - return self._runOperation(query, (doc_ids)) - - def _decrypt_doc_cb(self, result): - """ - Store the decryption result in the sync db from where it will later be - picked by _process_decrypted_docs. - - :param result: A tuple containing the document's id, revision, - content, generation, transaction id and sync index. - :type result: tuple(str, str, str, int, str, int) - - :return: A deferred that will fire after the document has been - inserted in the sync db. - :rtype: twisted.internet.defer.Deferred - """ - doc_id, rev, content, gen, trans_id, idx = result - logger.debug("sync decrypter pool: decrypted doc %s: %s %s %s" - % (doc_id, rev, gen, trans_id)) - return self.insert_received_doc( - doc_id, rev, content, gen, trans_id, idx) - - def _get_docs(self, encrypted=None, sequence=None): - """ - Get documents from the received docs table in the sync db. - - :param encrypted: If not None, only return documents with encrypted - field equal to given parameter. - :type encrypted: bool or None - :param order_by: The name of the field to order results. - - :return: A deferred that will fire with the results of the database - query. - :rtype: twisted.internet.defer.Deferred - """ - query = "SELECT doc_id, rev, content, gen, trans_id, encrypted, " \ - "idx FROM %s" % self.TABLE_NAME - parameters = [] - if encrypted or sequence: - query += " WHERE sync_id = ? and" - parameters += [self._sync_id] - if encrypted: - query += " encrypted = ?" - parameters += [int(encrypted)] - if sequence: - query += " idx in (" + ', '.join('?' * len(sequence)) + ")" - parameters += [int(i) for i in sequence] - query += " ORDER BY idx ASC" - return self._runQuery(query, parameters) - - @defer.inlineCallbacks - def _get_insertable_docs(self): - """ - Return a list of non-encrypted documents ready to be inserted. - - :return: A deferred that will fire with the list of insertable - documents. - :rtype: twisted.internet.defer.Deferred - """ - # Here, check in memory what are the insertable indexes that can - # form a sequence starting from the last inserted index - sequence = [] - insertable_docs = [] - next_index = self._last_inserted_idx + 1 - while next_index in self._decrypted_docs_indexes: - sequence.append(str(next_index)) - next_index += 1 - if len(sequence) > 900: - # 999 is the default value of SQLITE_MAX_VARIABLE_NUMBER - # if we try to query more, SQLite will refuse - # we need to find a way to improve this - # being researched in #7669 - break - # Then fetch all the ones ready for insertion. - if sequence: - insertable_docs = yield self._get_docs(encrypted=False, - sequence=sequence) - defer.returnValue(insertable_docs) - - @defer.inlineCallbacks - def _process_decrypted_docs(self): - """ - Fetch as many decrypted documents as can be taken from the expected - order and insert them in the local replica. - - :return: A deferred that will fire with the list of inserted - documents. - :rtype: twisted.internet.defer.Deferred - """ - insertable = yield self._get_insertable_docs() - processed_docs_ids = [] - for doc_fields in insertable: - method = self._insert_decrypted_local_doc - # FIXME: This is used only because SQLCipherU1DBSync is synchronous - # When adbapi is used there is no need for an external thread - # Without this the reactor can freeze and fail docs download - yield threads.deferToThread(method, *doc_fields) - processed_docs_ids.append(doc_fields[0]) - yield self._delete_received_docs(processed_docs_ids) - - def _insert_decrypted_local_doc(self, doc_id, doc_rev, content, - gen, trans_id, encrypted, idx): - """ - Insert the decrypted document into the local replica. - - Make use of the passed callback `insert_doc_cb` passed to the caller - by u1db sync. - - :param doc_id: The document id. - :type doc_id: str - :param doc_rev: The document revision. - :type doc_rev: str - :param content: The serialized content of the document. - :type content: str - :param gen: The generation corresponding to the modification of that - document. - :type gen: int - :param trans_id: The transaction id corresponding to the modification - of that document. - :type trans_id: str - """ - # could pass source_replica in params for callback chain - logger.debug("sync decrypter pool: inserting doc in local db: " - "%s:%s %s" % (doc_id, doc_rev, gen)) - - # convert deleted documents to avoid error on document creation - if content == 'null': - content = None - doc = SoledadDocument(doc_id, doc_rev, content) - gen = int(gen) - self._insert_doc_cb(doc, gen, trans_id) - - # store info about processed docs - self._last_inserted_idx = idx - self._processed_docs += 1 - - @defer.inlineCallbacks - def _decrypt_and_recurse(self): - """ - Decrypt the documents received from remote replica and insert them - into the local one. - - This method implicitelly returns a defferred (see the decorator - above). It should only be called by _launch_decrypt_and_process(). - because this way any exceptions raised here will be stored by the - errback attached to the deferred returned. - - :return: A deferred which will fire after all decrypt, process and - delete operations have been executed. - :rtype: twisted.internet.defer.Deferred - """ - if not self.running: - defer.returnValue(None) - processed = self._processed_docs - pending = self._docs_to_process - - if processed < pending: - yield self._process_decrypted_docs() - else: - self._finish() - - def _finish(self): - self._processed_docs = 0 - self._last_inserted_idx = 0 - self._decrypted_docs_indexes = set() - if not self._deferred.called: - self._deferred.callback(None) diff --git a/client/src/leap/soledad/client/http_target/api.py b/client/src/leap/soledad/client/http_target/api.py index 3c8e3764..c9da939c 100644 --- a/client/src/leap/soledad/client/http_target/api.py +++ b/client/src/leap/soledad/client/http_target/api.py @@ -43,8 +43,6 @@ class SyncTargetAPI(SyncTarget): def close(self): if self._sync_enc_pool: self._sync_enc_pool.stop() - if self._sync_decr_pool: - self._sync_decr_pool.stop() yield self._http.close() @property @@ -153,7 +151,7 @@ class SyncTargetAPI(SyncTarget): def sync_exchange(self, docs_by_generation, source_replica_uid, last_known_generation, last_known_trans_id, insert_doc_cb, ensure_callback=None, - defer_decryption=True, sync_id=None): + sync_id=None): """ Find out which documents the remote database does not know about, encrypt and send them. After that, receive documents from the remote @@ -185,11 +183,6 @@ class SyncTargetAPI(SyncTarget): created. :type ensure_callback: function - :param defer_decryption: Whether to defer the decryption process using - the intermediate database. If False, - decryption will be done inline. - :type defer_decryption: bool - :return: A deferred which fires with the new generation and transaction id of the target replica. :rtype: twisted.internet.defer.Deferred @@ -221,8 +214,7 @@ class SyncTargetAPI(SyncTarget): cur_target_gen, cur_target_trans_id = yield self._receive_docs( last_known_generation, last_known_trans_id, - ensure_callback, sync_id, - defer_decryption=defer_decryption) + ensure_callback, sync_id) # update gen and trans id info in case we just sent and did not # receive docs. diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 26606e9b..1f1bc480 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -21,7 +21,6 @@ from twisted.internet import defer from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS from leap.soledad.client.events import emit_async from leap.soledad.client.crypto import is_symmetrically_encrypted -from leap.soledad.client.encdecpool import SyncDecrypterPool from leap.soledad.client.http_target.support import RequestBody from leap.soledad.common.log import getLogger from leap.soledad.common.document import SoledadDocument @@ -50,26 +49,11 @@ class HTTPDocFetcher(object): @defer.inlineCallbacks def _receive_docs(self, last_known_generation, last_known_trans_id, - ensure_callback, sync_id, defer_decryption): - defer_decryption = False - - self._queue_for_decrypt = defer_decryption \ - and self._sync_db is not None + ensure_callback, sync_id): new_generation = last_known_generation new_transaction_id = last_known_trans_id - if self._queue_for_decrypt: - logger.debug( - "Soledad sync: will queue received docs for decrypting.") - - if defer_decryption: - self._setup_sync_decr_pool() - - # --------------------------------------------------------------------- - # maybe receive the first document - # --------------------------------------------------------------------- - # we fetch the first document before fetching the rest because we need # to know the total number of documents to be received, and this # information comes as metadata to each request. @@ -85,14 +69,6 @@ class HTTPDocFetcher(object): new_generation = ngen new_transaction_id = ntrans - # --------------------------------------------------------------------- - # wait for async decryption to finish - # --------------------------------------------------------------------- - - if defer_decryption: - yield self._sync_decr_pool.deferred - self._sync_decr_pool.stop() - defer.returnValue([new_generation, new_transaction_id]) def _fetch_all(self, last_known_generation, @@ -126,9 +102,6 @@ class HTTPDocFetcher(object): new_generation, new_transaction_id, number_of_changes, entries =\ self._parse_received_doc_response(response) - if self._sync_decr_pool and not self._sync_decr_pool.running: - self._sync_decr_pool.start(number_of_changes) - for doc_id, rev, content, gen, trans_id in entries: if doc_id is not None: # decrypt incoming document and insert into local database @@ -136,31 +109,10 @@ class HTTPDocFetcher(object): # symmetric decryption of document's contents # --------------------------------------------------------- # If arriving content was symmetrically encrypted, we decrypt - # it. We do it inline if defer_decryption flag is False or no - # sync_db was defined, otherwise we defer it writing it to the - # received docs table. doc = SoledadDocument(doc_id, rev, content) if is_symmetrically_encrypted(doc): - if self._queue_for_decrypt: - self._sync_decr_pool.insert_encrypted_received_doc( - doc.doc_id, doc.rev, doc.content, gen, trans_id, - idx) - else: - # defer_decryption is False or no-sync-db fallback - doc.set_json(self._crypto.decrypt_doc(doc)) - self._insert_doc_cb(doc, gen, trans_id) - else: - # not symmetrically encrypted doc, insert it directly - # or save it in the decrypted stage. - if self._queue_for_decrypt: - self._sync_decr_pool.insert_received_doc( - doc.doc_id, doc.rev, doc.content, gen, trans_id, - idx) - else: - self._insert_doc_cb(doc, gen, trans_id) - # ------------------------------------------------------------- - # end of symmetric decryption - # ------------------------------------------------------------- + doc.set_json(self._crypto.decrypt_doc(doc)) + self._insert_doc_cb(doc, gen, trans_id) self._received_docs += 1 user_data = {'uuid': self.uuid, 'userid': self.userid} _emit_receive_status(user_data, self._received_docs, total) @@ -212,18 +164,6 @@ class HTTPDocFetcher(object): return new_generation, new_transaction_id, number_of_changes, \ entries - def _setup_sync_decr_pool(self): - """ - Set up the SyncDecrypterPool for deferred decryption. - """ - if self._sync_decr_pool is None and self._sync_db is not None: - # initialize syncing queue decryption pool - self._sync_decr_pool = SyncDecrypterPool( - self._crypto, - self._sync_db, - insert_doc_cb=self._insert_doc_cb, - source_replica_uid=self.source_replica_uid) - def _emit_receive_status(user_data, received_docs, total): content = {'received': received_docs, 'total': total} diff --git a/client/src/leap/soledad/client/interfaces.py b/client/src/leap/soledad/client/interfaces.py index 14b34d24..82927ff4 100644 --- a/client/src/leap/soledad/client/interfaces.py +++ b/client/src/leap/soledad/client/interfaces.py @@ -321,7 +321,7 @@ class ISyncableStorage(Interface): "Property, True if the syncer is syncing.") token = Attribute("The authentication Token.") - def sync(self, defer_decryption=True): + def sync(self): """ Synchronize the local encrypted replica with a remote replica. @@ -331,11 +331,6 @@ class ISyncableStorage(Interface): :param url: the url of the target replica to sync with :type url: str - :param defer_decryption: - Whether to defer the decryption process using the intermediate - database. If False, decryption will be done inline. - :type defer_decryption: bool - :return: A deferred that will fire with the local generation before the synchronisation was performed. diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index b198607d..ba341bbf 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -164,7 +164,7 @@ class SQLCipherOptions(object): :param cipher_page_size: The page size. :type cipher_page_size: int :param defer_encryption: - Whether to defer encryption/decryption of documents, or do it + Whether to defer encryption of documents, or do it inline while syncing. :type defer_encryption: bool """ @@ -480,7 +480,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): raise DatabaseAccessError(str(e)) @defer.inlineCallbacks - def sync(self, url, creds=None, defer_decryption=True): + def sync(self, url, creds=None): """ Synchronize documents with remote replica exposed at url. @@ -495,10 +495,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): :param creds: optional dictionary giving credentials to authorize the operation with the server. :type creds: dict - :param defer_decryption: - Whether to defer the decryption process using the intermediate - database. If False, decryption will be done inline. - :type defer_decryption: bool :return: A Deferred, that will fire with the local generation (type `int`) @@ -510,8 +506,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): self.sync_phase = syncer.sync_phase self.syncer = syncer self.sync_exchange_phase = syncer.sync_exchange_phase - local_gen_before_sync = yield syncer.sync( - defer_decryption=defer_decryption) + local_gen_before_sync = yield syncer.sync() self.received_docs = syncer.received_docs defer.returnValue(local_gen_before_sync) diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index 4cbd9f2a..d3cfe029 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -56,23 +56,10 @@ class SoledadSynchronizer(Synchronizer): self.sync_exchange_phase = None @defer.inlineCallbacks - def sync(self, defer_decryption=True): + def sync(self): """ Synchronize documents between source and target. - Differently from u1db `Synchronizer.sync` method, this one allows to - pass a `defer_decryption` flag that will postpone the last - step in the synchronization dance, namely, the setting of the last - known generation and transaction id for a given remote replica. - - This is done to allow the ongoing parallel decryption of the incoming - docs to proceed without `InvalidGeneration` conflicts. - - :param defer_decryption: Whether to defer the decryption process using - the intermediate database. If False, - decryption will be done inline. - :type defer_decryption: bool - :return: A deferred which will fire after the sync has finished with the local generation before the synchronization was performed. :rtype: twisted.internet.defer.Deferred @@ -172,8 +159,7 @@ class SoledadSynchronizer(Synchronizer): new_gen, new_trans_id = yield sync_target.sync_exchange( docs_by_generation, self.source._replica_uid, target_last_known_gen, target_last_known_trans_id, - self._insert_doc_from_target, ensure_callback=ensure_callback, - defer_decryption=defer_decryption) + self._insert_doc_from_target, ensure_callback=ensure_callback) logger.debug("target gen after sync: %d" % new_gen) logger.debug("target trans_id after sync: %s" % new_trans_id) if hasattr(self.source, 'commit'): diff --git a/testing/tests/perf/test_encdecpool.py b/testing/tests/perf/test_encdecpool.py index 77091a41..8e820b9c 100644 --- a/testing/tests/perf/test_encdecpool.py +++ b/testing/tests/perf/test_encdecpool.py @@ -3,7 +3,6 @@ import json from uuid import uuid4 from twisted.internet.defer import gatherResults from leap.soledad.client.encdecpool import SyncEncrypterPool -from leap.soledad.client.encdecpool import SyncDecrypterPool from leap.soledad.common.document import SoledadDocument # FIXME: test load is low due issue #7370, higher values will get out of memory @@ -36,43 +35,3 @@ def create_encrypt(amount, size): test_encdecpool_encrypt_100_10k = create_encrypt(100, 10*1000) test_encdecpool_encrypt_100_100k = create_encrypt(100, 100*1000) test_encdecpool_encrypt_100_500k = create_encrypt(100, 500*1000) - - -def create_decrypt(amount, size): - @pytest.mark.benchmark(group="test_pool_decrypt") - @pytest.inlineCallbacks - def test(soledad_client, txbenchmark_with_setup, request, payload): - DOC_CONTENT = {'payload': payload(size)} - client = soledad_client() - - def setup(): - pool = SyncDecrypterPool( - client._crypto, - client._sync_db, - source_replica_uid=client._dbpool.replica_uid, - insert_doc_cb=lambda x, y, z: False) # ignored - pool.start(amount) - request.addfinalizer(pool.stop) - crypto = client._crypto - docs = [] - for _ in xrange(amount): - doc = SoledadDocument( - doc_id=uuid4().hex, rev='rev', - json=json.dumps(DOC_CONTENT)) - encrypted_content = json.loads(crypto.encrypt_doc(doc)) - docs.append((doc.doc_id, encrypted_content)) - return pool, docs - - def put_and_wait(pool, docs): - deferreds = [] # fires on completion - for idx, (doc_id, content) in enumerate(docs, 1): - deferreds.append(pool.insert_encrypted_received_doc( - doc_id, 'rev', content, idx, "trans_id", idx)) - return gatherResults(deferreds) - - yield txbenchmark_with_setup(setup, put_and_wait) - return test - -test_encdecpool_decrypt_100_10k = create_decrypt(100, 10*1000) -test_encdecpool_decrypt_100_100k = create_decrypt(100, 100*1000) -test_encdecpool_decrypt_100_500k = create_decrypt(100, 500*1000) diff --git a/testing/tests/perf/test_sync.py b/testing/tests/perf/test_sync.py index 4d42395b..0b48a0b9 100644 --- a/testing/tests/perf/test_sync.py +++ b/testing/tests/perf/test_sync.py @@ -23,8 +23,7 @@ def create_upload(uploads, size): def setup(): return load_up(client, uploads, payload(size)) - yield txbenchmark_with_setup(setup, client.sync, - defer_decryption=False) + yield txbenchmark_with_setup(setup, client.sync) return test diff --git a/testing/tests/sync/test_encdecpool.py b/testing/tests/sync/test_encdecpool.py index 4a32885e..7055a765 100644 --- a/testing/tests/sync/test_encdecpool.py +++ b/testing/tests/sync/test_encdecpool.py @@ -1,34 +1,11 @@ # -*- coding: utf-8 -*- -# test_encdecpool.py -# Copyright (C) 2015 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -""" -Tests for encryption and decryption pool. -""" import json -from random import shuffle - -from mock import MagicMock from twisted.internet.defer import inlineCallbacks from leap.soledad.client.encdecpool import SyncEncrypterPool -from leap.soledad.client.encdecpool import SyncDecrypterPool from leap.soledad.common.document import SoledadDocument from test_soledad.util import BaseSoledadTest -from twisted.internet import defer DOC_ID = "mydoc" DOC_REV = "rev" @@ -69,238 +46,3 @@ class TestSyncEncrypterPool(BaseSoledadTest): encrypted = yield self._pool.get_encrypted_doc(DOC_ID, DOC_REV) self.assertIsNotNone(encrypted) - - -class TestSyncDecrypterPool(BaseSoledadTest): - - def _insert_doc_cb(self, doc, gen, trans_id): - """ - Method used to mock the sync's return_doc_cb callback. - """ - self._inserted_docs.append((doc, gen, trans_id)) - - def _setup_pool(self, sync_db=None): - sync_db = sync_db or self._soledad._sync_db - return SyncDecrypterPool( - self._soledad._crypto, - sync_db, - source_replica_uid=self._soledad._dbpool.replica_uid, - insert_doc_cb=self._insert_doc_cb) - - def setUp(self): - BaseSoledadTest.setUp(self) - # setup the pool - self._pool = self._setup_pool() - # reset the inserted docs mock - self._inserted_docs = [] - - def tearDown(self): - if self._pool.running: - self._pool.stop() - BaseSoledadTest.tearDown(self) - - def test_insert_received_doc(self): - """ - Test that one document added to the pool is inserted using the - callback. - """ - self._pool.start(1) - self._pool.insert_received_doc( - DOC_ID, DOC_REV, "{}", 1, "trans_id", 1) - - def _assert_doc_was_inserted(_): - self.assertEqual( - self._inserted_docs, - [(SoledadDocument(DOC_ID, DOC_REV, "{}"), 1, u"trans_id")]) - - self._pool.deferred.addCallback(_assert_doc_was_inserted) - return self._pool.deferred - - def test_looping_control(self): - """ - Start and stop cleanly. - """ - self._pool.start(10) - self.assertTrue(self._pool.running) - self._pool.stop() - self.assertFalse(self._pool.running) - self.assertTrue(self._pool.deferred.called) - - def test_sync_id_col_is_created_if_non_existing_in_docs_recvd_table(self): - """ - Test that docs_received table is migrated, and has the sync_id column - """ - mock_run_query = MagicMock(return_value=defer.succeed(None)) - mock_sync_db = MagicMock() - mock_sync_db.runQuery = mock_run_query - pool = self._setup_pool(mock_sync_db) - d = pool.start(10) - pool.stop() - - def assert_trial_to_create_sync_id_column(_): - mock_run_query.assert_called_once_with( - "ALTER TABLE docs_received ADD COLUMN sync_id") - - d.addCallback(assert_trial_to_create_sync_id_column) - return d - - def test_insert_received_doc_many(self): - """ - Test that many documents added to the pool are inserted using the - callback. - """ - many = 100 - self._pool.start(many) - - # insert many docs in the pool - for i in xrange(many): - gen = idx = i + 1 - doc_id = "doc_id: %d" % idx - rev = "rev: %d" % idx - content = {'idx': idx} - trans_id = "trans_id: %d" % idx - self._pool.insert_received_doc( - doc_id, rev, content, gen, trans_id, idx) - - def _assert_doc_was_inserted(_): - self.assertEqual(many, len(self._inserted_docs)) - idx = 1 - for doc, gen, trans_id in self._inserted_docs: - expected_gen = idx - expected_doc_id = "doc_id: %d" % idx - expected_rev = "rev: %d" % idx - expected_content = json.dumps({'idx': idx}) - expected_trans_id = "trans_id: %d" % idx - - self.assertEqual(expected_doc_id, doc.doc_id) - self.assertEqual(expected_rev, doc.rev) - self.assertEqual(expected_content, json.dumps(doc.content)) - self.assertEqual(expected_gen, gen) - self.assertEqual(expected_trans_id, trans_id) - - idx += 1 - - self._pool.deferred.addCallback(_assert_doc_was_inserted) - return self._pool.deferred - - def test_insert_encrypted_received_doc(self): - """ - Test that one encrypted document added to the pool is decrypted and - inserted using the callback. - """ - crypto = self._soledad._crypto - doc = SoledadDocument( - doc_id=DOC_ID, rev=DOC_REV, json=json.dumps(DOC_CONTENT)) - encrypted_content = json.loads(crypto.encrypt_doc(doc)) - - # insert the encrypted document in the pool - self._pool.start(1) - self._pool.insert_encrypted_received_doc( - DOC_ID, DOC_REV, encrypted_content, 1, "trans_id", 1) - - def _assert_doc_was_decrypted_and_inserted(_): - self.assertEqual(1, len(self._inserted_docs)) - self.assertEqual(self._inserted_docs, [(doc, 1, u"trans_id")]) - - self._pool.deferred.addCallback( - _assert_doc_was_decrypted_and_inserted) - return self._pool.deferred - - @inlineCallbacks - def test_processing_order(self): - """ - This test ensures that processing of documents only occur if there is - a sequence in place. - """ - crypto = self._soledad._crypto - - docs = [] - for i in xrange(1, 10): - i = str(i) - doc = SoledadDocument( - doc_id=DOC_ID + i, rev=DOC_REV + i, - json=json.dumps(DOC_CONTENT)) - encrypted_content = json.loads(crypto.encrypt_doc(doc)) - docs.append((doc, encrypted_content)) - - # insert the encrypted document in the pool - yield self._pool.start(10) # pool is expecting to process 10 docs - self._pool._loop.stop() # we are processing manually - # first three arrives, forming a sequence - for i, (doc, encrypted_content) in enumerate(docs[:3]): - gen = idx = i + 1 - yield self._pool.insert_encrypted_received_doc( - doc.doc_id, doc.rev, encrypted_content, gen, "trans_id", idx) - - # last one arrives alone, so it can't be processed - doc, encrypted_content = docs[-1] - yield self._pool.insert_encrypted_received_doc( - doc.doc_id, doc.rev, encrypted_content, 10, "trans_id", 10) - - yield self._pool._decrypt_and_recurse() - - self.assertEqual(3, self._pool._processed_docs) - - def test_insert_encrypted_received_doc_many(self, many=100): - """ - Test that many encrypted documents added to the pool are decrypted and - inserted using the callback. - """ - crypto = self._soledad._crypto - self._pool.start(many) - docs = [] - - # insert many encrypted docs in the pool - for i in xrange(many): - gen = idx = i + 1 - doc_id = "doc_id: %d" % idx - rev = "rev: %d" % idx - content = {'idx': idx} - trans_id = "trans_id: %d" % idx - - doc = SoledadDocument( - doc_id=doc_id, rev=rev, json=json.dumps(content)) - - encrypted_content = json.loads(crypto.encrypt_doc(doc)) - docs.append((doc_id, rev, encrypted_content, gen, - trans_id, idx)) - shuffle(docs) - - for doc in docs: - self._pool.insert_encrypted_received_doc(*doc) - - def _assert_docs_were_decrypted_and_inserted(_): - self.assertEqual(many, len(self._inserted_docs)) - idx = 1 - for doc, gen, trans_id in self._inserted_docs: - expected_gen = idx - expected_doc_id = "doc_id: %d" % idx - expected_rev = "rev: %d" % idx - expected_content = json.dumps({'idx': idx}) - expected_trans_id = "trans_id: %d" % idx - - self.assertEqual(expected_doc_id, doc.doc_id) - self.assertEqual(expected_rev, doc.rev) - self.assertEqual(expected_content, json.dumps(doc.content)) - self.assertEqual(expected_gen, gen) - self.assertEqual(expected_trans_id, trans_id) - - idx += 1 - - self._pool.deferred.addCallback( - _assert_docs_were_decrypted_and_inserted) - return self._pool.deferred - - @inlineCallbacks - def test_pool_reuse(self): - """ - The pool is reused between syncs, this test verifies that - reusing is fine. - """ - for i in xrange(3): - yield self.test_insert_encrypted_received_doc_many(5) - self._inserted_docs = [] - decrypted_docs = yield self._pool._get_docs(encrypted=False) - # check that decrypted docs staging is clean - self.assertEquals([], decrypted_docs) - self._pool.stop() diff --git a/testing/tests/sync/test_sync.py b/testing/tests/sync/test_sync.py index 5290003e..a7d0a92b 100644 --- a/testing/tests/sync/test_sync.py +++ b/testing/tests/sync/test_sync.py @@ -187,7 +187,7 @@ class TestSoledadDbSync( self.addCleanup(target.close) return sync.SoledadSynchronizer( self.db, - target).sync(defer_decryption=False) + target).sync() @defer.inlineCallbacks def test_db_sync(self): diff --git a/testing/tests/sync/test_sync_deferred.py b/testing/tests/sync/test_sync_deferred.py index 4948aaf8..482b150c 100644 --- a/testing/tests/sync/test_sync_deferred.py +++ b/testing/tests/sync/test_sync_deferred.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . """ -Test Leap backend bits: sync with deferred encryption/decryption. +Test Leap backend bits: sync with deferred encryption. """ import time import os @@ -22,12 +22,8 @@ import random import string import shutil -from urlparse import urljoin - from twisted.internet import defer -from leap.soledad.common import couch - from leap.soledad.client import sync from leap.soledad.client.sqlcipher import SQLCipherOptions from leap.soledad.client.sqlcipher import SQLCipherDatabase @@ -41,9 +37,6 @@ from test_soledad.util import make_soledad_app from test_soledad.util import soledad_sync_target -# Just to make clear how this test is different... :) -DEFER_DECRYPTION = True - WAIT_STEP = 1 MAX_WAIT = 10 DBPASS = "pass" @@ -52,7 +45,7 @@ DBPASS = "pass" class BaseSoledadDeferredEncTest(SoledadWithCouchServerMixin): """ - Another base class for testing the deferred encryption/decryption during + Another base class for testing the deferred encryption during the syncs, using the intermediate database. """ defer_sync_encryption = True @@ -109,7 +102,7 @@ class TestSoledadDbSyncDeferredEncDecr( """ Test db.sync remote sync shortcut. - Case with deferred encryption and decryption: using the intermediate + Case with deferred encryption: using the intermediate syncdb. """ @@ -158,7 +151,7 @@ class TestSoledadDbSyncDeferredEncDecr( self.addCleanup(target.close) return sync.SoledadSynchronizer( dbsyncer, - target).sync(defer_decryption=True) + target).sync() def wait_for_sync(self): """ diff --git a/testing/tests/sync/test_sync_mutex.py b/testing/tests/sync/test_sync_mutex.py index 2626ab2a..2bcb3aec 100644 --- a/testing/tests/sync/test_sync_mutex.py +++ b/testing/tests/sync/test_sync_mutex.py @@ -47,7 +47,7 @@ from test_soledad.util import soledad_sync_target _old_sync = SoledadSynchronizer.sync -def _timed_sync(self, defer_decryption=True): +def _timed_sync(self): t = time.time() sync_id = uuid.uuid4() @@ -62,7 +62,7 @@ def _timed_sync(self, defer_decryption=True): self.source.sync_times[sync_id]['end'] = t return passthrough - d = _old_sync(self, defer_decryption=defer_decryption) + d = _old_sync(self) d.addBoth(_store_finish_time) return d diff --git a/testing/tests/sync/test_sync_target.py b/testing/tests/sync/test_sync_target.py index 964468ce..a2935539 100644 --- a/testing/tests/sync/test_sync_target.py +++ b/testing/tests/sync/test_sync_target.py @@ -231,8 +231,7 @@ class TestSoledadSyncTarget( doc = self.make_document('doc-here', 'replica:1', '{"value": "here"}') new_gen, trans_id = yield remote_target.sync_exchange( [(doc, 10, 'T-sid')], 'replica', last_known_generation=0, - last_known_trans_id=None, insert_doc_cb=receive_doc, - defer_decryption=False) + last_known_trans_id=None, insert_doc_cb=receive_doc) self.assertEqual(1, new_gen) self.assertGetEncryptedDoc( db, 'doc-here', 'replica:1', '{"value": "here"}', False) @@ -285,8 +284,7 @@ class TestSoledadSyncTarget( 'replica', last_known_generation=0, last_known_trans_id=None, - insert_doc_cb=receive_doc, - defer_decryption=False) + insert_doc_cb=receive_doc) self.assertGetEncryptedDoc( db, 'doc-here', 'replica:1', '{"value": "here"}', @@ -298,8 +296,7 @@ class TestSoledadSyncTarget( trigger_ids = [] new_gen, trans_id = yield remote_target.sync_exchange( [(doc2, 11, 'T-sud')], 'replica', last_known_generation=0, - last_known_trans_id=None, insert_doc_cb=receive_doc, - defer_decryption=False) + last_known_trans_id=None, insert_doc_cb=receive_doc) self.assertGetEncryptedDoc( db, 'doc-here2', 'replica:1', '{"value": "here2"}', False) @@ -331,7 +328,7 @@ class TestSoledadSyncTarget( new_gen, trans_id = yield remote_target.sync_exchange( [(doc, 10, 'T-sid')], 'replica', last_known_generation=0, last_known_trans_id=None, insert_doc_cb=receive_doc, - ensure_callback=ensure_cb, defer_decryption=False) + ensure_callback=ensure_cb) self.assertEqual(1, new_gen) db = self.db2 self.assertEqual(1, len(replica_uid_box)) @@ -446,8 +443,7 @@ class SoledadDatabaseSyncTargetTests( 'T-sid')] new_gen, trans_id = yield self.st.sync_exchange( docs_by_gen, 'replica', last_known_generation=0, - last_known_trans_id=None, insert_doc_cb=self.receive_doc, - defer_decryption=False) + last_known_trans_id=None, insert_doc_cb=self.receive_doc) self.assertGetEncryptedDoc( self.db, 'doc-id', 'replica:1', tests.simple_doc, False) self.assertTransactionLog(['doc-id'], self.db) @@ -471,8 +467,7 @@ class SoledadDatabaseSyncTargetTests( 'doc-id2', 'replica:1', tests.nested_doc), 11, 'T-2')] new_gen, trans_id = yield self.st.sync_exchange( docs_by_gen, 'replica', last_known_generation=0, - last_known_trans_id=None, insert_doc_cb=self.receive_doc, - defer_decryption=False) + last_known_trans_id=None, insert_doc_cb=self.receive_doc) self.assertGetEncryptedDoc( self.db, 'doc-id', 'replica:1', tests.simple_doc, False) self.assertGetEncryptedDoc( @@ -498,8 +493,7 @@ class SoledadDatabaseSyncTargetTests( self.assertTransactionLog([doc.doc_id, doc2.doc_id], self.db) new_gen, _ = yield self.st.sync_exchange( [], 'other-replica', last_known_generation=0, - last_known_trans_id=None, insert_doc_cb=self.receive_doc, - defer_decryption=False) + last_known_trans_id=None, insert_doc_cb=self.receive_doc) self.assertTransactionLog([doc.doc_id, doc2.doc_id], self.db) self.assertEqual(2, new_gen) self.assertEqual( @@ -779,10 +773,6 @@ class SoledadDatabaseSyncTargetTests( yield self.st.record_sync_info('replica', 0, 'T-sid') self.assertEqual(expected, called) - -# Just to make clear how this test is different... :) -DEFER_DECRYPTION = False - WAIT_STEP = 1 MAX_WAIT = 10 DBPASS = "pass" @@ -890,8 +880,7 @@ class TestSoledadDbSync( defer_encryption=True) self.dbsyncer = dbsyncer return dbsyncer.sync(target_url, - creds=creds, - defer_decryption=DEFER_DECRYPTION) + creds=creds) else: return self._do_sync(self, target_name) -- cgit v1.2.3 From 07dcb2ae5240f20a26903f53a432fcd49c7f1ec9 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Mon, 19 Sep 2016 03:56:44 -0300 Subject: [feature] streaming download protocol This commit finishes reversion into u1db original streaming protocol for downloads. --- client/src/leap/soledad/client/http_target/api.py | 4 +- .../src/leap/soledad/client/http_target/fetch.py | 91 ++++----- .../soledad/client/http_target/fetch_protocol.py | 206 +++++++++++++++++++++ server/src/leap/soledad/server/sync.py | 6 +- testing/tests/sync/test_sync_target.py | 65 +++---- 5 files changed, 273 insertions(+), 99 deletions(-) create mode 100644 client/src/leap/soledad/client/http_target/fetch_protocol.py diff --git a/client/src/leap/soledad/client/http_target/api.py b/client/src/leap/soledad/client/http_target/api.py index c9da939c..4e068523 100644 --- a/client/src/leap/soledad/client/http_target/api.py +++ b/client/src/leap/soledad/client/http_target/api.py @@ -72,11 +72,11 @@ class SyncTargetAPI(SyncTarget): return self._sync_enc_pool is not None def _http_request(self, url, method='GET', body=None, headers=None, - content_type=None): + content_type=None, body_reader=readBody): headers = headers or self._base_header if content_type: headers.update({'content-type': [content_type]}) - d = self._http.request(url, method, body, headers, readBody) + d = self._http.request(url, method, body, headers, body_reader) d.addErrback(_unauth_to_invalid_token_error) return d diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 1f1bc480..063082e5 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -14,8 +14,6 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import json - from twisted.internet import defer from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS @@ -25,7 +23,9 @@ from leap.soledad.client.http_target.support import RequestBody from leap.soledad.common.log import getLogger from leap.soledad.common.document import SoledadDocument from leap.soledad.common.l2db import errors -from leap.soledad.common.l2db.remote import utils +from datetime import datetime + +from . import fetch_protocol logger = getLogger(__name__) @@ -58,12 +58,12 @@ class HTTPDocFetcher(object): # to know the total number of documents to be received, and this # information comes as metadata to each request. - docs = yield self._fetch_all( - last_known_generation, last_known_trans_id, - sync_id, 0) self._received_docs = 0 + metadata = yield self._fetch_all( + last_known_generation, last_known_trans_id, + sync_id, self._received_docs) number_of_changes, ngen, ntrans =\ - self._insert_received_docs(docs, 1, 1) + self._parse_metadata(metadata) if ngen: new_generation = ngen @@ -81,14 +81,17 @@ class HTTPDocFetcher(object): ensure=self._ensure_callback is not None) # inform server of how many documents have already been received body.insert_info(received=received) - # send headers + # build a stream reader with doc parser callback + body_reader = fetch_protocol.build_body_reader(self._doc_parser) + # start download stream return self._http_request( self._url, method='POST', body=str(body), - content_type='application/x-soledad-sync-get') + content_type='application/x-soledad-sync-get', + body_reader=body_reader) - def _insert_received_docs(self, response, idx, total): + def _doc_parser(self, doc_info, content): """ Insert a received document into the local replica. @@ -99,26 +102,20 @@ class HTTPDocFetcher(object): :param total: The total number of operations. :type total: int """ - new_generation, new_transaction_id, number_of_changes, entries =\ - self._parse_received_doc_response(response) - - for doc_id, rev, content, gen, trans_id in entries: - if doc_id is not None: - # decrypt incoming document and insert into local database - # --------------------------------------------------------- - # symmetric decryption of document's contents - # --------------------------------------------------------- - # If arriving content was symmetrically encrypted, we decrypt - doc = SoledadDocument(doc_id, rev, content) - if is_symmetrically_encrypted(doc): - doc.set_json(self._crypto.decrypt_doc(doc)) - self._insert_doc_cb(doc, gen, trans_id) - self._received_docs += 1 - user_data = {'uuid': self.uuid, 'userid': self.userid} - _emit_receive_status(user_data, self._received_docs, total) - return number_of_changes, new_generation, new_transaction_id - - def _parse_received_doc_response(self, response): + # decrypt incoming document and insert into local database + # --------------------------------------------------------- + # symmetric decryption of document's contents + # --------------------------------------------------------- + # If arriving content was symmetrically encrypted, we decrypt + doc = SoledadDocument(doc_info['id'], doc_info['rev'], content) + if is_symmetrically_encrypted(doc): + doc.set_json(self._crypto.decrypt_doc(doc)) + self._insert_doc_cb(doc, doc_info['gen'], doc_info['trans_id']) + self._received_docs += 1 + user_data = {'uuid': self.uuid, 'userid': self.userid} + _emit_receive_status(user_data, self._received_docs, total=1000000) + + def _parse_metadata(self, metadata): """ Parse the response from the server containing the received document. @@ -130,18 +127,18 @@ class HTTPDocFetcher(object): :rtype: tuple """ # decode incoming stream - parts = response.splitlines() - if not parts or parts[0] != '[' or parts[-1] != ']': - raise errors.BrokenSyncStream - data = parts[1:-1] + # parts = response.splitlines() + # if not parts or parts[0] != '[' or parts[-1] != ']': + # raise errors.BrokenSyncStream + # data = parts[1:-1] # decode metadata + # try: + # line, comma = utils.check_and_strip_comma(data[0]) + # metadata = None + # except (IndexError): + # raise errors.BrokenSyncStream try: - line, comma = utils.check_and_strip_comma(data[0]) - metadata = None - except (IndexError): - raise errors.BrokenSyncStream - try: - metadata = json.loads(line) + # metadata = json.loads(line) new_generation = metadata['new_generation'] new_transaction_id = metadata['new_transaction_id'] number_of_changes = metadata['number_of_changes'] @@ -150,19 +147,7 @@ class HTTPDocFetcher(object): # make sure we have replica_uid from fresh new dbs if self._ensure_callback and 'replica_uid' in metadata: self._ensure_callback(metadata['replica_uid']) - # parse incoming document info - entries = [] - for index in xrange(1, len(data[1:]), 2): - try: - line, comma = utils.check_and_strip_comma(data[index]) - content, _ = utils.check_and_strip_comma(data[index + 1]) - entry = json.loads(line) - entries.append((entry['id'], entry['rev'], content or None, - entry['gen'], entry['trans_id'])) - except (IndexError, KeyError): - raise errors.BrokenSyncStream - return new_generation, new_transaction_id, number_of_changes, \ - entries + return number_of_changes, new_generation, new_transaction_id def _emit_receive_status(user_data, received_docs, total): diff --git a/client/src/leap/soledad/client/http_target/fetch_protocol.py b/client/src/leap/soledad/client/http_target/fetch_protocol.py new file mode 100644 index 00000000..6ecba2b0 --- /dev/null +++ b/client/src/leap/soledad/client/http_target/fetch_protocol.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python + +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Example using stdio, Deferreds, LineReceiver and twisted.web.client. + +Note that the WebCheckerCommandProtocol protocol could easily be used in e.g. +a telnet server instead; see the comments for details. + +Based on an example by Abe Fettig. +""" +import sys +import json +import warnings +from cStringIO import StringIO +from twisted.internet import reactor +from twisted.internet import defer +from twisted.internet import protocol +from twisted.web.client import HTTPConnectionPool +from twisted.web._newclient import ResponseDone +from twisted.web._newclient import PotentialDataLoss +from twisted.web.client import PartialDownloadError +from leap.soledad.common.l2db import errors +from leap.soledad.common.l2db.remote import utils +from leap.soledad.common.l2db.remote import http_errors +from leap.common.http import HTTPClient + + +class DocStreamReceiver(protocol.Protocol): + + def __init__(self, response, deferred, doc_reader): + self.deferred = deferred + self.status = response.code if response else None + self.message = response.phrase if response else None + self.headers = response.headers if response else {} + self.delimiter = '\r\n' + self._doc_reader = doc_reader + self.reset() + + def reset(self): + self._line = 0 + self._buffer = StringIO() + self._properly_finished = False + + # ---8<--- snippet from u1db.remote.http_client, modified to use errbacks + def _error(self, respdic): + descr = respdic.get("error") + exc_cls = errors.wire_description_to_exc.get(descr) + if exc_cls is not None: + message = respdic.get("message") + self.deferred.errback(exc_cls(message)) + # ---8<--- end of snippet from u1db.remote.http_client + + def connectionLost(self, reason): + """ + Deliver the accumulated response bytes to the waiting L{Deferred}, if + the response body has been completely received without error. + """ + if reason.check(ResponseDone): + + try: + body = self.finish() + except errors.BrokenSyncStream, e: + return self.deferred.errback(e) + + # ---8<--- snippet from u1db.remote.http_client + if self.status in (200, 201): + self.deferred.callback(self.metadata) + elif self.status in http_errors.ERROR_STATUSES: + try: + respdic = json.loads(body) + except ValueError: + self.deferred.errback( + errors.HTTPError(self.status, body, self.headers)) + else: + self._error(respdic) + # special cases + elif self.status == 503: + self.deferred.errback(errors.Unavailable(body, self.headers)) + else: + self.deferred.errback( + errors.HTTPError(self.status, body, self.headers)) + # ---8<--- end of snippet from u1db.remote.http_client + + elif reason.check(PotentialDataLoss): + self.deferred.errback( + PartialDownloadError(self.status, self.message, + b''.join(body))) + else: + self.deferred.errback(reason) + + def consumeBufferLines(self): + content = self._buffer.getvalue()[0:self._buffer.tell()] + self._buffer.seek(0) + lines = content.split(self.delimiter) + self._buffer.write(lines.pop(-1)) + return lines + + def dataReceived(self, data): + self._buffer.write(data) + if '\n' not in data: + return + lines = self.consumeBufferLines() + while lines: + line, _ = utils.check_and_strip_comma(lines.pop(0)) + try: + self.lineReceived(line) + except AssertionError, e: + raise errors.BrokenSyncStream(e) + + def lineReceived(self, line): + assert not self._properly_finished + if ']' == line: + self._properly_finished = True + elif self._line == 0: + assert line == '[' + self._line += 1 + elif self._line == 1: + self._line += 1 + self.metadata = json.loads(line) + assert 'error' not in self.metadata + elif (self._line % 2) == 0: + self._line += 1 + self.current_doc = json.loads(line) + assert 'error' not in self.current_doc + else: + self._line += 1 + self._doc_reader(self.current_doc, line.strip() or None) + + def finish(self): + if not self._properly_finished: + raise errors.BrokenSyncStream() + content = self._buffer.getvalue()[0:self._buffer.tell()] + self._buffer.close() + return content + + +def build_body_reader(doc_reader): + """ + Get the documents from a sync stream and call doc_reader on each + doc received. + + @param doc_reader: Function to be called for processing an incoming doc. + Will be called with doc metadata (dict parsed from 1st line) and doc + content (string) + @type response: function + + @return: A L{Deferred} which will fire with the sync metadata. + Cancelling it will close the connection to the server immediately. + """ + def read(response): + def cancel(deferred): + """ + Cancel a L{readBody} call, close the connection to the HTTP server + immediately, if it is still open. + + @param deferred: The cancelled L{defer.Deferred}. + """ + abort = getAbort() + if abort is not None: + abort() + + def getAbort(): + return getattr(protocol.transport, 'abortConnection', None) + + d = defer.Deferred(cancel) + protocol = DocStreamReceiver(response, d, doc_reader) + response.deliverBody(protocol) + if protocol.transport is not None and getAbort() is None: + warnings.warn( + 'Using readBody with a transport that does not have an ' + 'abortConnection method', + category=DeprecationWarning, + stacklevel=2) + return d + return read + + +def read_doc(doc_info, content): + print doc_info, len(content) + + +def finish(args): + print args + reactor.stop() + + +def fetch(url, token, sync_id): + headers = {'Authorization': ['Token %s' % token]} + headers.update({'content-type': ['application/x-soledad-sync-get']}) + body = """[ +{"ensure": false, "last_known_trans_id": "", "sync_id": "%s", +"last_known_generation": 0}, +{"received": 0} +]""" % sync_id + http = HTTPClient(pool=HTTPConnectionPool(reactor)) + d = http.request(url, 'POST', body, headers, build_body_reader(read_doc)) + d.addBoth(finish) + + +if __name__ == "__main__": + assert len(sys.argv) == 4 + reactor.callWhenRunning(fetch, sys.argv[1], sys.argv[2], sys.argv[3]) + reactor.run() diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 6f2ffe9f..c958bfaa 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -254,7 +254,11 @@ class SyncResource(http_app.SyncResource): gen=gen, trans_id=trans_id) self.responder.stream_entry(entry) content = doc.get_json() - self.responder.stream_entry(content.read() if content else '') + if content: + self.responder.stream_entry(content.read()) + content.close() + else: + self.responder.stream_entry('') new_gen, number_of_changes = \ self.sync_exch.find_changes_to_return(received) diff --git a/testing/tests/sync/test_sync_target.py b/testing/tests/sync/test_sync_target.py index a2935539..997dcdcd 100644 --- a/testing/tests/sync/test_sync_target.py +++ b/testing/tests/sync/test_sync_target.py @@ -30,6 +30,7 @@ from testscenarios import TestWithScenarios from twisted.internet import defer from leap.soledad.client import http_target as target +from leap.soledad.client.http_target.fetch_protocol import DocStreamReceiver from leap.soledad.client import crypto from leap.soledad.client.sqlcipher import SQLCipherU1DBSync from leap.soledad.client.sqlcipher import SQLCipherOptions @@ -44,6 +45,7 @@ from test_soledad.util import make_soledad_app from test_soledad.util import make_token_soledad_app from test_soledad.util import make_soledad_document_for_test from test_soledad.util import soledad_sync_target +from twisted.trial import unittest from test_soledad.util import SoledadWithCouchServerMixin from test_soledad.util import ADDRESS from test_soledad.util import SQLCIPHER_SCENARIOS @@ -53,92 +55,69 @@ from test_soledad.util import SQLCIPHER_SCENARIOS # The following tests come from `u1db.tests.test_remote_sync_target`. # ----------------------------------------------------------------------------- -class TestSoledadParseReceivedDocResponse(SoledadWithCouchServerMixin): +class TestSoledadParseReceivedDocResponse(unittest.TestCase): """ Some tests had to be copied to this class so we can instantiate our own target. """ - def setUp(self): - SoledadWithCouchServerMixin.setUp(self) - creds = {'token': { - 'uuid': 'user-uuid', - 'token': 'auth-token', - }} - self.target = target.SoledadHTTPSyncTarget( - self.couch_url, - uuid4().hex, - creds, - self._soledad._crypto, - None) - - def tearDown(self): - self.target.close() - SoledadWithCouchServerMixin.tearDown(self) + def parse(self, stream): + parser = DocStreamReceiver(None, None, lambda x, y: 42) + parser.dataReceived(stream) + parser.finish() def test_extra_comma(self): - """ - Test adapted to use encrypted content. - """ doc = SoledadDocument('i', rev='r') - doc.content = {} - _crypto = self._soledad._crypto - key = _crypto.doc_passphrase(doc.doc_id) - secret = _crypto.secret + doc.content = {'a': 'b'} - enc_json = crypto.encrypt_docstr( - doc.get_json(), doc.doc_id, doc.rev, - key, secret) + encrypted_docstr = crypto.SoledadCrypto('').encrypt_doc(doc) with self.assertRaises(l2db.errors.BrokenSyncStream): - self.target._parse_received_doc_response("[\r\n{},\r\n]") + self.parse("[\r\n{},\r\n]") with self.assertRaises(l2db.errors.BrokenSyncStream): - self.target._parse_received_doc_response( + self.parse( ('[\r\n{},\r\n{"id": "i", "rev": "r", ' + - '"content": %s, "gen": 3, "trans_id": "T-sid"}' + - ',\r\n]') % json.dumps(enc_json)) + '"gen": 3, "trans_id": "T-sid"},\r\n' + + '%s,\r\n]') % encrypted_docstr) def test_wrong_start(self): with self.assertRaises(l2db.errors.BrokenSyncStream): - self.target._parse_received_doc_response("{}\r\n]") - - with self.assertRaises(l2db.errors.BrokenSyncStream): - self.target._parse_received_doc_response("\r\n{}\r\n]") + self.parse("{}\r\n]") with self.assertRaises(l2db.errors.BrokenSyncStream): - self.target._parse_received_doc_response("") + self.parse("\r\n{}\r\n]") def test_wrong_end(self): with self.assertRaises(l2db.errors.BrokenSyncStream): - self.target._parse_received_doc_response("[\r\n{}") + self.parse("[\r\n{}") with self.assertRaises(l2db.errors.BrokenSyncStream): - self.target._parse_received_doc_response("[\r\n") + self.parse("[\r\n") def test_missing_comma(self): with self.assertRaises(l2db.errors.BrokenSyncStream): - self.target._parse_received_doc_response( + self.parse( '[\r\n{}\r\n{"id": "i", "rev": "r", ' '"content": "c", "gen": 3}\r\n]') def test_no_entries(self): with self.assertRaises(l2db.errors.BrokenSyncStream): - self.target._parse_received_doc_response("[\r\n]") + self.parse("[\r\n]") def test_error_in_stream(self): with self.assertRaises(l2db.errors.BrokenSyncStream): - self.target._parse_received_doc_response( + self.parse( '[\r\n{"new_generation": 0},' '\r\n{"error": "unavailable"}\r\n') with self.assertRaises(l2db.errors.BrokenSyncStream): - self.target._parse_received_doc_response( + self.parse( '[\r\n{"error": "unavailable"}\r\n') with self.assertRaises(l2db.errors.BrokenSyncStream): - self.target._parse_received_doc_response('[\r\n{"error": "?"}\r\n') + self.parse('[\r\n{"error": "?"}\r\n') # # functions for TestRemoteSyncTargets -- cgit v1.2.3 From a8182bb4f954c02d53d699bfe2a645667d770269 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Mon, 19 Sep 2016 21:48:56 -0300 Subject: [feature] upload streaming 1) enable HTTP 1.1 chunked upload on server 2) make the client sync.py generate a list of function calls instead of a list of full docs 3) disable encryption pool 4) make the doc encryption a list of function calls 5) create a twisted protocol for sending 6) make a producer that calls the doc generation as necessary --- .../leap/soledad/client/http_target/__init__.py | 5 +- client/src/leap/soledad/client/http_target/api.py | 12 +++- .../src/leap/soledad/client/http_target/fetch.py | 1 - client/src/leap/soledad/client/http_target/send.py | 64 ++++++++-------------- .../soledad/client/http_target/send_protocol.py | 61 +++++++++++++++++++++ .../src/leap/soledad/client/http_target/support.py | 27 +++++---- client/src/leap/soledad/client/sync.py | 24 ++++---- server/src/leap/soledad/server/__init__.py | 8 +-- server/src/leap/soledad/server/sync.py | 5 +- testing/tests/perf/conftest.py | 2 +- 10 files changed, 135 insertions(+), 74 deletions(-) create mode 100644 client/src/leap/soledad/client/http_target/send_protocol.py diff --git a/client/src/leap/soledad/client/http_target/__init__.py b/client/src/leap/soledad/client/http_target/__init__.py index 62e8bcf0..94de2feb 100644 --- a/client/src/leap/soledad/client/http_target/__init__.py +++ b/client/src/leap/soledad/client/http_target/__init__.py @@ -26,6 +26,8 @@ import os from leap.soledad.common.log import getLogger from leap.common.http import HTTPClient +from twisted.web.client import HTTPConnectionPool +from twisted.internet import reactor from leap.soledad.client.http_target.send import HTTPDocSender from leap.soledad.client.http_target.api import SyncTargetAPI from leap.soledad.client.http_target.fetch import HTTPDocFetcher @@ -99,7 +101,8 @@ class SoledadHTTPSyncTarget(SyncTargetAPI, HTTPDocSender, HTTPDocFetcher): # XXX Increasing timeout of simple requests to avoid chances of hitting # the duplicated syncing bug. This could be reduced to the 30s default # after implementing Cancellable Sync. See #7382 - self._http = HTTPClient(cert_file, timeout=90) + self._http = HTTPClient(cert_file, timeout=90, + pool=HTTPConnectionPool(reactor)) if DO_STATS: self.sync_exchange_phase = [0] diff --git a/client/src/leap/soledad/client/http_target/api.py b/client/src/leap/soledad/client/http_target/api.py index 4e068523..00b943e1 100644 --- a/client/src/leap/soledad/client/http_target/api.py +++ b/client/src/leap/soledad/client/http_target/api.py @@ -22,6 +22,7 @@ from uuid import uuid4 from twisted.web.error import Error from twisted.internet import defer +from twisted.web.http_headers import Headers from leap.soledad.client.http_target.support import readBody from leap.soledad.common.errors import InvalidAuthTokenError @@ -72,11 +73,18 @@ class SyncTargetAPI(SyncTarget): return self._sync_enc_pool is not None def _http_request(self, url, method='GET', body=None, headers=None, - content_type=None, body_reader=readBody): + content_type=None, body_reader=readBody, + body_producer=None): headers = headers or self._base_header if content_type: headers.update({'content-type': [content_type]}) - d = self._http.request(url, method, body, headers, body_reader) + if not body_producer: + d = self._http.request(url, method, body, headers, body_reader) + else: + d = self._http._agent.request( + method, url, headers=Headers(headers), + bodyProducer=body_producer(body)) + d.addCallback(body_reader) d.addErrback(_unauth_to_invalid_token_error) return d diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 063082e5..50e89a2a 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -23,7 +23,6 @@ from leap.soledad.client.http_target.support import RequestBody from leap.soledad.common.log import getLogger from leap.soledad.common.document import SoledadDocument from leap.soledad.common.l2db import errors -from datetime import datetime from . import fetch_protocol diff --git a/client/src/leap/soledad/client/http_target/send.py b/client/src/leap/soledad/client/http_target/send.py index c7bd057e..fcda9bd7 100644 --- a/client/src/leap/soledad/client/http_target/send.py +++ b/client/src/leap/soledad/client/http_target/send.py @@ -22,6 +22,7 @@ from leap.soledad.common.log import getLogger from leap.soledad.client.events import emit_async from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS from leap.soledad.client.http_target.support import RequestBody +from .send_protocol import DocStreamProducer logger = getLogger(__name__) @@ -54,73 +55,56 @@ class HTTPDocSender(object): last_known_trans_id=last_known_trans_id, sync_id=sync_id, ensure=self._ensure_callback is not None) - total = len(docs_by_generation) - while body.consumed < total: - result = yield self._send_batch(total, body, docs_by_generation) + result = yield self._send_batch(body, docs_by_generation) response_dict = json.loads(result)[0] gen_after_send = response_dict['new_generation'] trans_id_after_send = response_dict['new_transaction_id'] defer.returnValue([gen_after_send, trans_id_after_send]) - def _delete_sent(self, docs): - for doc, gen, trans_id in docs: - self._sync_enc_pool.delete_encrypted_doc( - doc.doc_id, doc.rev) - @defer.inlineCallbacks - def _send_batch(self, total, body, docs): - sent = [] + def _send_batch(self, body, docs): + total = len(docs) missing = total - body.consumed + calls = [] for i in xrange(1, missing + 1): - if body.pending_size > self.MAX_BATCH_SIZE: - break idx = body.consumed + i entry = docs[idx - 1] - sent.append(entry) - yield self._prepare_one_doc(entry, body, idx, total) - result = yield self._send_request(body.pop()) - if self._defer_encryption: - self._delete_sent(sent) + calls.append((self._prepare_one_doc, + entry, body, idx, total)) + result = yield self._send_request(body, calls) _emit_send_status(self.uuid, body.consumed, total) defer.returnValue(result) - def _send_request(self, body): + def _send_request(self, body, calls): return self._http_request( self._url, method='POST', - body=body, - content_type='application/x-soledad-sync-put') + body=(body, calls), + content_type='application/x-soledad-sync-put', + body_producer=DocStreamProducer) @defer.inlineCallbacks def _prepare_one_doc(self, entry, body, idx, total): - doc, gen, trans_id = entry - content = yield self._encrypt_doc(doc) + get_doc, gen, trans_id = entry + doc, content = yield self._encrypt_doc(get_doc) body.insert_info( id=doc.doc_id, rev=doc.rev, content=content, gen=gen, trans_id=trans_id, number_of_docs=total, doc_idx=idx) - def _encrypt_doc(self, doc): - d = None + @defer.inlineCallbacks + def _encrypt_doc(self, get_doc): + if type(get_doc) == tuple: + f, args = get_doc + doc = yield f(args) + else: + # tests + doc = get_doc if doc.is_tombstone(): - d = defer.succeed(None) - elif not self._defer_encryption: - # fallback case, for tests - d = defer.succeed(self._crypto.encrypt_doc(doc)) + defer.returnValue((doc, None)) else: - - def _maybe_encrypt_doc_inline(doc_json): - if doc_json is None: - # the document is not marked as tombstone, but we got - # nothing from the sync db. As it is not encrypted - # yet, we force inline encryption. - return self._crypto.encrypt_doc(doc) - return doc_json - - d = self._sync_enc_pool.get_encrypted_doc(doc.doc_id, doc.rev) - d.addCallback(_maybe_encrypt_doc_inline) - return d + defer.returnValue((doc, self._crypto.encrypt_doc(doc))) def _emit_send_status(user_data, idx, total): diff --git a/client/src/leap/soledad/client/http_target/send_protocol.py b/client/src/leap/soledad/client/http_target/send_protocol.py new file mode 100644 index 00000000..c72c6d13 --- /dev/null +++ b/client/src/leap/soledad/client/http_target/send_protocol.py @@ -0,0 +1,61 @@ +from zope.interface import implements +from twisted.internet import defer +from twisted.internet import reactor +from twisted.web.iweb import IBodyProducer +from twisted.web.iweb import UNKNOWN_LENGTH + + +class DocStreamProducer(object): + """ + A producer that writes the body of a request to a consumer. + """ + + implements(IBodyProducer) + + def __init__(self, parser_producer): + """ + Initialize the string produer. + + :param body: The body of the request. + :type body: str + """ + self.body, self.producer = parser_producer + self.length = UNKNOWN_LENGTH + self.pause = False + self.stop = False + + @defer.inlineCallbacks + def startProducing(self, consumer): + """ + Write the body to the consumer. + + :param consumer: Any IConsumer provider. + :type consumer: twisted.internet.interfaces.IConsumer + + :return: A successful deferred. + :rtype: twisted.internet.defer.Deferred + """ + call = self.producer.pop(0) + yield call[0](*call[1:]) + while self.producer and not self.stop: + if self.pause: + yield self.sleep(0.01) + continue + call = self.producer.pop(0) + yield call[0](*call[1:]) + consumer.write(self.body.pop(1)) + consumer.write(self.body.pop(1)) + + def sleep(self, secs): + d = defer.Deferred() + reactor.callLater(secs, d.callback, None) + return d + + def pauseProducing(self): + self.pause = True + + def stopProducing(self): + self.stop = True + + def resumeProducing(self): + self.pause = False diff --git a/client/src/leap/soledad/client/http_target/support.py b/client/src/leap/soledad/client/http_target/support.py index 6ec98ed4..40e5eb55 100644 --- a/client/src/leap/soledad/client/http_target/support.py +++ b/client/src/leap/soledad/client/http_target/support.py @@ -155,7 +155,6 @@ class RequestBody(object): self.headers = header_dict self.entries = [] self.consumed = 0 - self.pending_size = 0 def insert_info(self, **entry_dict): """ @@ -169,9 +168,8 @@ class RequestBody(object): """ entry = json.dumps(entry_dict) self.entries.append(entry) - self.pending_size += len(entry) - def pop(self): + def pop(self, amount=10): """ Removes all entries and returns it formatted and ready to be sent. @@ -182,19 +180,20 @@ class RequestBody(object): :return: formatted body ready to be sent :rtype: str """ - entries = self.entries[:] - self.entries = [] - self.pending_size = 0 - self.consumed += len(entries) - return self.entries_to_str(entries) + start = self.consumed == 0 + amount = min([len(self.entries), amount]) + entries = [self.entries.pop(0) for i in xrange(amount)] + self.consumed += amount + end = len(self.entries) == 0 + return self.entries_to_str(entries, start, end) def __str__(self): - return self.entries_to_str(self.entries) + return self.pop(len(self.entries)) def __len__(self): return len(self.entries) - def entries_to_str(self, entries=None): + def entries_to_str(self, entries=None, start=True, end=True): """ Format a list of entries into the body format expected by the server. @@ -205,6 +204,10 @@ class RequestBody(object): :return: formatted body ready to be sent :rtype: str """ - data = '[\r\n' + json.dumps(self.headers) + data = '' + if start: + data = '[\r\n' + json.dumps(self.headers) data += ''.join(',\r\n' + entry for entry in entries) - return data + '\r\n]' + if end: + data += '\r\n]' + return data diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index d3cfe029..9d237d98 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -142,17 +142,21 @@ class SoledadSynchronizer(Synchronizer): # -------------------------------------------------------------------- # prepare to send all the changed docs - changed_doc_ids = [doc_id for doc_id, _, _ in changes] - docs_to_send = self.source.get_docs( - changed_doc_ids, check_for_conflicts=False, include_deleted=True) - ids_sent = [] + # changed_doc_ids = [doc_id for doc_id, _, _ in changes] + # docs_to_send = self.source.get_docs( + # changed_doc_ids, check_for_conflicts=False, include_deleted=True) + ids_sent = [doc_id for doc_id, _, _ in changes] + # docs_by_generation = [] + # idx = 0 + # for doc in docs_to_send: + # _, gen, trans = changes[idx] + # docs_by_generation.append((doc, gen, trans)) + # idx += 1 + # ids_sent.append(doc.doc_id) docs_by_generation = [] - idx = 0 - for doc in docs_to_send: - _, gen, trans = changes[idx] - docs_by_generation.append((doc, gen, trans)) - idx += 1 - ids_sent.append(doc.doc_id) + for doc_id, gen, trans in changes: + get_doc = (self.source.get_doc, doc_id) + docs_by_generation.append((get_doc, gen, trans)) # exchange documents and try to insert the returned ones with # the target, return target synced-up-to gen. diff --git a/server/src/leap/soledad/server/__init__.py b/server/src/leap/soledad/server/__init__.py index d8243c19..889bf48f 100644 --- a/server/src/leap/soledad/server/__init__.py +++ b/server/src/leap/soledad/server/__init__.py @@ -193,7 +193,8 @@ class HTTPInvocationByMethodWithBody( try: content_length = int(self.environ['CONTENT_LENGTH']) except (ValueError, KeyError): - raise http_app.BadRequest + # raise http_app.BadRequest + content_length = self.max_request_size if content_length <= 0: raise http_app.BadRequest if content_length > self.max_request_size: @@ -219,7 +220,6 @@ class HTTPInvocationByMethodWithBody( if content_type == 'application/x-soledad-sync-put': meth_put = self._lookup('%s_put' % method) meth_end = self._lookup('%s_end' % method) - entries = [] while True: line = body_getline() entry = line.strip() @@ -228,11 +228,9 @@ class HTTPInvocationByMethodWithBody( if not entry or not comma: # empty or no prec comma raise http_app.BadRequest entry, comma = utils.check_and_strip_comma(entry) - entries.append(entry) + meth_put({}, entry) if comma or body_getline(): # extra comma or data raise http_app.BadRequest - for entry in entries: - meth_put({}, entry) return meth_end() # handle outgoing documents elif content_type == 'application/x-soledad-sync-get': diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index c958bfaa..0bf7b236 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -237,7 +237,9 @@ class SyncResource(http_app.SyncResource): :type doc_idx: int """ doc = Document(id, rev, content) - self._staging.append((doc, gen, trans_id, number_of_docs, doc_idx)) + self.sync_exch.insert_doc_from_source( + doc, gen, trans_id, number_of_docs=None, + doc_idx=None, sync_id=None) @http_app.http_method(received=int, content_as_args=True) def post_get(self, received): @@ -282,7 +284,6 @@ class SyncResource(http_app.SyncResource): Return the current generation and transaction_id after inserting one incoming document. """ - self.sync_exch.batched_insert_from_source(self._staging, self._sync_id) self.responder.content_type = 'application/x-soledad-sync-response' self.responder.start_response(200) self.responder.start_stream(), diff --git a/testing/tests/perf/conftest.py b/testing/tests/perf/conftest.py index 6fa6b2c0..09567b88 100644 --- a/testing/tests/perf/conftest.py +++ b/testing/tests/perf/conftest.py @@ -243,7 +243,7 @@ def soledad_client(tmpdir, soledad_server, remote_db, soledad_dbs, request): server_url=server_url, cert_file=None, auth_token=token, - defer_encryption=True) + defer_encryption=False) request.addfinalizer(soledad_client.close) return soledad_client return create -- cgit v1.2.3 From 16f73007db6ec74435a25a95ba2150d5d14d8138 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 21 Sep 2016 16:51:07 -0300 Subject: [feature] make the test accept large uploads We enabled chunking, which means that a use can upload his entire db on a single request. This commit makes server enable this and throttle download as Twisted cant control the payload producer code as its synchronous and blocking code. --- server/src/leap/soledad/server/sync.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 0bf7b236..bc977210 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -17,14 +17,16 @@ """ Server side synchronization infrastructure. """ -from leap.soledad.common.l2db import sync, Document +import time +from leap.soledad.common.l2db import sync from leap.soledad.common.l2db.remote import http_app from leap.soledad.server.caching import get_cache_for from leap.soledad.server.state import ServerSyncState +from leap.soledad.common.document import ServerDocument from itertools import izip -MAX_REQUEST_SIZE = 200 # in Mb +MAX_REQUEST_SIZE = 6000 # in Mb MAX_ENTRY_SIZE = 200 # in Mb @@ -236,7 +238,8 @@ class SyncResource(http_app.SyncResource): :param doc_idx: The index of the current document. :type doc_idx: int """ - doc = Document(id, rev, content) + doc = ServerDocument(id, rev) + doc._json = content self.sync_exch.insert_doc_from_source( doc, gen, trans_id, number_of_docs=None, doc_idx=None, sync_id=None) @@ -255,10 +258,15 @@ class SyncResource(http_app.SyncResource): entry = dict(id=doc.doc_id, rev=doc.rev, gen=gen, trans_id=trans_id) self.responder.stream_entry(entry) - content = doc.get_json() - if content: - self.responder.stream_entry(content.read()) - content.close() + content_reader = doc.get_json() + if content_reader: + content = content_reader.read() + self.responder.stream_entry(content) + content_reader.close() + # throttle at 5mb/s + # FIXME: twistd cant control througput + # we need to either use gunicorn or go async + time.sleep(len(content) / (5.0 * 1024 * 1024)) else: self.responder.stream_entry('') -- cgit v1.2.3 From a302322e53878a6212532d33ac0a0f9e0c34b176 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 21 Sep 2016 17:21:09 -0300 Subject: [bug] handle 400 There was an if without an else on error handler that avoided handling errors that falled back current logic. Added a generic one to the tail so we dont miss it. --- client/src/leap/soledad/client/http_target/support.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/client/src/leap/soledad/client/http_target/support.py b/client/src/leap/soledad/client/http_target/support.py index 40e5eb55..fe91c5b1 100644 --- a/client/src/leap/soledad/client/http_target/support.py +++ b/client/src/leap/soledad/client/http_target/support.py @@ -53,6 +53,9 @@ class ReadBodyProtocol(_ReadBodyProtocol): if exc_cls is not None: message = respdic.get("message") self.deferred.errback(exc_cls(message)) + else: + self.deferred.errback( + errors.HTTPError(self.status, respdic, self.headers)) # ---8<--- end of snippet from u1db.remote.http_client def connectionLost(self, reason): -- cgit v1.2.3 From 7680ec18f26ce6bab48c8a57a05e08cba7c6ba5e Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 22 Sep 2016 01:00:23 -0300 Subject: [feature] stream content in a separate line This allow different paths for raw data and metadata, avoiding unnecessary json parsing. --- client/src/leap/soledad/client/http_target/send_protocol.py | 2 +- client/src/leap/soledad/client/http_target/support.py | 8 +++++++- server/src/leap/soledad/server/__init__.py | 7 ++++--- server/src/leap/soledad/server/sync.py | 11 ++++++++--- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/send_protocol.py b/client/src/leap/soledad/client/http_target/send_protocol.py index c72c6d13..b93a4284 100644 --- a/client/src/leap/soledad/client/http_target/send_protocol.py +++ b/client/src/leap/soledad/client/http_target/send_protocol.py @@ -39,7 +39,7 @@ class DocStreamProducer(object): yield call[0](*call[1:]) while self.producer and not self.stop: if self.pause: - yield self.sleep(0.01) + yield self.sleep(0.001) continue call = self.producer.pop(0) yield call[0](*call[1:]) diff --git a/client/src/leap/soledad/client/http_target/support.py b/client/src/leap/soledad/client/http_target/support.py index fe91c5b1..c066331c 100644 --- a/client/src/leap/soledad/client/http_target/support.py +++ b/client/src/leap/soledad/client/http_target/support.py @@ -169,7 +169,13 @@ class RequestBody(object): :return: length of the entry after JSON dumps :rtype: int """ - entry = json.dumps(entry_dict) + if 'content' in entry_dict: + content = entry_dict['content'] or '' + del entry_dict['content'] + entry = json.dumps(entry_dict) + entry = entry + ',\r\n' + content + else: + entry = json.dumps(entry_dict) self.entries.append(entry) def pop(self, amount=10): diff --git a/server/src/leap/soledad/server/__init__.py b/server/src/leap/soledad/server/__init__.py index 889bf48f..7ba95543 100644 --- a/server/src/leap/soledad/server/__init__.py +++ b/server/src/leap/soledad/server/__init__.py @@ -221,14 +221,15 @@ class HTTPInvocationByMethodWithBody( meth_put = self._lookup('%s_put' % method) meth_end = self._lookup('%s_end' % method) while True: - line = body_getline() - entry = line.strip() + entry = body_getline().strip() if entry == ']': # end of incoming document stream break if not entry or not comma: # empty or no prec comma raise http_app.BadRequest entry, comma = utils.check_and_strip_comma(entry) - meth_put({}, entry) + content = body_getline().strip() + content, comma = utils.check_and_strip_comma(content) + meth_put({'content': content or None}, entry) if comma or body_getline(): # extra comma or data raise http_app.BadRequest return meth_end() diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index bc977210..6fcfe240 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -212,6 +212,7 @@ class SyncResource(http_app.SyncResource): db, self.source_replica_uid, last_known_generation, sync_id) self._sync_id = sync_id self._staging = [] + self._staging_size = 0 @http_app.http_method(content_as_args=True) def post_put( @@ -240,9 +241,13 @@ class SyncResource(http_app.SyncResource): """ doc = ServerDocument(id, rev) doc._json = content - self.sync_exch.insert_doc_from_source( - doc, gen, trans_id, number_of_docs=None, - doc_idx=None, sync_id=None) + self._staging_size += len(content or '') + self._staging.append((doc, gen, trans_id, number_of_docs, doc_idx)) + if self._staging_size > 8192 * 1024 or doc_idx == number_of_docs: + self.sync_exch.batched_insert_from_source(self._staging, + self._sync_id) + self._staging = [] + self._staging_size = 0 @http_app.http_method(received=int, content_as_args=True) def post_get(self, received): -- cgit v1.2.3 From 9ea98145abd130227b33d691b82dbcca76ef70de Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 22 Sep 2016 01:02:28 -0300 Subject: [feature] fix and enable batch Batching is now decided by server, this commits enables it. --- common/src/leap/soledad/common/backend.py | 2 +- common/src/leap/soledad/common/couch/__init__.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/common/src/leap/soledad/common/backend.py b/common/src/leap/soledad/common/backend.py index 5c995d38..4a29ca87 100644 --- a/common/src/leap/soledad/common/backend.py +++ b/common/src/leap/soledad/common/backend.py @@ -73,8 +73,8 @@ class SoledadBackend(CommonBackend): def batch_end(self): if not self.BATCH_SUPPORT: return - self.batching = False self._database.batch_end() + self.batching = False for name in self.after_batch_callbacks: self.after_batch_callbacks[name]() self.after_batch_callbacks = None diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index f19b0acb..6f233b26 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -658,7 +658,7 @@ class CouchDatabase(object): _, _, data = resource.get_json(**kwargs) return data - def _allocate_new_generation(self, doc_id, transaction_id): + def _allocate_new_generation(self, doc_id, transaction_id, save=True): """ Allocate a new generation number for a document modification. @@ -698,10 +698,12 @@ class CouchDatabase(object): DOC_ID_KEY: doc_id, TRANSACTION_ID_KEY: transaction_id, } - self._database.save(gen_doc) + if save: + self._database.save(gen_doc) break # succeeded allocating a new generation, proceed except ResourceConflict: pass # try again! + return gen_doc def save_document(self, old_doc, doc, transaction_id): """ @@ -780,6 +782,7 @@ class CouchDatabase(object): headers=envelope.headers) except ResourceConflict: raise RevisionConflict() + self._allocate_new_generation(doc.doc_id, transaction_id) else: for name, attachment in attachments.items(): del attachment['follows'] @@ -788,12 +791,12 @@ class CouchDatabase(object): attachment['data'] = binascii.b2a_base64( parts[index]).strip() couch_doc['_attachments'] = attachments + gen_doc = self._allocate_new_generation(doc.doc_id, transaction_id, save=False) self.batch_docs[doc.doc_id] = couch_doc + self.batch_docs[gen_doc['_id']] = gen_doc last_gen, last_trans_id = self.batch_generation self.batch_generation = (last_gen + 1, transaction_id) - self._allocate_new_generation(doc.doc_id, transaction_id) - def _new_resource(self, *path): """ Return a new resource for accessing a couch database. -- cgit v1.2.3 From 32d73ec50d6147d2511d6679bb12c17dc01210e4 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 22 Sep 2016 05:03:59 -0300 Subject: [feature] batch based on payload size batch is slower than usual insert for a single doc, so, if a document exceeds the buffer, commit the batch (if any) and put the huge load by traditional insert. refactor coming. --- common/src/leap/soledad/common/couch/__init__.py | 3 ++- server/src/leap/soledad/server/sync.py | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index 6f233b26..2e6f734e 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -791,7 +791,8 @@ class CouchDatabase(object): attachment['data'] = binascii.b2a_base64( parts[index]).strip() couch_doc['_attachments'] = attachments - gen_doc = self._allocate_new_generation(doc.doc_id, transaction_id, save=False) + gen_doc = self._allocate_new_generation( + doc.doc_id, transaction_id, save=False) self.batch_docs[doc.doc_id] = couch_doc self.batch_docs[gen_doc['_id']] = gen_doc last_gen, last_trans_id = self.batch_generation diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 6fcfe240..e12ebf8a 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -118,6 +118,8 @@ class SyncExchange(sync.SyncExchange): return_doc_cb(doc, gen, trans_id) def batched_insert_from_source(self, entries, sync_id): + if not entries: + return self._db.batch_start() for entry in entries: doc, gen, trans_id, number_of_docs, doc_idx = entry @@ -241,8 +243,19 @@ class SyncResource(http_app.SyncResource): """ doc = ServerDocument(id, rev) doc._json = content - self._staging_size += len(content or '') - self._staging.append((doc, gen, trans_id, number_of_docs, doc_idx)) + if (len(content or '') > (8192 * 1024) / 4) or number_of_docs < 4: + self.sync_exch.batched_insert_from_source(self._staging, + self._sync_id) + self._staging = [] + self._staging_size = 0 + self.sync_exch.insert_doc_from_source( + doc, gen, trans_id, + number_of_docs=number_of_docs, + doc_idx=doc_idx, + sync_id=self._sync_id) + else: + self._staging_size += len(content or '') + self._staging.append((doc, gen, trans_id, number_of_docs, doc_idx)) if self._staging_size > 8192 * 1024 or doc_idx == number_of_docs: self.sync_exch.batched_insert_from_source(self._staging, self._sync_id) -- cgit v1.2.3 From 77b952eeec20623e3b2e6f47597c59124c83f3d4 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 14 Sep 2016 01:50:52 -0400 Subject: [feature] streaming crypto implementation --- client/src/leap/soledad/client/_crypto.py | 200 ++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 client/src/leap/soledad/client/_crypto.py diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py new file mode 100644 index 00000000..e4093a9e --- /dev/null +++ b/client/src/leap/soledad/client/_crypto.py @@ -0,0 +1,200 @@ +import binascii +import hashlib +import hmac +import os + +from cStringIO import StringIO + +from twisted.persisted import dirdbm +from twisted.internet import defer +from twisted.internet import interfaces +from twisted.internet import reactor +from twisted.web import client +from twisted.web.client import FileBodyProducer + +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes +from cryptography.hazmat.backends.multibackend import MultiBackend +from cryptography.hazmat.backends.openssl.backend \ + import Backend as OpenSSLBackend + +from zope.interface import implements + +from leap.common.config import get_path_prefix +from leap.soledad.client.secrets import SoledadSecrets + + +MAC_KEY_LENGTH = 64 + +crypto_backend = MultiBackend([OpenSSLBackend()]) + + +class AESWriter(object): + + implements(interfaces.IConsumer) + + def __init__(self, key, fd, iv=None): + if iv is None: + iv = os.urandom(16) + + cipher = _get_aes_ctr_cipher(key, iv) + self.encryptor = cipher.encryptor() + + self.fd = fd + self.done = False + self.deferred = defer.Deferred() + + def write(self, data): + encrypted = self.encryptor.update(data) + self.fd.write(encrypted) + return encrypted + + def end(self): + if not self.done: + self.encryptor.finalize() + self.fd.seek(0) + self.deferred.callback(self.fd) + self.done = True + + +class HMACWriter(object): + + implements(interfaces.IConsumer) + + def __init__(self, key): + self.done = False + self.deferred = defer.Deferred() + + self.digest = '' + self._hmac = hmac.new(key, '', hashlib.sha256) + + def write(self, data): + self._hmac.update(data) + + def end(self): + if not self.done: + self.digest = self._hmac.digest() + self.deferred.callback(self.digest) + self.done = True + + +class EncryptAndHMAC(object): + + implements(interfaces.IConsumer) + + def __init__(self, crypter, hmac): + self.crypter = crypter + self.hmac = hmac + + def write(self, data): + enc_chunk = self.crypter.write(data) + self.hmac.write(enc_chunk) + + + +class NewDocCryptoStreamer(object): + + staging_path = os.path.join(get_path_prefix(), 'leap', 'soledad', 'staging') + staged_template = """ + {"_enc_scheme": "symkey", + "_enc_method": "aes-256-ctr", + "_mac_method": "hmac", + "_mac_hash": "sha256", + "_encoding": "ENCODING", + "_enc_json": "ENC", + "_enc_iv": "IV", + "_mac": "MAC"}""" + + + def __init__(self, content_fd, doc_id, rev, secret=None): + self._content_fd = content_fd + self._contentFileProducer = FileBodyProducer( + content_fd, readSize=2**8) + self.doc_id = doc_id + self.rev = rev + self._encrypted_fd = StringIO() + + self.iv = os.urandom(16) + + sym_key = _get_sym_key_for_doc(doc_id, secret) + mac_key = _get_mac_key_for_doc(doc_id, secret) + + crypter = AESWriter(sym_key, self._encrypted_fd, self.iv) + hmac = HMACWriter(mac_key) + + self.crypter_consumer = crypter + self.hmac_consumer = hmac + + self._prime_hmac() + self.encrypt_and_mac_consumer = EncryptAndHMAC(crypter, hmac) + + def encrypt_stream(self): + d = self._contentFileProducer.startProducing( + self.encrypt_and_mac_consumer) + d.addCallback(self.end_crypto_stream) + d.addCallback(self.persist_encrypted_doc) + return d + + def end_crypto_stream(self, ignored): + self.crypter_consumer.end() + self._post_hmac() + self.hmac_consumer.end() + return defer.succeed('ok') + + def persist_encrypted_doc(self, ignored, encoding='hex'): + # TODO to avoid blocking on io, this can use a + # version of dbm that chunks the writes to the + # disk fd by using the same FileBodyProducer strategy + # that we're using here, long live to the Cooperator. + # this will benefit + + # TODO -- transition to hex: needs migration FIXME + if encoding == 'b64': + encode = binascii.b2a_base64 + elif encoding == 'hex': + encode = binascii.b2a_hex + else: + raise RuntimeError('Unknown encoding: %s' % encoding) + + db = dirdbm.DirDBM(self.staging_path) + key = '{doc_id}@{rev}'.format( + doc_id=self.doc_id, rev=self.rev) + value = self.staged_template.replace( + 'ENCODING', encoding).replace( + 'ENC', encode(self._encrypted_fd.read())).replace( + 'IV', binascii.b2a_base64(self.iv)).replace( + 'MAC', encode(self.hmac_consumer.digest)) + db[key] = value + + self._content_fd.close() + self._encrypted_fd.close() + + def _prime_hmac(self): + pre = '{doc_id}{rev}'.format( + doc_id=self.doc_id, rev=self.rev) + self.hmac_consumer.write(pre) + + def _post_hmac(self): + # FIXME -- original impl passed b64 encoded iv + post = '{enc_scheme}{enc_method}{enc_iv}'.format( + enc_scheme='symkey', + enc_method='aes-256-ctr', + enc_iv=binascii.b2a_base64(self.iv)) + self.hmac_consumer.write(post) + + +def _hmac_sha256(key, data): + return hmac.new(key, data, hashlib.sha256).digest() + + +def _get_mac_key_for_doc(doc_id, secret): + key = secret[:MAC_KEY_LENGTH] + return _hmac_sha256(key, doc_id) + + +def _get_sym_key_for_doc(doc_id, secret): + key = secret[MAC_KEY_LENGTH:] + return _hmac_sha256(key, doc_id) + + +def _get_aes_ctr_cipher(key, iv): + return Cipher(algorithms.AES(key), modes.CTR(iv), backend=crypto_backend) -- cgit v1.2.3 From fcf3b3046dd2005992638ebf993d53897af8ed3a Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 14 Sep 2016 01:52:36 -0400 Subject: [refactor] remove encryption pool --- client/src/leap/soledad/client/__init__.py | 1 + client/src/leap/soledad/client/_crypto.py | 83 +++++--- client/src/leap/soledad/client/adbapi.py | 18 +- client/src/leap/soledad/client/api.py | 46 +++-- client/src/leap/soledad/client/crypto.py | 212 ++++++++++++--------- client/src/leap/soledad/client/encdecpool.py | 145 -------------- .../leap/soledad/client/http_target/__init__.py | 7 +- client/src/leap/soledad/client/http_target/api.py | 6 - .../src/leap/soledad/client/http_target/fetch.py | 15 +- client/src/leap/soledad/client/http_target/send.py | 5 + client/src/leap/soledad/client/secrets.py | 6 +- client/src/leap/soledad/client/sqlcipher.py | 30 +-- testing/tests/client/test_crypto2.py | 63 ++++++ 13 files changed, 298 insertions(+), 339 deletions(-) create mode 100644 testing/tests/client/test_crypto2.py diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 245a8971..3a114021 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -21,6 +21,7 @@ from leap.soledad.client.api import Soledad from leap.soledad.common import soledad_assert from ._version import get_versions + __version__ = get_versions()['version'] del get_versions diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index e4093a9e..ed861fdd 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -1,14 +1,37 @@ +# -*- coding: utf-8 -*- +# _crypto.py +# Copyright (C) 2016 LEAP Encryption Access Project +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +""" +Cryptographic operations for the soledad client +""" + import binascii +import base64 import hashlib import hmac import os from cStringIO import StringIO -from twisted.persisted import dirdbm from twisted.internet import defer from twisted.internet import interfaces from twisted.internet import reactor +from twisted.logger import Logger +from twisted.persisted import dirdbm from twisted.web import client from twisted.web.client import FileBodyProducer @@ -23,11 +46,17 @@ from leap.common.config import get_path_prefix from leap.soledad.client.secrets import SoledadSecrets +log = Logger() + MAC_KEY_LENGTH = 64 crypto_backend = MultiBackend([OpenSSLBackend()]) +class EncryptionError(Exception): + pass + + class AESWriter(object): implements(interfaces.IConsumer) @@ -35,6 +64,10 @@ class AESWriter(object): def __init__(self, key, fd, iv=None): if iv is None: iv = os.urandom(16) + if len(key) != 32: + raise EncryptionError('key is not 256 bits') + if len(iv) != 16: + raise EncryptionError('iv is not 128 bits') cipher = _get_aes_ctr_cipher(key, iv) self.encryptor = cipher.encryptor() @@ -51,7 +84,6 @@ class AESWriter(object): def end(self): if not self.done: self.encryptor.finalize() - self.fd.seek(0) self.deferred.callback(self.fd) self.done = True @@ -91,18 +123,12 @@ class EncryptAndHMAC(object): -class NewDocCryptoStreamer(object): +class DocEncrypter(object): staging_path = os.path.join(get_path_prefix(), 'leap', 'soledad', 'staging') - staged_template = """ - {"_enc_scheme": "symkey", - "_enc_method": "aes-256-ctr", - "_mac_method": "hmac", - "_mac_hash": "sha256", - "_encoding": "ENCODING", - "_enc_json": "ENC", - "_enc_iv": "IV", - "_mac": "MAC"}""" + staged_template = """{"_enc_scheme": "symkey", "_enc_method": + "aes-256-ctr", "_mac_method": "hmac", "_mac_hash": "sha256", + "_encoding": "ENCODING", "_enc_json": "CIPHERTEXT", "_enc_iv": "IV", "_mac": "MAC"}""" def __init__(self, content_fd, doc_id, rev, secret=None): @@ -140,14 +166,12 @@ class NewDocCryptoStreamer(object): self.hmac_consumer.end() return defer.succeed('ok') - def persist_encrypted_doc(self, ignored, encoding='hex'): - # TODO to avoid blocking on io, this can use a - # version of dbm that chunks the writes to the - # disk fd by using the same FileBodyProducer strategy - # that we're using here, long live to the Cooperator. - # this will benefit + # TODO make this pluggable: + # pass another class (CryptoSerializer) to which we pass + # the doc info, the encrypted_fd and the mac_digest - # TODO -- transition to hex: needs migration FIXME + def persist_encrypted_doc(self, ignored, encoding='hex'): + # TODO -- transition to b64: needs migration FIXME if encoding == 'b64': encode = binascii.b2a_base64 elif encoding == 'hex': @@ -155,14 +179,25 @@ class NewDocCryptoStreamer(object): else: raise RuntimeError('Unknown encoding: %s' % encoding) + # TODO to avoid blocking on io, this can use a + # version of dbm that chunks the writes to the + # disk fd by using the same FileBodyProducer strategy + # that we're using here, long live to the Cooperator. + + db = dirdbm.DirDBM(self.staging_path) key = '{doc_id}@{rev}'.format( doc_id=self.doc_id, rev=self.rev) + ciphertext = encode(self._encrypted_fd.getvalue()) value = self.staged_template.replace( 'ENCODING', encoding).replace( - 'ENC', encode(self._encrypted_fd.read())).replace( - 'IV', binascii.b2a_base64(self.iv)).replace( - 'MAC', encode(self.hmac_consumer.digest)) + 'CIPHERTEXT', ciphertext).replace( + 'IV', encode(self.iv)).replace( + 'MAC', encode(self.hmac_consumer.digest)).replace( + '\n', '') + self._encrypted_fd.seek(0) + + log.debug('persisting %s' % key) db[key] = value self._content_fd.close() @@ -174,14 +209,14 @@ class NewDocCryptoStreamer(object): self.hmac_consumer.write(pre) def _post_hmac(self): - # FIXME -- original impl passed b64 encoded iv post = '{enc_scheme}{enc_method}{enc_iv}'.format( enc_scheme='symkey', enc_method='aes-256-ctr', - enc_iv=binascii.b2a_base64(self.iv)) + enc_iv=binascii.b2a_hex(self.iv)) self.hmac_consumer.write(post) + def _hmac_sha256(key, data): return hmac.new(key, data, hashlib.sha256).digest() diff --git a/client/src/leap/soledad/client/adbapi.py b/client/src/leap/soledad/client/adbapi.py index ce9bec05..a5328d2b 100644 --- a/client/src/leap/soledad/client/adbapi.py +++ b/client/src/leap/soledad/client/adbapi.py @@ -50,8 +50,7 @@ How many times a SQLCipher query should be retried in case of timeout. SQLCIPHER_MAX_RETRIES = 10 -def getConnectionPool(opts, openfun=None, driver="pysqlcipher", - sync_enc_pool=None): +def getConnectionPool(opts, openfun=None, driver="pysqlcipher"): """ Return a connection pool. @@ -72,7 +71,7 @@ def getConnectionPool(opts, openfun=None, driver="pysqlcipher", if openfun is None and driver == "pysqlcipher": openfun = partial(set_init_pragmas, opts=opts) return U1DBConnectionPool( - opts, sync_enc_pool, + opts, # the following params are relayed "as is" to twisted's # ConnectionPool. "%s.dbapi2" % driver, opts.path, timeout=SQLCIPHER_CONNECTION_TIMEOUT, @@ -89,7 +88,7 @@ class U1DBConnection(adbapi.Connection): The U1DB wrapper to use. """ - def __init__(self, pool, sync_enc_pool, init_u1db=False): + def __init__(self, pool, init_u1db=False): """ :param pool: The pool of connections to that owns this connection. :type pool: adbapi.ConnectionPool @@ -97,7 +96,6 @@ class U1DBConnection(adbapi.Connection): :type init_u1db: bool """ self.init_u1db = init_u1db - self._sync_enc_pool = sync_enc_pool try: adbapi.Connection.__init__(self, pool) except dbapi2.DatabaseError as e: @@ -116,8 +114,7 @@ class U1DBConnection(adbapi.Connection): if self.init_u1db: self._u1db = self.u1db_wrapper( self._connection, - self._pool.opts, - self._sync_enc_pool) + self._pool.opts) def __getattr__(self, name): """ @@ -162,12 +159,11 @@ class U1DBConnectionPool(adbapi.ConnectionPool): connectionFactory = U1DBConnection transactionFactory = U1DBTransaction - def __init__(self, opts, sync_enc_pool, *args, **kwargs): + def __init__(self, opts, *args, **kwargs): """ Initialize the connection pool. """ self.opts = opts - self._sync_enc_pool = sync_enc_pool try: adbapi.ConnectionPool.__init__(self, *args, **kwargs) except dbapi2.DatabaseError as e: @@ -182,7 +178,7 @@ class U1DBConnectionPool(adbapi.ConnectionPool): try: conn = self.connectionFactory( - self, self._sync_enc_pool, init_u1db=True) + self, init_u1db=True) replica_uid = conn._u1db._real_replica_uid setProxiedObject(self.replica_uid, replica_uid) except DatabaseAccessError as e: @@ -257,7 +253,7 @@ class U1DBConnectionPool(adbapi.ConnectionPool): tid = self.threadID() u1db = self._u1dbconnections.get(tid) conn = self.connectionFactory( - self, self._sync_enc_pool, init_u1db=not bool(u1db)) + self, init_u1db=not bool(u1db)) if self.replica_uid is None: replica_uid = conn._u1db._real_replica_uid diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index cbcae4f7..6b257669 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -61,6 +61,7 @@ from leap.soledad.client.secrets import SoledadSecrets from leap.soledad.client.shared_db import SoledadSharedDatabase from leap.soledad.client import sqlcipher from leap.soledad.client import encdecpool +from leap.soledad.client._crypto import DocEncrypter logger = getLogger(__name__) @@ -190,7 +191,6 @@ class Soledad(object): self._server_url = server_url self._defer_encryption = defer_encryption self._secrets_path = None - self._sync_enc_pool = None self._dbsyncer = None self.shared_db = shared_db @@ -299,12 +299,7 @@ class Soledad(object): ) self._sqlcipher_opts = opts - # the sync_db is used both for deferred encryption, so - # we want to initialize it anyway to allow for all combinations of - # deferred encryption configurations. - self._initialize_sync_db(opts) - self._dbpool = adbapi.getConnectionPool( - opts, sync_enc_pool=self._sync_enc_pool) + self._dbpool = adbapi.getConnectionPool(opts) def _init_u1db_syncer(self): """ @@ -314,9 +309,7 @@ class Soledad(object): self._dbsyncer = sqlcipher.SQLCipherU1DBSync( self._sqlcipher_opts, self._crypto, replica_uid, SOLEDAD_CERT, - defer_encryption=self._defer_encryption, - sync_db=self._sync_db, - sync_enc_pool=self._sync_enc_pool) + sync_db=self._sync_db) def sync_stats(self): sync_phase = 0 @@ -345,8 +338,6 @@ class Soledad(object): if self._sync_db: self._sync_db.close() self._sync_db = None - if self._defer_encryption: - self._sync_enc_pool.stop() # # ILocalStorage @@ -363,6 +354,19 @@ class Soledad(object): """ return self._dbpool.runU1DBQuery(meth, *args, **kw) + def stream_encryption(self, result, doc): + contentfd = StringIO() + contentfd.write(doc.get_json()) + contentfd.seek(0) + + sikret = self._secrets.remote_storage_secret + crypter = DocEncrypter( + contentfd, doc.doc_id, doc.rev, secret=sikret) + d = crypter.encrypt_stream() + d.addCallback(lambda _: result) + return d + + def put_doc(self, doc): """ Update a document. @@ -385,7 +389,9 @@ class Soledad(object): also be updated. :rtype: twisted.internet.defer.Deferred """ - return self._defer("put_doc", doc) + d = self._defer("put_doc", doc) + d.addCallback(self.stream_encryption, doc) + return d def delete_doc(self, doc): """ @@ -479,7 +485,9 @@ class Soledad(object): # create_doc (and probably to put_doc too). There are cases (mail # payloads for example) in which we already have the encoding in the # headers, so we don't need to guess it. - return self._defer("create_doc", content, doc_id=doc_id) + d = self._defer("create_doc", content, doc_id=doc_id) + d.addCallback(lambda doc: self.stream_encryption('', doc)) + return d def create_doc_from_json(self, json, doc_id=None): """ @@ -846,11 +854,6 @@ class Soledad(object): opts, path=sync_db_path, create=True) self._sync_db = sqlcipher.getConnectionPool( sync_opts, extra_queries=self._sync_db_extra_init) - if self._defer_encryption: - # initialize syncing queue encryption pool - self._sync_enc_pool = encdecpool.SyncEncrypterPool( - self._crypto, self._sync_db) - self._sync_enc_pool.start() @property def _sync_db_extra_init(self): @@ -860,11 +863,6 @@ class Soledad(object): :rtype: tuple of strings """ - maybe_create = "CREATE TABLE IF NOT EXISTS %s (%s)" - encr = encdecpool.SyncEncrypterPool - sql_encr_table_query = (maybe_create % ( - encr.TABLE_NAME, encr.FIELD_NAMES)) - return (sql_encr_table_query,) # # ISecretsStorage diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index d81c883b..da067237 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -42,6 +42,9 @@ MAC_KEY_LENGTH = 64 crypto_backend = MultiBackend([OpenSSLBackend()]) +# TODO -- deprecate. +# Secrets still using this. + def encrypt_sym(data, key): """ Encrypt data using AES-256 cipher in CTR mode. @@ -68,7 +71,10 @@ def encrypt_sym(data, key): return binascii.b2a_base64(iv), ciphertext -def decrypt_sym(data, key, iv): +# FIXME decryption of the secrets doc is still using b64 +# Deprecate that, move to hex. + +def decrypt_sym(data, key, iv, encoding='base64'): """ Decrypt some data previously encrypted using AES-256 cipher in CTR mode. @@ -78,7 +84,7 @@ def decrypt_sym(data, key, iv): long). :type key: str :param iv: The initialization vector. - :type iv: long + :type iv: str (it's b64 encoded by secrets, hex by deserializing from wire) :return: The decrypted data. :rtype: str @@ -88,7 +94,12 @@ def decrypt_sym(data, key, iv): soledad_assert( len(key) == 32, # 32 x 8 = 256 bits. 'Wrong key size: %s (must be 256 bits long).' % len(key)) - iv = binascii.a2b_base64(iv) + + if encoding == 'base64': + iv = binascii.a2b_base64(iv) + elif encoding == 'hex': + iv = binascii.a2b_hex(iv) + cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=crypto_backend) decryptor = cipher.decryptor() return decryptor.update(data) + decryptor.finalize() @@ -159,17 +170,17 @@ class SoledadCrypto(object): doc_id, hashlib.sha256).digest() - def encrypt_doc(self, doc): - """ - Wrapper around encrypt_docstr that accepts the document as argument. - - :param doc: the document. - :type doc: SoledadDocument - """ - key = self.doc_passphrase(doc.doc_id) - - return encrypt_docstr( - doc.get_json(), doc.doc_id, doc.rev, key, self._secret) + #def encrypt_doc(self, doc): + #""" + #Wrapper around encrypt_docstr that accepts the document as argument. +# + #:param doc: the document. + #:type doc: SoledadDocument + #""" + #key = self.doc_passphrase(doc.doc_id) +# + #return encrypt_docstr( + #doc.get_json(), doc.doc_id, doc.rev, key, self._secret) def decrypt_doc(self, doc): """ @@ -194,6 +205,8 @@ class SoledadCrypto(object): # Crypto utilities for a SoledadDocument. # +# TODO should be ported to streaming consumer + def mac_doc(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, enc_iv, mac_method, secret): """ @@ -212,7 +225,7 @@ def mac_doc(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, enc_iv, :param ciphertext: The content of the document. :type ciphertext: str :param enc_scheme: The encryption scheme. - :type enc_scheme: str + :type enc_scheme: bytes :param enc_method: The encryption method. :type enc_method: str :param enc_iv: The encryption initialization vector. @@ -231,6 +244,7 @@ def mac_doc(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, enc_iv, soledad_assert(mac_method == crypto.MacMethods.HMAC) except AssertionError: raise crypto.UnknownMacMethodError + template = "{doc_id}{doc_rev}{ciphertext}{enc_scheme}{enc_method}{enc_iv}" content = template.format( doc_id=doc_id, @@ -239,78 +253,82 @@ def mac_doc(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, enc_iv, enc_scheme=enc_scheme, enc_method=enc_method, enc_iv=enc_iv) - return hmac.new( + + digest = hmac.new( doc_mac_key(doc_id, secret), content, hashlib.sha256).digest() + return digest -def encrypt_docstr(docstr, doc_id, doc_rev, key, secret): - """ - Encrypt C{doc}'s content. - - Encrypt doc's contents using AES-256 CTR mode and return a valid JSON - string representing the following: - - { - crypto.ENC_JSON_KEY: '', - crypto.ENC_SCHEME_KEY: 'symkey', - crypto.ENC_METHOD_KEY: crypto.EncryptionMethods.AES_256_CTR, - crypto.ENC_IV_KEY: '', - MAC_KEY: '' - crypto.MAC_METHOD_KEY: 'hmac' - } - - :param docstr: A representation of the document to be encrypted. - :type docstr: str or unicode. - - :param doc_id: The document id. - :type doc_id: str - - :param doc_rev: The document revision. - :type doc_rev: str - - :param key: The key used to encrypt ``data`` (must be 256 bits long). - :type key: str - - :param secret: The Soledad storage secret (used for MAC auth). - :type secret: str - - :return: The JSON serialization of the dict representing the encrypted - content. - :rtype: str - """ - enc_scheme = crypto.EncryptionSchemes.SYMKEY - enc_method = crypto.EncryptionMethods.AES_256_CTR - mac_method = crypto.MacMethods.HMAC - enc_iv, ciphertext = encrypt_sym( - str(docstr), # encryption/decryption routines expect str - key) - mac = binascii.b2a_hex( # store the mac as hex. - mac_doc( - doc_id, - doc_rev, - ciphertext, - enc_scheme, - enc_method, - enc_iv, - mac_method, - secret)) +#def encrypt_docstr(docstr, doc_id, doc_rev, key, secret): + #""" + #Encrypt C{doc}'s content. +# + #Encrypt doc's contents using AES-256 CTR mode and return a valid JSON + #string representing the following: +# + #{ + #crypto.ENC_JSON_KEY: '', + #crypto.ENC_SCHEME_KEY: 'symkey', + #crypto.ENC_METHOD_KEY: crypto.EncryptionMethods.AES_256_CTR, + #crypto.ENC_IV_KEY: '', + #MAC_KEY: '' + #crypto.MAC_METHOD_KEY: 'hmac' + #} +# + #:param docstr: A representation of the document to be encrypted. + #:type docstr: str or unicode. +# + #:param doc_id: The document id. + #:type doc_id: str +# + #:param doc_rev: The document revision. + #:type doc_rev: str +# + #:param key: The key used to encrypt ``data`` (must be 256 bits long). + #:type key: str +# + #:param secret: The Soledad storage secret (used for MAC auth). + #:type secret: str +# + #:return: The JSON serialization of the dict representing the encrypted + #content. + #:rtype: str + #""" + #enc_scheme = crypto.EncryptionSchemes.SYMKEY + #enc_method = crypto.EncryptionMethods.AES_256_CTR + #mac_method = crypto.MacMethods.HMAC + #enc_iv, ciphertext = encrypt_sym( + #str(docstr), # encryption/decryption routines expect str + #key) + #mac = binascii.b2a_hex( # store the mac as hex. + #mac_doc( + #doc_id, + #doc_rev, + #ciphertext, + #enc_scheme, + #enc_method, + #enc_iv, + #mac_method, + #secret)) # Return a representation for the encrypted content. In the following, we # convert binary data to hexadecimal representation so the JSON # serialization does not complain about what it tries to serialize. - hex_ciphertext = binascii.b2a_hex(ciphertext) - logger.debug("encrypting doc: %s" % doc_id) - return json.dumps({ - crypto.ENC_JSON_KEY: hex_ciphertext, - crypto.ENC_SCHEME_KEY: enc_scheme, - crypto.ENC_METHOD_KEY: enc_method, - crypto.ENC_IV_KEY: enc_iv, - crypto.MAC_KEY: mac, - crypto.MAC_METHOD_KEY: mac_method, - }) + #hex_ciphertext = binascii.b2a_hex(ciphertext) + #log.debug("Encrypting doc: %s" % doc_id) + #return json.dumps({ + #crypto.ENC_JSON_KEY: hex_ciphertext, + #crypto.ENC_SCHEME_KEY: enc_scheme, + #crypto.ENC_METHOD_KEY: enc_method, + #crypto.ENC_IV_KEY: enc_iv, + #crypto.MAC_KEY: mac, + #crypto.MAC_METHOD_KEY: mac_method, + #}) +# +# TODO port to _crypto def _verify_doc_mac(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, enc_iv, mac_method, secret, doc_mac): """ @@ -338,6 +356,7 @@ def _verify_doc_mac(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, :raise crypto.UnknownMacMethodError: Raised when C{mac_method} is unknown. :raise crypto.WrongMacError: Raised when MAC could not be verified. """ + # TODO mac_doc should be ported to Streaming also calculated_mac = mac_doc( doc_id, doc_rev, @@ -347,16 +366,15 @@ def _verify_doc_mac(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, enc_iv, mac_method, secret) - # we compare mac's hashes to avoid possible timing attacks that might - # exploit python's builtin comparison operator behaviour, which fails - # immediatelly when non-matching bytes are found. - doc_mac_hash = hashlib.sha256( - binascii.a2b_hex( # the mac is stored as hex - doc_mac)).digest() - calculated_mac_hash = hashlib.sha256(calculated_mac).digest() - - if doc_mac_hash != calculated_mac_hash: - logger.warn("wrong MAC while decrypting doc...") + + ok = hmac.compare_digest( + str(calculated_mac), + binascii.a2b_hex(doc_mac)) + + if not ok: + loggger.warn("wrong MAC while decrypting doc...") + loggger.info(u'given: %s' % doc_mac) + loggger.info(u'calculated: %s' % binascii.b2a_hex(calculated_mac)) raise crypto.WrongMacError("Could not authenticate document's " "contents.") @@ -415,12 +433,17 @@ def decrypt_doc_dict(doc_dict, doc_id, doc_rev, key, secret): ]) soledad_assert(expected_keys.issubset(set(doc_dict.keys()))) - ciphertext = binascii.a2b_hex(doc_dict[crypto.ENC_JSON_KEY]) - enc_scheme = doc_dict[crypto.ENC_SCHEME_KEY] - enc_method = doc_dict[crypto.ENC_METHOD_KEY] - enc_iv = doc_dict[crypto.ENC_IV_KEY] - doc_mac = doc_dict[crypto.MAC_KEY] - mac_method = doc_dict[crypto.MAC_METHOD_KEY] + d = doc_dict + decode = binascii.a2b_hex + + enc_scheme = d[crypto.ENC_SCHEME_KEY] + enc_method = d[crypto.ENC_METHOD_KEY] + doc_mac = d[crypto.MAC_KEY] + mac_method = d[crypto.MAC_METHOD_KEY] + enc_iv = d[crypto.ENC_IV_KEY] + + ciphertext_hex = d[crypto.ENC_JSON_KEY] + ciphertext = decode(ciphertext_hex) soledad_assert(enc_scheme == crypto.EncryptionSchemes.SYMKEY) @@ -428,7 +451,8 @@ def decrypt_doc_dict(doc_dict, doc_id, doc_rev, key, secret): doc_id, doc_rev, ciphertext, enc_scheme, enc_method, enc_iv, mac_method, secret, doc_mac) - return decrypt_sym(ciphertext, key, enc_iv) + decr = decrypt_sym(ciphertext, key, enc_iv, encoding='hex') + return decr def is_symmetrically_encrypted(doc): diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py index 8eaefa77..b5cfb3ca 100644 --- a/client/src/leap/soledad/client/encdecpool.py +++ b/client/src/leap/soledad/client/encdecpool.py @@ -28,7 +28,6 @@ from twisted.internet import defer from leap.soledad.common import soledad_assert from leap.soledad.common.log import getLogger -from leap.soledad.client.crypto import encrypt_docstr from leap.soledad.client.crypto import decrypt_doc_dict @@ -104,150 +103,6 @@ class SyncEncryptDecryptPool(object): return self._sync_db.runQuery(query, *args) -def encrypt_doc_task(doc_id, doc_rev, content, key, secret): - """ - Encrypt the content of the given document. - - :param doc_id: The document id. - :type doc_id: str - :param doc_rev: The document revision. - :type doc_rev: str - :param content: The serialized content of the document. - :type content: str - :param key: The encryption key. - :type key: str - :param secret: The Soledad storage secret (used for MAC auth). - :type secret: str - - :return: A tuple containing the doc id, revision and encrypted content. - :rtype: tuple(str, str, str) - """ - encrypted_content = encrypt_docstr( - content, doc_id, doc_rev, key, secret) - return doc_id, doc_rev, encrypted_content - - -class SyncEncrypterPool(SyncEncryptDecryptPool): - """ - Pool of workers that spawn subprocesses to execute the symmetric encryption - of documents to be synced. - """ - TABLE_NAME = "docs_tosync" - FIELD_NAMES = "doc_id PRIMARY KEY, rev, content" - - ENCRYPT_LOOP_PERIOD = 2 - - def __init__(self, *args, **kwargs): - """ - Initialize the sync encrypter pool. - """ - SyncEncryptDecryptPool.__init__(self, *args, **kwargs) - # TODO delete already synced files from database - - def start(self): - """ - Start the encrypter pool. - """ - SyncEncryptDecryptPool.start(self) - logger.debug("starting the encryption loop...") - - def stop(self): - """ - Stop the encrypter pool. - """ - - SyncEncryptDecryptPool.stop(self) - - def encrypt_doc(self, doc): - """ - Encrypt document asynchronously then insert it on - local staging database. - - :param doc: The document to be encrypted. - :type doc: SoledadDocument - """ - soledad_assert(self._crypto is not None, "need a crypto object") - docstr = doc.get_json() - key = self._crypto.doc_passphrase(doc.doc_id) - secret = self._crypto.secret - args = doc.doc_id, doc.rev, docstr, key, secret - # encrypt asynchronously - # TODO use dedicated threadpool / move to ampoule - d = threads.deferToThread( - encrypt_doc_task, *args) - d.addCallback(self._encrypt_doc_cb) - return d - - def _encrypt_doc_cb(self, result): - """ - Insert results of encryption routine into the local sync database. - - :param result: A tuple containing the doc id, revision and encrypted - content. - :type result: tuple(str, str, str) - """ - doc_id, doc_rev, content = result - return self._insert_encrypted_local_doc(doc_id, doc_rev, content) - - def _insert_encrypted_local_doc(self, doc_id, doc_rev, content): - """ - Insert the contents of the encrypted doc into the local sync - database. - - :param doc_id: The document id. - :type doc_id: str - :param doc_rev: The document revision. - :type doc_rev: str - :param content: The serialized content of the document. - :type content: str - """ - query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?)" \ - % (self.TABLE_NAME,) - return self._runOperation(query, (doc_id, doc_rev, content)) - - @defer.inlineCallbacks - def get_encrypted_doc(self, doc_id, doc_rev): - """ - Get an encrypted document from the sync db. - - :param doc_id: The id of the document. - :type doc_id: str - :param doc_rev: The revision of the document. - :type doc_rev: str - - :return: A deferred that will fire with the encrypted content of the - document or None if the document was not found in the sync - db. - :rtype: twisted.internet.defer.Deferred - """ - query = "SELECT content FROM %s WHERE doc_id=? and rev=?" \ - % self.TABLE_NAME - result = yield self._runQuery(query, (doc_id, doc_rev)) - if result: - logger.debug("found doc on sync db: %s" % doc_id) - val = result.pop() - defer.returnValue(val[0]) - logger.debug("did not find doc on sync db: %s" % doc_id) - defer.returnValue(None) - - def delete_encrypted_doc(self, doc_id, doc_rev): - """ - Delete an encrypted document from the sync db. - - :param doc_id: The id of the document. - :type doc_id: str - :param doc_rev: The revision of the document. - :type doc_rev: str - - :return: A deferred that will fire when the operation in the database - has finished. - :rtype: twisted.internet.defer.Deferred - """ - query = "DELETE FROM %s WHERE doc_id=? and rev=?" \ - % self.TABLE_NAME - self._runOperation(query, (doc_id, doc_rev)) - - def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret, idx): """ diff --git a/client/src/leap/soledad/client/http_target/__init__.py b/client/src/leap/soledad/client/http_target/__init__.py index 94de2feb..5dc87fcb 100644 --- a/client/src/leap/soledad/client/http_target/__init__.py +++ b/client/src/leap/soledad/client/http_target/__init__.py @@ -54,7 +54,7 @@ class SoledadHTTPSyncTarget(SyncTargetAPI, HTTPDocSender, HTTPDocFetcher): written to the main database. """ def __init__(self, url, source_replica_uid, creds, crypto, cert_file, - sync_db=None, sync_enc_pool=None): + sync_db=None): """ Initialize the sync target. @@ -78,10 +78,6 @@ class SoledadHTTPSyncTarget(SyncTargetAPI, HTTPDocSender, HTTPDocFetcher): instead of retreiving it from the dedicated database. :type sync_db: Sqlite handler - :param sync_enc_pool: The encryption pool to use to defer encryption. - If None is passed the encryption will not be - deferred. - :type sync_enc_pool: leap.soledad.client.encdecpool.SyncEncrypterPool """ if url.endswith("/"): url = url[:-1] @@ -92,7 +88,6 @@ class SoledadHTTPSyncTarget(SyncTargetAPI, HTTPDocSender, HTTPDocFetcher): self.set_creds(creds) self._crypto = crypto self._sync_db = sync_db - self._sync_enc_pool = sync_enc_pool self._insert_doc_cb = None # asynchronous encryption/decryption attributes self._decryption_callback = None diff --git a/client/src/leap/soledad/client/http_target/api.py b/client/src/leap/soledad/client/http_target/api.py index 00b943e1..2d51d94f 100644 --- a/client/src/leap/soledad/client/http_target/api.py +++ b/client/src/leap/soledad/client/http_target/api.py @@ -42,8 +42,6 @@ class SyncTargetAPI(SyncTarget): @defer.inlineCallbacks def close(self): - if self._sync_enc_pool: - self._sync_enc_pool.stop() yield self._http.close() @property @@ -68,10 +66,6 @@ class SyncTargetAPI(SyncTarget): def _base_header(self): return self._auth_header.copy() if self._auth_header else {} - @property - def _defer_encryption(self): - return self._sync_enc_pool is not None - def _http_request(self, url, method='GET', body=None, headers=None, content_type=None, body_reader=readBody, body_producer=None): diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 50e89a2a..541ec1d2 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -146,7 +146,20 @@ class HTTPDocFetcher(object): # make sure we have replica_uid from fresh new dbs if self._ensure_callback and 'replica_uid' in metadata: self._ensure_callback(metadata['replica_uid']) - return number_of_changes, new_generation, new_transaction_id + # parse incoming document info + entries = [] + for index in xrange(1, len(data[1:]), 2): + try: + line, comma = utils.check_and_strip_comma(data[index]) + content, _ = utils.check_and_strip_comma(data[index + 1]) + entry = json.loads(line) + entries.append((entry['id'], entry['rev'], content, + entry['gen'], entry['trans_id'])) + except (IndexError, KeyError): + raise errors.BrokenSyncStream + return new_generation, new_transaction_id, number_of_changes, \ + entries + def _emit_receive_status(user_data, received_docs, total): diff --git a/client/src/leap/soledad/client/http_target/send.py b/client/src/leap/soledad/client/http_target/send.py index fcda9bd7..86744ec2 100644 --- a/client/src/leap/soledad/client/http_target/send.py +++ b/client/src/leap/soledad/client/http_target/send.py @@ -15,10 +15,13 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . import json +import os from twisted.internet import defer +from twisted.persisted import dirdbm from leap.soledad.common.log import getLogger +from leap.common.config import get_path_prefix from leap.soledad.client.events import emit_async from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS from leap.soledad.client.http_target.support import RequestBody @@ -39,6 +42,8 @@ class HTTPDocSender(object): # Any class inheriting from this one should provide a meaningful attribute # if the sync status event is meant to be used somewhere else. + staging_path = os.path.join(get_path_prefix(), 'leap', 'soledad', 'staging') + uuid = 'undefined' userid = 'undefined' diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index 1eb6f31d..ad1db2b8 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -266,7 +266,11 @@ class SoledadSecrets(object): # read storage secrets from file content = None with open(self._secrets_path, 'r') as f: - content = json.loads(f.read()) + raw = f.read() + raw = raw.replace('\n', '') + content = json.loads(raw) + + print "LOADING", content _, active_secret, version = self._import_recovery_document(content) self._maybe_set_active_secret(active_secret) diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index ba341bbf..8cbc3aea 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -266,26 +266,6 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): 'ALTER TABLE document ' 'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE') - # - # Document operations - # - - def put_doc(self, doc): - """ - Overwrite the put_doc method, to enqueue the modified document for - encryption before sync. - - :param doc: The document to be put. - :type doc: u1db.Document - - :return: The new document revision. - :rtype: str - """ - doc_rev = sqlite_backend.SQLitePartialExpandDatabase.put_doc(self, doc) - if self.defer_encryption: - # TODO move to api? - self._sync_enc_pool.encrypt_doc(doc) - return doc_rev # # SQLCipher API methods @@ -426,14 +406,13 @@ class SQLCipherU1DBSync(SQLCipherDatabase): ENCRYPT_LOOP_PERIOD = 1 def __init__(self, opts, soledad_crypto, replica_uid, cert_file, - defer_encryption=False, sync_db=None, sync_enc_pool=None): + sync_db=None): self._opts = opts self._path = opts.path self._crypto = soledad_crypto self.__replica_uid = replica_uid self._cert_file = cert_file - self._sync_enc_pool = sync_enc_pool self._sync_db = sync_db @@ -538,8 +517,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): creds=creds, crypto=self._crypto, cert_file=self._cert_file, - sync_db=self._sync_db, - sync_enc_pool=self._sync_enc_pool)) + sync_db=self._sync_db)) self._syncers[url] = (h, syncer) # in order to reuse the same synchronizer multiple times we have to # reset its state (i.e. the number of documents received from target @@ -597,14 +575,12 @@ class SoledadSQLCipherWrapper(SQLCipherDatabase): It can be used from adbapi to initialize a soledad database after getting a regular connection to a sqlcipher database. """ - def __init__(self, conn, opts, sync_enc_pool): + def __init__(self, conn, opts): self._db_handle = conn self._real_replica_uid = None self._ensure_schema() self.set_document_factory(soledad_doc_factory) self._prime_replica_uid() - self.defer_encryption = opts.defer_encryption - self._sync_enc_pool = sync_enc_pool def _assert_db_is_encrypted(opts): diff --git a/testing/tests/client/test_crypto2.py b/testing/tests/client/test_crypto2.py new file mode 100644 index 00000000..ae280020 --- /dev/null +++ b/testing/tests/client/test_crypto2.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +# test_crypto2.py +# Copyright (C) 2016 LEAP Encryption Access Project +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +""" +Tests for the _crypto module +""" + +import StringIO + + +import leap.soledad.client +from leap.soledad.client import _crypto + + +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes +from cryptography.hazmat.backends import default_backend + + +def _aes_encrypt(key, iv, data): + backend = default_backend() + cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=backend) + encryptor = cipher.encryptor() + return encryptor.update(data) + encryptor.finalize() + + +def test_chunked_encryption(): + key = 'A' * 32 + iv = 'A' * 16 + data = ( + "You can't come up against " + "the world's most powerful intelligence " + "agencies and not accept the risk. " + "If they want to get you, over time " + "they will.") + + fd = StringIO.StringIO() + aes = _crypto.AESWriter(key, fd, iv) + + block = 16 + + for i in range(len(data)/block): + chunk = data[i * block:(i+1)*block] + aes.write(chunk) + aes.end() + + ciphertext_chunked = fd.getvalue() + ciphertext = _aes_encrypt(key, iv, data) + + assert ciphertext_chunked == ciphertext -- cgit v1.2.3 From 510c0ba3a0c0ade334090a1c36dab9ccae0ba1b4 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 16 Sep 2016 19:24:55 -0400 Subject: [feature] blob encryptor / decryptor --- client/src/leap/soledad/client/_crypto.py | 316 ++++++++++++++++++++---------- client/src/leap/soledad/client/api.py | 8 +- testing/tests/client/test_crypto2.py | 126 +++++++++++- 3 files changed, 334 insertions(+), 116 deletions(-) diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index ed861fdd..61a190c7 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -24,9 +24,14 @@ import base64 import hashlib import hmac import os +import struct +import time +from io import BytesIO from cStringIO import StringIO +import six + from twisted.internet import defer from twisted.internet import interfaces from twisted.internet import reactor @@ -35,6 +40,9 @@ from twisted.persisted import dirdbm from twisted.web import client from twisted.web.client import FileBodyProducer +from cryptography.exceptions import InvalidSignature +from cryptography.hazmat.primitives import hashes +from cryptography.hazmat.primitives.hmac import HMAC from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from cryptography.hazmat.backends.multibackend import MultiBackend from cryptography.hazmat.backends.openssl.backend \ @@ -53,63 +61,223 @@ MAC_KEY_LENGTH = 64 crypto_backend = MultiBackend([OpenSSLBackend()]) -class EncryptionError(Exception): +class ENC_SCHEME: + symkey = 1 + + +class ENC_METHOD: + aes_256_ctr = 1 + + +class EncryptionDecryptionError(Exception): pass -class AESWriter(object): +class InvalidBlob(Exception): + pass - implements(interfaces.IConsumer) - def __init__(self, key, fd, iv=None): + +class BlobEncryptor(object): + + """ + Encrypts a payload associated with a given Document. + """ + + def __init__(self, doc_info, content_fd, result=None, secret=None, iv=None): + if iv is None: iv = os.urandom(16) + else: + log.warn('Using a fixed IV. Use only for testing!') + self.iv = iv + if not secret: + raise EncryptionDecryptionError('no secret given') + + self.doc_id = doc_info.doc_id + self.rev = doc_info.rev + + self._producer = FileBodyProducer(content_fd, readSize=2**8) + + self._preamble = BytesIO() + if result is None: + result = BytesIO() + self.result = result + + sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret) + mac_key = _get_mac_key_for_doc(doc_info.doc_id, secret) + + self._aes_fd = BytesIO() + self._aes = AESEncryptor(sym_key, self.iv, self._aes_fd) + self._hmac = HMACWriter(mac_key) + self._write_preamble() + + self._crypter = VerifiedEncrypter(self._aes, self._hmac) + + def encrypt(self): + d = self._producer.startProducing(self._crypter) + d.addCallback(self._end_crypto_stream) + return d + + def _write_preamble(self): + + def write(data): + self._preamble.write(data) + self._hmac.write(data) + + current_time = int(time.time()) + + write(b'\x80') + write(struct.pack( + 'Qbb', + current_time, + ENC_SCHEME.symkey, + ENC_METHOD.aes_256_ctr)) + write(self.iv) + write(self.doc_id) + write(self.rev) + + def _end_crypto_stream(self, ignored): + self._aes.end() + self._hmac.end() + + preamble = self._preamble.getvalue() + encrypted = self._aes_fd.getvalue() + hmac = self._hmac.result.getvalue() + + self.result.write( + base64.urlsafe_b64encode(preamble + encrypted + hmac)) + self._preamble.close() + self._aes_fd.close() + self._hmac.result.close() + self.result.seek(0) + return defer.succeed('ok') + + +class BlobDecryptor(object): + """ + Decrypts an encrypted blob associated with a given Document. + + Will raise an exception if the blob doesn't have the expected structure, or + if the HMAC doesn't verify. + """ + + def __init__(self, doc_info, ciphertext_fd, result=None, + secret=None): + self.doc_id = doc_info.doc_id + self.rev = doc_info.rev + + self.ciphertext = ciphertext_fd + + self.sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret) + self.mac_key = _get_mac_key_for_doc(doc_info.doc_id, secret) + + if result is None: + result = BytesIO() + self.result = result + + def decrypt(self): + + try: + data = base64.urlsafe_b64decode(self.ciphertext.getvalue()) + except (TypeError, binascii.Error): + raise InvalidBlob + self.ciphertext.close() + + current_time = int(time.time()) + if not data or six.indexbytes(data, 0) != 0x80: + raise InvalidBlob + try: + ts, sch, meth = struct.unpack("Qbb", data[1:11]) + except struct.error: + raise InvalidBlob + + # TODO check timestamp + if sch != ENC_SCHEME.symkey: + raise InvalidBlob('invalid scheme') + # TODO should adapt the assymetric-gpg too, rigth? + if meth != ENC_METHOD.aes_256_ctr: + raise InvalidBlob('invalid encryption scheme') + + iv = data[11:27] + docidlen = len(self.doc_id) + ciph_idx = 26 + docidlen + doc_id = data[26:ciph_idx] + revlen = len(self.rev) + rev_idx = ciph_idx + 1 + revlen + rev = data[ciph_idx + 1:rev_idx] + + if rev != self.rev: + raise InvalidBlob('invalid revision') + + ciphertext = data[rev_idx:-64] + hmac = data[-64:] + + h = HMAC(self.mac_key, hashes.SHA512(), backend=crypto_backend) + h.update(data[:-64]) + try: + h.verify(hmac) + except InvalidSignature: + raise InvalidBlob('HMAC could not be verifed') + + decryptor = _get_aes_ctr_cipher(self.sym_key, iv).decryptor() + + # TODO pass chunks, streaming, instead + # Use AESDecryptor below + self.result.write(decryptor.update(ciphertext)) + self.result.write(decryptor.finalize()) + return self.result + + +class AESEncryptor(object): + + implements(interfaces.IConsumer) + + def __init__(self, key, iv, fd=None): if len(key) != 32: - raise EncryptionError('key is not 256 bits') + raise EncryptionDecryptionError('key is not 256 bits') if len(iv) != 16: - raise EncryptionError('iv is not 128 bits') + raise EncryptionDecryptionError('iv is not 128 bits') cipher = _get_aes_ctr_cipher(key, iv) self.encryptor = cipher.encryptor() - + + if fd is None: + fd = BytesIO() self.fd = fd + self.done = False - self.deferred = defer.Deferred() def write(self, data): encrypted = self.encryptor.update(data) + encode = binascii.b2a_hex self.fd.write(encrypted) return encrypted def end(self): if not self.done: - self.encryptor.finalize() - self.deferred.callback(self.fd) + final = self.encryptor.finalize() self.done = True class HMACWriter(object): implements(interfaces.IConsumer) + hashtype = 'sha512' def __init__(self, key): - self.done = False - self.deferred = defer.Deferred() - - self.digest = '' - self._hmac = hmac.new(key, '', hashlib.sha256) + self._hmac = hmac.new(key, '', getattr(hashlib, self.hashtype)) + self.result = BytesIO('') def write(self, data): self._hmac.update(data) def end(self): - if not self.done: - self.digest = self._hmac.digest() - self.deferred.callback(self.digest) - self.done = True + self.result.write(self._hmac.digest()) + -class EncryptAndHMAC(object): +class VerifiedEncrypter(object): implements(interfaces.IConsumer) @@ -122,99 +290,39 @@ class EncryptAndHMAC(object): self.hmac.write(enc_chunk) +class AESDecryptor(object): -class DocEncrypter(object): - - staging_path = os.path.join(get_path_prefix(), 'leap', 'soledad', 'staging') - staged_template = """{"_enc_scheme": "symkey", "_enc_method": - "aes-256-ctr", "_mac_method": "hmac", "_mac_hash": "sha256", - "_encoding": "ENCODING", "_enc_json": "CIPHERTEXT", "_enc_iv": "IV", "_mac": "MAC"}""" - - - def __init__(self, content_fd, doc_id, rev, secret=None): - self._content_fd = content_fd - self._contentFileProducer = FileBodyProducer( - content_fd, readSize=2**8) - self.doc_id = doc_id - self.rev = rev - self._encrypted_fd = StringIO() - - self.iv = os.urandom(16) - - sym_key = _get_sym_key_for_doc(doc_id, secret) - mac_key = _get_mac_key_for_doc(doc_id, secret) + implements(interfaces.IConsumer) - crypter = AESWriter(sym_key, self._encrypted_fd, self.iv) - hmac = HMACWriter(mac_key) + def __init__(self, key, iv, fd): + if iv is None: + iv = os.urandom(16) + if len(key) != 32: + raise EncryptionhDecryptionError('key is not 256 bits') + if len(iv) != 16: + raise EncryptionDecryptionError('iv is not 128 bits') - self.crypter_consumer = crypter - self.hmac_consumer = hmac + cipher = _get_aes_ctr_cipher(key, iv) + self.decryptor = cipher.decryptor() - self._prime_hmac() - self.encrypt_and_mac_consumer = EncryptAndHMAC(crypter, hmac) + self.fd = fd + self.done = False + self.deferred = defer.Deferred() - def encrypt_stream(self): - d = self._contentFileProducer.startProducing( - self.encrypt_and_mac_consumer) - d.addCallback(self.end_crypto_stream) - d.addCallback(self.persist_encrypted_doc) - return d - def end_crypto_stream(self, ignored): - self.crypter_consumer.end() - self._post_hmac() - self.hmac_consumer.end() - return defer.succeed('ok') + def write(self, data): + decrypted = self.decryptor.update(data) + self.fd.write(decrypted) + return decrypted - # TODO make this pluggable: - # pass another class (CryptoSerializer) to which we pass - # the doc info, the encrypted_fd and the mac_digest + def end(self): + if not self.done: + self.decryptor.finalize() + self.deferred.callback(self.fd) + self.done = True - def persist_encrypted_doc(self, ignored, encoding='hex'): - # TODO -- transition to b64: needs migration FIXME - if encoding == 'b64': - encode = binascii.b2a_base64 - elif encoding == 'hex': - encode = binascii.b2a_hex - else: - raise RuntimeError('Unknown encoding: %s' % encoding) - - # TODO to avoid blocking on io, this can use a - # version of dbm that chunks the writes to the - # disk fd by using the same FileBodyProducer strategy - # that we're using here, long live to the Cooperator. - - - db = dirdbm.DirDBM(self.staging_path) - key = '{doc_id}@{rev}'.format( - doc_id=self.doc_id, rev=self.rev) - ciphertext = encode(self._encrypted_fd.getvalue()) - value = self.staged_template.replace( - 'ENCODING', encoding).replace( - 'CIPHERTEXT', ciphertext).replace( - 'IV', encode(self.iv)).replace( - 'MAC', encode(self.hmac_consumer.digest)).replace( - '\n', '') - self._encrypted_fd.seek(0) - - log.debug('persisting %s' % key) - db[key] = value - - self._content_fd.close() - self._encrypted_fd.close() - - def _prime_hmac(self): - pre = '{doc_id}{rev}'.format( - doc_id=self.doc_id, rev=self.rev) - self.hmac_consumer.write(pre) - - def _post_hmac(self): - post = '{enc_scheme}{enc_method}{enc_iv}'.format( - enc_scheme='symkey', - enc_method='aes-256-ctr', - enc_iv=binascii.b2a_hex(self.iv)) - self.hmac_consumer.write(post) +# utils def _hmac_sha256(key, data): diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 6b257669..98613df2 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -61,7 +61,7 @@ from leap.soledad.client.secrets import SoledadSecrets from leap.soledad.client.shared_db import SoledadSharedDatabase from leap.soledad.client import sqlcipher from leap.soledad.client import encdecpool -from leap.soledad.client._crypto import DocEncrypter +#from leap.soledad.client._crypto import DocEncrypter logger = getLogger(__name__) @@ -360,8 +360,10 @@ class Soledad(object): contentfd.seek(0) sikret = self._secrets.remote_storage_secret - crypter = DocEncrypter( - contentfd, doc.doc_id, doc.rev, secret=sikret) + + # TODO use BlobEncrypter + #crypter = DocEncrypter( + #contentfd, doc.doc_id, doc.rev, secret=sikret) d = crypter.encrypt_stream() d.addCallback(lambda _: result) return d diff --git a/testing/tests/client/test_crypto2.py b/testing/tests/client/test_crypto2.py index ae280020..f0f6c4af 100644 --- a/testing/tests/client/test_crypto2.py +++ b/testing/tests/client/test_crypto2.py @@ -19,16 +19,28 @@ Tests for the _crypto module """ +import base64 +import binascii +import time +import struct import StringIO - import leap.soledad.client from leap.soledad.client import _crypto - from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from cryptography.hazmat.backends import default_backend +from twisted.trial import unittest + + +snowden1 = ( + "You can't come up against " + "the world's most powerful intelligence " + "agencies and not accept the risk. " + "If they want to get you, over time " + "they will.") + def _aes_encrypt(key, iv, data): backend = default_backend() @@ -36,20 +48,21 @@ def _aes_encrypt(key, iv, data): encryptor = cipher.encryptor() return encryptor.update(data) + encryptor.finalize() +def _aes_decrypt(key, iv, data): + backend = default_backend() + cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=backend) + decryptor = cipher.decryptor() + return decryptor.update(data) + decryptor.finalize() + def test_chunked_encryption(): key = 'A' * 32 iv = 'A' * 16 - data = ( - "You can't come up against " - "the world's most powerful intelligence " - "agencies and not accept the risk. " - "If they want to get you, over time " - "they will.") fd = StringIO.StringIO() - aes = _crypto.AESWriter(key, fd, iv) + aes = _crypto.AESEncryptor(key, iv, fd) + data = snowden1 block = 16 for i in range(len(data)/block): @@ -61,3 +74,98 @@ def test_chunked_encryption(): ciphertext = _aes_encrypt(key, iv, data) assert ciphertext_chunked == ciphertext + + +def test_decrypt(): + key = 'A' * 32 + iv = 'A' * 16 + + data = snowden1 + block = 16 + + ciphertext = _aes_encrypt(key, iv, data) + + fd = StringIO.StringIO() + aes = _crypto.AESDecryptor(key, iv, fd) + + for i in range(len(ciphertext)/block): + chunk = ciphertext[i * block:(i+1)*block] + aes.write(chunk) + aes.end() + + cleartext_chunked = fd.getvalue() + assert cleartext_chunked == data + + + +class BlobTestCase(unittest.TestCase): + + class doc_info: + doc_id = 'D-deadbeef' + rev = '397932e0c77f45fcb7c3732930e7e9b2:1' + + def test_blob_encryptor(self): + + inf = StringIO.StringIO() + inf.write(snowden1) + inf.seek(0) + outf = StringIO.StringIO() + + blob = _crypto.BlobEncryptor( + self.doc_info, inf, result=outf, + secret='A' * 96, iv='B'*16) + + d = blob.encrypt() + d.addCallback(self._test_blob_encryptor_cb, outf) + return d + + def _test_blob_encryptor_cb(self, _, outf): + encrypted = outf.getvalue() + data = base64.urlsafe_b64decode(encrypted) + + assert data[0] == '\x80' + ts, sch, meth = struct.unpack( + 'Qbb', data[1:11]) + assert sch == 1 + assert meth == 1 + iv = data[11:27] + assert iv == 'B' * 16 + doc_id = data[27:37] + assert doc_id == 'D-deadbeef' + + rev = data[37:71] + assert rev == self.doc_info.rev + + ciphertext = data[71:-64] + aes_key = _crypto._get_sym_key_for_doc( + self.doc_info.doc_id, 'A'*96) + assert ciphertext == _aes_encrypt(aes_key, 'B'*16, snowden1) + + decrypted = _aes_decrypt(aes_key, 'B'*16, ciphertext) + assert str(decrypted) == snowden1 + + def test_blob_decryptor(self): + + inf = StringIO.StringIO() + inf.write(snowden1) + inf.seek(0) + outf = StringIO.StringIO() + + blob = _crypto.BlobEncryptor( + self.doc_info, inf, result=outf, + secret='A' * 96, iv='B' * 16) + + def do_decrypt(_, outf): + decryptor = _crypto.BlobDecryptor( + self.doc_info, outf, + secret='A' * 96) + d = decryptor.decrypt() + return d + + d = blob.encrypt() + d.addCallback(do_decrypt, outf) + d.addCallback(self._test_blob_decryptor_cb) + return d + + def _test_blob_decryptor_cb(self, decrypted): + assert decrypted.getvalue() == snowden1 -- cgit v1.2.3 From d7740272be029db6229ec5372f277d2934815e98 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 16 Sep 2016 21:43:35 -0400 Subject: [refactor] adapt fetcher to decryptor --- client/src/leap/soledad/client/_crypto.py | 36 ++++++++++- client/src/leap/soledad/client/api.py | 73 +++++++++++++--------- client/src/leap/soledad/client/crypto.py | 72 +-------------------- .../src/leap/soledad/client/http_target/fetch.py | 47 +++++--------- client/src/leap/soledad/client/http_target/send.py | 5 +- server/src/leap/soledad/server/sync.py | 12 ++++ 6 files changed, 110 insertions(+), 135 deletions(-) diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index 61a190c7..a2de0ae1 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -77,6 +77,31 @@ class InvalidBlob(Exception): pass +docinfo = namedtuple('docinfo', 'doc_id rev') + + +class SoledadCrypto(object): + + def __init__(self, secret): + self.secret = secret + + def encrypt_doc(self, doc): + content = BytesIO() + content.write(str(doc.get_json())) + info = docinfo(doc.doc_id, doc.rev) + del doc + encryptor = BlobEncryptor(info, content, secret=self.secret) + return encryptor.encrypt() + + def decrypt_doc(self, doc): + info = docinfo(doc.doc_id, doc.rev) + ciphertext = BytesIO() + ciphertext.write(doc.get_json()) + ciphertext.seek(0) + del doc + decryptor = BlobDecryptor(info, ciphertext, secret=self.secret) + return decryptor.decrypt() + class BlobEncryptor(object): @@ -134,8 +159,8 @@ class BlobEncryptor(object): ENC_SCHEME.symkey, ENC_METHOD.aes_256_ctr)) write(self.iv) - write(self.doc_id) - write(self.rev) + write(str(self.doc_id)) + write(str(self.rev)) def _end_crypto_stream(self, ignored): self._aes.end() @@ -177,7 +202,6 @@ class BlobDecryptor(object): self.result = result def decrypt(self): - try: data = base64.urlsafe_b64decode(self.ciphertext.getvalue()) except (TypeError, binascii.Error): @@ -341,3 +365,9 @@ def _get_sym_key_for_doc(doc_id, secret): def _get_aes_ctr_cipher(key, iv): return Cipher(algorithms.AES(key), modes.CTR(iv), backend=crypto_backend) + + +def is_symmetrically_encrypted(payload): + header = base64.urlsafe_b64decode(enc[:15] + '===') + ts, sch, meth = struct.unpack('Qbb', header[1:11]) + return sch == ENC_SCHEME.symkey diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 98613df2..74ebaddc 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -56,12 +56,11 @@ from leap.soledad.common.errors import DatabaseAccessError from leap.soledad.client import adbapi from leap.soledad.client import events as soledad_events from leap.soledad.client import interfaces as soledad_interfaces -from leap.soledad.client.crypto import SoledadCrypto +from leap.soledad.client import sqlcipher from leap.soledad.client.secrets import SoledadSecrets from leap.soledad.client.shared_db import SoledadSharedDatabase -from leap.soledad.client import sqlcipher -from leap.soledad.client import encdecpool -#from leap.soledad.client._crypto import DocEncrypter +from leap.soledad.client._crypto import SoledadCrypto +from leap.soledad.client._crypto import BlobEncryptor logger = getLogger(__name__) @@ -308,8 +307,7 @@ class Soledad(object): replica_uid = self._dbpool.replica_uid self._dbsyncer = sqlcipher.SQLCipherU1DBSync( self._sqlcipher_opts, self._crypto, replica_uid, - SOLEDAD_CERT, - sync_db=self._sync_db) + SOLEDAD_CERT) def sync_stats(self): sync_phase = 0 @@ -354,19 +352,38 @@ class Soledad(object): """ return self._dbpool.runU1DBQuery(meth, *args, **kw) - def stream_encryption(self, result, doc): - contentfd = StringIO() - contentfd.write(doc.get_json()) - contentfd.seek(0) - - sikret = self._secrets.remote_storage_secret - - # TODO use BlobEncrypter - #crypter = DocEncrypter( - #contentfd, doc.doc_id, doc.rev, secret=sikret) - d = crypter.encrypt_stream() - d.addCallback(lambda _: result) - return d + #def stream_encryption(self, result, doc): + #print 'streaming encryption' + #contentfd = StringIO() + #contentfd.write(str(doc.get_json())) + #contentfd.seek(0) +# + #sikret = self._secrets.remote_storage_secret + #docinfo = DocInfo(doc.doc_id, doc.rev) +# + # ------------------------------------------------------- + # TODO need to pass a fd to stage this!!! + # in the long run, we could connect this to the uploader + # but in the meantime, I thikn it's easy if we just + # serialize this to disk. + # + # To do this: + # 1. open a file, with a known name: + # soledad/staging/docid@rev.bin + # 2. pass that fd to BlobEncrypter as result (it's a fd) + # 3. On the upload part of the sync, just open again a read-only fd + # to this staging path and read it. + # that's the encrypted blob, ready to upload! + # ------------------------------------------------------- +# + #crypter = BlobEncryptor( + #docinfo, contentfd, secret=sikret) + #del doc +# +# + #d = crypter.encrypt() + #d.addCallback(lambda _: result) + #return d def put_doc(self, doc): @@ -392,7 +409,6 @@ class Soledad(object): :rtype: twisted.internet.defer.Deferred """ d = self._defer("put_doc", doc) - d.addCallback(self.stream_encryption, doc) return d def delete_doc(self, doc): @@ -488,7 +504,6 @@ class Soledad(object): # payloads for example) in which we already have the encoding in the # headers, so we don't need to guess it. d = self._defer("create_doc", content, doc_id=doc_id) - d.addCallback(lambda doc: self.stream_encryption('', doc)) return d def create_doc_from_json(self, json, doc_id=None): @@ -857,14 +872,6 @@ class Soledad(object): self._sync_db = sqlcipher.getConnectionPool( sync_opts, extra_queries=self._sync_db_extra_init) - @property - def _sync_db_extra_init(self): - """ - Queries for creating tables for the local sync documents db if needed. - They are passed as extra initialization to initialize_sqlciphjer_db - - :rtype: tuple of strings - """ # # ISecretsStorage @@ -1034,5 +1041,13 @@ class VerifiedHTTPSConnection(httplib.HTTPSConnection): match_hostname(self.sock.getpeercert(), self.host) +# TODO move this to a common module + +class DocInfo: + def __init__(self, doc_id, rev): + self.doc_id = doc_id + self.rev = rev + + old__VerifiedHTTPSConnection = http_client._VerifiedHTTPSConnection http_client._VerifiedHTTPSConnection = VerifiedHTTPSConnection diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index da067237..55c49d9c 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -130,77 +130,6 @@ def doc_mac_key(doc_id, secret): hashlib.sha256).digest() -class SoledadCrypto(object): - """ - General cryptographic functionality encapsulated in a - object that can be passed along. - """ - def __init__(self, secret): - """ - Initialize the crypto object. - - :param secret: The Soledad remote storage secret. - :type secret: str - """ - self._secret = secret - - def doc_mac_key(self, doc_id): - return doc_mac_key(doc_id, self._secret) - - def doc_passphrase(self, doc_id): - """ - Generate a passphrase for symmetric encryption of document's contents. - - The password is derived using HMAC having sha256 as underlying hash - function. The key used for HMAC are the first - C{soledad.REMOTE_STORAGE_SECRET_LENGTH} bytes of Soledad's storage - secret stripped from the first MAC_KEY_LENGTH characters. The HMAC - message is C{doc_id}. - - :param doc_id: The id of the document that will be encrypted using - this passphrase. - :type doc_id: str - - :return: The passphrase. - :rtype: str - """ - soledad_assert(self._secret is not None) - return hmac.new( - self._secret[MAC_KEY_LENGTH:], - doc_id, - hashlib.sha256).digest() - - #def encrypt_doc(self, doc): - #""" - #Wrapper around encrypt_docstr that accepts the document as argument. -# - #:param doc: the document. - #:type doc: SoledadDocument - #""" - #key = self.doc_passphrase(doc.doc_id) -# - #return encrypt_docstr( - #doc.get_json(), doc.doc_id, doc.rev, key, self._secret) - - def decrypt_doc(self, doc): - """ - Wrapper around decrypt_doc_dict that accepts the document as argument. - - :param doc: the document. - :type doc: SoledadDocument - - :return: json string with the decrypted document - :rtype: str - """ - key = self.doc_passphrase(doc.doc_id) - return decrypt_doc_dict( - doc.content, doc.doc_id, doc.rev, key, self._secret) - - @property - def secret(self): - return self._secret - - # # Crypto utilities for a SoledadDocument. # @@ -455,6 +384,7 @@ def decrypt_doc_dict(doc_dict, doc_id, doc_rev, key, secret): return decr +# TODO deprecate def is_symmetrically_encrypted(doc): """ Return True if the document was symmetrically encrypted. diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 541ec1d2..2e54ca70 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -18,9 +18,9 @@ from twisted.internet import defer from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS from leap.soledad.client.events import emit_async -from leap.soledad.client.crypto import is_symmetrically_encrypted from leap.soledad.client.http_target.support import RequestBody from leap.soledad.common.log import getLogger +from leap.soledad.client._crypto import is_symmetrically_encrypted from leap.soledad.common.document import SoledadDocument from leap.soledad.common.l2db import errors @@ -50,6 +50,8 @@ class HTTPDocFetcher(object): def _receive_docs(self, last_known_generation, last_known_trans_id, ensure_callback, sync_id): + print 'receiving.....', sync_id + new_generation = last_known_generation new_transaction_id = last_known_trans_id @@ -90,6 +92,7 @@ class HTTPDocFetcher(object): content_type='application/x-soledad-sync-get', body_reader=body_reader) + @defer.inlineCallbacks def _doc_parser(self, doc_info, content): """ Insert a received document into the local replica. @@ -102,13 +105,19 @@ class HTTPDocFetcher(object): :type total: int """ # decrypt incoming document and insert into local database - # --------------------------------------------------------- - # symmetric decryption of document's contents - # --------------------------------------------------------- # If arriving content was symmetrically encrypted, we decrypt + doc = SoledadDocument(doc_info['id'], doc_info['rev'], content) - if is_symmetrically_encrypted(doc): - doc.set_json(self._crypto.decrypt_doc(doc)) + + print "GOT.....", doc + + payload = doc['raw'] + if is_symmetrically_encrypted(payload): + print "SHOULD DECRYPT!!!!", content + decrypted = yield self._crypto.decrypt_doc(doc) + doc.set_json(decrypted) + + # TODO insert blobs here on the blob backend self._insert_doc_cb(doc, doc_info['gen'], doc_info['trans_id']) self._received_docs += 1 user_data = {'uuid': self.uuid, 'userid': self.userid} @@ -125,17 +134,6 @@ class HTTPDocFetcher(object): content, gen, trans_id) :rtype: tuple """ - # decode incoming stream - # parts = response.splitlines() - # if not parts or parts[0] != '[' or parts[-1] != ']': - # raise errors.BrokenSyncStream - # data = parts[1:-1] - # decode metadata - # try: - # line, comma = utils.check_and_strip_comma(data[0]) - # metadata = None - # except (IndexError): - # raise errors.BrokenSyncStream try: # metadata = json.loads(line) new_generation = metadata['new_generation'] @@ -146,20 +144,7 @@ class HTTPDocFetcher(object): # make sure we have replica_uid from fresh new dbs if self._ensure_callback and 'replica_uid' in metadata: self._ensure_callback(metadata['replica_uid']) - # parse incoming document info - entries = [] - for index in xrange(1, len(data[1:]), 2): - try: - line, comma = utils.check_and_strip_comma(data[index]) - content, _ = utils.check_and_strip_comma(data[index + 1]) - entry = json.loads(line) - entries.append((entry['id'], entry['rev'], content, - entry['gen'], entry['trans_id'])) - except (IndexError, KeyError): - raise errors.BrokenSyncStream - return new_generation, new_transaction_id, number_of_changes, \ - entries - + return number_of_changes, new_generation, new_transaction_id def _emit_receive_status(user_data, received_docs, total): diff --git a/client/src/leap/soledad/client/http_target/send.py b/client/src/leap/soledad/client/http_target/send.py index 86744ec2..6f5893b1 100644 --- a/client/src/leap/soledad/client/http_target/send.py +++ b/client/src/leap/soledad/client/http_target/send.py @@ -109,7 +109,10 @@ class HTTPDocSender(object): if doc.is_tombstone(): defer.returnValue((doc, None)) else: - defer.returnValue((doc, self._crypto.encrypt_doc(doc))) + # TODO -- for blobs, should stream the doc raw content + # TODO -- get rid of this json encoding + content = yield self._crypto.encrypt_doc(doc) + defer.returnValue((doc, content.getvalue())) def _emit_send_status(user_data, idx, total): diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index e12ebf8a..533ce778 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -17,6 +17,7 @@ """ Server side synchronization infrastructure. """ +<<<<<<< a64e0fad3a8b1a07887c567d99fd32e3dcf54b23 import time from leap.soledad.common.l2db import sync from leap.soledad.common.l2db.remote import http_app @@ -24,6 +25,15 @@ from leap.soledad.server.caching import get_cache_for from leap.soledad.server.state import ServerSyncState from leap.soledad.common.document import ServerDocument from itertools import izip +======= +from itertools import izip +import cjson + +from leap.soledad.common.l2db import sync, Document +from leap.soledad.common.l2db.remote import http_app +from leap.soledad.server.caching import get_cache_for +from leap.soledad.server.state import ServerSyncState +>>>>>>> wip: adapt crypto to streaming flow MAX_REQUEST_SIZE = 6000 # in Mb @@ -199,6 +209,7 @@ class SyncResource(http_app.SyncResource): not already exist. :type ensure: bool """ + print "POST ARGS" # create or open the database cache = get_cache_for('db-' + sync_id + self.dbname, expire=120) if ensure: @@ -271,6 +282,7 @@ class SyncResource(http_app.SyncResource): client on the current sync session. :type received: int """ + print 'IN POST GET' def send_doc(doc, gen, trans_id): entry = dict(id=doc.doc_id, rev=doc.rev, -- cgit v1.2.3 From 0098849ffd6d9d7514a2eff7b6ced9403a9062ca Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Mon, 19 Sep 2016 22:00:04 -0400 Subject: [refactor] add SoledadCrypto interface --- client/src/leap/soledad/client/_crypto.py | 43 +++- client/src/leap/soledad/client/api.py | 33 +-- client/src/leap/soledad/client/crypto.py | 367 ------------------------------ client/src/leap/soledad/client/secrets.py | 2 +- testing/tests/perf/conftest.py | 3 +- testing/tests/perf/test_crypto.py | 79 ++++--- testing/tests/perf/test_encdecpool.py | 37 --- 7 files changed, 85 insertions(+), 479 deletions(-) delete mode 100644 testing/tests/perf/test_encdecpool.py diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index a2de0ae1..2a523144 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -29,6 +29,7 @@ import time from io import BytesIO from cStringIO import StringIO +from collections import namedtuple import six @@ -51,7 +52,6 @@ from cryptography.hazmat.backends.openssl.backend \ from zope.interface import implements from leap.common.config import get_path_prefix -from leap.soledad.client.secrets import SoledadSecrets log = Logger() @@ -86,23 +86,49 @@ class SoledadCrypto(object): self.secret = secret def encrypt_doc(self, doc): + + def put_raw(blob): + return '{"raw": "' + blob.getvalue() + '"}' + content = BytesIO() content.write(str(doc.get_json())) info = docinfo(doc.doc_id, doc.rev) del doc encryptor = BlobEncryptor(info, content, secret=self.secret) - return encryptor.encrypt() + d = encryptor.encrypt() + d.addCallback(put_raw) + return d def decrypt_doc(self, doc): info = docinfo(doc.doc_id, doc.rev) ciphertext = BytesIO() - ciphertext.write(doc.get_json()) - ciphertext.seek(0) + payload = doc.content['raw'] del doc + ciphertext.write(str(payload)) + ciphertext.seek(0) decryptor = BlobDecryptor(info, ciphertext, secret=self.secret) return decryptor.decrypt() +def encrypt_sym(data, key): + iv = os.urandom(16) + encryptor = AESEncryptor(key, iv) + encryptor.write(data) + encryptor.end() + ciphertext = encryptor.fd.getvalue() + return base64.urlsafe_b64encode(iv), ciphertext + + +def decrypt_sym(data, key, iv): + _iv = base64.urlsafe_b64decode(iv) + decryptor = AESDecryptor(key, _iv) + decryptor.write(data) + decryptor.end() + plaintext = decryptor.fd.getvalue() + return plaintext + + + class BlobEncryptor(object): """ @@ -122,7 +148,7 @@ class BlobEncryptor(object): self.doc_id = doc_info.doc_id self.rev = doc_info.rev - self._producer = FileBodyProducer(content_fd, readSize=2**8) + self._producer = FileBodyProducer(content_fd, readSize=2**16) self._preamble = BytesIO() if result is None: @@ -176,7 +202,7 @@ class BlobEncryptor(object): self._aes_fd.close() self._hmac.result.close() self.result.seek(0) - return defer.succeed('ok') + return defer.succeed(self.result) class BlobDecryptor(object): @@ -274,7 +300,6 @@ class AESEncryptor(object): def write(self, data): encrypted = self.encryptor.update(data) - encode = binascii.b2a_hex self.fd.write(encrypted) return encrypted @@ -318,7 +343,7 @@ class AESDecryptor(object): implements(interfaces.IConsumer) - def __init__(self, key, iv, fd): + def __init__(self, key, iv, fd=None): if iv is None: iv = os.urandom(16) if len(key) != 32: @@ -329,6 +354,8 @@ class AESDecryptor(object): cipher = _get_aes_ctr_cipher(key, iv) self.decryptor = cipher.decryptor() + if fd is None: + fd = BytesIO() self.fd = fd self.done = False self.deferred = defer.Deferred() diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 74ebaddc..de44f526 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -225,7 +225,6 @@ class Soledad(object): # have to close any thread-related stuff we have already opened # here, otherwise there might be zombie threads that may clog the # reactor. - self._sync_db.close() if hasattr(self, '_dbpool'): self._dbpool.close() raise @@ -288,16 +287,12 @@ class Soledad(object): tohex = binascii.b2a_hex # sqlcipher only accepts the hex version key = tohex(self._secrets.get_local_storage_key()) - sync_db_key = tohex(self._secrets.get_sync_db_key()) opts = sqlcipher.SQLCipherOptions( self._local_db_path, key, is_raw_key=True, create=True, - defer_encryption=self._defer_encryption, - sync_db_key=sync_db_key, - ) + defer_encryption=self._defer_encryption) self._sqlcipher_opts = opts - self._dbpool = adbapi.getConnectionPool(opts) def _init_u1db_syncer(self): @@ -332,10 +327,6 @@ class Soledad(object): self._dbpool.close() if getattr(self, '_dbsyncer', None): self._dbsyncer.close() - # close the sync database - if self._sync_db: - self._sync_db.close() - self._sync_db = None # # ILocalStorage @@ -850,28 +841,6 @@ class Soledad(object): token = property(_get_token, _set_token, doc='The authentication Token.') - def _initialize_sync_db(self, opts): - """ - Initialize the Symmetrically-Encrypted document to be synced database, - and the queue to communicate with subprocess workers. - - :param opts: - :type opts: SQLCipherOptions - """ - soledad_assert(opts.sync_db_key is not None) - sync_db_path = None - if opts.path != ":memory:": - sync_db_path = "%s-sync" % opts.path - else: - sync_db_path = ":memory:" - - # we copy incoming options because the opts object might be used - # somewhere else - sync_opts = sqlcipher.SQLCipherOptions.copy( - opts, path=sync_db_path, create=True) - self._sync_db = sqlcipher.getConnectionPool( - sync_opts, extra_queries=self._sync_db_extra_init) - # # ISecretsStorage diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index 55c49d9c..ecc0a0cf 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -32,370 +32,3 @@ from leap.soledad.common import soledad_assert from leap.soledad.common import soledad_assert_type from leap.soledad.common import crypto from leap.soledad.common.log import getLogger - - -logger = getLogger(__name__) - - -MAC_KEY_LENGTH = 64 - -crypto_backend = MultiBackend([OpenSSLBackend()]) - - -# TODO -- deprecate. -# Secrets still using this. - -def encrypt_sym(data, key): - """ - Encrypt data using AES-256 cipher in CTR mode. - - :param data: The data to be encrypted. - :type data: str - :param key: The key used to encrypt data (must be 256 bits long). - :type key: str - - :return: A tuple with the initialization vector and the encrypted data. - :rtype: (long, str) - """ - soledad_assert_type(key, str) - soledad_assert( - len(key) == 32, # 32 x 8 = 256 bits. - 'Wrong key size: %s bits (must be 256 bits long).' % - (len(key) * 8)) - - iv = os.urandom(16) - cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=crypto_backend) - encryptor = cipher.encryptor() - ciphertext = encryptor.update(data) + encryptor.finalize() - - return binascii.b2a_base64(iv), ciphertext - - -# FIXME decryption of the secrets doc is still using b64 -# Deprecate that, move to hex. - -def decrypt_sym(data, key, iv, encoding='base64'): - """ - Decrypt some data previously encrypted using AES-256 cipher in CTR mode. - - :param data: The data to be decrypted. - :type data: str - :param key: The symmetric key used to decrypt data (must be 256 bits - long). - :type key: str - :param iv: The initialization vector. - :type iv: str (it's b64 encoded by secrets, hex by deserializing from wire) - - :return: The decrypted data. - :rtype: str - """ - soledad_assert_type(key, str) - # assert params - soledad_assert( - len(key) == 32, # 32 x 8 = 256 bits. - 'Wrong key size: %s (must be 256 bits long).' % len(key)) - - if encoding == 'base64': - iv = binascii.a2b_base64(iv) - elif encoding == 'hex': - iv = binascii.a2b_hex(iv) - - cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=crypto_backend) - decryptor = cipher.decryptor() - return decryptor.update(data) + decryptor.finalize() - - -def doc_mac_key(doc_id, secret): - """ - Generate a key for calculating a MAC for a document whose id is - C{doc_id}. - - The key is derived using HMAC having sha256 as underlying hash - function. The key used for HMAC is the first MAC_KEY_LENGTH characters - of Soledad's storage secret. The HMAC message is C{doc_id}. - - :param doc_id: The id of the document. - :type doc_id: str - - :param secret: The Soledad storage secret - :type secret: str - - :return: The key. - :rtype: str - """ - soledad_assert(secret is not None) - return hmac.new( - secret[:MAC_KEY_LENGTH], - doc_id, - hashlib.sha256).digest() - - -# -# Crypto utilities for a SoledadDocument. -# - -# TODO should be ported to streaming consumer - -def mac_doc(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, enc_iv, - mac_method, secret): - """ - Calculate a MAC for C{doc} using C{ciphertext}. - - Current MAC method used is HMAC, with the following parameters: - - * key: sha256(storage_secret, doc_id) - * msg: doc_id + doc_rev + ciphertext - * digestmod: sha256 - - :param doc_id: The id of the document. - :type doc_id: str - :param doc_rev: The revision of the document. - :type doc_rev: str - :param ciphertext: The content of the document. - :type ciphertext: str - :param enc_scheme: The encryption scheme. - :type enc_scheme: bytes - :param enc_method: The encryption method. - :type enc_method: str - :param enc_iv: The encryption initialization vector. - :type enc_iv: str - :param mac_method: The MAC method to use. - :type mac_method: str - :param secret: The Soledad storage secret - :type secret: str - - :return: The calculated MAC. - :rtype: str - - :raise crypto.UnknownMacMethodError: Raised when C{mac_method} is unknown. - """ - try: - soledad_assert(mac_method == crypto.MacMethods.HMAC) - except AssertionError: - raise crypto.UnknownMacMethodError - - template = "{doc_id}{doc_rev}{ciphertext}{enc_scheme}{enc_method}{enc_iv}" - content = template.format( - doc_id=doc_id, - doc_rev=doc_rev, - ciphertext=ciphertext, - enc_scheme=enc_scheme, - enc_method=enc_method, - enc_iv=enc_iv) - - digest = hmac.new( - doc_mac_key(doc_id, secret), - content, - hashlib.sha256).digest() - return digest - - -#def encrypt_docstr(docstr, doc_id, doc_rev, key, secret): - #""" - #Encrypt C{doc}'s content. -# - #Encrypt doc's contents using AES-256 CTR mode and return a valid JSON - #string representing the following: -# - #{ - #crypto.ENC_JSON_KEY: '', - #crypto.ENC_SCHEME_KEY: 'symkey', - #crypto.ENC_METHOD_KEY: crypto.EncryptionMethods.AES_256_CTR, - #crypto.ENC_IV_KEY: '', - #MAC_KEY: '' - #crypto.MAC_METHOD_KEY: 'hmac' - #} -# - #:param docstr: A representation of the document to be encrypted. - #:type docstr: str or unicode. -# - #:param doc_id: The document id. - #:type doc_id: str -# - #:param doc_rev: The document revision. - #:type doc_rev: str -# - #:param key: The key used to encrypt ``data`` (must be 256 bits long). - #:type key: str -# - #:param secret: The Soledad storage secret (used for MAC auth). - #:type secret: str -# - #:return: The JSON serialization of the dict representing the encrypted - #content. - #:rtype: str - #""" - #enc_scheme = crypto.EncryptionSchemes.SYMKEY - #enc_method = crypto.EncryptionMethods.AES_256_CTR - #mac_method = crypto.MacMethods.HMAC - #enc_iv, ciphertext = encrypt_sym( - #str(docstr), # encryption/decryption routines expect str - #key) - #mac = binascii.b2a_hex( # store the mac as hex. - #mac_doc( - #doc_id, - #doc_rev, - #ciphertext, - #enc_scheme, - #enc_method, - #enc_iv, - #mac_method, - #secret)) - # Return a representation for the encrypted content. In the following, we - # convert binary data to hexadecimal representation so the JSON - # serialization does not complain about what it tries to serialize. - #hex_ciphertext = binascii.b2a_hex(ciphertext) - #log.debug("Encrypting doc: %s" % doc_id) - #return json.dumps({ - #crypto.ENC_JSON_KEY: hex_ciphertext, - #crypto.ENC_SCHEME_KEY: enc_scheme, - #crypto.ENC_METHOD_KEY: enc_method, - #crypto.ENC_IV_KEY: enc_iv, - #crypto.MAC_KEY: mac, - #crypto.MAC_METHOD_KEY: mac_method, - #}) -# - - -# TODO port to _crypto -def _verify_doc_mac(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, - enc_iv, mac_method, secret, doc_mac): - """ - Verify that C{doc_mac} is a correct MAC for the given document. - - :param doc_id: The id of the document. - :type doc_id: str - :param doc_rev: The revision of the document. - :type doc_rev: str - :param ciphertext: The content of the document. - :type ciphertext: str - :param enc_scheme: The encryption scheme. - :type enc_scheme: str - :param enc_method: The encryption method. - :type enc_method: str - :param enc_iv: The encryption initialization vector. - :type enc_iv: str - :param mac_method: The MAC method to use. - :type mac_method: str - :param secret: The Soledad storage secret - :type secret: str - :param doc_mac: The MAC to be verified against. - :type doc_mac: str - - :raise crypto.UnknownMacMethodError: Raised when C{mac_method} is unknown. - :raise crypto.WrongMacError: Raised when MAC could not be verified. - """ - # TODO mac_doc should be ported to Streaming also - calculated_mac = mac_doc( - doc_id, - doc_rev, - ciphertext, - enc_scheme, - enc_method, - enc_iv, - mac_method, - secret) - - ok = hmac.compare_digest( - str(calculated_mac), - binascii.a2b_hex(doc_mac)) - - if not ok: - loggger.warn("wrong MAC while decrypting doc...") - loggger.info(u'given: %s' % doc_mac) - loggger.info(u'calculated: %s' % binascii.b2a_hex(calculated_mac)) - raise crypto.WrongMacError("Could not authenticate document's " - "contents.") - - -def decrypt_doc_dict(doc_dict, doc_id, doc_rev, key, secret): - """ - Decrypt a symmetrically encrypted C{doc}'s content. - - Return the JSON string representation of the document's decrypted content. - - The passed doc_dict argument should have the following structure: - - { - crypto.ENC_JSON_KEY: '', - crypto.ENC_SCHEME_KEY: '', - crypto.ENC_METHOD_KEY: '', - crypto.ENC_IV_KEY: '', # (optional) - MAC_KEY: '' - crypto.MAC_METHOD_KEY: 'hmac' - } - - C{enc_blob} is the encryption of the JSON serialization of the document's - content. For now Soledad just deals with documents whose C{enc_scheme} is - crypto.EncryptionSchemes.SYMKEY and C{enc_method} is - crypto.EncryptionMethods.AES_256_CTR. - - :param doc_dict: The content of the document to be decrypted. - :type doc_dict: dict - - :param doc_id: The document id. - :type doc_id: str - - :param doc_rev: The document revision. - :type doc_rev: str - - :param key: The key used to encrypt ``data`` (must be 256 bits long). - :type key: str - - :param secret: The Soledad storage secret. - :type secret: str - - :return: The JSON serialization of the decrypted content. - :rtype: str - - :raise UnknownEncryptionMethodError: Raised when trying to decrypt from an - unknown encryption method. - """ - # assert document dictionary structure - expected_keys = set([ - crypto.ENC_JSON_KEY, - crypto.ENC_SCHEME_KEY, - crypto.ENC_METHOD_KEY, - crypto.ENC_IV_KEY, - crypto.MAC_KEY, - crypto.MAC_METHOD_KEY, - ]) - soledad_assert(expected_keys.issubset(set(doc_dict.keys()))) - - d = doc_dict - decode = binascii.a2b_hex - - enc_scheme = d[crypto.ENC_SCHEME_KEY] - enc_method = d[crypto.ENC_METHOD_KEY] - doc_mac = d[crypto.MAC_KEY] - mac_method = d[crypto.MAC_METHOD_KEY] - enc_iv = d[crypto.ENC_IV_KEY] - - ciphertext_hex = d[crypto.ENC_JSON_KEY] - ciphertext = decode(ciphertext_hex) - - soledad_assert(enc_scheme == crypto.EncryptionSchemes.SYMKEY) - - _verify_doc_mac( - doc_id, doc_rev, ciphertext, enc_scheme, enc_method, - enc_iv, mac_method, secret, doc_mac) - - decr = decrypt_sym(ciphertext, key, enc_iv, encoding='hex') - return decr - - -# TODO deprecate -def is_symmetrically_encrypted(doc): - """ - Return True if the document was symmetrically encrypted. - - :param doc: The document to check. - :type doc: SoledadDocument - - :rtype: bool - """ - if doc.content and crypto.ENC_SCHEME_KEY in doc.content: - if doc.content[crypto.ENC_SCHEME_KEY] \ - == crypto.EncryptionSchemes.SYMKEY: - return True - return False diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index ad1db2b8..8543df01 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -34,7 +34,7 @@ from leap.soledad.common import soledad_assert_type from leap.soledad.common import document from leap.soledad.common.log import getLogger from leap.soledad.client import events -from leap.soledad.client.crypto import encrypt_sym, decrypt_sym +from leap.soledad.client._crypto import encrypt_sym, decrypt_sym logger = getLogger(__name__) diff --git a/testing/tests/perf/conftest.py b/testing/tests/perf/conftest.py index 09567b88..2964936b 100644 --- a/testing/tests/perf/conftest.py +++ b/testing/tests/perf/conftest.py @@ -242,8 +242,7 @@ def soledad_client(tmpdir, soledad_server, remote_db, soledad_dbs, request): local_db_path=local_db_path, server_url=server_url, cert_file=None, - auth_token=token, - defer_encryption=False) + auth_token=token) request.addfinalizer(soledad_client.close) return soledad_client return create diff --git a/testing/tests/perf/test_crypto.py b/testing/tests/perf/test_crypto.py index be00560b..9ce418ba 100644 --- a/testing/tests/perf/test_crypto.py +++ b/testing/tests/perf/test_crypto.py @@ -1,9 +1,21 @@ +""" +Benchmarks for crypto operations. +If you don't want to stress your local machine too much, you can pass the +SIZE_LIMT environment variable. + +For instance, to keep the maximum payload at 1MB: + +SIZE_LIMIT=1E6 py.test -s tests/perf/test_crypto.py +""" import pytest +import os import json from uuid import uuid4 + from leap.soledad.common.document import SoledadDocument -from leap.soledad.client.crypto import encrypt_sym -from leap.soledad.client.crypto import decrypt_sym +from leap.soledad.client import _crypto + +LIMIT = int(float(os.environ.get('SIZE_LIMIT', 50 * 1000 * 1000))) def create_doc_encryption(size): @@ -20,7 +32,11 @@ def create_doc_encryption(size): return test_doc_encryption +# TODO this test is really bullshit, because it's still including +# the json serialization. + def create_doc_decryption(size): + @pytest.inlineCallbacks @pytest.mark.benchmark(group="test_crypto_decrypt_doc") def test_doc_decryption(soledad_client, benchmark, payload): crypto = soledad_client()._crypto @@ -29,32 +45,19 @@ def create_doc_decryption(size): doc = SoledadDocument( doc_id=uuid4().hex, rev='rev', json=json.dumps(DOC_CONTENT)) - encrypted_doc = crypto.encrypt_doc(doc) + + encrypted_doc = yield crypto.encrypt_doc(doc) doc.set_json(encrypted_doc) benchmark(crypto.decrypt_doc, doc) return test_doc_decryption -test_encrypt_doc_10k = create_doc_encryption(10*1000) -test_encrypt_doc_100k = create_doc_encryption(100*1000) -test_encrypt_doc_500k = create_doc_encryption(500*1000) -test_encrypt_doc_1M = create_doc_encryption(1000*1000) -test_encrypt_doc_10M = create_doc_encryption(10*1000*1000) -test_encrypt_doc_50M = create_doc_encryption(50*1000*1000) -test_decrypt_doc_10k = create_doc_decryption(10*1000) -test_decrypt_doc_100k = create_doc_decryption(100*1000) -test_decrypt_doc_500k = create_doc_decryption(500*1000) -test_decrypt_doc_1M = create_doc_decryption(1000*1000) -test_decrypt_doc_10M = create_doc_decryption(10*1000*1000) -test_decrypt_doc_50M = create_doc_decryption(50*1000*1000) - - def create_raw_encryption(size): @pytest.mark.benchmark(group="test_crypto_raw_encrypt") def test_raw_encrypt(benchmark, payload): key = payload(32) - benchmark(encrypt_sym, payload(size), key) + benchmark(_crypto.encrypt_sym, payload(size), key) return test_raw_encrypt @@ -62,20 +65,32 @@ def create_raw_decryption(size): @pytest.mark.benchmark(group="test_crypto_raw_decrypt") def test_raw_decrypt(benchmark, payload): key = payload(32) - iv, ciphertext = encrypt_sym(payload(size), key) - benchmark(decrypt_sym, ciphertext, key, iv) + iv, ciphertext = _crypto.encrypt_sym(payload(size), key) + benchmark(_crypto.decrypt_sym, ciphertext, key, iv) return test_raw_decrypt -test_encrypt_raw_10k = create_raw_encryption(10*1000) -test_encrypt_raw_100k = create_raw_encryption(100*1000) -test_encrypt_raw_500k = create_raw_encryption(500*1000) -test_encrypt_raw_1M = create_raw_encryption(1000*1000) -test_encrypt_raw_10M = create_raw_encryption(10*1000*1000) -test_encrypt_raw_50M = create_raw_encryption(50*1000*1000) -test_decrypt_raw_10k = create_raw_decryption(10*1000) -test_decrypt_raw_100k = create_raw_decryption(100*1000) -test_decrypt_raw_500k = create_raw_decryption(500*1000) -test_decrypt_raw_1M = create_raw_decryption(1000*1000) -test_decrypt_raw_10M = create_raw_decryption(10*1000*1000) -test_decrypt_raw_50M = create_raw_decryption(50*1000*1000) +# Create the TESTS in the global namespace, they'll be picked by the benchmark +# plugin. + +encryption_tests = [ + ('10k', 1E4), + ('100k', 1E5), + ('500k', 5E5), + ('1M', 1E6), + ('10M', 1E7), + ('50M', 5E7), +] + +for name, size in encryption_tests: + if size < LIMIT: + sz = int(size) + globals()['test_encrypt_doc_' + name] = create_doc_encryption(sz) + globals()['test_decrypt_doc_' + name] = create_doc_decryption(sz) + + +for name, size in encryption_tests: + if size < LIMIT: + sz = int(size) + globals()['test_encrypt_raw_' + name] = create_raw_encryption(sz) + globals()['test_decrypt_raw_' + name] = create_raw_decryption(sz) diff --git a/testing/tests/perf/test_encdecpool.py b/testing/tests/perf/test_encdecpool.py deleted file mode 100644 index 8e820b9c..00000000 --- a/testing/tests/perf/test_encdecpool.py +++ /dev/null @@ -1,37 +0,0 @@ -import pytest -import json -from uuid import uuid4 -from twisted.internet.defer import gatherResults -from leap.soledad.client.encdecpool import SyncEncrypterPool -from leap.soledad.common.document import SoledadDocument -# FIXME: test load is low due issue #7370, higher values will get out of memory - - -def create_encrypt(amount, size): - @pytest.mark.benchmark(group="test_pool_encrypt") - @pytest.inlineCallbacks - def test(soledad_client, txbenchmark_with_setup, request, payload): - DOC_CONTENT = {'payload': payload(size)} - - def setup(): - client = soledad_client() - pool = SyncEncrypterPool(client._crypto, client._sync_db) - pool.start() - request.addfinalizer(pool.stop) - docs = [ - SoledadDocument(doc_id=uuid4().hex, rev='rev', - json=json.dumps(DOC_CONTENT)) - for _ in xrange(amount) - ] - return pool, docs - - @pytest.inlineCallbacks - def put_and_wait(pool, docs): - yield gatherResults([pool.encrypt_doc(doc) for doc in docs]) - - yield txbenchmark_with_setup(setup, put_and_wait) - return test - -test_encdecpool_encrypt_100_10k = create_encrypt(100, 10*1000) -test_encdecpool_encrypt_100_100k = create_encrypt(100, 100*1000) -test_encdecpool_encrypt_100_500k = create_encrypt(100, 500*1000) -- cgit v1.2.3 From 6d1e6a9a8a0a70c43f9cd23c4541cd08618d3d11 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 21 Sep 2016 16:06:28 -0400 Subject: [bug] fix wrong content parsing --- client/src/leap/soledad/client/http_target/fetch.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 2e54ca70..a0c35063 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -49,9 +49,6 @@ class HTTPDocFetcher(object): @defer.inlineCallbacks def _receive_docs(self, last_known_generation, last_known_trans_id, ensure_callback, sync_id): - - print 'receiving.....', sync_id - new_generation = last_known_generation new_transaction_id = last_known_trans_id @@ -104,16 +101,13 @@ class HTTPDocFetcher(object): :param total: The total number of operations. :type total: int """ - # decrypt incoming document and insert into local database # If arriving content was symmetrically encrypted, we decrypt + # decrypt incoming document and insert into local database doc = SoledadDocument(doc_info['id'], doc_info['rev'], content) - print "GOT.....", doc - - payload = doc['raw'] + payload = doc.content['raw'] if is_symmetrically_encrypted(payload): - print "SHOULD DECRYPT!!!!", content decrypted = yield self._crypto.decrypt_doc(doc) doc.set_json(decrypted) -- cgit v1.2.3 From d738c0c166312a8f39f115a0773e6a3f30075977 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 21 Sep 2016 19:00:12 -0400 Subject: [refactor] remove encdecpool, finally --- client/src/leap/soledad/client/encdecpool.py | 134 --------------------------- 1 file changed, 134 deletions(-) delete mode 100644 client/src/leap/soledad/client/encdecpool.py diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py deleted file mode 100644 index b5cfb3ca..00000000 --- a/client/src/leap/soledad/client/encdecpool.py +++ /dev/null @@ -1,134 +0,0 @@ -# -*- coding: utf-8 -*- -# encdecpool.py -# Copyright (C) 2015 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -""" -A pool of encryption/decryption concurrent and parallel workers for using -during synchronization. -""" - - -from twisted.internet import threads -from twisted.internet import defer - -from leap.soledad.common import soledad_assert -from leap.soledad.common.log import getLogger - -from leap.soledad.client.crypto import decrypt_doc_dict - - -logger = getLogger(__name__) - - -# -# Encrypt pool of workers -# - -class SyncEncryptDecryptPool(object): - """ - Base class for encrypter/decrypter pools. - """ - - def __init__(self, crypto, sync_db): - """ - Initialize the pool of encryption-workers. - - :param crypto: A SoledadCryto instance to perform the encryption. - :type crypto: leap.soledad.crypto.SoledadCrypto - - :param sync_db: A database connection handle - :type sync_db: pysqlcipher.dbapi2.Connection - """ - self._crypto = crypto - self._sync_db = sync_db - self._delayed_call = None - self._started = False - - def start(self): - self._started = True - - def stop(self): - self._started = False - # maybe cancel the next delayed call - if self._delayed_call \ - and not self._delayed_call.called: - self._delayed_call.cancel() - - @property - def running(self): - return self._started - - def _runOperation(self, query, *args): - """ - Run an operation on the sync db. - - :param query: The query to be executed. - :type query: str - :param args: A list of query arguments. - :type args: list - - :return: A deferred that will fire when the operation in the database - has finished. - :rtype: twisted.internet.defer.Deferred - """ - return self._sync_db.runOperation(query, *args) - - def _runQuery(self, query, *args): - """ - Run a query on the sync db. - - :param query: The query to be executed. - :type query: str - :param args: A list of query arguments. - :type args: list - - :return: A deferred that will fire with the results of the database - query. - :rtype: twisted.internet.defer.Deferred - """ - return self._sync_db.runQuery(query, *args) - - -def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret, - idx): - """ - Decrypt the content of the given document. - - :param doc_id: The document id. - :type doc_id: str - :param doc_rev: The document revision. - :type doc_rev: str - :param content: The encrypted content of the document as JSON dict. - :type content: dict - :param gen: The generation corresponding to the modification of that - document. - :type gen: int - :param trans_id: The transaction id corresponding to the modification of - that document. - :type trans_id: str - :param key: The encryption key. - :type key: str - :param secret: The Soledad storage secret (used for MAC auth). - :type secret: str - :param idx: The index of this document in the current sync process. - :type idx: int - - :return: A tuple containing the doc id, revision and encrypted content. - :rtype: tuple(str, str, str) - """ - decrypted_content = decrypt_doc_dict(content, doc_id, doc_rev, key, secret) - return doc_id, doc_rev, decrypted_content, gen, trans_id, idx -- cgit v1.2.3 From 75208477a2f1634664b80b8501818e5a905aa0f3 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 22 Sep 2016 01:42:26 -0400 Subject: [tests] adapt tests --- client/src/leap/soledad/client/_crypto.py | 35 ++- client/src/leap/soledad/client/http_target/send.py | 2 +- client/src/leap/soledad/client/secrets.py | 6 +- server/pkg/requirements.pip | 1 + testing/pytest.ini | 1 + testing/test_soledad/util.py | 17 +- testing/tests/client/test_crypto.py | 263 +++++++++++++++------ testing/tests/client/test_crypto2.py | 171 -------------- testing/tests/sync/test_encdecpool.py | 48 ---- testing/tests/sync/test_sqlcipher_sync.py | 14 +- testing/tox.ini | 3 + 11 files changed, 228 insertions(+), 333 deletions(-) delete mode 100644 testing/tests/client/test_crypto2.py delete mode 100644 testing/tests/sync/test_encdecpool.py diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index 2a523144..deba5590 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -35,7 +35,6 @@ import six from twisted.internet import defer from twisted.internet import interfaces -from twisted.internet import reactor from twisted.logger import Logger from twisted.persisted import dirdbm from twisted.web import client @@ -88,7 +87,8 @@ class SoledadCrypto(object): def encrypt_doc(self, doc): def put_raw(blob): - return '{"raw": "' + blob.getvalue() + '"}' + raw = blob.getvalue() + return '{"raw": "' + raw + '"}' content = BytesIO() content.write(str(doc.get_json())) @@ -105,9 +105,9 @@ class SoledadCrypto(object): payload = doc.content['raw'] del doc ciphertext.write(str(payload)) - ciphertext.seek(0) decryptor = BlobDecryptor(info, ciphertext, secret=self.secret) - return decryptor.decrypt() + buf = decryptor.decrypt() + return buf.getvalue() def encrypt_sym(data, key): @@ -116,11 +116,11 @@ def encrypt_sym(data, key): encryptor.write(data) encryptor.end() ciphertext = encryptor.fd.getvalue() - return base64.urlsafe_b64encode(iv), ciphertext + return base64.b64encode(iv), ciphertext def decrypt_sym(data, key, iv): - _iv = base64.urlsafe_b64decode(iv) + _iv = base64.b64decode(str(iv)) decryptor = AESDecryptor(key, _iv) decryptor.write(data) decryptor.end() @@ -136,7 +136,6 @@ class BlobEncryptor(object): """ def __init__(self, doc_info, content_fd, result=None, secret=None, iv=None): - if iv is None: iv = os.urandom(16) else: @@ -148,7 +147,9 @@ class BlobEncryptor(object): self.doc_id = doc_info.doc_id self.rev = doc_info.rev + content_fd.seek(0) self._producer = FileBodyProducer(content_fd, readSize=2**16) + self._content_fd = content_fd self._preamble = BytesIO() if result is None: @@ -170,6 +171,11 @@ class BlobEncryptor(object): d.addCallback(self._end_crypto_stream) return d + def encrypt_whole(self): + self._crypter.write(self._content_fd.getvalue()) + self._end_crypto_stream(None) + return '{"raw":"' + self.result.getvalue() + '"}' + def _write_preamble(self): def write(data): @@ -191,6 +197,7 @@ class BlobEncryptor(object): def _end_crypto_stream(self, ignored): self._aes.end() self._hmac.end() + self._content_fd.close() preamble = self._preamble.getvalue() encrypted = self._aes_fd.getvalue() @@ -274,6 +281,7 @@ class BlobDecryptor(object): # TODO pass chunks, streaming, instead # Use AESDecryptor below + self.result.write(decryptor.update(ciphertext)) self.result.write(decryptor.finalize()) return self.result @@ -296,6 +304,7 @@ class AESEncryptor(object): fd = BytesIO() self.fd = fd + self.done = False def write(self, data): @@ -373,6 +382,12 @@ class AESDecryptor(object): self.done = True +def is_symmetrically_encrypted(payload): + header = base64.urlsafe_b64decode(enc[:15] + '===') + ts, sch, meth = struct.unpack('Qbb', header[1:11]) + return sch == ENC_SCHEME.symkey + + # utils @@ -392,9 +407,3 @@ def _get_sym_key_for_doc(doc_id, secret): def _get_aes_ctr_cipher(key, iv): return Cipher(algorithms.AES(key), modes.CTR(iv), backend=crypto_backend) - - -def is_symmetrically_encrypted(payload): - header = base64.urlsafe_b64decode(enc[:15] + '===') - ts, sch, meth = struct.unpack('Qbb', header[1:11]) - return sch == ENC_SCHEME.symkey diff --git a/client/src/leap/soledad/client/http_target/send.py b/client/src/leap/soledad/client/http_target/send.py index 6f5893b1..e562a128 100644 --- a/client/src/leap/soledad/client/http_target/send.py +++ b/client/src/leap/soledad/client/http_target/send.py @@ -112,7 +112,7 @@ class HTTPDocSender(object): # TODO -- for blobs, should stream the doc raw content # TODO -- get rid of this json encoding content = yield self._crypto.encrypt_doc(doc) - defer.returnValue((doc, content.getvalue())) + defer.returnValue((doc, content)) def _emit_send_status(user_data, idx, total): diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index 8543df01..21c4f291 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -266,11 +266,7 @@ class SoledadSecrets(object): # read storage secrets from file content = None with open(self._secrets_path, 'r') as f: - raw = f.read() - raw = raw.replace('\n', '') - content = json.loads(raw) - - print "LOADING", content + content = json.loads(f.read()) _, active_secret, version = self._import_recovery_document(content) self._maybe_set_active_secret(active_secret) diff --git a/server/pkg/requirements.pip b/server/pkg/requirements.pip index e92dfde6..e4a87e74 100644 --- a/server/pkg/requirements.pip +++ b/server/pkg/requirements.pip @@ -3,3 +3,4 @@ PyOpenSSL twisted>=12.3.0 Beaker couchdb +python-cjson diff --git a/testing/pytest.ini b/testing/pytest.ini index 2d34c607..39d1e1c6 100644 --- a/testing/pytest.ini +++ b/testing/pytest.ini @@ -1,3 +1,4 @@ [pytest] testpaths = tests norecursedirs = tests/perf +twisted = yes diff --git a/testing/test_soledad/util.py b/testing/test_soledad/util.py index d53f6cda..bde0b1b7 100644 --- a/testing/test_soledad/util.py +++ b/testing/test_soledad/util.py @@ -15,12 +15,10 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . - """ Utilities used by multiple test suites. """ - import os import random import string @@ -45,14 +43,13 @@ from leap.soledad.common.document import SoledadDocument from leap.soledad.common.couch import CouchDatabase from leap.soledad.common.couch.state import CouchServerState -from leap.soledad.common.crypto import ENC_SCHEME_KEY from leap.soledad.client import Soledad from leap.soledad.client import http_target from leap.soledad.client import auth -from leap.soledad.client.crypto import decrypt_doc_dict from leap.soledad.client.sqlcipher import SQLCipherDatabase from leap.soledad.client.sqlcipher import SQLCipherOptions +from leap.soledad.client._crypto import is_symmetrically_encrypted from leap.soledad.server import SoledadApp from leap.soledad.server.auth import SoledadTokenAuthMiddleware @@ -212,6 +209,7 @@ def soledad_sync_target( # redefine the base leap test class so it inherits from twisted trial's # TestCase. This is needed so trial knows that it has to manage a reactor and # wait for deferreds returned by tests to be fired. + BaseLeapTest = type( 'BaseLeapTest', (unittest.TestCase,), dict(BaseLeapTest.__dict__)) @@ -311,6 +309,7 @@ class BaseSoledadTest(BaseLeapTest, MockedSharedDBTest): self.addCleanup(soledad.close) return soledad + @pytest.inlineCallbacks def assertGetEncryptedDoc( self, db, doc_id, doc_rev, content, has_conflicts): """ @@ -320,13 +319,9 @@ class BaseSoledadTest(BaseLeapTest, MockedSharedDBTest): has_conflicts=has_conflicts) doc = db.get_doc(doc_id) - if ENC_SCHEME_KEY in doc.content: - # XXX check for SYM_KEY too - key = self._soledad._crypto.doc_passphrase(doc.doc_id) - secret = self._soledad._crypto.secret - decrypted = decrypt_doc_dict( - doc.content, doc.doc_id, doc.rev, - key, secret) + if is_symmetrically_encrypted(doc.content['raw']): + crypt = self._soledad._crypto + decrypted = yield crypt.decrypt_doc(doc) doc.set_json(decrypted) self.assertEqual(exp_doc.doc_id, doc.doc_id) self.assertEqual(exp_doc.rev, doc.rev) diff --git a/testing/tests/client/test_crypto.py b/testing/tests/client/test_crypto.py index 77252b46..dc3054f2 100644 --- a/testing/tests/client/test_crypto.py +++ b/testing/tests/client/test_crypto.py @@ -17,47 +17,184 @@ """ Tests for cryptographic related stuff. """ -import os -import hashlib import binascii +import base64 +import hashlib +import json +import os +import struct + +from io import BytesIO + +import pytest + +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes +from cryptography.hazmat.backends import default_backend -from leap.soledad.client import crypto from leap.soledad.common.document import SoledadDocument from test_soledad.util import BaseSoledadTest -from leap.soledad.common.crypto import WrongMacError -from leap.soledad.common.crypto import UnknownMacMethodError -from leap.soledad.common.crypto import ENC_JSON_KEY -from leap.soledad.common.crypto import ENC_SCHEME_KEY -from leap.soledad.common.crypto import MAC_KEY -from leap.soledad.common.crypto import MAC_METHOD_KEY +from leap.soledad.client import _crypto + +from twisted.trial import unittest +from twisted.internet import defer + + +snowden1 = ( + "You can't come up against " + "the world's most powerful intelligence " + "agencies and not accept the risk. " + "If they want to get you, over time " + "they will.") + + +class AESTest(unittest.TestCase): + + def test_chunked_encryption(self): + key = 'A' * 32 + iv = 'A' * 16 + + fd = BytesIO() + aes = _crypto.AESEncryptor(key, iv, fd) + + data = snowden1 + block = 16 + + for i in range(len(data)/block): + chunk = data[i * block:(i+1)*block] + aes.write(chunk) + aes.end() + + ciphertext_chunked = fd.getvalue() + ciphertext = _aes_encrypt(key, iv, data) + + assert ciphertext_chunked == ciphertext + + + def test_decrypt(self): + key = 'A' * 32 + iv = 'A' * 16 + + data = snowden1 + block = 16 + + ciphertext = _aes_encrypt(key, iv, data) + + fd = BytesIO() + aes = _crypto.AESDecryptor(key, iv, fd) + + for i in range(len(ciphertext)/block): + chunk = ciphertext[i * block:(i+1)*block] + aes.write(chunk) + aes.end() + + cleartext_chunked = fd.getvalue() + assert cleartext_chunked == data + + + +class BlobTestCase(unittest.TestCase): + + class doc_info: + doc_id = 'D-deadbeef' + rev = '397932e0c77f45fcb7c3732930e7e9b2:1' + + @defer.inlineCallbacks + def test_blob_encryptor(self): + + inf = BytesIO() + inf.write(snowden1) + inf.seek(0) + outf = BytesIO() + + blob = _crypto.BlobEncryptor( + self.doc_info, inf, result=outf, + secret='A' * 96, iv='B'*16) + + encrypted = yield blob.encrypt() + data = base64.urlsafe_b64decode(encrypted.getvalue()) + assert data[0] == '\x80' + ts, sch, meth = struct.unpack( + 'Qbb', data[1:11]) + assert sch == 1 + assert meth == 1 + iv = data[11:27] + assert iv == 'B' * 16 + doc_id = data[27:37] + assert doc_id == 'D-deadbeef' -class EncryptedSyncTestCase(BaseSoledadTest): + rev = data[37:71] + assert rev == self.doc_info.rev - """ - Tests that guarantee that data will always be encrypted when syncing. - """ + ciphertext = data[71:-64] + aes_key = _crypto._get_sym_key_for_doc( + self.doc_info.doc_id, 'A'*96) + assert ciphertext == _aes_encrypt(aes_key, 'B'*16, snowden1) - def test_encrypt_decrypt_json(self): + decrypted = _aes_decrypt(aes_key, 'B'*16, ciphertext) + assert str(decrypted) == snowden1 + + + @defer.inlineCallbacks + def test_blob_decryptor(self): + + inf = BytesIO() + inf.write(snowden1) + inf.seek(0) + outf = BytesIO() + + blob = _crypto.BlobEncryptor( + self.doc_info, inf, result=outf, + secret='A' * 96, iv='B' * 16) + yield blob.encrypt() + + decryptor = _crypto.BlobDecryptor( + self.doc_info, outf, + secret='A' * 96) + decrypted = yield decryptor.decrypt() + assert decrypted.getvalue() == snowden1 + + + @defer.inlineCallbacks + def test_encrypt_and_decrypt(self): + """ + Check that encrypting and decrypting gives same doc. """ - Test encrypting and decrypting documents. + crypto = _crypto.SoledadCrypto('A' * 96) + payload = {'key': 'someval'} + doc1 = SoledadDocument('id1', '1', json.dumps(payload)) + + encrypted = yield crypto.encrypt_doc(doc1) + assert encrypted != payload + assert 'raw' in encrypted + doc2 = SoledadDocument('id1', '1') + doc2.set_json(encrypted) + decrypted = yield crypto.decrypt_doc(doc2) + assert len(decrypted) != 0 + assert json.loads(decrypted) == payload + + + @defer.inlineCallbacks + def test_decrypt_with_wrong_mac_raises(self): """ - simpledoc = {'key': 'val'} - doc1 = SoledadDocument(doc_id='id') - doc1.content = simpledoc - - # encrypt doc - doc1.set_json(self._soledad._crypto.encrypt_doc(doc1)) - # assert content is different and includes keys - self.assertNotEqual( - simpledoc, doc1.content, - 'incorrect document encryption') - self.assertTrue(ENC_JSON_KEY in doc1.content) - self.assertTrue(ENC_SCHEME_KEY in doc1.content) - # decrypt doc - doc1.set_json(self._soledad._crypto.decrypt_doc(doc1)) - self.assertEqual( - simpledoc, doc1.content, 'incorrect document encryption') + Trying to decrypt a document with wrong MAC should raise. + """ + crypto = _crypto.SoledadCrypto('A' * 96) + payload = {'key': 'someval'} + doc1 = SoledadDocument('id1', '1', json.dumps(payload)) + + encrypted = yield crypto.encrypt_doc(doc1) + encdict = json.loads(encrypted) + raw = base64.urlsafe_b64decode(str(encdict['raw'])) + # mess with MAC + messed = raw[:-64] + '0' * 64 + newraw = base64.urlsafe_b64encode(str(messed)) + doc2 = SoledadDocument('id1', '1') + doc2.set_json(json.dumps({"raw": str(newraw)})) + + with pytest.raises(_crypto.InvalidBlob): + decrypted = yield crypto.decrypt_doc(doc2) + class RecoveryDocumentTestCase(BaseSoledadTest): @@ -146,60 +283,22 @@ class SoledadSecretsTestCase(BaseSoledadTest): "Should have a secret at this point") -class MacAuthTestCase(BaseSoledadTest): - - def test_decrypt_with_wrong_mac_raises(self): - """ - Trying to decrypt a document with wrong MAC should raise. - """ - simpledoc = {'key': 'val'} - doc = SoledadDocument(doc_id='id') - doc.content = simpledoc - # encrypt doc - doc.set_json(self._soledad._crypto.encrypt_doc(doc)) - self.assertTrue(MAC_KEY in doc.content) - self.assertTrue(MAC_METHOD_KEY in doc.content) - # mess with MAC - doc.content[MAC_KEY] = '1234567890ABCDEF' - # try to decrypt doc - self.assertRaises( - WrongMacError, - self._soledad._crypto.decrypt_doc, doc) - - def test_decrypt_with_unknown_mac_method_raises(self): - """ - Trying to decrypt a document with unknown MAC method should raise. - """ - simpledoc = {'key': 'val'} - doc = SoledadDocument(doc_id='id') - doc.content = simpledoc - # encrypt doc - doc.set_json(self._soledad._crypto.encrypt_doc(doc)) - self.assertTrue(MAC_KEY in doc.content) - self.assertTrue(MAC_METHOD_KEY in doc.content) - # mess with MAC method - doc.content[MAC_METHOD_KEY] = 'mymac' - # try to decrypt doc - self.assertRaises( - UnknownMacMethodError, - self._soledad._crypto.decrypt_doc, doc) - class SoledadCryptoAESTestCase(BaseSoledadTest): def test_encrypt_decrypt_sym(self): # generate 256-bit key key = os.urandom(32) - iv, cyphertext = crypto.encrypt_sym('data', key) + iv, cyphertext = _crypto.encrypt_sym('data', key) self.assertTrue(cyphertext is not None) self.assertTrue(cyphertext != '') self.assertTrue(cyphertext != 'data') - plaintext = crypto.decrypt_sym(cyphertext, key, iv) + plaintext = _crypto.decrypt_sym(cyphertext, key, iv) self.assertEqual('data', plaintext) def test_decrypt_with_wrong_iv_fails(self): key = os.urandom(32) - iv, cyphertext = crypto.encrypt_sym('data', key) + iv, cyphertext = _crypto.encrypt_sym('data', key) self.assertTrue(cyphertext is not None) self.assertTrue(cyphertext != '') self.assertTrue(cyphertext != 'data') @@ -208,13 +307,13 @@ class SoledadCryptoAESTestCase(BaseSoledadTest): wrongiv = rawiv while wrongiv == rawiv: wrongiv = os.urandom(1) + rawiv[1:] - plaintext = crypto.decrypt_sym( + plaintext = _crypto.decrypt_sym( cyphertext, key, iv=binascii.b2a_base64(wrongiv)) self.assertNotEqual('data', plaintext) def test_decrypt_with_wrong_key_fails(self): key = os.urandom(32) - iv, cyphertext = crypto.encrypt_sym('data', key) + iv, cyphertext = _crypto.encrypt_sym('data', key) self.assertTrue(cyphertext is not None) self.assertTrue(cyphertext != '') self.assertTrue(cyphertext != 'data') @@ -222,5 +321,19 @@ class SoledadCryptoAESTestCase(BaseSoledadTest): # ensure keys are different in case we are extremely lucky while wrongkey == key: wrongkey = os.urandom(32) - plaintext = crypto.decrypt_sym(cyphertext, wrongkey, iv) + plaintext = _crypto.decrypt_sym(cyphertext, wrongkey, iv) self.assertNotEqual('data', plaintext) + + +def _aes_encrypt(key, iv, data): + backend = default_backend() + cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=backend) + encryptor = cipher.encryptor() + return encryptor.update(data) + encryptor.finalize() + + +def _aes_decrypt(key, iv, data): + backend = default_backend() + cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=backend) + decryptor = cipher.decryptor() + return decryptor.update(data) + decryptor.finalize() diff --git a/testing/tests/client/test_crypto2.py b/testing/tests/client/test_crypto2.py deleted file mode 100644 index f0f6c4af..00000000 --- a/testing/tests/client/test_crypto2.py +++ /dev/null @@ -1,171 +0,0 @@ -# -*- coding: utf-8 -*- -# test_crypto2.py -# Copyright (C) 2016 LEAP Encryption Access Project -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -""" -Tests for the _crypto module -""" - -import base64 -import binascii -import time -import struct -import StringIO - -import leap.soledad.client -from leap.soledad.client import _crypto - -from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes -from cryptography.hazmat.backends import default_backend - -from twisted.trial import unittest - - -snowden1 = ( - "You can't come up against " - "the world's most powerful intelligence " - "agencies and not accept the risk. " - "If they want to get you, over time " - "they will.") - - -def _aes_encrypt(key, iv, data): - backend = default_backend() - cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=backend) - encryptor = cipher.encryptor() - return encryptor.update(data) + encryptor.finalize() - -def _aes_decrypt(key, iv, data): - backend = default_backend() - cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=backend) - decryptor = cipher.decryptor() - return decryptor.update(data) + decryptor.finalize() - - -def test_chunked_encryption(): - key = 'A' * 32 - iv = 'A' * 16 - - fd = StringIO.StringIO() - aes = _crypto.AESEncryptor(key, iv, fd) - - data = snowden1 - block = 16 - - for i in range(len(data)/block): - chunk = data[i * block:(i+1)*block] - aes.write(chunk) - aes.end() - - ciphertext_chunked = fd.getvalue() - ciphertext = _aes_encrypt(key, iv, data) - - assert ciphertext_chunked == ciphertext - - -def test_decrypt(): - key = 'A' * 32 - iv = 'A' * 16 - - data = snowden1 - block = 16 - - ciphertext = _aes_encrypt(key, iv, data) - - fd = StringIO.StringIO() - aes = _crypto.AESDecryptor(key, iv, fd) - - for i in range(len(ciphertext)/block): - chunk = ciphertext[i * block:(i+1)*block] - aes.write(chunk) - aes.end() - - cleartext_chunked = fd.getvalue() - assert cleartext_chunked == data - - - -class BlobTestCase(unittest.TestCase): - - class doc_info: - doc_id = 'D-deadbeef' - rev = '397932e0c77f45fcb7c3732930e7e9b2:1' - - def test_blob_encryptor(self): - - inf = StringIO.StringIO() - inf.write(snowden1) - inf.seek(0) - outf = StringIO.StringIO() - - blob = _crypto.BlobEncryptor( - self.doc_info, inf, result=outf, - secret='A' * 96, iv='B'*16) - - d = blob.encrypt() - d.addCallback(self._test_blob_encryptor_cb, outf) - return d - - def _test_blob_encryptor_cb(self, _, outf): - encrypted = outf.getvalue() - data = base64.urlsafe_b64decode(encrypted) - - assert data[0] == '\x80' - ts, sch, meth = struct.unpack( - 'Qbb', data[1:11]) - assert sch == 1 - assert meth == 1 - iv = data[11:27] - assert iv == 'B' * 16 - doc_id = data[27:37] - assert doc_id == 'D-deadbeef' - - rev = data[37:71] - assert rev == self.doc_info.rev - - ciphertext = data[71:-64] - aes_key = _crypto._get_sym_key_for_doc( - self.doc_info.doc_id, 'A'*96) - assert ciphertext == _aes_encrypt(aes_key, 'B'*16, snowden1) - - decrypted = _aes_decrypt(aes_key, 'B'*16, ciphertext) - assert str(decrypted) == snowden1 - - def test_blob_decryptor(self): - - inf = StringIO.StringIO() - inf.write(snowden1) - inf.seek(0) - outf = StringIO.StringIO() - - blob = _crypto.BlobEncryptor( - self.doc_info, inf, result=outf, - secret='A' * 96, iv='B' * 16) - - def do_decrypt(_, outf): - decryptor = _crypto.BlobDecryptor( - self.doc_info, outf, - secret='A' * 96) - d = decryptor.decrypt() - return d - - d = blob.encrypt() - d.addCallback(do_decrypt, outf) - d.addCallback(self._test_blob_decryptor_cb) - return d - - def _test_blob_decryptor_cb(self, decrypted): - assert decrypted.getvalue() == snowden1 diff --git a/testing/tests/sync/test_encdecpool.py b/testing/tests/sync/test_encdecpool.py deleted file mode 100644 index 7055a765..00000000 --- a/testing/tests/sync/test_encdecpool.py +++ /dev/null @@ -1,48 +0,0 @@ -# -*- coding: utf-8 -*- -import json -from twisted.internet.defer import inlineCallbacks - -from leap.soledad.client.encdecpool import SyncEncrypterPool - -from leap.soledad.common.document import SoledadDocument -from test_soledad.util import BaseSoledadTest - -DOC_ID = "mydoc" -DOC_REV = "rev" -DOC_CONTENT = {'simple': 'document'} - - -class TestSyncEncrypterPool(BaseSoledadTest): - - def setUp(self): - BaseSoledadTest.setUp(self) - crypto = self._soledad._crypto - sync_db = self._soledad._sync_db - self._pool = SyncEncrypterPool(crypto, sync_db) - self._pool.start() - - def tearDown(self): - self._pool.stop() - BaseSoledadTest.tearDown(self) - - @inlineCallbacks - def test_get_encrypted_doc_returns_none(self): - """ - Test that trying to get an encrypted doc from the pool returns None if - the document was never added for encryption. - """ - doc = yield self._pool.get_encrypted_doc(DOC_ID, DOC_REV) - self.assertIsNone(doc) - - @inlineCallbacks - def test_encrypt_doc_and_get_it_back(self): - """ - Test that the pool actually encrypts a document added to the queue. - """ - doc = SoledadDocument( - doc_id=DOC_ID, rev=DOC_REV, json=json.dumps(DOC_CONTENT)) - - yield self._pool.encrypt_doc(doc) - encrypted = yield self._pool.get_encrypted_doc(DOC_ID, DOC_REV) - - self.assertIsNotNone(encrypted) diff --git a/testing/tests/sync/test_sqlcipher_sync.py b/testing/tests/sync/test_sqlcipher_sync.py index 3cbefc8b..2528600d 100644 --- a/testing/tests/sync/test_sqlcipher_sync.py +++ b/testing/tests/sync/test_sqlcipher_sync.py @@ -27,8 +27,6 @@ from leap.soledad.common.l2db import sync from leap.soledad.common.l2db import vectorclock from leap.soledad.common.l2db import errors -from leap.soledad.common.crypto import ENC_SCHEME_KEY -from leap.soledad.client.crypto import decrypt_doc_dict from leap.soledad.client.http_target import SoledadHTTPSyncTarget from test_soledad import u1db_tests as tests @@ -545,13 +543,11 @@ class SQLCipherDatabaseSyncTests( self.assertFalse(doc2.has_conflicts) self.sync(self.db2, db3) doc3 = db3.get_doc('the-doc') - if ENC_SCHEME_KEY in doc3.content: - _crypto = self._soledad._crypto - key = _crypto.doc_passphrase(doc3.doc_id) - secret = _crypto.secret - doc3.set_json(decrypt_doc_dict( - doc3.content, - doc3.doc_id, doc3.rev, key, secret)) + + _crypto = self._soledad._crypto + decrypted = _crypto.decrypt_doc(doc3) + doc3.set_json(decrypted) + self.assertEqual(doc4.get_json(), doc3.get_json()) self.assertFalse(doc3.has_conflicts) self.db1.close() diff --git a/testing/tox.ini b/testing/tox.ini index 31cb8a4f..0eeeab9e 100644 --- a/testing/tox.ini +++ b/testing/tox.ini @@ -1,5 +1,6 @@ [tox] envlist = py27 +skipsdist=True [testenv] basepython = python2.7 @@ -7,6 +8,7 @@ commands = py.test --cov-report=html \ --cov-report=term \ --cov=leap.soledad \ {posargs} +usedevelop = True deps = coverage pytest @@ -18,6 +20,7 @@ deps = pdbpp couchdb requests + service_identity # install soledad local packages -e../common -e../client -- cgit v1.2.3 From e7bf2bcf974b27d1a52f08e45ad35635226ffa92 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 22 Sep 2016 02:58:58 -0400 Subject: [refactor] remove legacy crypto implementation --- client/src/leap/soledad/client/crypto.py | 34 -------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 client/src/leap/soledad/client/crypto.py diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py deleted file mode 100644 index ecc0a0cf..00000000 --- a/client/src/leap/soledad/client/crypto.py +++ /dev/null @@ -1,34 +0,0 @@ -# -*- coding: utf-8 -*- -# crypto.py -# Copyright (C) 2013, 2014 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -""" -Cryptographic utilities for Soledad. -""" -import os -import binascii -import hmac -import hashlib -import json - -from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes -from cryptography.hazmat.backends.multibackend import MultiBackend -from cryptography.hazmat.backends.openssl.backend \ - import Backend as OpenSSLBackend - -from leap.soledad.common import soledad_assert -from leap.soledad.common import soledad_assert_type -from leap.soledad.common import crypto -from leap.soledad.common.log import getLogger -- cgit v1.2.3 From 69958161af00958b1d92ab6153a591aa89199e35 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 22 Sep 2016 03:08:20 -0400 Subject: [refactor] remove lingering refs to syncdb --- testing/test_soledad/util.py | 7 ++----- testing/tests/sync/test_sqlcipher_sync.py | 6 +----- testing/tests/sync/test_sync_deferred.py | 10 ++-------- testing/tests/sync/test_sync_target.py | 19 ++++--------------- 4 files changed, 9 insertions(+), 33 deletions(-) diff --git a/testing/test_soledad/util.py b/testing/test_soledad/util.py index bde0b1b7..b1965aa6 100644 --- a/testing/test_soledad/util.py +++ b/testing/test_soledad/util.py @@ -190,8 +190,7 @@ class MockedSharedDBTest(object): def soledad_sync_target( - test, path, source_replica_uid=uuid4().hex, - sync_db=None, sync_enc_pool=None): + test, path, source_replica_uid=uuid4().hex): creds = {'token': { 'uuid': 'user-uuid', 'token': 'auth-token', @@ -201,9 +200,7 @@ def soledad_sync_target( source_replica_uid, creds, test._soledad._crypto, - None, # cert_file - sync_db=sync_db, - sync_enc_pool=sync_enc_pool) + None) # cert_file # redefine the base leap test class so it inherits from twisted trial's diff --git a/testing/tests/sync/test_sqlcipher_sync.py b/testing/tests/sync/test_sqlcipher_sync.py index 2528600d..c3cd8444 100644 --- a/testing/tests/sync/test_sqlcipher_sync.py +++ b/testing/tests/sync/test_sqlcipher_sync.py @@ -709,13 +709,9 @@ def make_local_db_and_soledad_target( test.startTwistedServer() replica_uid = os.path.basename(path) db = test.request_state._create_database(replica_uid) - sync_db = test._soledad._sync_db - sync_enc_pool = test._soledad._sync_enc_pool st = soledad_sync_target( test, db._dbname, - source_replica_uid=source_replica_uid, - sync_db=sync_db, - sync_enc_pool=sync_enc_pool) + source_replica_uid=source_replica_uid) return db, st target_scenarios = [ diff --git a/testing/tests/sync/test_sync_deferred.py b/testing/tests/sync/test_sync_deferred.py index 482b150c..eb71ea73 100644 --- a/testing/tests/sync/test_sync_deferred.py +++ b/testing/tests/sync/test_sync_deferred.py @@ -71,12 +71,10 @@ class BaseSoledadDeferredEncTest(SoledadWithCouchServerMixin): import binascii tohex = binascii.b2a_hex key = tohex(self._soledad.secrets.get_local_storage_key()) - sync_db_key = tohex(self._soledad.secrets.get_sync_db_key()) dbpath = self._soledad._local_db_path self.opts = SQLCipherOptions( - dbpath, key, is_raw_key=True, create=False, - defer_encryption=True, sync_db_key=sync_db_key) + dbpath, key, is_raw_key=True, create=False) self.db1 = SQLCipherDatabase(self.opts) self.db2 = self.request_state._create_database('test') @@ -139,15 +137,11 @@ class TestSoledadDbSyncDeferredEncDecr( and Token auth. """ replica_uid = self._soledad._dbpool.replica_uid - sync_db = self._soledad._sync_db - sync_enc_pool = self._soledad._sync_enc_pool dbsyncer = self._soledad._dbsyncer # Soledad.sync uses the dbsyncer target = soledad_sync_target( self, self.db2._dbname, - source_replica_uid=replica_uid, - sync_db=sync_db, - sync_enc_pool=sync_enc_pool) + source_replica_uid=replica_uid) self.addCleanup(target.close) return sync.SoledadSynchronizer( dbsyncer, diff --git a/testing/tests/sync/test_sync_target.py b/testing/tests/sync/test_sync_target.py index 997dcdcd..b301c48b 100644 --- a/testing/tests/sync/test_sync_target.py +++ b/testing/tests/sync/test_sync_target.py @@ -130,13 +130,9 @@ def make_local_db_and_soledad_target( test.startTwistedServer() replica_uid = os.path.basename(path) db = test.request_state._create_database(replica_uid) - sync_db = test._soledad._sync_db - sync_enc_pool = test._soledad._sync_enc_pool st = soledad_sync_target( test, db._dbname, - source_replica_uid=source_replica_uid, - sync_db=sync_db, - sync_enc_pool=sync_enc_pool) + source_replica_uid=source_replica_uid) return db, st @@ -167,15 +163,11 @@ class TestSoledadSyncTarget( def getSyncTarget(self, path=None, source_replica_uid=uuid4().hex): if self.port is None: self.startTwistedServer() - sync_db = self._soledad._sync_db - sync_enc_pool = self._soledad._sync_enc_pool if path is None: path = self.db2._dbname target = self.sync_target( self, path, - source_replica_uid=source_replica_uid, - sync_db=sync_db, - sync_enc_pool=sync_enc_pool) + source_replica_uid=source_replica_uid) self.addCleanup(target.close) return target @@ -811,12 +803,10 @@ class TestSoledadDbSync( import binascii tohex = binascii.b2a_hex key = tohex(self._soledad.secrets.get_local_storage_key()) - sync_db_key = tohex(self._soledad.secrets.get_sync_db_key()) dbpath = self._soledad._local_db_path self.opts = SQLCipherOptions( - dbpath, key, is_raw_key=True, create=False, - defer_encryption=True, sync_db_key=sync_db_key) + dbpath, key, is_raw_key=True, create=False) self.db1 = SQLCipherDatabase(self.opts) self.db2 = self.request_state._create_database(replica_uid='test') @@ -855,8 +845,7 @@ class TestSoledadDbSync( self.opts, crypto, replica_uid, - None, - defer_encryption=True) + None) self.dbsyncer = dbsyncer return dbsyncer.sync(target_url, creds=creds) -- cgit v1.2.3 From 7d6373566120d1211b60e4a926c6bc9a78015637 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 22 Sep 2016 15:41:03 -0400 Subject: [bug] fix bad merge in imports block --- server/src/leap/soledad/server/sync.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 533ce778..337d9ecf 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -17,23 +17,14 @@ """ Server side synchronization infrastructure. """ -<<<<<<< a64e0fad3a8b1a07887c567d99fd32e3dcf54b23 import time -from leap.soledad.common.l2db import sync -from leap.soledad.common.l2db.remote import http_app -from leap.soledad.server.caching import get_cache_for -from leap.soledad.server.state import ServerSyncState -from leap.soledad.common.document import ServerDocument -from itertools import izip -======= from itertools import izip -import cjson -from leap.soledad.common.l2db import sync, Document +from leap.soledad.common.l2db import sync from leap.soledad.common.l2db.remote import http_app from leap.soledad.server.caching import get_cache_for from leap.soledad.server.state import ServerSyncState ->>>>>>> wip: adapt crypto to streaming flow +from leap.soledad.common.document import ServerDocument MAX_REQUEST_SIZE = 6000 # in Mb -- cgit v1.2.3 From 457ee65731bfe5c6a70ef990f5469b896f4b80fe Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 22 Sep 2016 16:05:29 -0400 Subject: [style] pep8 --- client/src/leap/soledad/client/_crypto.py | 15 ++++---- client/src/leap/soledad/client/api.py | 42 +--------------------- client/src/leap/soledad/client/http_target/send.py | 2 -- client/src/leap/soledad/client/sqlcipher.py | 1 - 4 files changed, 7 insertions(+), 53 deletions(-) diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index deba5590..cee4f0f4 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -128,14 +128,14 @@ def decrypt_sym(data, key, iv): return plaintext - class BlobEncryptor(object): """ Encrypts a payload associated with a given Document. """ - def __init__(self, doc_info, content_fd, result=None, secret=None, iv=None): + def __init__(self, doc_info, content_fd, result=None, secret=None, + iv=None): if iv is None: iv = os.urandom(16) else: @@ -181,12 +181,12 @@ class BlobEncryptor(object): def write(data): self._preamble.write(data) self._hmac.write(data) - + current_time = int(time.time()) write(b'\x80') write(struct.pack( - 'Qbb', + 'Qbb', current_time, ENC_SCHEME.symkey, ENC_METHOD.aes_256_ctr)) @@ -299,12 +299,11 @@ class AESEncryptor(object): cipher = _get_aes_ctr_cipher(key, iv) self.encryptor = cipher.encryptor() - + if fd is None: fd = BytesIO() self.fd = fd - self.done = False def write(self, data): @@ -334,7 +333,6 @@ class HMACWriter(object): self.result.write(self._hmac.digest()) - class VerifiedEncrypter(object): implements(interfaces.IConsumer) @@ -346,7 +344,7 @@ class VerifiedEncrypter(object): def write(self, data): enc_chunk = self.crypter.write(data) self.hmac.write(enc_chunk) - + class AESDecryptor(object): @@ -369,7 +367,6 @@ class AESDecryptor(object): self.done = False self.deferred = defer.Deferred() - def write(self, data): decrypted = self.decryptor.update(data) self.fd.write(decrypted) diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index de44f526..8ce77d24 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -131,7 +131,7 @@ class Soledad(object): def __init__(self, uuid, passphrase, secrets_path, local_db_path, server_url, cert_file, shared_db=None, - auth_token=None, defer_encryption=False, syncable=True): + auth_token=None, syncable=True): """ Initialize configuration, cryptographic keys and dbs. @@ -168,11 +168,6 @@ class Soledad(object): Authorization token for accessing remote databases. :type auth_token: str - :param defer_encryption: - Whether to defer encryption of documents, or do it - inline while syncing. - :type defer_encryption: bool - :param syncable: If set to ``False``, this database will not attempt to synchronize with remote replicas (default is ``True``) @@ -343,40 +338,6 @@ class Soledad(object): """ return self._dbpool.runU1DBQuery(meth, *args, **kw) - #def stream_encryption(self, result, doc): - #print 'streaming encryption' - #contentfd = StringIO() - #contentfd.write(str(doc.get_json())) - #contentfd.seek(0) -# - #sikret = self._secrets.remote_storage_secret - #docinfo = DocInfo(doc.doc_id, doc.rev) -# - # ------------------------------------------------------- - # TODO need to pass a fd to stage this!!! - # in the long run, we could connect this to the uploader - # but in the meantime, I thikn it's easy if we just - # serialize this to disk. - # - # To do this: - # 1. open a file, with a known name: - # soledad/staging/docid@rev.bin - # 2. pass that fd to BlobEncrypter as result (it's a fd) - # 3. On the upload part of the sync, just open again a read-only fd - # to this staging path and read it. - # that's the encrypted blob, ready to upload! - # ------------------------------------------------------- -# - #crypter = BlobEncryptor( - #docinfo, contentfd, secret=sikret) - #del doc -# -# - #d = crypter.encrypt() - #d.addCallback(lambda _: result) - #return d - - def put_doc(self, doc): """ Update a document. @@ -841,7 +802,6 @@ class Soledad(object): token = property(_get_token, _set_token, doc='The authentication Token.') - # # ISecretsStorage # diff --git a/client/src/leap/soledad/client/http_target/send.py b/client/src/leap/soledad/client/http_target/send.py index e562a128..431f06f6 100644 --- a/client/src/leap/soledad/client/http_target/send.py +++ b/client/src/leap/soledad/client/http_target/send.py @@ -42,8 +42,6 @@ class HTTPDocSender(object): # Any class inheriting from this one should provide a meaningful attribute # if the sync status event is meant to be used somewhere else. - staging_path = os.path.join(get_path_prefix(), 'leap', 'soledad', 'staging') - uuid = 'undefined' userid = 'undefined' diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 8cbc3aea..618b17b9 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -266,7 +266,6 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): 'ALTER TABLE document ' 'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE') - # # SQLCipher API methods # -- cgit v1.2.3 From fae45ba56d35c2bf7b8f00f2cfe5c423718bf12e Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 22 Sep 2016 16:07:59 -0400 Subject: [bug] remove print debug statements --- server/src/leap/soledad/server/sync.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 337d9ecf..8a05b91f 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -200,7 +200,6 @@ class SyncResource(http_app.SyncResource): not already exist. :type ensure: bool """ - print "POST ARGS" # create or open the database cache = get_cache_for('db-' + sync_id + self.dbname, expire=120) if ensure: @@ -273,8 +272,6 @@ class SyncResource(http_app.SyncResource): client on the current sync session. :type received: int """ - print 'IN POST GET' - def send_doc(doc, gen, trans_id): entry = dict(id=doc.doc_id, rev=doc.rev, gen=gen, trans_id=trans_id) -- cgit v1.2.3 From 40742021a8beeb68b159456b423e4c3674f7926d Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 22 Sep 2016 16:12:33 -0400 Subject: [bug] fix import --- testing/tests/sync/test_sync_target.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/tests/sync/test_sync_target.py b/testing/tests/sync/test_sync_target.py index b301c48b..7c93cd7c 100644 --- a/testing/tests/sync/test_sync_target.py +++ b/testing/tests/sync/test_sync_target.py @@ -31,10 +31,10 @@ from twisted.internet import defer from leap.soledad.client import http_target as target from leap.soledad.client.http_target.fetch_protocol import DocStreamReceiver -from leap.soledad.client import crypto from leap.soledad.client.sqlcipher import SQLCipherU1DBSync from leap.soledad.client.sqlcipher import SQLCipherOptions from leap.soledad.client.sqlcipher import SQLCipherDatabase +from leap.soledad.client import _crypto from leap.soledad.common import l2db @@ -71,7 +71,7 @@ class TestSoledadParseReceivedDocResponse(unittest.TestCase): doc = SoledadDocument('i', rev='r') doc.content = {'a': 'b'} - encrypted_docstr = crypto.SoledadCrypto('').encrypt_doc(doc) + encrypted_docstr = _crypto.SoledadCrypto('').encrypt_doc(doc) with self.assertRaises(l2db.errors.BrokenSyncStream): self.parse("[\r\n{},\r\n]") -- cgit v1.2.3 From 781984f3485b1fd479d09278a665f599c1bd10dc Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 22 Sep 2016 20:37:23 -0300 Subject: [test] fix test and remove leftovers defer_encryption --- client/src/leap/soledad/client/_crypto.py | 18 +++++++----------- client/src/leap/soledad/client/api.py | 5 +---- .../src/leap/soledad/client/examples/soledad_sync.py | 2 +- client/src/leap/soledad/client/http_target/fetch.py | 8 ++++---- client/src/leap/soledad/client/sqlcipher.py | 12 +++--------- scripts/db_access/client_side_db.py | 3 +-- scripts/docker/files/bin/client_side_db.py | 3 +-- scripts/profiling/mail/soledad_client.py | 3 +-- scripts/profiling/sync/profile-sync.py | 1 - testing/test_soledad/util.py | 2 -- testing/tests/server/test_server.py | 11 +++-------- testing/tests/sync/test_sqlcipher_sync.py | 4 ---- testing/tests/sync/test_sync_target.py | 4 ++-- 13 files changed, 24 insertions(+), 52 deletions(-) diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index cee4f0f4..1492c1ab 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -28,7 +28,6 @@ import struct import time from io import BytesIO -from cStringIO import StringIO from collections import namedtuple import six @@ -36,8 +35,6 @@ import six from twisted.internet import defer from twisted.internet import interfaces from twisted.logger import Logger -from twisted.persisted import dirdbm -from twisted.web import client from twisted.web.client import FileBodyProducer from cryptography.exceptions import InvalidSignature @@ -50,8 +47,6 @@ from cryptography.hazmat.backends.openssl.backend \ from zope.interface import implements -from leap.common.config import get_path_prefix - log = Logger() @@ -241,7 +236,6 @@ class BlobDecryptor(object): raise InvalidBlob self.ciphertext.close() - current_time = int(time.time()) if not data or six.indexbytes(data, 0) != 0x80: raise InvalidBlob try: @@ -259,7 +253,6 @@ class BlobDecryptor(object): iv = data[11:27] docidlen = len(self.doc_id) ciph_idx = 26 + docidlen - doc_id = data[26:ciph_idx] revlen = len(self.rev) rev_idx = ciph_idx + 1 + revlen rev = data[ciph_idx + 1:rev_idx] @@ -313,7 +306,7 @@ class AESEncryptor(object): def end(self): if not self.done: - final = self.encryptor.finalize() + self.fd.write(self.encryptor.finalize()) self.done = True @@ -354,7 +347,7 @@ class AESDecryptor(object): if iv is None: iv = os.urandom(16) if len(key) != 32: - raise EncryptionhDecryptionError('key is not 256 bits') + raise EncryptionDecryptionError('key is not 256 bits') if len(iv) != 16: raise EncryptionDecryptionError('iv is not 128 bits') @@ -380,9 +373,12 @@ class AESDecryptor(object): def is_symmetrically_encrypted(payload): - header = base64.urlsafe_b64decode(enc[:15] + '===') + if not payload or len(payload) < 24 \ + or not payload.startswith('{"raw": "'): + return False + header = base64.urlsafe_b64decode(payload[9:24] + '==') ts, sch, meth = struct.unpack('Qbb', header[1:11]) - return sch == ENC_SCHEME.symkey + return sch == ENC_SCHEME.symkey and meth == ENC_METHOD.aes_256_ctr # utils diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 8ce77d24..1f151e7d 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -60,7 +60,6 @@ from leap.soledad.client import sqlcipher from leap.soledad.client.secrets import SoledadSecrets from leap.soledad.client.shared_db import SoledadSharedDatabase from leap.soledad.client._crypto import SoledadCrypto -from leap.soledad.client._crypto import BlobEncryptor logger = getLogger(__name__) @@ -183,7 +182,6 @@ class Soledad(object): self._passphrase = passphrase self._local_db_path = local_db_path self._server_url = server_url - self._defer_encryption = defer_encryption self._secrets_path = None self._dbsyncer = None @@ -285,8 +283,7 @@ class Soledad(object): opts = sqlcipher.SQLCipherOptions( self._local_db_path, key, - is_raw_key=True, create=True, - defer_encryption=self._defer_encryption) + is_raw_key=True, create=True) self._sqlcipher_opts = opts self._dbpool = adbapi.getConnectionPool(opts) diff --git a/client/src/leap/soledad/client/examples/soledad_sync.py b/client/src/leap/soledad/client/examples/soledad_sync.py index 63077ee3..3aed10eb 100644 --- a/client/src/leap/soledad/client/examples/soledad_sync.py +++ b/client/src/leap/soledad/client/examples/soledad_sync.py @@ -40,7 +40,7 @@ def init_soledad(_): global soledad soledad = Soledad(uuid, _pass, secrets_path, local_db_path, server_url, cert_file, - auth_token=token, defer_encryption=False) + auth_token=token) def getall(_): d = soledad.get_all_docs() diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index a0c35063..5356f872 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -106,10 +106,10 @@ class HTTPDocFetcher(object): doc = SoledadDocument(doc_info['id'], doc_info['rev'], content) - payload = doc.content['raw'] - if is_symmetrically_encrypted(payload): - decrypted = yield self._crypto.decrypt_doc(doc) - doc.set_json(decrypted) + if is_symmetrically_encrypted(content): + content = yield self._crypto.decrypt_doc(doc) + + doc.set_json(content) # TODO insert blobs here on the blob backend self._insert_doc_cb(doc, doc_info['gen'], doc_info['trans_id']) diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 618b17b9..bd7d2cc1 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -117,7 +117,7 @@ class SQLCipherOptions(object): @classmethod def copy(cls, source, path=None, key=None, create=None, is_raw_key=None, cipher=None, kdf_iter=None, - cipher_page_size=None, defer_encryption=None, sync_db_key=None): + cipher_page_size=None, sync_db_key=None): """ Return a copy of C{source} with parameters different than None replaced by new values. @@ -134,7 +134,7 @@ class SQLCipherOptions(object): args.append(getattr(source, name)) for name in ["create", "is_raw_key", "cipher", "kdf_iter", - "cipher_page_size", "defer_encryption", "sync_db_key"]: + "cipher_page_size", "sync_db_key"]: val = local_vars[name] if val is not None: kwargs[name] = val @@ -145,7 +145,7 @@ class SQLCipherOptions(object): def __init__(self, path, key, create=True, is_raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024, - defer_encryption=False, sync_db_key=None): + sync_db_key=None): """ :param path: The filesystem path for the database to open. :type path: str @@ -163,10 +163,6 @@ class SQLCipherOptions(object): :type kdf_iter: int :param cipher_page_size: The page size. :type cipher_page_size: int - :param defer_encryption: - Whether to defer encryption of documents, or do it - inline while syncing. - :type defer_encryption: bool """ self.path = path self.key = key @@ -175,7 +171,6 @@ class SQLCipherOptions(object): self.cipher = cipher self.kdf_iter = kdf_iter self.cipher_page_size = cipher_page_size - self.defer_encryption = defer_encryption self.sync_db_key = sync_db_key def __str__(self): @@ -201,7 +196,6 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): """ A U1DB implementation that uses SQLCipher as its persistence layer. """ - defer_encryption = False # The attribute _index_storage_value will be used as the lookup key for the # implementation of the SQLCipher storage backend. diff --git a/scripts/db_access/client_side_db.py b/scripts/db_access/client_side_db.py index 11d72791..2acee2b5 100644 --- a/scripts/db_access/client_side_db.py +++ b/scripts/db_access/client_side_db.py @@ -133,8 +133,7 @@ def _get_soledad_instance(uuid, passphrase, basedir, server_url, cert_file, local_db_path=local_db_path, server_url=server_url, cert_file=cert_file, - auth_token=token, - defer_encryption=True) + auth_token=token) def _get_keymanager_instance(username, provider, soledad, token, diff --git a/scripts/docker/files/bin/client_side_db.py b/scripts/docker/files/bin/client_side_db.py index 4be33d13..80da7392 100644 --- a/scripts/docker/files/bin/client_side_db.py +++ b/scripts/docker/files/bin/client_side_db.py @@ -136,8 +136,7 @@ def _get_soledad_instance(uuid, passphrase, basedir, server_url, cert_file, local_db_path=local_db_path, server_url=server_url, cert_file=cert_file, - auth_token=token, - defer_encryption=True) + auth_token=token) def _get_keymanager_instance(username, provider, soledad, token, diff --git a/scripts/profiling/mail/soledad_client.py b/scripts/profiling/mail/soledad_client.py index 5ac8ce39..dcd605aa 100644 --- a/scripts/profiling/mail/soledad_client.py +++ b/scripts/profiling/mail/soledad_client.py @@ -30,8 +30,7 @@ class SoledadClient(object): server_url=self._server_url, cert_file=None, auth_token=self._auth_token, - secret_id=None, - defer_encryption=True) + secret_id=None) def close(self): if self._soledad is not None: diff --git a/scripts/profiling/sync/profile-sync.py b/scripts/profiling/sync/profile-sync.py index 34e66f03..1d59217a 100755 --- a/scripts/profiling/sync/profile-sync.py +++ b/scripts/profiling/sync/profile-sync.py @@ -91,7 +91,6 @@ def _get_soledad_instance_from_uuid(uuid, passphrase, basedir, server_url, server_url=server_url, cert_file=cert_file, auth_token=token, - defer_encryption=True, syncable=True) diff --git a/testing/test_soledad/util.py b/testing/test_soledad/util.py index b1965aa6..f44ce166 100644 --- a/testing/test_soledad/util.py +++ b/testing/test_soledad/util.py @@ -216,7 +216,6 @@ class BaseSoledadTest(BaseLeapTest, MockedSharedDBTest): """ Instantiates Soledad for usage in tests. """ - defer_sync_encryption = False @pytest.mark.usefixtures("method_tmpdir") def setUp(self): @@ -300,7 +299,6 @@ class BaseSoledadTest(BaseLeapTest, MockedSharedDBTest): self.tempdir, prefix, local_db_path), server_url=server_url, # Soledad will fail if not given an url cert_file=cert_file, - defer_encryption=self.defer_sync_encryption, shared_db=MockSharedDB(), auth_token=auth_token) self.addCleanup(soledad.close) diff --git a/testing/tests/server/test_server.py b/testing/tests/server/test_server.py index 6bbcf002..a7cc97d4 100644 --- a/testing/tests/server/test_server.py +++ b/testing/tests/server/test_server.py @@ -41,7 +41,7 @@ from test_soledad.util import ( BaseSoledadTest, ) -from leap.soledad.common import crypto +from leap.soledad.client import _crypto from leap.soledad.client import Soledad from leap.soledad.server.config import load_configuration from leap.soledad.server.config import CONFIG_DEFAULTS @@ -412,13 +412,8 @@ class EncryptedSyncTestCase( self.assertEqual(soldoc.doc_id, couchdoc.doc_id) self.assertEqual(soldoc.rev, couchdoc.rev) couch_content = couchdoc.content.keys() - self.assertEqual(6, len(couch_content)) - self.assertTrue(crypto.ENC_JSON_KEY in couch_content) - self.assertTrue(crypto.ENC_SCHEME_KEY in couch_content) - self.assertTrue(crypto.ENC_METHOD_KEY in couch_content) - self.assertTrue(crypto.ENC_IV_KEY in couch_content) - self.assertTrue(crypto.MAC_KEY in couch_content) - self.assertTrue(crypto.MAC_METHOD_KEY in couch_content) + self.assertEqual(['raw'], couch_content) + self.assertTrue(_crypto.is_symmetrically_encrypted(couchdoc.get_json())) d = sol1.get_all_docs() d.addCallback(_db1AssertEmptyDocList) diff --git a/testing/tests/sync/test_sqlcipher_sync.py b/testing/tests/sync/test_sqlcipher_sync.py index c3cd8444..029164eb 100644 --- a/testing/tests/sync/test_sqlcipher_sync.py +++ b/testing/tests/sync/test_sqlcipher_sync.py @@ -544,10 +544,6 @@ class SQLCipherDatabaseSyncTests( self.sync(self.db2, db3) doc3 = db3.get_doc('the-doc') - _crypto = self._soledad._crypto - decrypted = _crypto.decrypt_doc(doc3) - doc3.set_json(decrypted) - self.assertEqual(doc4.get_json(), doc3.get_json()) self.assertFalse(doc3.has_conflicts) self.db1.close() diff --git a/testing/tests/sync/test_sync_target.py b/testing/tests/sync/test_sync_target.py index 7c93cd7c..ef034142 100644 --- a/testing/tests/sync/test_sync_target.py +++ b/testing/tests/sync/test_sync_target.py @@ -71,7 +71,7 @@ class TestSoledadParseReceivedDocResponse(unittest.TestCase): doc = SoledadDocument('i', rev='r') doc.content = {'a': 'b'} - encrypted_docstr = _crypto.SoledadCrypto('').encrypt_doc(doc) + encrypted_docstr = _crypto.SoledadCrypto('safe').encrypt_doc(doc) with self.assertRaises(l2db.errors.BrokenSyncStream): self.parse("[\r\n{},\r\n]") @@ -589,9 +589,9 @@ class SoledadDatabaseSyncTargetTests( [], 'other-replica', last_known_generation=0, last_known_trans_id=None, insert_doc_cb=self.receive_doc) self.assertTransactionLog([doc.doc_id, doc.doc_id], self.db) + self.assertEqual(2, new_gen) self.assertEqual( (doc.doc_id, doc.rev, None, 2), self.other_changes[0][:-1]) - self.assertEqual(2, new_gen) if self.whitebox: self.assertEqual(self.db._last_exchange_log['return'], {'last_gen': 2, 'docs': [(doc.doc_id, doc.rev)]}) -- cgit v1.2.3 From 3a93b3d33e4e1c44397e3ad377b04d0b140a65bf Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 30 Sep 2016 17:37:42 -0300 Subject: [style] remove unused imports (pep8) --- client/src/leap/soledad/client/http_target/send.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/send.py b/client/src/leap/soledad/client/http_target/send.py index 431f06f6..ae0b126f 100644 --- a/client/src/leap/soledad/client/http_target/send.py +++ b/client/src/leap/soledad/client/http_target/send.py @@ -15,13 +15,10 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . import json -import os from twisted.internet import defer -from twisted.persisted import dirdbm from leap.soledad.common.log import getLogger -from leap.common.config import get_path_prefix from leap.soledad.client.events import emit_async from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS from leap.soledad.client.http_target.support import RequestBody -- cgit v1.2.3 From efea94cf241afb845aabba0870b33566aaaeafbc Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 30 Sep 2016 19:26:55 -0300 Subject: [bug] defer insertion to threads during download Insertion is synchronous and blocks the reactor. That's a temporary solution as we used to have on decpool. --- client/src/leap/soledad/client/http_target/fetch.py | 11 ++++++++++- client/src/leap/soledad/client/http_target/fetch_protocol.py | 5 +---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 5356f872..dc7bbd2c 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . from twisted.internet import defer +from twisted.internet import threads from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS from leap.soledad.client.events import emit_async @@ -51,6 +52,8 @@ class HTTPDocFetcher(object): ensure_callback, sync_id): new_generation = last_known_generation new_transaction_id = last_known_trans_id + # Acts as a queue, ensuring line order on async processing + self.semaphore = defer.DeferredSemaphore(1) # we fetch the first document before fetching the rest because we need # to know the total number of documents to be received, and this @@ -62,6 +65,7 @@ class HTTPDocFetcher(object): sync_id, self._received_docs) number_of_changes, ngen, ntrans =\ self._parse_metadata(metadata) + yield self.semaphore.acquire() if ngen: new_generation = ngen @@ -112,7 +116,12 @@ class HTTPDocFetcher(object): doc.set_json(content) # TODO insert blobs here on the blob backend - self._insert_doc_cb(doc, doc_info['gen'], doc_info['trans_id']) + # FIXME: This is wrong. Using a SQLite connection from multiple threads + # is dangerous. We should bring the dbpool here or find an alternative. + # Current fix only helps releasing the reactor for other tasks as this + # is an IO intensive call. + yield self.semaphore.run(threads.deferToThread, self._insert_doc_cb, + doc, doc_info['gen'], doc_info['trans_id']) self._received_docs += 1 user_data = {'uuid': self.uuid, 'userid': self.userid} _emit_receive_status(user_data, self._received_docs, total=1000000) diff --git a/client/src/leap/soledad/client/http_target/fetch_protocol.py b/client/src/leap/soledad/client/http_target/fetch_protocol.py index 6ecba2b0..4d45c9d4 100644 --- a/client/src/leap/soledad/client/http_target/fetch_protocol.py +++ b/client/src/leap/soledad/client/http_target/fetch_protocol.py @@ -107,6 +107,7 @@ class DocStreamReceiver(protocol.Protocol): line, _ = utils.check_and_strip_comma(lines.pop(0)) try: self.lineReceived(line) + self._line += 1 except AssertionError, e: raise errors.BrokenSyncStream(e) @@ -116,17 +117,13 @@ class DocStreamReceiver(protocol.Protocol): self._properly_finished = True elif self._line == 0: assert line == '[' - self._line += 1 elif self._line == 1: - self._line += 1 self.metadata = json.loads(line) assert 'error' not in self.metadata elif (self._line % 2) == 0: - self._line += 1 self.current_doc = json.loads(line) assert 'error' not in self.current_doc else: - self._line += 1 self._doc_reader(self.current_doc, line.strip() or None) def finish(self): -- cgit v1.2.3 From eeed046f3fb9b4b4e90ac9e31803f469314ab8c6 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 30 Sep 2016 21:46:21 -0300 Subject: [refactor] DocStreamReceiver <- ReadBodyProtocol Both classes holds u1db error handling. Making DocStreamReceiver a subclass reduces the error handling to a single place thus removing duplicated code. --- .../src/leap/soledad/client/http_target/fetch.py | 3 +- .../soledad/client/http_target/fetch_protocol.py | 56 ++++------------------ 2 files changed, 12 insertions(+), 47 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index dc7bbd2c..1b4351ea 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -14,6 +14,7 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import json from twisted.internet import defer from twisted.internet import threads @@ -138,7 +139,7 @@ class HTTPDocFetcher(object): :rtype: tuple """ try: - # metadata = json.loads(line) + metadata = json.loads(metadata) new_generation = metadata['new_generation'] new_transaction_id = metadata['new_transaction_id'] number_of_changes = metadata['number_of_changes'] diff --git a/client/src/leap/soledad/client/http_target/fetch_protocol.py b/client/src/leap/soledad/client/http_target/fetch_protocol.py index 4d45c9d4..902607ea 100644 --- a/client/src/leap/soledad/client/http_target/fetch_protocol.py +++ b/client/src/leap/soledad/client/http_target/fetch_protocol.py @@ -17,18 +17,15 @@ import warnings from cStringIO import StringIO from twisted.internet import reactor from twisted.internet import defer -from twisted.internet import protocol from twisted.web.client import HTTPConnectionPool from twisted.web._newclient import ResponseDone -from twisted.web._newclient import PotentialDataLoss -from twisted.web.client import PartialDownloadError from leap.soledad.common.l2db import errors from leap.soledad.common.l2db.remote import utils -from leap.soledad.common.l2db.remote import http_errors from leap.common.http import HTTPClient +from .support import ReadBodyProtocol -class DocStreamReceiver(protocol.Protocol): +class DocStreamReceiver(ReadBodyProtocol): def __init__(self, response, deferred, doc_reader): self.deferred = deferred @@ -44,52 +41,19 @@ class DocStreamReceiver(protocol.Protocol): self._buffer = StringIO() self._properly_finished = False - # ---8<--- snippet from u1db.remote.http_client, modified to use errbacks - def _error(self, respdic): - descr = respdic.get("error") - exc_cls = errors.wire_description_to_exc.get(descr) - if exc_cls is not None: - message = respdic.get("message") - self.deferred.errback(exc_cls(message)) - # ---8<--- end of snippet from u1db.remote.http_client - def connectionLost(self, reason): """ Deliver the accumulated response bytes to the waiting L{Deferred}, if the response body has been completely received without error. """ - if reason.check(ResponseDone): - - try: - body = self.finish() - except errors.BrokenSyncStream, e: - return self.deferred.errback(e) - - # ---8<--- snippet from u1db.remote.http_client - if self.status in (200, 201): - self.deferred.callback(self.metadata) - elif self.status in http_errors.ERROR_STATUSES: - try: - respdic = json.loads(body) - except ValueError: - self.deferred.errback( - errors.HTTPError(self.status, body, self.headers)) - else: - self._error(respdic) - # special cases - elif self.status == 503: - self.deferred.errback(errors.Unavailable(body, self.headers)) + try: + if reason.check(ResponseDone): + self.dataBuffer = self.metadata else: - self.deferred.errback( - errors.HTTPError(self.status, body, self.headers)) - # ---8<--- end of snippet from u1db.remote.http_client - - elif reason.check(PotentialDataLoss): - self.deferred.errback( - PartialDownloadError(self.status, self.message, - b''.join(body))) - else: - self.deferred.errback(reason) + self.dataBuffer = self.finish() + except errors.BrokenSyncStream, e: + return self.deferred.errback(e) + return ReadBodyProtocol.connectionLost(self, reason) def consumeBufferLines(self): content = self._buffer.getvalue()[0:self._buffer.tell()] @@ -118,7 +82,7 @@ class DocStreamReceiver(protocol.Protocol): elif self._line == 0: assert line == '[' elif self._line == 1: - self.metadata = json.loads(line) + self.metadata = line assert 'error' not in self.metadata elif (self._line % 2) == 0: self.current_doc = json.loads(line) -- cgit v1.2.3 From af0189dda80f9a5cb720841378e635fc06881edf Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 1 Oct 2016 01:16:40 -0300 Subject: [refactor] more comments, less code Some code were duplicated, got removed. Additional comments added for documenting such a critical and complex part as a protocol. --- .../soledad/client/http_target/fetch_protocol.py | 132 +++++++++------------ .../soledad/client/http_target/send_protocol.py | 2 +- .../src/leap/soledad/client/http_target/support.py | 4 +- 3 files changed, 59 insertions(+), 79 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/fetch_protocol.py b/client/src/leap/soledad/client/http_target/fetch_protocol.py index 902607ea..dac82d8e 100644 --- a/client/src/leap/soledad/client/http_target/fetch_protocol.py +++ b/client/src/leap/soledad/client/http_target/fetch_protocol.py @@ -1,31 +1,44 @@ -#!/usr/bin/env python - -# Copyright (c) Twisted Matrix Laboratories. -# See LICENSE for details. - -""" -Example using stdio, Deferreds, LineReceiver and twisted.web.client. - -Note that the WebCheckerCommandProtocol protocol could easily be used in e.g. -a telnet server instead; see the comments for details. - -Based on an example by Abe Fettig. -""" -import sys +# -*- coding: utf-8 -*- +# support.py +# Copyright (C) 2016 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . import json -import warnings +from functools import partial from cStringIO import StringIO -from twisted.internet import reactor -from twisted.internet import defer -from twisted.web.client import HTTPConnectionPool from twisted.web._newclient import ResponseDone from leap.soledad.common.l2db import errors from leap.soledad.common.l2db.remote import utils -from leap.common.http import HTTPClient from .support import ReadBodyProtocol +from .support import readBody class DocStreamReceiver(ReadBodyProtocol): + """ + A protocol implementation that can parse incoming data from server based + on a line format specified on u1db implementation. Except that we split doc + attributes from content to ease parsing and increment throughput for larger + documents. + [\r\n + {metadata},\r\n + {doc_info},\r\n + {content},\r\n + ... + {doc_info},\r\n + {content},\r\n + ] + """ def __init__(self, response, deferred, doc_reader): self.deferred = deferred @@ -56,6 +69,10 @@ class DocStreamReceiver(ReadBodyProtocol): return ReadBodyProtocol.connectionLost(self, reason) def consumeBufferLines(self): + """ + Consumes lines from buffer and rewind it, writing remaining data + that didn't formed a line back into buffer. + """ content = self._buffer.getvalue()[0:self._buffer.tell()] self._buffer.seek(0) lines = content.split(self.delimiter) @@ -63,6 +80,10 @@ class DocStreamReceiver(ReadBodyProtocol): return lines def dataReceived(self, data): + """ + Buffer incoming data until a line breaks comes in. We check only + the incoming data for efficiency. + """ self._buffer.write(data) if '\n' not in data: return @@ -76,6 +97,14 @@ class DocStreamReceiver(ReadBodyProtocol): raise errors.BrokenSyncStream(e) def lineReceived(self, line): + """ + Protocol implementation. + 0: [\r\n + 1: {metadata},\r\n + (even): {doc_info},\r\n + (odd): {data},\r\n + (last): ] + """ assert not self._properly_finished if ']' == line: self._properly_finished = True @@ -91,6 +120,9 @@ class DocStreamReceiver(ReadBodyProtocol): self._doc_reader(self.current_doc, line.strip() or None) def finish(self): + """ + Checks that ']' came and stream was properly closed. + """ if not self._properly_finished: raise errors.BrokenSyncStream() content = self._buffer.getvalue()[0:self._buffer.tell()] @@ -106,62 +138,10 @@ def build_body_reader(doc_reader): @param doc_reader: Function to be called for processing an incoming doc. Will be called with doc metadata (dict parsed from 1st line) and doc content (string) - @type response: function + @type doc_reader: function - @return: A L{Deferred} which will fire with the sync metadata. - Cancelling it will close the connection to the server immediately. + @return: A function that can be called by the http Agent to create and + configure the proper protocol. """ - def read(response): - def cancel(deferred): - """ - Cancel a L{readBody} call, close the connection to the HTTP server - immediately, if it is still open. - - @param deferred: The cancelled L{defer.Deferred}. - """ - abort = getAbort() - if abort is not None: - abort() - - def getAbort(): - return getattr(protocol.transport, 'abortConnection', None) - - d = defer.Deferred(cancel) - protocol = DocStreamReceiver(response, d, doc_reader) - response.deliverBody(protocol) - if protocol.transport is not None and getAbort() is None: - warnings.warn( - 'Using readBody with a transport that does not have an ' - 'abortConnection method', - category=DeprecationWarning, - stacklevel=2) - return d - return read - - -def read_doc(doc_info, content): - print doc_info, len(content) - - -def finish(args): - print args - reactor.stop() - - -def fetch(url, token, sync_id): - headers = {'Authorization': ['Token %s' % token]} - headers.update({'content-type': ['application/x-soledad-sync-get']}) - body = """[ -{"ensure": false, "last_known_trans_id": "", "sync_id": "%s", -"last_known_generation": 0}, -{"received": 0} -]""" % sync_id - http = HTTPClient(pool=HTTPConnectionPool(reactor)) - d = http.request(url, 'POST', body, headers, build_body_reader(read_doc)) - d.addBoth(finish) - - -if __name__ == "__main__": - assert len(sys.argv) == 4 - reactor.callWhenRunning(fetch, sys.argv[1], sys.argv[2], sys.argv[3]) - reactor.run() + protocolClass = partial(DocStreamReceiver, doc_reader=doc_reader) + return partial(readBody, protocolClass=protocolClass) diff --git a/client/src/leap/soledad/client/http_target/send_protocol.py b/client/src/leap/soledad/client/http_target/send_protocol.py index b93a4284..61e95e56 100644 --- a/client/src/leap/soledad/client/http_target/send_protocol.py +++ b/client/src/leap/soledad/client/http_target/send_protocol.py @@ -32,7 +32,7 @@ class DocStreamProducer(object): :param consumer: Any IConsumer provider. :type consumer: twisted.internet.interfaces.IConsumer - :return: A successful deferred. + :return: A Deferred that fires when production ends. :rtype: twisted.internet.defer.Deferred """ call = self.producer.pop(0) diff --git a/client/src/leap/soledad/client/http_target/support.py b/client/src/leap/soledad/client/http_target/support.py index c066331c..feb306e8 100644 --- a/client/src/leap/soledad/client/http_target/support.py +++ b/client/src/leap/soledad/client/http_target/support.py @@ -94,7 +94,7 @@ class ReadBodyProtocol(_ReadBodyProtocol): self.deferred.errback(reason) -def readBody(response): +def readBody(response, protocolClass=ReadBodyProtocol): """ Get the body of an L{IResponse} and return it as a byte string. @@ -119,7 +119,7 @@ def readBody(response): abort() d = defer.Deferred(cancel) - protocol = ReadBodyProtocol(response, d) + protocol = protocolClass(response, d) def getAbort(): return getattr(protocol.transport, 'abortConnection', None) -- cgit v1.2.3 From 8ff1f0b781c49b88da13af390e4d118ad3e77b43 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Mon, 17 Oct 2016 03:25:43 -0300 Subject: [refactor] Issues from code review --- client/src/leap/soledad/client/api.py | 8 ------- client/src/leap/soledad/client/http_target/api.py | 7 ++++-- .../src/leap/soledad/client/http_target/fetch.py | 28 ++++++++++++---------- .../soledad/client/http_target/fetch_protocol.py | 2 +- 4 files changed, 21 insertions(+), 24 deletions(-) diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 1f151e7d..c560f661 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -967,13 +967,5 @@ class VerifiedHTTPSConnection(httplib.HTTPSConnection): match_hostname(self.sock.getpeercert(), self.host) -# TODO move this to a common module - -class DocInfo: - def __init__(self, doc_id, rev): - self.doc_id = doc_id - self.rev = rev - - old__VerifiedHTTPSConnection = http_client._VerifiedHTTPSConnection http_client._VerifiedHTTPSConnection = VerifiedHTTPSConnection diff --git a/client/src/leap/soledad/client/http_target/api.py b/client/src/leap/soledad/client/http_target/api.py index 2d51d94f..0e24b37f 100644 --- a/client/src/leap/soledad/client/http_target/api.py +++ b/client/src/leap/soledad/client/http_target/api.py @@ -40,9 +40,8 @@ class SyncTargetAPI(SyncTarget): Declares public methods and implements u1db.SyncTarget. """ - @defer.inlineCallbacks def close(self): - yield self._http.close() + return self._http.close() @property def uuid(self): @@ -75,6 +74,10 @@ class SyncTargetAPI(SyncTarget): if not body_producer: d = self._http.request(url, method, body, headers, body_reader) else: + # Upload case, check send.py + # Used to lazy produce body from docs with a custom protocol + # FIXME: _agent usage to bypass timeout, there is an ongoing + # discussion on how to properly do it. d = self._http._agent.request( method, url, headers=Headers(headers), bodyProducer=body_producer(body)) diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 1b4351ea..036b5b21 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -53,19 +53,21 @@ class HTTPDocFetcher(object): ensure_callback, sync_id): new_generation = last_known_generation new_transaction_id = last_known_trans_id + self._received_docs = 0 # Acts as a queue, ensuring line order on async processing + # as `self._insert_doc_cb` cant be run concurrently or out of order. + # DeferredSemaphore solves the concurrency and its implementation uses + # a queue, solving the ordering. + # FIXME: Find a proper solution to avoid surprises on Twisted changes self.semaphore = defer.DeferredSemaphore(1) - # we fetch the first document before fetching the rest because we need - # to know the total number of documents to be received, and this - # information comes as metadata to each request. - - self._received_docs = 0 metadata = yield self._fetch_all( last_known_generation, last_known_trans_id, sync_id, self._received_docs) - number_of_changes, ngen, ntrans =\ - self._parse_metadata(metadata) + metadata = self._parse_metadata(metadata) + number_of_changes, ngen, ntrans = metadata + + # wait for pending inserts yield self.semaphore.acquire() if ngen: @@ -106,8 +108,8 @@ class HTTPDocFetcher(object): :param total: The total number of operations. :type total: int """ - # If arriving content was symmetrically encrypted, we decrypt - # decrypt incoming document and insert into local database + # If arriving content was symmetrically encrypted, we decrypt incoming + # document and insert into local database doc = SoledadDocument(doc_info['id'], doc_info['rev'], content) @@ -117,10 +119,10 @@ class HTTPDocFetcher(object): doc.set_json(content) # TODO insert blobs here on the blob backend - # FIXME: This is wrong. Using a SQLite connection from multiple threads - # is dangerous. We should bring the dbpool here or find an alternative. - # Current fix only helps releasing the reactor for other tasks as this - # is an IO intensive call. + # FIXME: This is wrong. Using the very same SQLite connection object + # from multiple threads is dangerous. We should bring the dbpool here + # or find an alternative. Deferring to a thread only helps releasing + # the reactor for other tasks as this is an IO intensive call. yield self.semaphore.run(threads.deferToThread, self._insert_doc_cb, doc, doc_info['gen'], doc_info['trans_id']) self._received_docs += 1 diff --git a/client/src/leap/soledad/client/http_target/fetch_protocol.py b/client/src/leap/soledad/client/http_target/fetch_protocol.py index dac82d8e..29801819 100644 --- a/client/src/leap/soledad/client/http_target/fetch_protocol.py +++ b/client/src/leap/soledad/client/http_target/fetch_protocol.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# support.py +# fetch_protocol.py # Copyright (C) 2016 LEAP # # This program is free software: you can redistribute it and/or modify -- cgit v1.2.3 From 288434178a4e89f86b9740cfe77a4dc0ce9e45f7 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 19 Oct 2016 02:14:46 -0300 Subject: [bug] total number of documents Parsing from metadata we can store the total of docs and handle it for the doc parser in order to be able to keep consistent events info. --- client/src/leap/soledad/client/http_target/fetch.py | 4 ++-- client/src/leap/soledad/client/http_target/fetch_protocol.py | 4 +++- testing/tests/sync/test_sync_target.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 036b5b21..bbc743e1 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -97,7 +97,7 @@ class HTTPDocFetcher(object): body_reader=body_reader) @defer.inlineCallbacks - def _doc_parser(self, doc_info, content): + def _doc_parser(self, doc_info, content, total): """ Insert a received document into the local replica. @@ -127,7 +127,7 @@ class HTTPDocFetcher(object): doc, doc_info['gen'], doc_info['trans_id']) self._received_docs += 1 user_data = {'uuid': self.uuid, 'userid': self.userid} - _emit_receive_status(user_data, self._received_docs, total=1000000) + _emit_receive_status(user_data, self._received_docs, total=total) def _parse_metadata(self, metadata): """ diff --git a/client/src/leap/soledad/client/http_target/fetch_protocol.py b/client/src/leap/soledad/client/http_target/fetch_protocol.py index 29801819..4290785d 100644 --- a/client/src/leap/soledad/client/http_target/fetch_protocol.py +++ b/client/src/leap/soledad/client/http_target/fetch_protocol.py @@ -113,11 +113,13 @@ class DocStreamReceiver(ReadBodyProtocol): elif self._line == 1: self.metadata = line assert 'error' not in self.metadata + self.total = json.loads(line).get('number_of_changes', -1) elif (self._line % 2) == 0: self.current_doc = json.loads(line) assert 'error' not in self.current_doc else: - self._doc_reader(self.current_doc, line.strip() or None) + self._doc_reader( + self.current_doc, line.strip() or None, self.total) def finish(self): """ diff --git a/testing/tests/sync/test_sync_target.py b/testing/tests/sync/test_sync_target.py index ef034142..17223606 100644 --- a/testing/tests/sync/test_sync_target.py +++ b/testing/tests/sync/test_sync_target.py @@ -63,7 +63,7 @@ class TestSoledadParseReceivedDocResponse(unittest.TestCase): """ def parse(self, stream): - parser = DocStreamReceiver(None, None, lambda x, y: 42) + parser = DocStreamReceiver(None, None, lambda *_: 42) parser.dataReceived(stream) parser.finish() -- cgit v1.2.3 From 2505f61f7374cd0afeb9392c03589607d7b63b64 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 19 Oct 2016 14:05:27 -0300 Subject: [refactor] stop using leap.common.http We aren't using leap.common.http implementation and we need specific features from original Twisted Web Agent. This commit implements it on HTTP Targer. --- .../leap/soledad/client/http_target/__init__.py | 8 ++++---- client/src/leap/soledad/client/http_target/api.py | 22 +++++++++------------- client/src/leap/soledad/client/sqlcipher.py | 6 +----- client/src/leap/soledad/client/sync.py | 6 ------ testing/tests/sync/test_sync.py | 1 - testing/tests/sync/test_sync_deferred.py | 1 - testing/tests/sync/test_sync_target.py | 2 -- 7 files changed, 14 insertions(+), 32 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/__init__.py b/client/src/leap/soledad/client/http_target/__init__.py index 5dc87fcb..17b7307c 100644 --- a/client/src/leap/soledad/client/http_target/__init__.py +++ b/client/src/leap/soledad/client/http_target/__init__.py @@ -25,8 +25,8 @@ after receiving. import os from leap.soledad.common.log import getLogger -from leap.common.http import HTTPClient -from twisted.web.client import HTTPConnectionPool +from leap.common.certs import get_compatible_ssl_context_factory +from twisted.web.client import Agent from twisted.internet import reactor from leap.soledad.client.http_target.send import HTTPDocSender from leap.soledad.client.http_target.api import SyncTargetAPI @@ -96,8 +96,8 @@ class SoledadHTTPSyncTarget(SyncTargetAPI, HTTPDocSender, HTTPDocFetcher): # XXX Increasing timeout of simple requests to avoid chances of hitting # the duplicated syncing bug. This could be reduced to the 30s default # after implementing Cancellable Sync. See #7382 - self._http = HTTPClient(cert_file, timeout=90, - pool=HTTPConnectionPool(reactor)) + self._http = Agent(reactor, + get_compatible_ssl_context_factory(cert_file)) if DO_STATS: self.sync_exchange_phase = [0] diff --git a/client/src/leap/soledad/client/http_target/api.py b/client/src/leap/soledad/client/http_target/api.py index 0e24b37f..1b086a00 100644 --- a/client/src/leap/soledad/client/http_target/api.py +++ b/client/src/leap/soledad/client/http_target/api.py @@ -18,11 +18,13 @@ import os import json import base64 +from StringIO import StringIO from uuid import uuid4 from twisted.web.error import Error from twisted.internet import defer from twisted.web.http_headers import Headers +from twisted.web.client import FileBodyProducer from leap.soledad.client.http_target.support import readBody from leap.soledad.common.errors import InvalidAuthTokenError @@ -40,9 +42,6 @@ class SyncTargetAPI(SyncTarget): Declares public methods and implements u1db.SyncTarget. """ - def close(self): - return self._http.close() - @property def uuid(self): return self._uuid @@ -71,17 +70,14 @@ class SyncTargetAPI(SyncTarget): headers = headers or self._base_header if content_type: headers.update({'content-type': [content_type]}) - if not body_producer: - d = self._http.request(url, method, body, headers, body_reader) - else: + if not body_producer and body: + body = FileBodyProducer(StringIO(body)) + elif body_producer: # Upload case, check send.py - # Used to lazy produce body from docs with a custom protocol - # FIXME: _agent usage to bypass timeout, there is an ongoing - # discussion on how to properly do it. - d = self._http._agent.request( - method, url, headers=Headers(headers), - bodyProducer=body_producer(body)) - d.addCallback(body_reader) + body = body_producer(body) + d = self._http.request( + method, url, headers=Headers(headers), bodyProducer=body) + d.addCallback(body_reader) d.addErrback(_unauth_to_invalid_token_error) return d diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index bd7d2cc1..f4a3ba6e 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -533,11 +533,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): """ super(SQLCipherU1DBSync, self).close() # close all open syncers - for url in self._syncers.keys(): - _, syncer = self._syncers[url] - syncer.close() - del self._syncers[url] - self.running = False + self._syncers = {} class U1DBSQLiteBackend(sqlite_backend.SQLitePartialExpandDatabase): diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index 9d237d98..272b3f57 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -217,12 +217,6 @@ class SoledadSynchronizer(Synchronizer): # if gapless record current reached generation with target return self._record_sync_info_with_the_target(info["my_gen"]) - def close(self): - """ - Close the synchronizer. - """ - self.sync_target.close() - def _record_sync_info_with_the_target(self, start_generation): """ Store local replica metadata in server. diff --git a/testing/tests/sync/test_sync.py b/testing/tests/sync/test_sync.py index a7d0a92b..a434e944 100644 --- a/testing/tests/sync/test_sync.py +++ b/testing/tests/sync/test_sync.py @@ -184,7 +184,6 @@ class TestSoledadDbSync( target = soledad_sync_target( self, self.db2._dbname, source_replica_uid=self._soledad._dbpool.replica_uid) - self.addCleanup(target.close) return sync.SoledadSynchronizer( self.db, target).sync() diff --git a/testing/tests/sync/test_sync_deferred.py b/testing/tests/sync/test_sync_deferred.py index eb71ea73..001612a6 100644 --- a/testing/tests/sync/test_sync_deferred.py +++ b/testing/tests/sync/test_sync_deferred.py @@ -142,7 +142,6 @@ class TestSoledadDbSyncDeferredEncDecr( target = soledad_sync_target( self, self.db2._dbname, source_replica_uid=replica_uid) - self.addCleanup(target.close) return sync.SoledadSynchronizer( dbsyncer, target).sync() diff --git a/testing/tests/sync/test_sync_target.py b/testing/tests/sync/test_sync_target.py index 17223606..fd1d413e 100644 --- a/testing/tests/sync/test_sync_target.py +++ b/testing/tests/sync/test_sync_target.py @@ -168,7 +168,6 @@ class TestSoledadSyncTarget( target = self.sync_target( self, path, source_replica_uid=source_replica_uid) - self.addCleanup(target.close) return target def setUp(self): @@ -389,7 +388,6 @@ class SoledadDatabaseSyncTargetTests( def tearDown(self): self.db.close() - self.st.close() tests.TestCaseWithServer.tearDown(self) SoledadWithCouchServerMixin.tearDown(self) -- cgit v1.2.3 From a45084e4beb3fa16962735d7cebfa9fdac73dc6c Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 28 Oct 2016 17:42:26 -0300 Subject: [tests] fall back to TestCase classes This test was using pytest, but it was making other trial based tests fail. I couldn't figure out why, but falling back to TestCase solved it. --- testing/tests/couch/conftest.py | 31 ------------------------------- testing/tests/couch/test_state.py | 33 ++++++++++++++++++++------------- 2 files changed, 20 insertions(+), 44 deletions(-) delete mode 100644 testing/tests/couch/conftest.py diff --git a/testing/tests/couch/conftest.py b/testing/tests/couch/conftest.py deleted file mode 100644 index 1074f091..00000000 --- a/testing/tests/couch/conftest.py +++ /dev/null @@ -1,31 +0,0 @@ -import couchdb -import pytest -import random -import string - - -@pytest.fixture -def random_name(): - return 'user-' + ''.join( - random.choice( - string.ascii_lowercase) for _ in range(10)) - - -class RandomDatabase(object): - - def __init__(self, couch_url, name): - self.couch_url = couch_url - self.name = name - self.server = couchdb.client.Server(couch_url) - self.database = self.server.create(name) - - def teardown(self): - self.server.delete(self.name) - - -@pytest.fixture -def db(random_name, request): - couch_url = request.config.getoption('--couch-url') - db = RandomDatabase(couch_url, random_name) - request.addfinalizer(db.teardown) - return db diff --git a/testing/tests/couch/test_state.py b/testing/tests/couch/test_state.py index e293b5b8..e5ac3704 100644 --- a/testing/tests/couch/test_state.py +++ b/testing/tests/couch/test_state.py @@ -1,25 +1,32 @@ import pytest - from leap.soledad.common.couch import CONFIG_DOC_ID from leap.soledad.common.couch import SCHEMA_VERSION from leap.soledad.common.couch import SCHEMA_VERSION_KEY from leap.soledad.common.couch.state import CouchServerState +from uuid import uuid4 from leap.soledad.common.errors import WrongCouchSchemaVersionError from leap.soledad.common.errors import MissingCouchConfigDocumentError +from test_soledad.util import CouchDBTestCase + +class CouchDesignDocsTests(CouchDBTestCase): -def test_wrong_couch_version_raises(db): - wrong_schema_version = SCHEMA_VERSION + 1 - db.database.create( - {'_id': CONFIG_DOC_ID, SCHEMA_VERSION_KEY: wrong_schema_version}) - with pytest.raises(WrongCouchSchemaVersionError): - CouchServerState(db.couch_url, create_cmd='/bin/echo', - check_schema_versions=True) + def setUp(self): + CouchDBTestCase.setUp(self) + self.db = self.couch_server.create('user-' + uuid4().hex) + self.addCleanup(self.delete_db, self.db.name) + def test_wrong_couch_version_raises(self): + wrong_schema_version = SCHEMA_VERSION + 1 + self.db.create( + {'_id': CONFIG_DOC_ID, SCHEMA_VERSION_KEY: wrong_schema_version}) + with pytest.raises(WrongCouchSchemaVersionError): + CouchServerState(self.couch_url, create_cmd='/bin/echo', + check_schema_versions=True) -def test_missing_config_doc_raises(db): - db.database.create({}) - with pytest.raises(MissingCouchConfigDocumentError): - CouchServerState(db.couch_url, create_cmd='/bin/echo', - check_schema_versions=True) + def test_missing_config_doc_raises(self): + self.db.create({}) + with pytest.raises(MissingCouchConfigDocumentError): + CouchServerState(self.couch_url, create_cmd='/bin/echo', + check_schema_versions=True) -- cgit v1.2.3 From 304739a5b7335a521c37680235bd3452cf3c8d0f Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 28 Oct 2016 17:55:14 -0300 Subject: [refactor] stop using a dict for syncers A dict was used to store references for the synchronizers based on a URL. This commit removes it as it doesnt make sense with current code. --- client/src/leap/soledad/client/sqlcipher.py | 49 ++++++----------------------- 1 file changed, 10 insertions(+), 39 deletions(-) diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index f4a3ba6e..6caa39cd 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -42,9 +42,7 @@ SQLCipher 1.1 databases, we do not implement them as all SQLCipher databases handled by Soledad should be created by SQLCipher >= 2.0. """ import os -import json -from hashlib import sha256 from functools import partial from pysqlcipher import dbapi2 as sqlcipher_dbapi2 @@ -409,13 +407,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): self._sync_db = sync_db - # we store syncers in a dictionary indexed by the target URL. We also - # store a hash of the auth info in case auth info expires and we need - # to rebuild the syncer for that target. The final self._syncers - # format is the following: - # - # self._syncers = {'': ('', syncer), ...} - self._syncers = {} # storage for the documents received during a sync self.received_docs = [] @@ -495,28 +486,16 @@ class SQLCipherU1DBSync(SQLCipherDatabase): :return: A synchronizer. :rtype: Synchronizer """ - # we want to store at most one syncer for each url, so we also store a - # hash of the connection credentials and replace the stored syncer for - # a certain url if credentials have changed. - h = sha256(json.dumps([url, creds])).hexdigest() - cur_h, syncer = self._syncers.get(url, (None, None)) - if syncer is None or h != cur_h: - syncer = SoledadSynchronizer( - self, - SoledadHTTPSyncTarget( - url, - # XXX is the replica_uid ready? - self._replica_uid, - creds=creds, - crypto=self._crypto, - cert_file=self._cert_file, - sync_db=self._sync_db)) - self._syncers[url] = (h, syncer) - # in order to reuse the same synchronizer multiple times we have to - # reset its state (i.e. the number of documents received from target - # and inserted in the local replica). - syncer.num_inserted = 0 - return syncer + return SoledadSynchronizer( + self, + SoledadHTTPSyncTarget( + url, + # XXX is the replica_uid ready? + self._replica_uid, + creds=creds, + crypto=self._crypto, + cert_file=self._cert_file, + sync_db=self._sync_db)) # # Symmetric encryption of syncing docs @@ -527,14 +506,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): # XXX this SHOULD BE a callback return self._get_generation() - def close(self): - """ - Close the syncer and syncdb orderly - """ - super(SQLCipherU1DBSync, self).close() - # close all open syncers - self._syncers = {} - class U1DBSQLiteBackend(sqlite_backend.SQLitePartialExpandDatabase): """ -- cgit v1.2.3 From 349e42d73225282935b2d4677e778821db25634b Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 28 Oct 2016 21:37:56 -0300 Subject: [tests] improve doc creation on benchmarks If we create all at once we cant test higher loads because it will try to hold all in memory at the same time. Also, this code is smaller and more readable. --- testing/tests/perf/test_sync.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/testing/tests/perf/test_sync.py b/testing/tests/perf/test_sync.py index 0b48a0b9..7b3c4bf0 100644 --- a/testing/tests/perf/test_sync.py +++ b/testing/tests/perf/test_sync.py @@ -1,17 +1,11 @@ import pytest -from twisted.internet.defer import gatherResults - +@pytest.inlineCallbacks def load_up(client, amount, payload): - deferreds = [] # create a bunch of local documents for i in xrange(amount): - d = client.create_doc({'content': payload}) - deferreds.append(d) - d = gatherResults(deferreds) - d.addCallback(lambda _: None) - return d + yield client.create_doc({'content': payload}) def create_upload(uploads, size): -- cgit v1.2.3 From e14a50f15f34898a00f4afa1eb288ef708b4fbd4 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 28 Oct 2016 21:40:05 -0300 Subject: [tests] remove test_sync_very_large_files We have benchmarks now to test sync limits and 100mb is too far from current needs. --- testing/tests/server/test_server.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/testing/tests/server/test_server.py b/testing/tests/server/test_server.py index a7cc97d4..2a3cb751 100644 --- a/testing/tests/server/test_server.py +++ b/testing/tests/server/test_server.py @@ -468,16 +468,6 @@ class EncryptedSyncTestCase( """ return self._test_encrypted_sym_sync(passphrase=u'ãáàäéàëíìïóòöõúùüñç') - def test_sync_very_large_files(self): - """ - Test if Soledad can sync very large files. - """ - self.skipTest( - "Work in progress. For reference, see: " - "https://leap.se/code/issues/7370") - length = 100 * (10 ** 6) # 100 MB - return self._test_encrypted_sym_sync(doc_size=length, number_of_docs=1) - def test_sync_many_small_files(self): """ Test if Soledad can sync many smallfiles. -- cgit v1.2.3 From efdbd3b58520dd998f5625ea1311d513fcce4e1c Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 29 Oct 2016 23:26:36 -0300 Subject: [refactor] simplify server insert Moved out magic numbers into a constant and simplified logic during doc upload. --- server/src/leap/soledad/server/sync.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 8a05b91f..d43fc822 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -29,6 +29,7 @@ from leap.soledad.common.document import ServerDocument MAX_REQUEST_SIZE = 6000 # in Mb MAX_ENTRY_SIZE = 200 # in Mb +ENTRY_CACHE_SIZE = 8192 * 1024 class SyncExchange(sync.SyncExchange): @@ -242,22 +243,10 @@ class SyncResource(http_app.SyncResource): :param doc_idx: The index of the current document. :type doc_idx: int """ - doc = ServerDocument(id, rev) - doc._json = content - if (len(content or '') > (8192 * 1024) / 4) or number_of_docs < 4: - self.sync_exch.batched_insert_from_source(self._staging, - self._sync_id) - self._staging = [] - self._staging_size = 0 - self.sync_exch.insert_doc_from_source( - doc, gen, trans_id, - number_of_docs=number_of_docs, - doc_idx=doc_idx, - sync_id=self._sync_id) - else: - self._staging_size += len(content or '') - self._staging.append((doc, gen, trans_id, number_of_docs, doc_idx)) - if self._staging_size > 8192 * 1024 or doc_idx == number_of_docs: + doc = ServerDocument(id, rev, json=content) + self._staging_size += len(content or '') + self._staging.append((doc, gen, trans_id, number_of_docs, doc_idx)) + if self._staging_size > ENTRY_CACHE_SIZE or doc_idx == number_of_docs: self.sync_exch.batched_insert_from_source(self._staging, self._sync_id) self._staging = [] -- cgit v1.2.3 From 6fe7e61bfd8f8fd6140b931e55b8c5aae6432321 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 11 Nov 2016 16:38:00 -0300 Subject: [feature] Adds deprecated crypto module This is supposed to be used only for temporary backwards compatibility, while we develop a proper migration tool. --- client/src/leap/soledad/client/crypto.py | 450 +++++++++++++++++++++++++++++++ 1 file changed, 450 insertions(+) create mode 100644 client/src/leap/soledad/client/crypto.py diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py new file mode 100644 index 00000000..09e90171 --- /dev/null +++ b/client/src/leap/soledad/client/crypto.py @@ -0,0 +1,450 @@ +# -*- coding: utf-8 -*- +# crypto.py +# Copyright (C) 2013, 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Cryptographic utilities for Soledad. +""" +import os +import binascii +import hmac +import hashlib +import json + +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes +from cryptography.hazmat.backends.multibackend import MultiBackend +from cryptography.hazmat.backends.openssl.backend \ + import Backend as OpenSSLBackend + +from leap.soledad.common import soledad_assert +from leap.soledad.common import soledad_assert_type +from leap.soledad.common import crypto +from leap.soledad.common.log import getLogger +import warnings + + +logger = getLogger(__name__) +warnings.warn("'soledad.client.crypto' MODULE DEPRECATED", + DeprecationWarning, stacklevel=2) + + +MAC_KEY_LENGTH = 64 + +crypto_backend = MultiBackend([OpenSSLBackend()]) + + +def encrypt_sym(data, key): + """ + Encrypt data using AES-256 cipher in CTR mode. + + :param data: The data to be encrypted. + :type data: str + :param key: The key used to encrypt data (must be 256 bits long). + :type key: str + + :return: A tuple with the initialization vector and the encrypted data. + :rtype: (long, str) + """ + soledad_assert_type(key, str) + soledad_assert( + len(key) == 32, # 32 x 8 = 256 bits. + 'Wrong key size: %s bits (must be 256 bits long).' % + (len(key) * 8)) + + iv = os.urandom(16) + cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=crypto_backend) + encryptor = cipher.encryptor() + ciphertext = encryptor.update(data) + encryptor.finalize() + + return binascii.b2a_base64(iv), ciphertext + + +def decrypt_sym(data, key, iv): + """ + Decrypt some data previously encrypted using AES-256 cipher in CTR mode. + + :param data: The data to be decrypted. + :type data: str + :param key: The symmetric key used to decrypt data (must be 256 bits + long). + :type key: str + :param iv: The initialization vector. + :type iv: long + + :return: The decrypted data. + :rtype: str + """ + soledad_assert_type(key, str) + # assert params + soledad_assert( + len(key) == 32, # 32 x 8 = 256 bits. + 'Wrong key size: %s (must be 256 bits long).' % len(key)) + iv = binascii.a2b_base64(iv) + cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=crypto_backend) + decryptor = cipher.decryptor() + return decryptor.update(data) + decryptor.finalize() + + +def doc_mac_key(doc_id, secret): + """ + Generate a key for calculating a MAC for a document whose id is + C{doc_id}. + + The key is derived using HMAC having sha256 as underlying hash + function. The key used for HMAC is the first MAC_KEY_LENGTH characters + of Soledad's storage secret. The HMAC message is C{doc_id}. + + :param doc_id: The id of the document. + :type doc_id: str + + :param secret: The Soledad storage secret + :type secret: str + + :return: The key. + :rtype: str + """ + soledad_assert(secret is not None) + return hmac.new( + secret[:MAC_KEY_LENGTH], + doc_id, + hashlib.sha256).digest() + + +class SoledadCrypto(object): + """ + General cryptographic functionality encapsulated in a + object that can be passed along. + """ + def __init__(self, secret): + """ + Initialize the crypto object. + + :param secret: The Soledad remote storage secret. + :type secret: str + """ + self._secret = secret + + def doc_mac_key(self, doc_id): + return doc_mac_key(doc_id, self._secret) + + def doc_passphrase(self, doc_id): + """ + Generate a passphrase for symmetric encryption of document's contents. + + The password is derived using HMAC having sha256 as underlying hash + function. The key used for HMAC are the first + C{soledad.REMOTE_STORAGE_SECRET_LENGTH} bytes of Soledad's storage + secret stripped from the first MAC_KEY_LENGTH characters. The HMAC + message is C{doc_id}. + + :param doc_id: The id of the document that will be encrypted using + this passphrase. + :type doc_id: str + + :return: The passphrase. + :rtype: str + """ + soledad_assert(self._secret is not None) + return hmac.new( + self._secret[MAC_KEY_LENGTH:], + doc_id, + hashlib.sha256).digest() + + def encrypt_doc(self, doc): + """ + Wrapper around encrypt_docstr that accepts the document as argument. + + :param doc: the document. + :type doc: SoledadDocument + """ + key = self.doc_passphrase(doc.doc_id) + + return encrypt_docstr( + doc.get_json(), doc.doc_id, doc.rev, key, self._secret) + + def decrypt_doc(self, doc): + """ + Wrapper around decrypt_doc_dict that accepts the document as argument. + + :param doc: the document. + :type doc: SoledadDocument + + :return: json string with the decrypted document + :rtype: str + """ + key = self.doc_passphrase(doc.doc_id) + return decrypt_doc_dict( + doc.content, doc.doc_id, doc.rev, key, self._secret) + + @property + def secret(self): + return self._secret + + +# +# Crypto utilities for a SoledadDocument. +# + +def mac_doc(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, enc_iv, + mac_method, secret): + """ + Calculate a MAC for C{doc} using C{ciphertext}. + + Current MAC method used is HMAC, with the following parameters: + + * key: sha256(storage_secret, doc_id) + * msg: doc_id + doc_rev + ciphertext + * digestmod: sha256 + + :param doc_id: The id of the document. + :type doc_id: str + :param doc_rev: The revision of the document. + :type doc_rev: str + :param ciphertext: The content of the document. + :type ciphertext: str + :param enc_scheme: The encryption scheme. + :type enc_scheme: str + :param enc_method: The encryption method. + :type enc_method: str + :param enc_iv: The encryption initialization vector. + :type enc_iv: str + :param mac_method: The MAC method to use. + :type mac_method: str + :param secret: The Soledad storage secret + :type secret: str + + :return: The calculated MAC. + :rtype: str + + :raise crypto.UnknownMacMethodError: Raised when C{mac_method} is unknown. + """ + try: + soledad_assert(mac_method == crypto.MacMethods.HMAC) + except AssertionError: + raise crypto.UnknownMacMethodError + template = "{doc_id}{doc_rev}{ciphertext}{enc_scheme}{enc_method}{enc_iv}" + content = template.format( + doc_id=doc_id, + doc_rev=doc_rev, + ciphertext=ciphertext, + enc_scheme=enc_scheme, + enc_method=enc_method, + enc_iv=enc_iv) + return hmac.new( + doc_mac_key(doc_id, secret), + content, + hashlib.sha256).digest() + + +def encrypt_docstr(docstr, doc_id, doc_rev, key, secret): + """ + Encrypt C{doc}'s content. + + Encrypt doc's contents using AES-256 CTR mode and return a valid JSON + string representing the following: + + { + crypto.ENC_JSON_KEY: '', + crypto.ENC_SCHEME_KEY: 'symkey', + crypto.ENC_METHOD_KEY: crypto.EncryptionMethods.AES_256_CTR, + crypto.ENC_IV_KEY: '', + MAC_KEY: '' + crypto.MAC_METHOD_KEY: 'hmac' + } + + :param docstr: A representation of the document to be encrypted. + :type docstr: str or unicode. + + :param doc_id: The document id. + :type doc_id: str + + :param doc_rev: The document revision. + :type doc_rev: str + + :param key: The key used to encrypt ``data`` (must be 256 bits long). + :type key: str + + :param secret: The Soledad storage secret (used for MAC auth). + :type secret: str + + :return: The JSON serialization of the dict representing the encrypted + content. + :rtype: str + """ + enc_scheme = crypto.EncryptionSchemes.SYMKEY + enc_method = crypto.EncryptionMethods.AES_256_CTR + mac_method = crypto.MacMethods.HMAC + enc_iv, ciphertext = encrypt_sym( + str(docstr), # encryption/decryption routines expect str + key) + mac = binascii.b2a_hex( # store the mac as hex. + mac_doc( + doc_id, + doc_rev, + ciphertext, + enc_scheme, + enc_method, + enc_iv, + mac_method, + secret)) + # Return a representation for the encrypted content. In the following, we + # convert binary data to hexadecimal representation so the JSON + # serialization does not complain about what it tries to serialize. + hex_ciphertext = binascii.b2a_hex(ciphertext) + logger.debug("encrypting doc: %s" % doc_id) + return json.dumps({ + crypto.ENC_JSON_KEY: hex_ciphertext, + crypto.ENC_SCHEME_KEY: enc_scheme, + crypto.ENC_METHOD_KEY: enc_method, + crypto.ENC_IV_KEY: enc_iv, + crypto.MAC_KEY: mac, + crypto.MAC_METHOD_KEY: mac_method, + }) + + +def _verify_doc_mac(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, + enc_iv, mac_method, secret, doc_mac): + """ + Verify that C{doc_mac} is a correct MAC for the given document. + + :param doc_id: The id of the document. + :type doc_id: str + :param doc_rev: The revision of the document. + :type doc_rev: str + :param ciphertext: The content of the document. + :type ciphertext: str + :param enc_scheme: The encryption scheme. + :type enc_scheme: str + :param enc_method: The encryption method. + :type enc_method: str + :param enc_iv: The encryption initialization vector. + :type enc_iv: str + :param mac_method: The MAC method to use. + :type mac_method: str + :param secret: The Soledad storage secret + :type secret: str + :param doc_mac: The MAC to be verified against. + :type doc_mac: str + + :raise crypto.UnknownMacMethodError: Raised when C{mac_method} is unknown. + :raise crypto.WrongMacError: Raised when MAC could not be verified. + """ + calculated_mac = mac_doc( + doc_id, + doc_rev, + ciphertext, + enc_scheme, + enc_method, + enc_iv, + mac_method, + secret) + # we compare mac's hashes to avoid possible timing attacks that might + # exploit python's builtin comparison operator behaviour, which fails + # immediatelly when non-matching bytes are found. + doc_mac_hash = hashlib.sha256( + binascii.a2b_hex( # the mac is stored as hex + doc_mac)).digest() + calculated_mac_hash = hashlib.sha256(calculated_mac).digest() + + if doc_mac_hash != calculated_mac_hash: + logger.warn("wrong MAC while decrypting doc...") + raise crypto.WrongMacError("Could not authenticate document's " + "contents.") + + +def decrypt_doc_dict(doc_dict, doc_id, doc_rev, key, secret): + """ + Decrypt a symmetrically encrypted C{doc}'s content. + + Return the JSON string representation of the document's decrypted content. + + The passed doc_dict argument should have the following structure: + + { + crypto.ENC_JSON_KEY: '', + crypto.ENC_SCHEME_KEY: '', + crypto.ENC_METHOD_KEY: '', + crypto.ENC_IV_KEY: '', # (optional) + MAC_KEY: '' + crypto.MAC_METHOD_KEY: 'hmac' + } + + C{enc_blob} is the encryption of the JSON serialization of the document's + content. For now Soledad just deals with documents whose C{enc_scheme} is + crypto.EncryptionSchemes.SYMKEY and C{enc_method} is + crypto.EncryptionMethods.AES_256_CTR. + + :param doc_dict: The content of the document to be decrypted. + :type doc_dict: dict + + :param doc_id: The document id. + :type doc_id: str + + :param doc_rev: The document revision. + :type doc_rev: str + + :param key: The key used to encrypt ``data`` (must be 256 bits long). + :type key: str + + :param secret: The Soledad storage secret. + :type secret: str + + :return: The JSON serialization of the decrypted content. + :rtype: str + + :raise UnknownEncryptionMethodError: Raised when trying to decrypt from an + unknown encryption method. + """ + # assert document dictionary structure + expected_keys = set([ + crypto.ENC_JSON_KEY, + crypto.ENC_SCHEME_KEY, + crypto.ENC_METHOD_KEY, + crypto.ENC_IV_KEY, + crypto.MAC_KEY, + crypto.MAC_METHOD_KEY, + ]) + soledad_assert(expected_keys.issubset(set(doc_dict.keys()))) + + ciphertext = binascii.a2b_hex(doc_dict[crypto.ENC_JSON_KEY]) + enc_scheme = doc_dict[crypto.ENC_SCHEME_KEY] + enc_method = doc_dict[crypto.ENC_METHOD_KEY] + enc_iv = doc_dict[crypto.ENC_IV_KEY] + doc_mac = doc_dict[crypto.MAC_KEY] + mac_method = doc_dict[crypto.MAC_METHOD_KEY] + + soledad_assert(enc_scheme == crypto.EncryptionSchemes.SYMKEY) + + _verify_doc_mac( + doc_id, doc_rev, ciphertext, enc_scheme, enc_method, + enc_iv, mac_method, secret, doc_mac) + + return decrypt_sym(ciphertext, key, enc_iv) + + +def is_symmetrically_encrypted(doc): + """ + Return True if the document was symmetrically encrypted. + + :param doc: The document to check. + :type doc: SoledadDocument + + :rtype: bool + """ + if doc.content and crypto.ENC_SCHEME_KEY in doc.content: + if doc.content[crypto.ENC_SCHEME_KEY] \ + == crypto.EncryptionSchemes.SYMKEY: + return True + return False -- cgit v1.2.3 From cc81bb2a8ed0e989159f17061a567230a5059c21 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 11 Nov 2016 22:47:56 -0300 Subject: [feature] Adds back support to deprecated crypto Will be removed when we have the proper tool to migrate data. --- client/src/leap/soledad/client/_crypto.py | 13 +++++++++---- client/src/leap/soledad/client/http_target/__init__.py | 3 +++ client/src/leap/soledad/client/http_target/fetch.py | 7 ++++--- testing/tests/server/test_server.py | 2 +- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index 1492c1ab..d8d37f55 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -372,11 +372,16 @@ class AESDecryptor(object): self.done = True -def is_symmetrically_encrypted(payload): - if not payload or len(payload) < 24 \ - or not payload.startswith('{"raw": "'): +def is_symmetrically_encrypted(doc): + payload = doc.content + if not payload or 'raw' not in payload: + return False + payload = str(payload['raw']) + if len(payload) < 16: + return False + header = base64.urlsafe_b64decode(payload[:18] + '==') + if six.indexbytes(header, 0) != 0x80: return False - header = base64.urlsafe_b64decode(payload[9:24] + '==') ts, sch, meth = struct.unpack('Qbb', header[1:11]) return sch == ENC_SCHEME.symkey and meth == ENC_METHOD.aes_256_ctr diff --git a/client/src/leap/soledad/client/http_target/__init__.py b/client/src/leap/soledad/client/http_target/__init__.py index 17b7307c..91d87f0c 100644 --- a/client/src/leap/soledad/client/http_target/__init__.py +++ b/client/src/leap/soledad/client/http_target/__init__.py @@ -31,6 +31,7 @@ from twisted.internet import reactor from leap.soledad.client.http_target.send import HTTPDocSender from leap.soledad.client.http_target.api import SyncTargetAPI from leap.soledad.client.http_target.fetch import HTTPDocFetcher +from leap.soledad.client import crypto as old_crypto logger = getLogger(__name__) @@ -87,6 +88,8 @@ class SoledadHTTPSyncTarget(SyncTargetAPI, HTTPDocSender, HTTPDocFetcher): self._uuid = None self.set_creds(creds) self._crypto = crypto + # TODO: DEPRECATED CRYPTO + self._deprecated_crypto = old_crypto.SoledadCrypto(crypto.secret) self._sync_db = sync_db self._insert_doc_cb = None # asynchronous encryption/decryption attributes diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index bbc743e1..53650de4 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -25,6 +25,7 @@ from leap.soledad.common.log import getLogger from leap.soledad.client._crypto import is_symmetrically_encrypted from leap.soledad.common.document import SoledadDocument from leap.soledad.common.l2db import errors +from leap.soledad.client import crypto as old_crypto from . import fetch_protocol @@ -112,10 +113,10 @@ class HTTPDocFetcher(object): # document and insert into local database doc = SoledadDocument(doc_info['id'], doc_info['rev'], content) - - if is_symmetrically_encrypted(content): + if is_symmetrically_encrypted(doc): content = yield self._crypto.decrypt_doc(doc) - + elif old_crypto.is_symmetrically_encrypted(doc): + content = self._deprecated_crypto.decrypt_doc(doc) doc.set_json(content) # TODO insert blobs here on the blob backend diff --git a/testing/tests/server/test_server.py b/testing/tests/server/test_server.py index 2a3cb751..2f958b29 100644 --- a/testing/tests/server/test_server.py +++ b/testing/tests/server/test_server.py @@ -413,7 +413,7 @@ class EncryptedSyncTestCase( self.assertEqual(soldoc.rev, couchdoc.rev) couch_content = couchdoc.content.keys() self.assertEqual(['raw'], couch_content) - self.assertTrue(_crypto.is_symmetrically_encrypted(couchdoc.get_json())) + self.assertTrue(_crypto.is_symmetrically_encrypted(couchdoc)) d = sol1.get_all_docs() d.addCallback(_db1AssertEmptyDocList) -- cgit v1.2.3 From ec4a1d773609a922734f36bfe2360c7e622ff155 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 16 Nov 2016 19:09:15 -0300 Subject: [refactor] remove assert logic from fetch_protocol Asserts aren't a good solution for stream parsing, its cleaner to check and raise in place. Also, asserts can be ignored. --- .../leap/soledad/client/http_target/fetch_protocol.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/fetch_protocol.py b/client/src/leap/soledad/client/http_target/fetch_protocol.py index 4290785d..a15991f3 100644 --- a/client/src/leap/soledad/client/http_target/fetch_protocol.py +++ b/client/src/leap/soledad/client/http_target/fetch_protocol.py @@ -90,11 +90,8 @@ class DocStreamReceiver(ReadBodyProtocol): lines = self.consumeBufferLines() while lines: line, _ = utils.check_and_strip_comma(lines.pop(0)) - try: - self.lineReceived(line) - self._line += 1 - except AssertionError, e: - raise errors.BrokenSyncStream(e) + self.lineReceived(line) + self._line += 1 def lineReceived(self, line): """ @@ -105,18 +102,22 @@ class DocStreamReceiver(ReadBodyProtocol): (odd): {data},\r\n (last): ] """ - assert not self._properly_finished + if self._properly_finished: + raise errors.BrokenSyncStream("Reading a finished stream") if ']' == line: self._properly_finished = True elif self._line == 0: - assert line == '[' + if line is not '[': + raise errors.BrokenSyncStream("Invalid start") elif self._line == 1: self.metadata = line - assert 'error' not in self.metadata + if 'error' in self.metadata: + raise errors.BrokenSyncStream("Error from server: %s" % line) self.total = json.loads(line).get('number_of_changes', -1) elif (self._line % 2) == 0: self.current_doc = json.loads(line) - assert 'error' not in self.current_doc + if 'error' in self.current_doc: + raise errors.BrokenSyncStream("Error from server: %s" % line) else: self._doc_reader( self.current_doc, line.strip() or None, self.total) -- cgit v1.2.3 From 3bb6cad3878f2b3338e36765fdb4ecacc6a7270a Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 16 Nov 2016 19:11:03 -0300 Subject: [style] add license header --- .../src/leap/soledad/client/http_target/send_protocol.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/client/src/leap/soledad/client/http_target/send_protocol.py b/client/src/leap/soledad/client/http_target/send_protocol.py index 61e95e56..9980309a 100644 --- a/client/src/leap/soledad/client/http_target/send_protocol.py +++ b/client/src/leap/soledad/client/http_target/send_protocol.py @@ -1,3 +1,19 @@ +# -*- coding: utf-8 -*- +# send_protocol.py +# Copyright (C) 2016 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . from zope.interface import implements from twisted.internet import defer from twisted.internet import reactor -- cgit v1.2.3 From d297b54c45271dbb6fd1aa0f3e6fd209220e1038 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 16 Nov 2016 19:32:17 -0300 Subject: [refactor] improve readability of stream producer --- .../src/leap/soledad/client/http_target/send_protocol.py | 14 ++++++-------- client/src/leap/soledad/client/http_target/support.py | 13 ++++++++----- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/send_protocol.py b/client/src/leap/soledad/client/http_target/send_protocol.py index 9980309a..63ce6b42 100644 --- a/client/src/leap/soledad/client/http_target/send_protocol.py +++ b/client/src/leap/soledad/client/http_target/send_protocol.py @@ -28,14 +28,14 @@ class DocStreamProducer(object): implements(IBodyProducer) - def __init__(self, parser_producer): + def __init__(self, producer): """ Initialize the string produer. - :param body: The body of the request. - :type body: str + :param producer: A RequestBody instance and a list of producer calls + :type producer: (.support.RequestBody, [(function, *args)]) """ - self.body, self.producer = parser_producer + self.body, self.producer = producer self.length = UNKNOWN_LENGTH self.pause = False self.stop = False @@ -51,16 +51,14 @@ class DocStreamProducer(object): :return: A Deferred that fires when production ends. :rtype: twisted.internet.defer.Deferred """ - call = self.producer.pop(0) - yield call[0](*call[1:]) while self.producer and not self.stop: if self.pause: yield self.sleep(0.001) continue call = self.producer.pop(0) yield call[0](*call[1:]) - consumer.write(self.body.pop(1)) - consumer.write(self.body.pop(1)) + consumer.write(self.body.pop(1, leave_open=True)) + consumer.write(self.body.pop(0)) # close stream def sleep(self, secs): d = defer.Deferred() diff --git a/client/src/leap/soledad/client/http_target/support.py b/client/src/leap/soledad/client/http_target/support.py index feb306e8..19e07838 100644 --- a/client/src/leap/soledad/client/http_target/support.py +++ b/client/src/leap/soledad/client/http_target/support.py @@ -178,13 +178,16 @@ class RequestBody(object): entry = json.dumps(entry_dict) self.entries.append(entry) - def pop(self, amount=10): + def pop(self, amount=10, leave_open=False): """ - Removes all entries and returns it formatted and ready + Removes entries and returns it formatted and ready to be sent. - :param number: number of entries to pop and format - :type number: int + :param amount: number of entries to pop and format + :type amount: int + + :param leave_open: flag to skip stream closing + :type amount: bool :return: formatted body ready to be sent :rtype: str @@ -193,7 +196,7 @@ class RequestBody(object): amount = min([len(self.entries), amount]) entries = [self.entries.pop(0) for i in xrange(amount)] self.consumed += amount - end = len(self.entries) == 0 + end = len(self.entries) == 0 if not leave_open else False return self.entries_to_str(entries, start, end) def __str__(self): -- cgit v1.2.3 From b9ef460f39a9f0b62894367c4260986e35fbbb1c Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 16 Nov 2016 21:57:26 -0300 Subject: [refactor] simplify content as a new line Code was complex and raised a flag during review. --- client/src/leap/soledad/client/http_target/support.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/support.py b/client/src/leap/soledad/client/http_target/support.py index 19e07838..d8d8e420 100644 --- a/client/src/leap/soledad/client/http_target/support.py +++ b/client/src/leap/soledad/client/http_target/support.py @@ -162,20 +162,15 @@ class RequestBody(object): def insert_info(self, **entry_dict): """ Dumps an entry into JSON format and add it to entries list. + Adds 'content' key on a new line if it's present. :param entry_dict: Entry as a dictionary :type entry_dict: dict - - :return: length of the entry after JSON dumps - :rtype: int """ + content = '' if 'content' in entry_dict: - content = entry_dict['content'] or '' - del entry_dict['content'] - entry = json.dumps(entry_dict) - entry = entry + ',\r\n' + content - else: - entry = json.dumps(entry_dict) + content = ',\r\n' + (entry_dict['content'] or '') + entry = json.dumps(entry_dict) + content self.entries.append(entry) def pop(self, amount=10, leave_open=False): -- cgit v1.2.3 From 3ec2aea071bdc1a76f90761adf9c0e38fa4cb832 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 16 Nov 2016 22:03:02 -0300 Subject: [style] improve comments for sync.py --- client/src/leap/soledad/client/sync.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index 272b3f57..6bd58f8f 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -141,18 +141,7 @@ class SoledadSynchronizer(Synchronizer): self.sync_phase[0] += 1 # -------------------------------------------------------------------- - # prepare to send all the changed docs - # changed_doc_ids = [doc_id for doc_id, _, _ in changes] - # docs_to_send = self.source.get_docs( - # changed_doc_ids, check_for_conflicts=False, include_deleted=True) ids_sent = [doc_id for doc_id, _, _ in changes] - # docs_by_generation = [] - # idx = 0 - # for doc in docs_to_send: - # _, gen, trans = changes[idx] - # docs_by_generation.append((doc, gen, trans)) - # idx += 1 - # ids_sent.append(doc.doc_id) docs_by_generation = [] for doc_id, gen, trans in changes: get_doc = (self.source.get_doc, doc_id) @@ -166,8 +155,8 @@ class SoledadSynchronizer(Synchronizer): self._insert_doc_from_target, ensure_callback=ensure_callback) logger.debug("target gen after sync: %d" % new_gen) logger.debug("target trans_id after sync: %s" % new_trans_id) - if hasattr(self.source, 'commit'): - self.source.commit() # sync worked, commit + if hasattr(self.source, 'commit'): # sqlcipher backend speed up + self.source.commit() # insert it all in a single transaction info = { "target_replica_uid": self.target_replica_uid, "new_gen": new_gen, -- cgit v1.2.3 From 5989db73d6aff56ade7ca9526f9f5241616aa72a Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 16 Nov 2016 22:08:03 -0300 Subject: [style] explicit unlimited request size Request size on a stream can't be measured upfront and a limit doesn't make much sense. The real limit is user's Quota, to be implemented. --- server/src/leap/soledad/server/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index d43fc822..f505a044 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -27,7 +27,7 @@ from leap.soledad.server.state import ServerSyncState from leap.soledad.common.document import ServerDocument -MAX_REQUEST_SIZE = 6000 # in Mb +MAX_REQUEST_SIZE = float('inf') # It's a stream. MAX_ENTRY_SIZE = 200 # in Mb ENTRY_CACHE_SIZE = 8192 * 1024 -- cgit v1.2.3 From 45763ed191296df9d0c5a3b9b37a07ebee3293ad Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 16 Nov 2016 22:14:01 -0300 Subject: [tests] remove test_sync_deferred Deferred encryption option is gone. --- testing/tests/sync/test_sync_deferred.py | 182 ------------------------------- 1 file changed, 182 deletions(-) delete mode 100644 testing/tests/sync/test_sync_deferred.py diff --git a/testing/tests/sync/test_sync_deferred.py b/testing/tests/sync/test_sync_deferred.py deleted file mode 100644 index 001612a6..00000000 --- a/testing/tests/sync/test_sync_deferred.py +++ /dev/null @@ -1,182 +0,0 @@ -# test_sync_deferred.py -# Copyright (C) 2014 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -""" -Test Leap backend bits: sync with deferred encryption. -""" -import time -import os -import random -import string -import shutil - -from twisted.internet import defer - -from leap.soledad.client import sync -from leap.soledad.client.sqlcipher import SQLCipherOptions -from leap.soledad.client.sqlcipher import SQLCipherDatabase - -from testscenarios import TestWithScenarios - -from test_soledad import u1db_tests as tests -from test_soledad.util import ADDRESS -from test_soledad.util import SoledadWithCouchServerMixin -from test_soledad.util import make_soledad_app -from test_soledad.util import soledad_sync_target - - -WAIT_STEP = 1 -MAX_WAIT = 10 -DBPASS = "pass" - - -class BaseSoledadDeferredEncTest(SoledadWithCouchServerMixin): - - """ - Another base class for testing the deferred encryption during - the syncs, using the intermediate database. - """ - defer_sync_encryption = True - - def setUp(self): - SoledadWithCouchServerMixin.setUp(self) - self.startTwistedServer() - # config info - self.db1_file = os.path.join(self.tempdir, "db1.u1db") - os.unlink(self.db1_file) - self.db_pass = DBPASS - self.email = ADDRESS - - # get a random prefix for each test, so we do not mess with - # concurrency during initialization and shutting down of - # each local db. - self.rand_prefix = ''.join( - map(lambda x: random.choice(string.ascii_letters), range(6))) - - # open test dbs: db1 will be the local sqlcipher db (which - # instantiates a syncdb). We use the self._soledad instance that was - # already created on some setUp method. - import binascii - tohex = binascii.b2a_hex - key = tohex(self._soledad.secrets.get_local_storage_key()) - dbpath = self._soledad._local_db_path - - self.opts = SQLCipherOptions( - dbpath, key, is_raw_key=True, create=False) - self.db1 = SQLCipherDatabase(self.opts) - - self.db2 = self.request_state._create_database('test') - - def tearDown(self): - # XXX should not access "private" attrs - shutil.rmtree(os.path.dirname(self._soledad._local_db_path)) - SoledadWithCouchServerMixin.tearDown(self) - - -class SyncTimeoutError(Exception): - - """ - Dummy exception to notify timeout during sync. - """ - pass - - -class TestSoledadDbSyncDeferredEncDecr( - TestWithScenarios, - BaseSoledadDeferredEncTest, - tests.TestCaseWithServer): - - """ - Test db.sync remote sync shortcut. - Case with deferred encryption: using the intermediate - syncdb. - """ - - scenarios = [ - ('http', { - 'make_app_with_state': make_soledad_app, - 'make_database_for_test': tests.make_memory_database_for_test, - }), - ] - - oauth = False - token = True - - def setUp(self): - """ - Need to explicitely invoke inicialization on all bases. - """ - BaseSoledadDeferredEncTest.setUp(self) - self.server = self.server_thread = None - self.syncer = None - - def tearDown(self): - """ - Need to explicitely invoke destruction on all bases. - """ - dbsyncer = getattr(self, 'dbsyncer', None) - if dbsyncer: - dbsyncer.close() - BaseSoledadDeferredEncTest.tearDown(self) - - def do_sync(self): - """ - Perform sync using SoledadSynchronizer, SoledadSyncTarget - and Token auth. - """ - replica_uid = self._soledad._dbpool.replica_uid - dbsyncer = self._soledad._dbsyncer # Soledad.sync uses the dbsyncer - - target = soledad_sync_target( - self, self.db2._dbname, - source_replica_uid=replica_uid) - return sync.SoledadSynchronizer( - dbsyncer, - target).sync() - - def wait_for_sync(self): - """ - Wait for sync to finish. - """ - wait = 0 - syncer = self.syncer - if syncer is not None: - while syncer.syncing: - time.sleep(WAIT_STEP) - wait += WAIT_STEP - if wait >= MAX_WAIT: - raise SyncTimeoutError - - @defer.inlineCallbacks - def test_db_sync(self): - """ - Test sync. - - Adapted to check for encrypted content. - """ - doc1 = self.db1.create_doc_from_json(tests.simple_doc) - doc2 = self.db2.create_doc_from_json(tests.nested_doc) - local_gen_before_sync = yield self.do_sync() - - gen, _, changes = self.db1.whats_changed(local_gen_before_sync) - self.assertEqual(1, len(changes)) - - self.assertEqual(doc2.doc_id, changes[0][0]) - self.assertEqual(1, gen - local_gen_before_sync) - - self.assertGetEncryptedDoc( - self.db2, doc1.doc_id, doc1.rev, tests.simple_doc, False) - self.assertGetEncryptedDoc( - self.db1, doc2.doc_id, doc2.rev, tests.nested_doc, False) -- cgit v1.2.3 From d656f671b3784575ba4c5bf2a30478c98198a95c Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 16 Nov 2016 22:16:35 -0300 Subject: [pkg] add six and cryptography Those are already present, but we are using the ones coming from our dependencies. Explicit is better than implicit. --- client/pkg/requirements.pip | 2 ++ 1 file changed, 2 insertions(+) diff --git a/client/pkg/requirements.pip b/client/pkg/requirements.pip index 2ae844e1..a18fe124 100644 --- a/client/pkg/requirements.pip +++ b/client/pkg/requirements.pip @@ -2,3 +2,5 @@ pysqlcipher>2.6.3 scrypt zope.proxy twisted +six +cryptography -- cgit v1.2.3 From 1be8f24264d7c0e9c8616faa64c206954fa5c342 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 16 Nov 2016 23:05:07 -0300 Subject: [refactor] better naming for producer calling Giving the proper name to the function and arguments helps to make the producer wizardry less magic. --- client/src/leap/soledad/client/http_target/send_protocol.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/client/src/leap/soledad/client/http_target/send_protocol.py b/client/src/leap/soledad/client/http_target/send_protocol.py index 63ce6b42..0cb6d039 100644 --- a/client/src/leap/soledad/client/http_target/send_protocol.py +++ b/client/src/leap/soledad/client/http_target/send_protocol.py @@ -56,7 +56,8 @@ class DocStreamProducer(object): yield self.sleep(0.001) continue call = self.producer.pop(0) - yield call[0](*call[1:]) + fun, args = call[0], call[1:] + yield fun(*args) consumer.write(self.body.pop(1, leave_open=True)) consumer.write(self.body.pop(0)) # close stream -- cgit v1.2.3 From 632cd2414a47bc7f471d8f501ab6250293aedf51 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 17 Nov 2016 00:38:55 -0300 Subject: [bug] include_deleted=True on sync Also refactored tests and code to stop relying on old parameters which included docs instead of get_doc calls. --- client/src/leap/soledad/client/http_target/send.py | 14 +++----- client/src/leap/soledad/client/sync.py | 15 +++++--- testing/tests/sync/test_sync.py | 19 ++++++++++ testing/tests/sync/test_sync_target.py | 40 ++++++++++++---------- 4 files changed, 56 insertions(+), 32 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/send.py b/client/src/leap/soledad/client/http_target/send.py index ae0b126f..c15488a0 100644 --- a/client/src/leap/soledad/client/http_target/send.py +++ b/client/src/leap/soledad/client/http_target/send.py @@ -86,21 +86,17 @@ class HTTPDocSender(object): @defer.inlineCallbacks def _prepare_one_doc(self, entry, body, idx, total): - get_doc, gen, trans_id = entry - doc, content = yield self._encrypt_doc(get_doc) + get_doc_call, gen, trans_id = entry + doc, content = yield self._encrypt_doc(get_doc_call) body.insert_info( id=doc.doc_id, rev=doc.rev, content=content, gen=gen, trans_id=trans_id, number_of_docs=total, doc_idx=idx) @defer.inlineCallbacks - def _encrypt_doc(self, get_doc): - if type(get_doc) == tuple: - f, args = get_doc - doc = yield f(args) - else: - # tests - doc = get_doc + def _encrypt_doc(self, get_doc_call): + f, args, kwargs = get_doc_call + doc = yield f(*args, **kwargs) if doc.is_tombstone(): defer.returnValue((doc, None)) else: diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index 6bd58f8f..70c841d6 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -141,11 +141,7 @@ class SoledadSynchronizer(Synchronizer): self.sync_phase[0] += 1 # -------------------------------------------------------------------- - ids_sent = [doc_id for doc_id, _, _ in changes] - docs_by_generation = [] - for doc_id, gen, trans in changes: - get_doc = (self.source.get_doc, doc_id) - docs_by_generation.append((get_doc, gen, trans)) + docs_by_generation = self._docs_by_gen_from_changes(changes) # exchange documents and try to insert the returned ones with # the target, return target synced-up-to gen. @@ -153,6 +149,7 @@ class SoledadSynchronizer(Synchronizer): docs_by_generation, self.source._replica_uid, target_last_known_gen, target_last_known_trans_id, self._insert_doc_from_target, ensure_callback=ensure_callback) + ids_sent = [doc_id for doc_id, _, _ in changes] logger.debug("target gen after sync: %d" % new_gen) logger.debug("target trans_id after sync: %s" % new_trans_id) if hasattr(self.source, 'commit'): # sqlcipher backend speed up @@ -185,6 +182,14 @@ class SoledadSynchronizer(Synchronizer): defer.returnValue(my_gen) + def _docs_by_gen_from_changes(self, changes): + docs_by_generation = [] + kwargs = {'include_deleted': True} + for doc_id, gen, trans in changes: + get_doc = (self.source.get_doc, (doc_id,), kwargs) + docs_by_generation.append((get_doc, gen, trans)) + return docs_by_generation + def complete_sync(self): """ Last stage of the synchronization: diff --git a/testing/tests/sync/test_sync.py b/testing/tests/sync/test_sync.py index a434e944..76757c5b 100644 --- a/testing/tests/sync/test_sync.py +++ b/testing/tests/sync/test_sync.py @@ -19,6 +19,7 @@ import threading import time from urlparse import urljoin +from mock import Mock from twisted.internet import defer from testscenarios import TestWithScenarios @@ -210,3 +211,21 @@ class TestSoledadDbSync( self.db, doc2.doc_id, doc2.rev, tests.nested_doc, False) # TODO: add u1db.tests.test_sync.TestRemoteSyncIntegration + + +class TestSoledadSynchronizer(BaseSoledadTest): + + def setUp(self): + BaseSoledadTest.setUp(self) + self.db = Mock() + self.target = Mock() + self.synchronizer = sync.SoledadSynchronizer( + self.db, + self.target) + + def test_docs_by_gen_includes_deleted(self): + changes = [('id', 'gen', 'trans')] + docs_by_gen = self.synchronizer._docs_by_gen_from_changes(changes) + f, args, kwargs = docs_by_gen[0][0] + self.assertIn('include_deleted', kwargs) + self.assertTrue(kwargs['include_deleted']) diff --git a/testing/tests/sync/test_sync_target.py b/testing/tests/sync/test_sync_target.py index fd1d413e..e32f08b3 100644 --- a/testing/tests/sync/test_sync_target.py +++ b/testing/tests/sync/test_sync_target.py @@ -199,8 +199,9 @@ class TestSoledadSyncTarget( other_docs.append((doc.doc_id, doc.rev, doc.get_json())) doc = self.make_document('doc-here', 'replica:1', '{"value": "here"}') + get_doc = (lambda _: doc, (1,), {}) new_gen, trans_id = yield remote_target.sync_exchange( - [(doc, 10, 'T-sid')], 'replica', last_known_generation=0, + [(get_doc, 10, 'T-sid')], 'replica', last_known_generation=0, last_known_trans_id=None, insert_doc_cb=receive_doc) self.assertEqual(1, new_gen) self.assertGetEncryptedDoc( @@ -247,10 +248,12 @@ class TestSoledadSyncTarget( doc1 = self.make_document('doc-here', 'replica:1', '{"value": "here"}') doc2 = self.make_document('doc-here2', 'replica:1', '{"value": "here2"}') + get_doc1 = (lambda _: doc1, (1,), {}) + get_doc2 = (lambda _: doc2, (2,), {}) with self.assertRaises(l2db.errors.U1DBError): yield remote_target.sync_exchange( - [(doc1, 10, 'T-sid'), (doc2, 11, 'T-sud')], + [(get_doc1, 10, 'T-sid'), (get_doc2, 11, 'T-sud')], 'replica', last_known_generation=0, last_known_trans_id=None, @@ -265,7 +268,7 @@ class TestSoledadSyncTarget( # retry trigger_ids = [] new_gen, trans_id = yield remote_target.sync_exchange( - [(doc2, 11, 'T-sud')], 'replica', last_known_generation=0, + [(get_doc2, 11, 'T-sud')], 'replica', last_known_generation=0, last_known_trans_id=None, insert_doc_cb=receive_doc) self.assertGetEncryptedDoc( db, 'doc-here2', 'replica:1', '{"value": "here2"}', @@ -295,8 +298,9 @@ class TestSoledadSyncTarget( replica_uid_box.append(replica_uid) doc = self.make_document('doc-here', 'replica:1', '{"value": "here"}') + get_doc = (lambda _: doc, (1,), {}) new_gen, trans_id = yield remote_target.sync_exchange( - [(doc, 10, 'T-sid')], 'replica', last_known_generation=0, + [(get_doc, 10, 'T-sid')], 'replica', last_known_generation=0, last_known_trans_id=None, insert_doc_cb=receive_doc, ensure_callback=ensure_cb) self.assertEqual(1, new_gen) @@ -408,8 +412,8 @@ class SoledadDatabaseSyncTargetTests( This test was adapted to decrypt remote content before assert. """ docs_by_gen = [ - (self.make_document('doc-id', 'replica:1', tests.simple_doc), 10, - 'T-sid')] + ((self.make_document, ('doc-id', 'replica:1', tests.simple_doc,), {}), + 10, 'T-sid')] new_gen, trans_id = yield self.st.sync_exchange( docs_by_gen, 'replica', last_known_generation=0, last_known_trans_id=None, insert_doc_cb=self.receive_doc) @@ -430,10 +434,10 @@ class SoledadDatabaseSyncTargetTests( This test was adapted to decrypt remote content before assert. """ docs_by_gen = [ - (self.make_document( - 'doc-id', 'replica:1', tests.simple_doc), 10, 'T-1'), - (self.make_document( - 'doc-id2', 'replica:1', tests.nested_doc), 11, 'T-2')] + ((self.make_document, + ('doc-id', 'replica:1', tests.simple_doc), {}), 10, 'T-1'), + ((self.make_document, + ('doc-id2', 'replica:1', tests.nested_doc), {}), 11, 'T-2')] new_gen, trans_id = yield self.st.sync_exchange( docs_by_gen, 'replica', last_known_generation=0, last_known_trans_id=None, insert_doc_cb=self.receive_doc) @@ -515,7 +519,7 @@ class SoledadDatabaseSyncTargetTests( doc = self.db.create_doc_from_json('{}') edit_rev = 'replica:1|' + doc.rev docs_by_gen = [ - (self.make_document(doc.doc_id, edit_rev, None), 10, 'T-sid')] + ((self.make_document, (doc.doc_id, edit_rev, None), {}), 10, 'T-sid')] new_gen, trans_id = yield self.st.sync_exchange( docs_by_gen, 'replica', last_known_generation=0, last_known_trans_id=None, insert_doc_cb=self.receive_doc) @@ -534,7 +538,7 @@ class SoledadDatabaseSyncTargetTests( self.assertTransactionLog([doc.doc_id], self.db) new_doc = '{"key": "altval"}' docs_by_gen = [ - (self.make_document(doc.doc_id, 'replica:1', new_doc), 10, + ((self.make_document, (doc.doc_id, 'replica:1', new_doc), {}), 10, 'T-sid')] new_gen, _ = yield self.st.sync_exchange( docs_by_gen, 'replica', last_known_generation=0, @@ -554,7 +558,7 @@ class SoledadDatabaseSyncTargetTests( self.assertTransactionLog([doc.doc_id], self.db) gen, txid = self.db._get_generation_info() docs_by_gen = [ - (self.make_document(doc.doc_id, doc.rev, tests.simple_doc), + ((self.make_document, (doc.doc_id, doc.rev, tests.simple_doc), {}), 10, 'T-sid')] new_gen, _ = yield self.st.sync_exchange( docs_by_gen, 'replica', last_known_generation=gen, @@ -600,7 +604,7 @@ class SoledadDatabaseSyncTargetTests( self.assertTransactionLog([doc.doc_id], self.db) new_doc = '{"key": "altval"}' docs_by_gen = [ - (self.make_document(doc.doc_id, 'test:1|z:2', new_doc), 10, + ((self.make_document, (doc.doc_id, 'test:1|z:2', new_doc), {}), 10, 'T-sid')] new_gen, _ = yield self.st.sync_exchange( docs_by_gen, 'other-replica', last_known_generation=0, @@ -625,7 +629,7 @@ class SoledadDatabaseSyncTargetTests( self.assertTransactionLog([doc.doc_id], self.db) new_doc = '{"key": "altval"}' docs_by_gen = [ - (self.make_document(doc.doc_id, 'test:1|z:2', new_doc), 10, + ((self.make_document, (doc.doc_id, 'test:1|z:2', new_doc), {}), 10, 'T-sid')] new_gen, _ = yield self.st.sync_exchange( docs_by_gen, 'other-replica', last_known_generation=0, @@ -646,7 +650,7 @@ class SoledadDatabaseSyncTargetTests( self.assertTransactionLog([doc.doc_id], self.db) new_doc = '{"key": "altval"}' docs_by_gen = [ - (self.make_document(doc.doc_id, 'test:1|z:2', new_doc), 10, + ((self.make_document, (doc.doc_id, 'test:1|z:2', new_doc), {}), 10, 'T-sid')] new_gen, _ = yield self.st.sync_exchange( docs_by_gen, 'other-replica', last_known_generation=0, @@ -657,8 +661,8 @@ class SoledadDatabaseSyncTargetTests( def test_sync_exchange_converged_handling(self): doc = self.db.create_doc_from_json(tests.simple_doc) docs_by_gen = [ - (self.make_document('new', 'other:1', '{}'), 4, 'T-foo'), - (self.make_document(doc.doc_id, doc.rev, doc.get_json()), 5, + ((self.make_document, ('new', 'other:1', '{}'), {}), 4, 'T-foo'), + ((self.make_document, (doc.doc_id, doc.rev, doc.get_json()), {}), 5, 'T-bar')] new_gen, _ = yield self.st.sync_exchange( docs_by_gen, 'other-replica', last_known_generation=0, -- cgit v1.2.3 From 529dbdf27804f12da80907d25c412d10e9fa3763 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 17 Nov 2016 01:33:04 -0300 Subject: [style] fix pep8 and confs Fixes setup.cfg, adding current exclude rules, simplified tox.ini to use setup.cfg and fixed all. --- client/src/leap/soledad/client/api.py | 1 + .../examples/benchmarks/measure_index_times.py | 3 +++ .../benchmarks/measure_index_times_custom_docid.py | 3 +++ .../leap/soledad/client/examples/run_benchmark.py | 1 + .../src/leap/soledad/client/examples/use_adbapi.py | 2 ++ client/src/leap/soledad/client/examples/use_api.py | 2 ++ client/src/leap/soledad/client/sqlcipher.py | 1 + .../soledad/common/l2db/backends/sqlite_backend.py | 1 + .../leap/soledad/common/l2db/remote/http_app.py | 1 + setup.cfg | 8 +++---- testing/tests/client/test_crypto.py | 27 ++++++++-------------- testing/tests/couch/test_command.py | 3 ++- testing/tests/perf/test_crypto.py | 14 +++++------ testing/tests/perf/test_sqlcipher.py | 12 +++++----- testing/tests/perf/test_sync.py | 12 +++++----- testing/tests/sync/test_sqlcipher_sync.py | 1 + testing/tests/sync/test_sync_mutex.py | 1 + testing/tests/sync/test_sync_target.py | 11 +++++---- testing/tox.ini | 4 ++-- 19 files changed, 61 insertions(+), 47 deletions(-) diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index c560f661..da6eec66 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -935,6 +935,7 @@ def create_path_if_not_exists(path): # Monkey patching u1db to be able to provide a custom SSL cert # ---------------------------------------------------------------------------- + # We need a more reasonable timeout (in seconds) SOLEDAD_TIMEOUT = 120 diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py index 4fc91d9d..92bc85d6 100644 --- a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py +++ b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py @@ -58,6 +58,7 @@ def debug(*args): if not silent: print(*args) + debug("[+] db path:", tmpdb) debug("[+] num docs", numdocs) @@ -74,6 +75,7 @@ dbpool = adbapi.getConnectionPool(opts) def createDoc(doc): return dbpool.runU1DBQuery("create_doc", doc) + db_indexes = { 'by-chash': ['chash'], 'by-number': ['number']} @@ -168,6 +170,7 @@ def insert_docs(_): deferreds.append(d) return defer.gatherResults(deferreds, consumeErrors=True) + d = create_indexes(None) d.addCallback(insert_docs) d.addCallback(get_from_index) diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py index 38ea18a3..429566c7 100644 --- a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py +++ b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py @@ -58,6 +58,7 @@ def debug(*args): if not silent: print(*args) + debug("[+] db path:", tmpdb) debug("[+] num docs", numdocs) @@ -74,6 +75,7 @@ dbpool = adbapi.getConnectionPool(opts) def createDoc(doc, doc_id): return dbpool.runU1DBQuery("create_doc", doc, doc_id=doc_id) + db_indexes = { 'by-chash': ['chash'], 'by-number': ['number']} @@ -168,6 +170,7 @@ def insert_docs(_): deferreds.append(d) return defer.gatherResults(deferreds, consumeErrors=True) + d = create_indexes(None) d.addCallback(insert_docs) d.addCallback(get_from_index) diff --git a/client/src/leap/soledad/client/examples/run_benchmark.py b/client/src/leap/soledad/client/examples/run_benchmark.py index 61621e89..ddedf433 100644 --- a/client/src/leap/soledad/client/examples/run_benchmark.py +++ b/client/src/leap/soledad/client/examples/run_benchmark.py @@ -14,6 +14,7 @@ cmd = "SILENT=1 TIMES={times} TMPDIR={tmpdir} python ./use_{version}api.py" def parse_time(r): return r.split('\n')[-1] + with open(CSVFILE, 'w') as log: for times in range(0, 10000, 500): diff --git a/client/src/leap/soledad/client/examples/use_adbapi.py b/client/src/leap/soledad/client/examples/use_adbapi.py index a2683836..39301b41 100644 --- a/client/src/leap/soledad/client/examples/use_adbapi.py +++ b/client/src/leap/soledad/client/examples/use_adbapi.py @@ -39,6 +39,7 @@ def debug(*args): if not silent: print(*args) + debug("[+] db path:", tmpdb) debug("[+] times", times) @@ -87,6 +88,7 @@ def allDone(_): print((end_time - start_time).total_seconds()) reactor.stop() + deferreds = [] payload = open('manifest.phk').read() diff --git a/client/src/leap/soledad/client/examples/use_api.py b/client/src/leap/soledad/client/examples/use_api.py index e2501c98..db77c4b3 100644 --- a/client/src/leap/soledad/client/examples/use_api.py +++ b/client/src/leap/soledad/client/examples/use_api.py @@ -36,6 +36,7 @@ def debug(*args): if not silent: print(*args) + debug("[+] db path:", tmpdb) debug("[+] times", times) @@ -52,6 +53,7 @@ db = sqlcipher.SQLCipherDatabase(opts) def allDone(): debug("ALL DONE!") + payload = open('manifest.phk').read() for i in range(times): diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 6caa39cd..b9db3674 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -594,6 +594,7 @@ def soledad_doc_factory(doc_id=None, rev=None, json='{}', has_conflicts=False, return SoledadDocument(doc_id=doc_id, rev=rev, json=json, has_conflicts=has_conflicts, syncable=syncable) + sqlite_backend.SQLiteDatabase.register_implementation(SQLCipherDatabase) diff --git a/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py b/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py index 295f3132..27db65af 100644 --- a/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py +++ b/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py @@ -923,4 +923,5 @@ class SQLitePartialExpandDatabase(SQLiteDatabase): raw_doc = json.loads(doc) self._update_indexes(doc_id, raw_doc, getters, c) + SQLiteDatabase.register_implementation(SQLitePartialExpandDatabase) diff --git a/common/src/leap/soledad/common/l2db/remote/http_app.py b/common/src/leap/soledad/common/l2db/remote/http_app.py index a9680890..496274b2 100644 --- a/common/src/leap/soledad/common/l2db/remote/http_app.py +++ b/common/src/leap/soledad/common/l2db/remote/http_app.py @@ -194,6 +194,7 @@ class URLToResource(object): resource_cls = params.pop('resource_cls') return resource_cls, params + url_to_resource = URLToResource() diff --git a/setup.cfg b/setup.cfg index 187616d5..f62466ea 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,7 @@ [pep8] -exclude = versioneer.py,_version.py,*.egg,build,docs,scripts,ddocs.py -ignore = E731 +exclude = versioneer.py,_version.py,*.egg,build,docs,scripts,ddocs.py,.tox +ignore = F812,E731 [flake8] -exclude = versioneer.py,_version.py,*.egg,build,docs,scripts,ddocs.py -ignore = E731 +exclude = versioneer.py,_version.py,*.egg,build,docs,scripts,ddocs.py,.tox +ignore = F812,E731 diff --git a/testing/tests/client/test_crypto.py b/testing/tests/client/test_crypto.py index dc3054f2..483c7803 100644 --- a/testing/tests/client/test_crypto.py +++ b/testing/tests/client/test_crypto.py @@ -59,8 +59,8 @@ class AESTest(unittest.TestCase): data = snowden1 block = 16 - for i in range(len(data)/block): - chunk = data[i * block:(i+1)*block] + for i in range(len(data) / block): + chunk = data[i * block:(i + 1) * block] aes.write(chunk) aes.end() @@ -69,7 +69,6 @@ class AESTest(unittest.TestCase): assert ciphertext_chunked == ciphertext - def test_decrypt(self): key = 'A' * 32 iv = 'A' * 16 @@ -82,8 +81,8 @@ class AESTest(unittest.TestCase): fd = BytesIO() aes = _crypto.AESDecryptor(key, iv, fd) - for i in range(len(ciphertext)/block): - chunk = ciphertext[i * block:(i+1)*block] + for i in range(len(ciphertext) / block): + chunk = ciphertext[i * block:(i + 1) * block] aes.write(chunk) aes.end() @@ -91,7 +90,6 @@ class AESTest(unittest.TestCase): assert cleartext_chunked == data - class BlobTestCase(unittest.TestCase): class doc_info: @@ -108,13 +106,13 @@ class BlobTestCase(unittest.TestCase): blob = _crypto.BlobEncryptor( self.doc_info, inf, result=outf, - secret='A' * 96, iv='B'*16) + secret='A' * 96, iv='B' * 16) encrypted = yield blob.encrypt() data = base64.urlsafe_b64decode(encrypted.getvalue()) assert data[0] == '\x80' - ts, sch, meth = struct.unpack( + ts, sch, meth = struct.unpack( 'Qbb', data[1:11]) assert sch == 1 assert meth == 1 @@ -128,13 +126,12 @@ class BlobTestCase(unittest.TestCase): ciphertext = data[71:-64] aes_key = _crypto._get_sym_key_for_doc( - self.doc_info.doc_id, 'A'*96) - assert ciphertext == _aes_encrypt(aes_key, 'B'*16, snowden1) + self.doc_info.doc_id, 'A' * 96) + assert ciphertext == _aes_encrypt(aes_key, 'B' * 16, snowden1) - decrypted = _aes_decrypt(aes_key, 'B'*16, ciphertext) + decrypted = _aes_decrypt(aes_key, 'B' * 16, ciphertext) assert str(decrypted) == snowden1 - @defer.inlineCallbacks def test_blob_decryptor(self): @@ -154,7 +151,6 @@ class BlobTestCase(unittest.TestCase): decrypted = yield decryptor.decrypt() assert decrypted.getvalue() == snowden1 - @defer.inlineCallbacks def test_encrypt_and_decrypt(self): """ @@ -173,7 +169,6 @@ class BlobTestCase(unittest.TestCase): assert len(decrypted) != 0 assert json.loads(decrypted) == payload - @defer.inlineCallbacks def test_decrypt_with_wrong_mac_raises(self): """ @@ -193,8 +188,7 @@ class BlobTestCase(unittest.TestCase): doc2.set_json(json.dumps({"raw": str(newraw)})) with pytest.raises(_crypto.InvalidBlob): - decrypted = yield crypto.decrypt_doc(doc2) - + yield crypto.decrypt_doc(doc2) class RecoveryDocumentTestCase(BaseSoledadTest): @@ -283,7 +277,6 @@ class SoledadSecretsTestCase(BaseSoledadTest): "Should have a secret at this point") - class SoledadCryptoAESTestCase(BaseSoledadTest): def test_encrypt_decrypt_sym(self): diff --git a/testing/tests/couch/test_command.py b/testing/tests/couch/test_command.py index 68097fb1..9fb2c153 100644 --- a/testing/tests/couch/test_command.py +++ b/testing/tests/couch/test_command.py @@ -25,6 +25,7 @@ class CommandBasedDBCreationTest(unittest.TestCase): state.ensure_database, "user-1337") def test_raises_unauthorized_by_default(self): - state = couch_state.CouchServerState("url", check_schema_versions=False) + state = couch_state.CouchServerState("url", + check_schema_versions=False) self.assertRaises(u1db_errors.Unauthorized, state.ensure_database, "user-1337") diff --git a/testing/tests/perf/test_crypto.py b/testing/tests/perf/test_crypto.py index 9ce418ba..367c3b5b 100644 --- a/testing/tests/perf/test_crypto.py +++ b/testing/tests/perf/test_crypto.py @@ -5,7 +5,7 @@ SIZE_LIMT environment variable. For instance, to keep the maximum payload at 1MB: -SIZE_LIMIT=1E6 py.test -s tests/perf/test_crypto.py +SIZE_LIMIT=1E6 py.test -s tests/perf/test_crypto.py """ import pytest import os @@ -45,7 +45,7 @@ def create_doc_decryption(size): doc = SoledadDocument( doc_id=uuid4().hex, rev='rev', json=json.dumps(DOC_CONTENT)) - + encrypted_doc = yield crypto.encrypt_doc(doc) doc.set_json(encrypted_doc) @@ -74,16 +74,16 @@ def create_raw_decryption(size): # plugin. encryption_tests = [ - ('10k', 1E4), + ('10k', 1E4), ('100k', 1E5), ('500k', 5E5), - ('1M', 1E6), - ('10M', 1E7), - ('50M', 5E7), + ('1M', 1E6), + ('10M', 1E7), + ('50M', 5E7), ] for name, size in encryption_tests: - if size < LIMIT: + if size < LIMIT: sz = int(size) globals()['test_encrypt_doc_' + name] = create_doc_encryption(sz) globals()['test_decrypt_doc_' + name] = create_doc_decryption(sz) diff --git a/testing/tests/perf/test_sqlcipher.py b/testing/tests/perf/test_sqlcipher.py index e7a54228..39c9e3ad 100644 --- a/testing/tests/perf/test_sqlcipher.py +++ b/testing/tests/perf/test_sqlcipher.py @@ -29,10 +29,10 @@ def build_test_sqlcipher_create(amount, size): return test -test_async_create_20_500k = build_test_sqlcipher_async_create(20, 500*1000) -test_async_create_100_100k = build_test_sqlcipher_async_create(100, 100*1000) -test_async_create_1000_10k = build_test_sqlcipher_async_create(1000, 10*1000) +test_async_create_20_500k = build_test_sqlcipher_async_create(20, 500 * 1000) +test_async_create_100_100k = build_test_sqlcipher_async_create(100, 100 * 1000) +test_async_create_1000_10k = build_test_sqlcipher_async_create(1000, 10 * 1000) # synchronous -test_create_20_500k = build_test_sqlcipher_create(20, 500*1000) -test_create_100_100k = build_test_sqlcipher_create(100, 100*1000) -test_create_1000_10k = build_test_sqlcipher_create(1000, 10*1000) +test_create_20_500k = build_test_sqlcipher_create(20, 500 * 1000) +test_create_100_100k = build_test_sqlcipher_create(100, 100 * 1000) +test_create_1000_10k = build_test_sqlcipher_create(1000, 10 * 1000) diff --git a/testing/tests/perf/test_sync.py b/testing/tests/perf/test_sync.py index 7b3c4bf0..1bf6cc21 100644 --- a/testing/tests/perf/test_sync.py +++ b/testing/tests/perf/test_sync.py @@ -21,9 +21,9 @@ def create_upload(uploads, size): return test -test_upload_20_500k = create_upload(20, 500*1000) -test_upload_100_100k = create_upload(100, 100*1000) -test_upload_1000_10k = create_upload(1000, 10*1000) +test_upload_20_500k = create_upload(20, 500 * 1000) +test_upload_100_100k = create_upload(100, 100 * 1000) +test_upload_1000_10k = create_upload(1000, 10 * 1000) def create_download(downloads, size): @@ -46,9 +46,9 @@ def create_download(downloads, size): return test -test_download_20_500k = create_download(20, 500*1000) -test_download_100_100k = create_download(100, 100*1000) -test_download_1000_10k = create_download(1000, 10*1000) +test_download_20_500k = create_download(20, 500 * 1000) +test_download_100_100k = create_download(100, 100 * 1000) +test_download_1000_10k = create_download(1000, 10 * 1000) @pytest.inlineCallbacks diff --git a/testing/tests/sync/test_sqlcipher_sync.py b/testing/tests/sync/test_sqlcipher_sync.py index 029164eb..26f63a40 100644 --- a/testing/tests/sync/test_sqlcipher_sync.py +++ b/testing/tests/sync/test_sqlcipher_sync.py @@ -710,6 +710,7 @@ def make_local_db_and_soledad_target( source_replica_uid=source_replica_uid) return db, st + target_scenarios = [ ('leap', { 'create_db_and_target': make_local_db_and_soledad_target, diff --git a/testing/tests/sync/test_sync_mutex.py b/testing/tests/sync/test_sync_mutex.py index 2bcb3aec..432a3cd2 100644 --- a/testing/tests/sync/test_sync_mutex.py +++ b/testing/tests/sync/test_sync_mutex.py @@ -66,6 +66,7 @@ def _timed_sync(self): d.addBoth(_store_finish_time) return d + SoledadSynchronizer.sync = _timed_sync # -- end of monkey-patching diff --git a/testing/tests/sync/test_sync_target.py b/testing/tests/sync/test_sync_target.py index e32f08b3..dd69ffa1 100644 --- a/testing/tests/sync/test_sync_target.py +++ b/testing/tests/sync/test_sync_target.py @@ -412,7 +412,8 @@ class SoledadDatabaseSyncTargetTests( This test was adapted to decrypt remote content before assert. """ docs_by_gen = [ - ((self.make_document, ('doc-id', 'replica:1', tests.simple_doc,), {}), + ((self.make_document, + ('doc-id', 'replica:1', tests.simple_doc,), {}), 10, 'T-sid')] new_gen, trans_id = yield self.st.sync_exchange( docs_by_gen, 'replica', last_known_generation=0, @@ -519,7 +520,8 @@ class SoledadDatabaseSyncTargetTests( doc = self.db.create_doc_from_json('{}') edit_rev = 'replica:1|' + doc.rev docs_by_gen = [ - ((self.make_document, (doc.doc_id, edit_rev, None), {}), 10, 'T-sid')] + ((self.make_document, (doc.doc_id, edit_rev, None), {}), + 10, 'T-sid')] new_gen, trans_id = yield self.st.sync_exchange( docs_by_gen, 'replica', last_known_generation=0, last_known_trans_id=None, insert_doc_cb=self.receive_doc) @@ -662,8 +664,8 @@ class SoledadDatabaseSyncTargetTests( doc = self.db.create_doc_from_json(tests.simple_doc) docs_by_gen = [ ((self.make_document, ('new', 'other:1', '{}'), {}), 4, 'T-foo'), - ((self.make_document, (doc.doc_id, doc.rev, doc.get_json()), {}), 5, - 'T-bar')] + ((self.make_document, (doc.doc_id, doc.rev, doc.get_json()), {}), + 5, 'T-bar')] new_gen, _ = yield self.st.sync_exchange( docs_by_gen, 'other-replica', last_known_generation=0, last_known_trans_id=None, insert_doc_cb=self.receive_doc) @@ -746,6 +748,7 @@ class SoledadDatabaseSyncTargetTests( yield self.st.record_sync_info('replica', 0, 'T-sid') self.assertEqual(expected, called) + WAIT_STEP = 1 MAX_WAIT = 10 DBPASS = "pass" diff --git a/testing/tox.ini b/testing/tox.ini index 0eeeab9e..d84566ca 100644 --- a/testing/tox.ini +++ b/testing/tox.ini @@ -42,8 +42,8 @@ deps = pep8 flake8 commands = - pep8 client server common - flake8 --ignore=F812,E731 client server common + pep8 + flake8 [testenv:parallel] deps = -- cgit v1.2.3 From 3e74de6208c22643b1c063b70d22d352c0409703 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 17 Nov 2016 22:11:54 -0200 Subject: [test] remove unneeded setting of environment variables --- testing/test_soledad/util.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/testing/test_soledad/util.py b/testing/test_soledad/util.py index f44ce166..4a705396 100644 --- a/testing/test_soledad/util.py +++ b/testing/test_soledad/util.py @@ -223,14 +223,7 @@ class BaseSoledadTest(BaseLeapTest, MockedSharedDBTest): # repeat it here because twisted.trial does not work with # setUpClass/tearDownClass. - self.old_path = os.environ['PATH'] - self.old_home = os.environ['HOME'] self.home = self.tempdir - bin_tdir = os.path.join( - self.tempdir, - 'bin') - os.environ["PATH"] = bin_tdir - os.environ["HOME"] = self.tempdir # config info self.db1_file = os.path.join(self.tempdir, "db1.u1db") @@ -257,10 +250,6 @@ class BaseSoledadTest(BaseLeapTest, MockedSharedDBTest): self._db2.close() self._soledad.close() - # restore paths - os.environ["PATH"] = self.old_path - os.environ["HOME"] = self.old_home - def _delete_temporary_dirs(): # XXX should not access "private" attrs for f in [self._soledad.local_db_path, -- cgit v1.2.3 From 5294f5b9ae01429e6d0ee75c8dc98441ba760845 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 17 Nov 2016 22:14:51 -0200 Subject: [test] use tags for selecting benchmark tests --- testing/pytest.ini | 1 - testing/tests/perf/conftest.py | 3 --- testing/tests/perf/test_crypto.py | 3 +++ testing/tests/perf/test_misc.py | 2 ++ testing/tests/perf/test_sqlcipher.py | 2 ++ testing/tests/perf/test_sync.py | 2 ++ testing/tox.ini | 9 +++++---- 7 files changed, 14 insertions(+), 8 deletions(-) diff --git a/testing/pytest.ini b/testing/pytest.ini index 39d1e1c6..eb70b67c 100644 --- a/testing/pytest.ini +++ b/testing/pytest.ini @@ -1,4 +1,3 @@ [pytest] testpaths = tests -norecursedirs = tests/perf twisted = yes diff --git a/testing/tests/perf/conftest.py b/testing/tests/perf/conftest.py index 2964936b..d08ea61d 100644 --- a/testing/tests/perf/conftest.py +++ b/testing/tests/perf/conftest.py @@ -25,9 +25,6 @@ server.ensure_server() def pytest_addoption(parser): - parser.addoption( - "--couch-url", type="string", default="http://127.0.0.1:5984", - help="the url for the couch server to be used during tests") parser.addoption( "--num-docs", type="int", default=100, help="the number of documents to use in performance tests") diff --git a/testing/tests/perf/test_crypto.py b/testing/tests/perf/test_crypto.py index 367c3b5b..a438ee49 100644 --- a/testing/tests/perf/test_crypto.py +++ b/testing/tests/perf/test_crypto.py @@ -18,6 +18,9 @@ from leap.soledad.client import _crypto LIMIT = int(float(os.environ.get('SIZE_LIMIT', 50 * 1000 * 1000))) +pytestmark = pytest.mark.perf + + def create_doc_encryption(size): @pytest.mark.benchmark(group="test_crypto_encrypt_doc") def test_doc_encryption(soledad_client, benchmark, payload): diff --git a/testing/tests/perf/test_misc.py b/testing/tests/perf/test_misc.py index ead48adf..b45dc04e 100644 --- a/testing/tests/perf/test_misc.py +++ b/testing/tests/perf/test_misc.py @@ -1,5 +1,7 @@ import pytest +pytestmark = pytest.mark.perf + @pytest.mark.benchmark(group="test_instance") def test_initialization(soledad_client, benchmark): diff --git a/testing/tests/perf/test_sqlcipher.py b/testing/tests/perf/test_sqlcipher.py index 39c9e3ad..807af6e9 100644 --- a/testing/tests/perf/test_sqlcipher.py +++ b/testing/tests/perf/test_sqlcipher.py @@ -5,6 +5,8 @@ import pytest from twisted.internet.defer import gatherResults +pytestmark = pytest.mark.perf + def load_up(client, amount, payload, defer=True): results = [client.create_doc({'content': payload}) for _ in xrange(amount)] diff --git a/testing/tests/perf/test_sync.py b/testing/tests/perf/test_sync.py index 1bf6cc21..9bb20389 100644 --- a/testing/tests/perf/test_sync.py +++ b/testing/tests/perf/test_sync.py @@ -1,5 +1,7 @@ import pytest +pytestmark = pytest.mark.perf + @pytest.inlineCallbacks def load_up(client, amount, payload): diff --git a/testing/tox.ini b/testing/tox.ini index d84566ca..f6470c89 100644 --- a/testing/tox.ini +++ b/testing/tox.ini @@ -4,10 +4,11 @@ skipsdist=True [testenv] basepython = python2.7 -commands = py.test --cov-report=html \ +commands = py.test -m "not perf" \ + --cov-report=html \ --cov-report=term \ - --cov=leap.soledad \ - {posargs} + --cov=leap.soledad \ + {posargs} usedevelop = True deps = coverage @@ -34,7 +35,7 @@ install_command = pip install {opts} {packages} deps = {[testenv]deps} pytest-benchmark -commands = py.test tests/perf {posargs} +commands = py.test -m perf {posargs} [testenv:code-check] changedir = .. -- cgit v1.2.3 From eadc36dbc23368f8d11a8cf7c9bd5571641d5b36 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 17 Nov 2016 22:15:32 -0200 Subject: [test] move fixtures one level up --- testing/tests/conftest.py | 189 +++++++++++++++++++++++++++++++++++++++++ testing/tests/perf/conftest.py | 188 ---------------------------------------- 2 files changed, 189 insertions(+), 188 deletions(-) diff --git a/testing/tests/conftest.py b/testing/tests/conftest.py index 9e4319ac..5698c8a9 100644 --- a/testing/tests/conftest.py +++ b/testing/tests/conftest.py @@ -1,4 +1,29 @@ +import json +import os import pytest +import requests +import signal +import time + +from hashlib import sha512 +from subprocess import call +from urlparse import urljoin +from uuid import uuid4 + +from leap.soledad.common.couch import CouchDatabase +from leap.soledad.client import Soledad + + +# +# default options for all tests +# + +DEFAULT_PASSPHRASE = '123' + +DEFAULT_URL = 'http://127.0.0.1:2424' +DEFAULT_PRIVKEY = 'soledad_privkey.pem' +DEFAULT_CERTKEY = 'soledad_certkey.pem' +DEFAULT_TOKEN = 'an-auth-token' def pytest_addoption(parser): @@ -16,3 +41,167 @@ def couch_url(request): @pytest.fixture def method_tmpdir(request, tmpdir): request.instance.tempdir = tmpdir.strpath + + +# +# remote_db fixture: provides an empty database for a given user in a per +# function scope. +# + +class UserDatabase(object): + + def __init__(self, url, uuid): + self._remote_db_url = urljoin(url, 'user-%s' % uuid) + + def setup(self): + return CouchDatabase.open_database( + url=self._remote_db_url, create=True, replica_uid=None) + + def teardown(self): + requests.delete(self._remote_db_url) + + +@pytest.fixture() +def remote_db(request): + couch_url = request.config.option.couch_url + + def create(uuid): + db = UserDatabase(couch_url, uuid) + request.addfinalizer(db.teardown) + return db.setup() + return create + + +def get_pid(pidfile): + if not os.path.isfile(pidfile): + return 0 + try: + with open(pidfile) as f: + return int(f.read()) + except IOError: + return 0 + + +# +# soledad_server fixture: provides a running soledad server in a per module +# context (same soledad server for all tests in this module). +# + +class SoledadServer(object): + + def __init__(self, tmpdir_factory, couch_url): + tmpdir = tmpdir_factory.mktemp('soledad-server') + self._pidfile = os.path.join(tmpdir.strpath, 'soledad-server.pid') + self._logfile = os.path.join(tmpdir.strpath, 'soledad-server.log') + self._couch_url = couch_url + + def start(self): + self._create_conf_file() + # start the server + call([ + 'twistd', + '--logfile=%s' % self._logfile, + '--pidfile=%s' % self._pidfile, + 'web', + '--wsgi=leap.soledad.server.application', + '--port=2424' + ]) + + def _create_conf_file(self): + if not os.access('/etc', os.W_OK): + return + if not os.path.isdir('/etc/soledad'): + os.mkdir('/etc/soledad') + with open('/etc/soledad/soledad-server.conf', 'w') as f: + content = '[soledad-server]\ncouch_url = %s' % self._couch_url + f.write(content) + + def stop(self): + pid = get_pid(self._pidfile) + os.kill(pid, signal.SIGKILL) + + +@pytest.fixture(scope='module') +def soledad_server(tmpdir_factory, request): + couch_url = request.config.option.couch_url + server = SoledadServer(tmpdir_factory, couch_url) + server.start() + request.addfinalizer(server.stop) + return server + + +# +# soledad_dbs fixture: provides all databases needed by soledad server in a per +# module scope (same databases for all tests in this module). +# + +def _token_dbname(): + dbname = 'tokens_' + \ + str(int(time.time() / (30 * 24 * 3600))) + return dbname + + +class SoledadDatabases(object): + + def __init__(self, url): + self._token_db_url = urljoin(url, _token_dbname()) + self._shared_db_url = urljoin(url, 'shared') + + def setup(self, uuid): + self._create_dbs() + self._add_token(uuid) + + def _create_dbs(self): + requests.put(self._token_db_url) + requests.put(self._shared_db_url) + + def _add_token(self, uuid): + token = sha512(DEFAULT_TOKEN).hexdigest() + content = {'type': 'Token', 'user_id': uuid} + requests.put( + self._token_db_url + '/' + token, data=json.dumps(content)) + + def teardown(self): + requests.delete(self._token_db_url) + requests.delete(self._shared_db_url) + + +@pytest.fixture() +def soledad_dbs(request): + couch_url = request.config.option.couch_url + + def create(uuid): + db = SoledadDatabases(couch_url) + request.addfinalizer(db.teardown) + return db.setup(uuid) + return create + + +# +# soledad_client fixture: provides a clean soledad client for a test function. +# + +@pytest.fixture() +def soledad_client(tmpdir, soledad_server, remote_db, soledad_dbs, request): + passphrase = DEFAULT_PASSPHRASE + server_url = DEFAULT_URL + token = DEFAULT_TOKEN + default_uuid = uuid4().hex + remote_db(default_uuid) + soledad_dbs(default_uuid) + + # get a soledad instance + def create(): + secrets_path = os.path.join(tmpdir.strpath, '%s.secret' % default_uuid) + local_db_path = os.path.join(tmpdir.strpath, '%s.db' % default_uuid) + soledad_client = Soledad( + default_uuid, + unicode(passphrase), + secrets_path=secrets_path, + local_db_path=local_db_path, + server_url=server_url, + cert_file=None, + auth_token=token) + request.addfinalizer(soledad_client.close) + return soledad_client + return create diff --git a/testing/tests/perf/conftest.py b/testing/tests/perf/conftest.py index d08ea61d..a9cc3464 100644 --- a/testing/tests/perf/conftest.py +++ b/testing/tests/perf/conftest.py @@ -1,21 +1,9 @@ -import json -import os import pytest -import requests import random import base64 -import signal -import time -from hashlib import sha512 -from uuid import uuid4 -from subprocess import call -from urlparse import urljoin from twisted.internet import threads, reactor -from leap.soledad.client import Soledad -from leap.soledad.common.couch import CouchDatabase - # we have to manually setup the events server in order to be able to signal # events. This is usually done by the enclosing application using soledad @@ -30,18 +18,6 @@ def pytest_addoption(parser): help="the number of documents to use in performance tests") -# -# default options for all tests -# - -DEFAULT_PASSPHRASE = '123' - -DEFAULT_URL = 'http://127.0.0.1:2424' -DEFAULT_PRIVKEY = 'soledad_privkey.pem' -DEFAULT_CERTKEY = 'soledad_certkey.pem' -DEFAULT_TOKEN = 'an-auth-token' - - @pytest.fixture() def payload(): def generate(size): @@ -52,140 +28,6 @@ def payload(): return generate -# -# soledad_dbs fixture: provides all databases needed by soledad server in a per -# module scope (same databases for all tests in this module). -# - -def _token_dbname(): - dbname = 'tokens_' + \ - str(int(time.time() / (30 * 24 * 3600))) - return dbname - - -class SoledadDatabases(object): - - def __init__(self, url): - self._token_db_url = urljoin(url, _token_dbname()) - self._shared_db_url = urljoin(url, 'shared') - - def setup(self, uuid): - self._create_dbs() - self._add_token(uuid) - - def _create_dbs(self): - requests.put(self._token_db_url) - requests.put(self._shared_db_url) - - def _add_token(self, uuid): - token = sha512(DEFAULT_TOKEN).hexdigest() - content = {'type': 'Token', 'user_id': uuid} - requests.put( - self._token_db_url + '/' + token, data=json.dumps(content)) - - def teardown(self): - requests.delete(self._token_db_url) - requests.delete(self._shared_db_url) - - -@pytest.fixture() -def soledad_dbs(request): - couch_url = request.config.option.couch_url - - def create(uuid): - db = SoledadDatabases(couch_url) - request.addfinalizer(db.teardown) - return db.setup(uuid) - return create - - -# -# remote_db fixture: provides an empty database for a given user in a per -# function scope. -# - -class UserDatabase(object): - - def __init__(self, url, uuid): - self._remote_db_url = urljoin(url, 'user-%s' % uuid) - - def setup(self): - return CouchDatabase.open_database( - url=self._remote_db_url, create=True, replica_uid=None) - - def teardown(self): - requests.delete(self._remote_db_url) - - -@pytest.fixture() -def remote_db(request): - couch_url = request.config.option.couch_url - - def create(uuid): - db = UserDatabase(couch_url, uuid) - request.addfinalizer(db.teardown) - return db.setup() - return create - - -def get_pid(pidfile): - if not os.path.isfile(pidfile): - return 0 - try: - with open(pidfile) as f: - return int(f.read()) - except IOError: - return 0 - - -# -# soledad_server fixture: provides a running soledad server in a per module -# context (same soledad server for all tests in this module). -# - -class SoledadServer(object): - - def __init__(self, tmpdir_factory, couch_url): - tmpdir = tmpdir_factory.mktemp('soledad-server') - self._pidfile = os.path.join(tmpdir.strpath, 'soledad-server.pid') - self._logfile = os.path.join(tmpdir.strpath, 'soledad-server.log') - self._couch_url = couch_url - - def start(self): - self._create_conf_file() - # start the server - call([ - 'twistd', - '--logfile=%s' % self._logfile, - '--pidfile=%s' % self._pidfile, - 'web', - '--wsgi=leap.soledad.server.application.wsgi_application', - '--port=2424' - ]) - - def _create_conf_file(self): - if not os.access('/etc', os.W_OK): - return - if not os.path.isdir('/etc/soledad'): - os.mkdir('/etc/soledad') - with open('/etc/soledad/soledad-server.conf', 'w') as f: - content = '[soledad-server]\ncouch_url = %s' % self._couch_url - f.write(content) - - def stop(self): - pid = get_pid(self._pidfile) - os.kill(pid, signal.SIGKILL) - - -@pytest.fixture(scope='module') -def soledad_server(tmpdir_factory, request): - couch_url = request.config.option.couch_url - server = SoledadServer(tmpdir_factory, couch_url) - server.start() - request.addfinalizer(server.stop) - return server - - @pytest.fixture() def txbenchmark(benchmark): def blockOnThread(*args, **kwargs): @@ -213,33 +55,3 @@ def txbenchmark_with_setup(benchmark): rounds=4, warmup_rounds=1) return threads.deferToThread(bench) return blockOnThreadWithSetup - - -# -# soledad_client fixture: provides a clean soledad client for a test function. -# - -@pytest.fixture() -def soledad_client(tmpdir, soledad_server, remote_db, soledad_dbs, request): - passphrase = DEFAULT_PASSPHRASE - server_url = DEFAULT_URL - token = DEFAULT_TOKEN - default_uuid = uuid4().hex - remote_db(default_uuid) - soledad_dbs(default_uuid) - - # get a soledad instance - def create(): - secrets_path = os.path.join(tmpdir.strpath, '%s.secret' % uuid4().hex) - local_db_path = os.path.join(tmpdir.strpath, '%s.db' % uuid4().hex) - soledad_client = Soledad( - default_uuid, - unicode(passphrase), - secrets_path=secrets_path, - local_db_path=local_db_path, - server_url=server_url, - cert_file=None, - auth_token=token) - request.addfinalizer(soledad_client.close) - return soledad_client - return create -- cgit v1.2.3 From 87259d4210e3488b00876d7ec83a8cc21e341712 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 17 Nov 2016 22:16:28 -0200 Subject: [test] add test for deprecated crypto format update --- testing/tests/client/test_deprecated_crypto.py | 67 ++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 testing/tests/client/test_deprecated_crypto.py diff --git a/testing/tests/client/test_deprecated_crypto.py b/testing/tests/client/test_deprecated_crypto.py new file mode 100644 index 00000000..ca1b1558 --- /dev/null +++ b/testing/tests/client/test_deprecated_crypto.py @@ -0,0 +1,67 @@ +import json +import pytest + +from leap.soledad.client import crypto as old_crypto +from leap.soledad.common.couch import CouchDatabase +from leap.soledad.common import crypto as common_crypto + +from test_soledad.u1db_tests import simple_doc + + +def deprecate_client_crypto(client): + secret = client._crypto.secret + _crypto = old_crypto.SoledadCrypto(secret) + setattr(client._dbsyncer, '_crypto', _crypto) + return client + + +def couch_database(couch_url, uuid): + db = CouchDatabase(couch_url, "user-%s" % (uuid,)) + return db + + +@pytest.inlineCallbacks +def test_touch_updates_remote_representation( + soledad_client, request): + + client = soledad_client() + deprecated_client = deprecate_client_crypto(soledad_client()) + + couch_url = request.config.option.couch_url + remote = couch_database(couch_url, client._uuid) + + # ensure remote db is empty + gen, docs = remote.get_all_docs() + assert gen == 0 + assert len(docs) == 0 + + # create a doc with deprecated client and sync + yield deprecated_client.create_doc(json.loads(simple_doc)) + yield deprecated_client.sync() + + # check for doc in remote db + gen, docs = remote.get_all_docs() + assert gen == 1 + assert len(docs) == 1 + doc = docs.pop() + content = doc.content + assert common_crypto.ENC_JSON_KEY in content + assert common_crypto.ENC_SCHEME_KEY in content + assert common_crypto.ENC_METHOD_KEY in content + assert common_crypto.ENC_IV_KEY in content + assert common_crypto.MAC_KEY in content + assert common_crypto.MAC_METHOD_KEY in content + + # "touch" the document with a newer client and synx + _, docs = yield client.get_all_docs() + yield client.put_doc(doc) + yield client.sync() + + # check for newer representation of doc in remote db + gen, docs = remote.get_all_docs() + assert gen == 2 + assert len(docs) == 1 + doc = docs.pop() + content = doc.content + assert len(content) == 1 + assert 'raw' in content -- cgit v1.2.3 From 378a07113a713a7c25f0fb8510d18ecdae2198bd Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 17 Nov 2016 22:35:21 -0200 Subject: [test] rename benchmark tests directory and tag --- .gitlab-ci.yml | 2 +- testing/tests/benchmarks/assets/cert_default.conf | 15 ++++ testing/tests/benchmarks/conftest.py | 57 +++++++++++++ testing/tests/benchmarks/pytest.ini | 2 + testing/tests/benchmarks/test_crypto.py | 99 +++++++++++++++++++++++ testing/tests/benchmarks/test_misc.py | 8 ++ testing/tests/benchmarks/test_sqlcipher.py | 40 +++++++++ testing/tests/benchmarks/test_sync.py | 64 +++++++++++++++ testing/tests/perf/assets/cert_default.conf | 15 ---- testing/tests/perf/conftest.py | 57 ------------- testing/tests/perf/pytest.ini | 2 - testing/tests/perf/test_crypto.py | 99 ----------------------- testing/tests/perf/test_misc.py | 8 -- testing/tests/perf/test_sqlcipher.py | 40 --------- testing/tests/perf/test_sync.py | 64 --------------- testing/tox.ini | 6 +- 16 files changed, 289 insertions(+), 289 deletions(-) create mode 100644 testing/tests/benchmarks/assets/cert_default.conf create mode 100644 testing/tests/benchmarks/conftest.py create mode 100644 testing/tests/benchmarks/pytest.ini create mode 100644 testing/tests/benchmarks/test_crypto.py create mode 100644 testing/tests/benchmarks/test_misc.py create mode 100644 testing/tests/benchmarks/test_sqlcipher.py create mode 100644 testing/tests/benchmarks/test_sync.py delete mode 100644 testing/tests/perf/assets/cert_default.conf delete mode 100644 testing/tests/perf/conftest.py delete mode 100644 testing/tests/perf/pytest.ini delete mode 100644 testing/tests/perf/test_crypto.py delete mode 100644 testing/tests/perf/test_misc.py delete mode 100644 testing/tests/perf/test_sqlcipher.py delete mode 100644 testing/tests/perf/test_sync.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index dd4e4605..d11a4d1e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -30,7 +30,7 @@ benchmark: - couchdb script: - cd testing - - tox -e perf -- --couch-url http://couchdb:5984 + - tox -e benchmark -- --couch-url http://couchdb:5984 tags: - docker - benchmark diff --git a/testing/tests/benchmarks/assets/cert_default.conf b/testing/tests/benchmarks/assets/cert_default.conf new file mode 100644 index 00000000..8043cea3 --- /dev/null +++ b/testing/tests/benchmarks/assets/cert_default.conf @@ -0,0 +1,15 @@ +[ req ] +default_bits = 1024 +default_keyfile = keyfile.pem +distinguished_name = req_distinguished_name +prompt = no +output_password = mypass + +[ req_distinguished_name ] +C = GB +ST = Test State or Province +L = Test Locality +O = Organization Name +OU = Organizational Unit Name +CN = localhost +emailAddress = test@email.address diff --git a/testing/tests/benchmarks/conftest.py b/testing/tests/benchmarks/conftest.py new file mode 100644 index 00000000..a9cc3464 --- /dev/null +++ b/testing/tests/benchmarks/conftest.py @@ -0,0 +1,57 @@ +import pytest +import random +import base64 + +from twisted.internet import threads, reactor + + +# we have to manually setup the events server in order to be able to signal +# events. This is usually done by the enclosing application using soledad +# client (i.e. bitmask client). +from leap.common.events import server +server.ensure_server() + + +def pytest_addoption(parser): + parser.addoption( + "--num-docs", type="int", default=100, + help="the number of documents to use in performance tests") + + +@pytest.fixture() +def payload(): + def generate(size): + random.seed(1337) # same seed to avoid different bench results + payload_bytes = bytearray(random.getrandbits(8) for _ in xrange(size)) + # encode as base64 to avoid ascii encode/decode errors + return base64.b64encode(payload_bytes)[:size] # remove b64 overhead + return generate + + +@pytest.fixture() +def txbenchmark(benchmark): + def blockOnThread(*args, **kwargs): + return threads.deferToThread( + benchmark, threads.blockingCallFromThread, + reactor, *args, **kwargs) + return blockOnThread + + +@pytest.fixture() +def txbenchmark_with_setup(benchmark): + def blockOnThreadWithSetup(setup, f): + def blocking_runner(*args, **kwargs): + return threads.blockingCallFromThread(reactor, f, *args, **kwargs) + + def blocking_setup(): + args = threads.blockingCallFromThread(reactor, setup) + try: + return tuple(arg for arg in args), {} + except TypeError: + return ((args,), {}) if args else None + + def bench(): + return benchmark.pedantic(blocking_runner, setup=blocking_setup, + rounds=4, warmup_rounds=1) + return threads.deferToThread(bench) + return blockOnThreadWithSetup diff --git a/testing/tests/benchmarks/pytest.ini b/testing/tests/benchmarks/pytest.ini new file mode 100644 index 00000000..7a0508ce --- /dev/null +++ b/testing/tests/benchmarks/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +twisted = yes diff --git a/testing/tests/benchmarks/test_crypto.py b/testing/tests/benchmarks/test_crypto.py new file mode 100644 index 00000000..ab586bea --- /dev/null +++ b/testing/tests/benchmarks/test_crypto.py @@ -0,0 +1,99 @@ +""" +Benchmarks for crypto operations. +If you don't want to stress your local machine too much, you can pass the +SIZE_LIMT environment variable. + +For instance, to keep the maximum payload at 1MB: + +SIZE_LIMIT=1E6 py.test -s tests/perf/test_crypto.py +""" +import pytest +import os +import json +from uuid import uuid4 + +from leap.soledad.common.document import SoledadDocument +from leap.soledad.client import _crypto + +LIMIT = int(float(os.environ.get('SIZE_LIMIT', 50 * 1000 * 1000))) + + +pytestmark = pytest.mark.benchmark + + +def create_doc_encryption(size): + @pytest.mark.benchmark(group="test_crypto_encrypt_doc") + def test_doc_encryption(soledad_client, benchmark, payload): + crypto = soledad_client()._crypto + + DOC_CONTENT = {'payload': payload(size)} + doc = SoledadDocument( + doc_id=uuid4().hex, rev='rev', + json=json.dumps(DOC_CONTENT)) + + benchmark(crypto.encrypt_doc, doc) + return test_doc_encryption + + +# TODO this test is really bullshit, because it's still including +# the json serialization. + +def create_doc_decryption(size): + @pytest.inlineCallbacks + @pytest.mark.benchmark(group="test_crypto_decrypt_doc") + def test_doc_decryption(soledad_client, benchmark, payload): + crypto = soledad_client()._crypto + + DOC_CONTENT = {'payload': payload(size)} + doc = SoledadDocument( + doc_id=uuid4().hex, rev='rev', + json=json.dumps(DOC_CONTENT)) + + encrypted_doc = yield crypto.encrypt_doc(doc) + doc.set_json(encrypted_doc) + + benchmark(crypto.decrypt_doc, doc) + return test_doc_decryption + + +def create_raw_encryption(size): + @pytest.mark.benchmark(group="test_crypto_raw_encrypt") + def test_raw_encrypt(benchmark, payload): + key = payload(32) + benchmark(_crypto.encrypt_sym, payload(size), key) + return test_raw_encrypt + + +def create_raw_decryption(size): + @pytest.mark.benchmark(group="test_crypto_raw_decrypt") + def test_raw_decrypt(benchmark, payload): + key = payload(32) + iv, ciphertext = _crypto.encrypt_sym(payload(size), key) + benchmark(_crypto.decrypt_sym, ciphertext, key, iv) + return test_raw_decrypt + + +# Create the TESTS in the global namespace, they'll be picked by the benchmark +# plugin. + +encryption_tests = [ + ('10k', 1E4), + ('100k', 1E5), + ('500k', 5E5), + ('1M', 1E6), + ('10M', 1E7), + ('50M', 5E7), +] + +for name, size in encryption_tests: + if size < LIMIT: + sz = int(size) + globals()['test_encrypt_doc_' + name] = create_doc_encryption(sz) + globals()['test_decrypt_doc_' + name] = create_doc_decryption(sz) + + +for name, size in encryption_tests: + if size < LIMIT: + sz = int(size) + globals()['test_encrypt_raw_' + name] = create_raw_encryption(sz) + globals()['test_decrypt_raw_' + name] = create_raw_decryption(sz) diff --git a/testing/tests/benchmarks/test_misc.py b/testing/tests/benchmarks/test_misc.py new file mode 100644 index 00000000..2f32ad7c --- /dev/null +++ b/testing/tests/benchmarks/test_misc.py @@ -0,0 +1,8 @@ +import pytest + +pytestmark = pytest.mark.benchmark + + +@pytest.mark.benchmark(group="test_instance") +def test_initialization(soledad_client, benchmark): + benchmark(soledad_client) diff --git a/testing/tests/benchmarks/test_sqlcipher.py b/testing/tests/benchmarks/test_sqlcipher.py new file mode 100644 index 00000000..7f8842bd --- /dev/null +++ b/testing/tests/benchmarks/test_sqlcipher.py @@ -0,0 +1,40 @@ +''' +Tests SoledadClient/SQLCipher interaction +''' +import pytest + +from twisted.internet.defer import gatherResults + +pytestmark = pytest.mark.benchmark + + +def load_up(client, amount, payload, defer=True): + results = [client.create_doc({'content': payload}) for _ in xrange(amount)] + if defer: + return gatherResults(results) + + +def build_test_sqlcipher_async_create(amount, size): + @pytest.inlineCallbacks + @pytest.mark.benchmark(group="test_sqlcipher_async_create") + def test(soledad_client, txbenchmark, payload): + client = soledad_client() + yield txbenchmark(load_up, client, amount, payload(size)) + return test + + +def build_test_sqlcipher_create(amount, size): + @pytest.mark.benchmark(group="test_sqlcipher_create") + def test(soledad_client, benchmark, payload): + client = soledad_client()._dbsyncer + benchmark(load_up, client, amount, payload(size), defer=False) + return test + + +test_async_create_20_500k = build_test_sqlcipher_async_create(20, 500 * 1000) +test_async_create_100_100k = build_test_sqlcipher_async_create(100, 100 * 1000) +test_async_create_1000_10k = build_test_sqlcipher_async_create(1000, 10 * 1000) +# synchronous +test_create_20_500k = build_test_sqlcipher_create(20, 500 * 1000) +test_create_100_100k = build_test_sqlcipher_create(100, 100 * 1000) +test_create_1000_10k = build_test_sqlcipher_create(1000, 10 * 1000) diff --git a/testing/tests/benchmarks/test_sync.py b/testing/tests/benchmarks/test_sync.py new file mode 100644 index 00000000..88afe9f8 --- /dev/null +++ b/testing/tests/benchmarks/test_sync.py @@ -0,0 +1,64 @@ +import pytest + +pytestmark = pytest.mark.benchmark + + +@pytest.inlineCallbacks +def load_up(client, amount, payload): + # create a bunch of local documents + for i in xrange(amount): + yield client.create_doc({'content': payload}) + + +def create_upload(uploads, size): + @pytest.inlineCallbacks + @pytest.mark.benchmark(group="test_upload") + def test(soledad_client, txbenchmark_with_setup, payload): + client = soledad_client() + + def setup(): + return load_up(client, uploads, payload(size)) + + yield txbenchmark_with_setup(setup, client.sync) + return test + + +test_upload_20_500k = create_upload(20, 500 * 1000) +test_upload_100_100k = create_upload(100, 100 * 1000) +test_upload_1000_10k = create_upload(1000, 10 * 1000) + + +def create_download(downloads, size): + @pytest.inlineCallbacks + @pytest.mark.benchmark(group="test_download") + def test(soledad_client, txbenchmark_with_setup, payload): + client = soledad_client() + + yield load_up(client, downloads, payload(size)) + yield client.sync() + # We could create them directly on couch, but sending them + # ensures we are dealing with properly encrypted docs + + def setup(): + return soledad_client() + + def sync(clean_client): + return clean_client.sync() + yield txbenchmark_with_setup(setup, sync) + return test + + +test_download_20_500k = create_download(20, 500 * 1000) +test_download_100_100k = create_download(100, 100 * 1000) +test_download_1000_10k = create_download(1000, 10 * 1000) + + +@pytest.inlineCallbacks +@pytest.mark.benchmark(group="test_nothing_to_sync") +def test_nothing_to_sync(soledad_client, txbenchmark_with_setup): + def setup(): + return soledad_client() + + def sync(clean_client): + return clean_client.sync() + yield txbenchmark_with_setup(setup, sync) diff --git a/testing/tests/perf/assets/cert_default.conf b/testing/tests/perf/assets/cert_default.conf deleted file mode 100644 index 8043cea3..00000000 --- a/testing/tests/perf/assets/cert_default.conf +++ /dev/null @@ -1,15 +0,0 @@ -[ req ] -default_bits = 1024 -default_keyfile = keyfile.pem -distinguished_name = req_distinguished_name -prompt = no -output_password = mypass - -[ req_distinguished_name ] -C = GB -ST = Test State or Province -L = Test Locality -O = Organization Name -OU = Organizational Unit Name -CN = localhost -emailAddress = test@email.address diff --git a/testing/tests/perf/conftest.py b/testing/tests/perf/conftest.py deleted file mode 100644 index a9cc3464..00000000 --- a/testing/tests/perf/conftest.py +++ /dev/null @@ -1,57 +0,0 @@ -import pytest -import random -import base64 - -from twisted.internet import threads, reactor - - -# we have to manually setup the events server in order to be able to signal -# events. This is usually done by the enclosing application using soledad -# client (i.e. bitmask client). -from leap.common.events import server -server.ensure_server() - - -def pytest_addoption(parser): - parser.addoption( - "--num-docs", type="int", default=100, - help="the number of documents to use in performance tests") - - -@pytest.fixture() -def payload(): - def generate(size): - random.seed(1337) # same seed to avoid different bench results - payload_bytes = bytearray(random.getrandbits(8) for _ in xrange(size)) - # encode as base64 to avoid ascii encode/decode errors - return base64.b64encode(payload_bytes)[:size] # remove b64 overhead - return generate - - -@pytest.fixture() -def txbenchmark(benchmark): - def blockOnThread(*args, **kwargs): - return threads.deferToThread( - benchmark, threads.blockingCallFromThread, - reactor, *args, **kwargs) - return blockOnThread - - -@pytest.fixture() -def txbenchmark_with_setup(benchmark): - def blockOnThreadWithSetup(setup, f): - def blocking_runner(*args, **kwargs): - return threads.blockingCallFromThread(reactor, f, *args, **kwargs) - - def blocking_setup(): - args = threads.blockingCallFromThread(reactor, setup) - try: - return tuple(arg for arg in args), {} - except TypeError: - return ((args,), {}) if args else None - - def bench(): - return benchmark.pedantic(blocking_runner, setup=blocking_setup, - rounds=4, warmup_rounds=1) - return threads.deferToThread(bench) - return blockOnThreadWithSetup diff --git a/testing/tests/perf/pytest.ini b/testing/tests/perf/pytest.ini deleted file mode 100644 index 7a0508ce..00000000 --- a/testing/tests/perf/pytest.ini +++ /dev/null @@ -1,2 +0,0 @@ -[pytest] -twisted = yes diff --git a/testing/tests/perf/test_crypto.py b/testing/tests/perf/test_crypto.py deleted file mode 100644 index a438ee49..00000000 --- a/testing/tests/perf/test_crypto.py +++ /dev/null @@ -1,99 +0,0 @@ -""" -Benchmarks for crypto operations. -If you don't want to stress your local machine too much, you can pass the -SIZE_LIMT environment variable. - -For instance, to keep the maximum payload at 1MB: - -SIZE_LIMIT=1E6 py.test -s tests/perf/test_crypto.py -""" -import pytest -import os -import json -from uuid import uuid4 - -from leap.soledad.common.document import SoledadDocument -from leap.soledad.client import _crypto - -LIMIT = int(float(os.environ.get('SIZE_LIMIT', 50 * 1000 * 1000))) - - -pytestmark = pytest.mark.perf - - -def create_doc_encryption(size): - @pytest.mark.benchmark(group="test_crypto_encrypt_doc") - def test_doc_encryption(soledad_client, benchmark, payload): - crypto = soledad_client()._crypto - - DOC_CONTENT = {'payload': payload(size)} - doc = SoledadDocument( - doc_id=uuid4().hex, rev='rev', - json=json.dumps(DOC_CONTENT)) - - benchmark(crypto.encrypt_doc, doc) - return test_doc_encryption - - -# TODO this test is really bullshit, because it's still including -# the json serialization. - -def create_doc_decryption(size): - @pytest.inlineCallbacks - @pytest.mark.benchmark(group="test_crypto_decrypt_doc") - def test_doc_decryption(soledad_client, benchmark, payload): - crypto = soledad_client()._crypto - - DOC_CONTENT = {'payload': payload(size)} - doc = SoledadDocument( - doc_id=uuid4().hex, rev='rev', - json=json.dumps(DOC_CONTENT)) - - encrypted_doc = yield crypto.encrypt_doc(doc) - doc.set_json(encrypted_doc) - - benchmark(crypto.decrypt_doc, doc) - return test_doc_decryption - - -def create_raw_encryption(size): - @pytest.mark.benchmark(group="test_crypto_raw_encrypt") - def test_raw_encrypt(benchmark, payload): - key = payload(32) - benchmark(_crypto.encrypt_sym, payload(size), key) - return test_raw_encrypt - - -def create_raw_decryption(size): - @pytest.mark.benchmark(group="test_crypto_raw_decrypt") - def test_raw_decrypt(benchmark, payload): - key = payload(32) - iv, ciphertext = _crypto.encrypt_sym(payload(size), key) - benchmark(_crypto.decrypt_sym, ciphertext, key, iv) - return test_raw_decrypt - - -# Create the TESTS in the global namespace, they'll be picked by the benchmark -# plugin. - -encryption_tests = [ - ('10k', 1E4), - ('100k', 1E5), - ('500k', 5E5), - ('1M', 1E6), - ('10M', 1E7), - ('50M', 5E7), -] - -for name, size in encryption_tests: - if size < LIMIT: - sz = int(size) - globals()['test_encrypt_doc_' + name] = create_doc_encryption(sz) - globals()['test_decrypt_doc_' + name] = create_doc_decryption(sz) - - -for name, size in encryption_tests: - if size < LIMIT: - sz = int(size) - globals()['test_encrypt_raw_' + name] = create_raw_encryption(sz) - globals()['test_decrypt_raw_' + name] = create_raw_decryption(sz) diff --git a/testing/tests/perf/test_misc.py b/testing/tests/perf/test_misc.py deleted file mode 100644 index b45dc04e..00000000 --- a/testing/tests/perf/test_misc.py +++ /dev/null @@ -1,8 +0,0 @@ -import pytest - -pytestmark = pytest.mark.perf - - -@pytest.mark.benchmark(group="test_instance") -def test_initialization(soledad_client, benchmark): - benchmark(soledad_client) diff --git a/testing/tests/perf/test_sqlcipher.py b/testing/tests/perf/test_sqlcipher.py deleted file mode 100644 index 807af6e9..00000000 --- a/testing/tests/perf/test_sqlcipher.py +++ /dev/null @@ -1,40 +0,0 @@ -''' -Tests SoledadClient/SQLCipher interaction -''' -import pytest - -from twisted.internet.defer import gatherResults - -pytestmark = pytest.mark.perf - - -def load_up(client, amount, payload, defer=True): - results = [client.create_doc({'content': payload}) for _ in xrange(amount)] - if defer: - return gatherResults(results) - - -def build_test_sqlcipher_async_create(amount, size): - @pytest.inlineCallbacks - @pytest.mark.benchmark(group="test_sqlcipher_async_create") - def test(soledad_client, txbenchmark, payload): - client = soledad_client() - yield txbenchmark(load_up, client, amount, payload(size)) - return test - - -def build_test_sqlcipher_create(amount, size): - @pytest.mark.benchmark(group="test_sqlcipher_create") - def test(soledad_client, benchmark, payload): - client = soledad_client()._dbsyncer - benchmark(load_up, client, amount, payload(size), defer=False) - return test - - -test_async_create_20_500k = build_test_sqlcipher_async_create(20, 500 * 1000) -test_async_create_100_100k = build_test_sqlcipher_async_create(100, 100 * 1000) -test_async_create_1000_10k = build_test_sqlcipher_async_create(1000, 10 * 1000) -# synchronous -test_create_20_500k = build_test_sqlcipher_create(20, 500 * 1000) -test_create_100_100k = build_test_sqlcipher_create(100, 100 * 1000) -test_create_1000_10k = build_test_sqlcipher_create(1000, 10 * 1000) diff --git a/testing/tests/perf/test_sync.py b/testing/tests/perf/test_sync.py deleted file mode 100644 index 9bb20389..00000000 --- a/testing/tests/perf/test_sync.py +++ /dev/null @@ -1,64 +0,0 @@ -import pytest - -pytestmark = pytest.mark.perf - - -@pytest.inlineCallbacks -def load_up(client, amount, payload): - # create a bunch of local documents - for i in xrange(amount): - yield client.create_doc({'content': payload}) - - -def create_upload(uploads, size): - @pytest.inlineCallbacks - @pytest.mark.benchmark(group="test_upload") - def test(soledad_client, txbenchmark_with_setup, payload): - client = soledad_client() - - def setup(): - return load_up(client, uploads, payload(size)) - - yield txbenchmark_with_setup(setup, client.sync) - return test - - -test_upload_20_500k = create_upload(20, 500 * 1000) -test_upload_100_100k = create_upload(100, 100 * 1000) -test_upload_1000_10k = create_upload(1000, 10 * 1000) - - -def create_download(downloads, size): - @pytest.inlineCallbacks - @pytest.mark.benchmark(group="test_download") - def test(soledad_client, txbenchmark_with_setup, payload): - client = soledad_client() - - yield load_up(client, downloads, payload(size)) - yield client.sync() - # We could create them directly on couch, but sending them - # ensures we are dealing with properly encrypted docs - - def setup(): - return soledad_client() - - def sync(clean_client): - return clean_client.sync() - yield txbenchmark_with_setup(setup, sync) - return test - - -test_download_20_500k = create_download(20, 500 * 1000) -test_download_100_100k = create_download(100, 100 * 1000) -test_download_1000_10k = create_download(1000, 10 * 1000) - - -@pytest.inlineCallbacks -@pytest.mark.benchmark(group="test_nothing_to_sync") -def test_nothing_to_sync(soledad_client, txbenchmark_with_setup): - def setup(): - return soledad_client() - - def sync(clean_client): - return clean_client.sync() - yield txbenchmark_with_setup(setup, sync) diff --git a/testing/tox.ini b/testing/tox.ini index f6470c89..f720d0a6 100644 --- a/testing/tox.ini +++ b/testing/tox.ini @@ -4,7 +4,7 @@ skipsdist=True [testenv] basepython = python2.7 -commands = py.test -m "not perf" \ +commands = py.test -m "not benchmark" \ --cov-report=html \ --cov-report=term \ --cov=leap.soledad \ @@ -31,11 +31,11 @@ setenv = TERM=xterm install_command = pip install {opts} {packages} -[testenv:perf] +[testenv:benchmark] deps = {[testenv]deps} pytest-benchmark -commands = py.test -m perf {posargs} +commands = py.test -m benchmark {posargs} [testenv:code-check] changedir = .. -- cgit v1.2.3 From 1804b5f74a4efa6b25c06fe353ac960fd42e4fb6 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 19 Nov 2016 04:13:52 -0300 Subject: [tests] migrate pytest to trial test_deprecated_crypto was using pytest, which unfortunately doesnt work when mixed with trial. Migrated back. Also added norecursedirs option back, as it is necessary for parallel testing mode. --- testing/pytest.ini | 1 + testing/test_soledad/util.py | 2 +- testing/tests/client/test_deprecated_crypto.py | 116 +++++++++++++++---------- 3 files changed, 72 insertions(+), 47 deletions(-) diff --git a/testing/pytest.ini b/testing/pytest.ini index eb70b67c..9c9fc3b7 100644 --- a/testing/pytest.ini +++ b/testing/pytest.ini @@ -1,3 +1,4 @@ [pytest] testpaths = tests twisted = yes +norecursedirs = tests/benchmarks diff --git a/testing/test_soledad/util.py b/testing/test_soledad/util.py index 4a705396..57f8199b 100644 --- a/testing/test_soledad/util.py +++ b/testing/test_soledad/util.py @@ -56,7 +56,7 @@ from leap.soledad.server.auth import SoledadTokenAuthMiddleware PASSWORD = '123456' -ADDRESS = 'leap@leap.se' +ADDRESS = 'user-1234' def make_local_db_and_target(test): diff --git a/testing/tests/client/test_deprecated_crypto.py b/testing/tests/client/test_deprecated_crypto.py index ca1b1558..8ee3735c 100644 --- a/testing/tests/client/test_deprecated_crypto.py +++ b/testing/tests/client/test_deprecated_crypto.py @@ -1,11 +1,16 @@ import json -import pytest +from twisted.internet import defer +from uuid import uuid4 +from urlparse import urljoin from leap.soledad.client import crypto as old_crypto from leap.soledad.common.couch import CouchDatabase from leap.soledad.common import crypto as common_crypto from test_soledad.u1db_tests import simple_doc +from test_soledad.util import SoledadWithCouchServerMixin +from test_soledad.util import make_token_soledad_app +from test_soledad.u1db_tests import TestCaseWithServer def deprecate_client_crypto(client): @@ -20,48 +25,67 @@ def couch_database(couch_url, uuid): return db -@pytest.inlineCallbacks -def test_touch_updates_remote_representation( - soledad_client, request): - - client = soledad_client() - deprecated_client = deprecate_client_crypto(soledad_client()) - - couch_url = request.config.option.couch_url - remote = couch_database(couch_url, client._uuid) - - # ensure remote db is empty - gen, docs = remote.get_all_docs() - assert gen == 0 - assert len(docs) == 0 - - # create a doc with deprecated client and sync - yield deprecated_client.create_doc(json.loads(simple_doc)) - yield deprecated_client.sync() - - # check for doc in remote db - gen, docs = remote.get_all_docs() - assert gen == 1 - assert len(docs) == 1 - doc = docs.pop() - content = doc.content - assert common_crypto.ENC_JSON_KEY in content - assert common_crypto.ENC_SCHEME_KEY in content - assert common_crypto.ENC_METHOD_KEY in content - assert common_crypto.ENC_IV_KEY in content - assert common_crypto.MAC_KEY in content - assert common_crypto.MAC_METHOD_KEY in content - - # "touch" the document with a newer client and synx - _, docs = yield client.get_all_docs() - yield client.put_doc(doc) - yield client.sync() - - # check for newer representation of doc in remote db - gen, docs = remote.get_all_docs() - assert gen == 2 - assert len(docs) == 1 - doc = docs.pop() - content = doc.content - assert len(content) == 1 - assert 'raw' in content +class DeprecatedCryptoTest(SoledadWithCouchServerMixin, TestCaseWithServer): + + def setUp(self): + SoledadWithCouchServerMixin.setUp(self) + TestCaseWithServer.setUp(self) + + def tearDown(self): + SoledadWithCouchServerMixin.tearDown(self) + TestCaseWithServer.tearDown(self) + + @staticmethod + def make_app_with_state(state): + return make_token_soledad_app(state) + + @defer.inlineCallbacks + def test_touch_updates_remote_representation(self): + self.startTwistedServer() + user = 'user-' + uuid4().hex + server_url = 'http://%s:%d' % (self.server_address) + client = self._soledad_instance(user=user, server_url=server_url) + deprecated_client = deprecate_client_crypto( + self._soledad_instance(user=user, server_url=server_url)) + + self.make_app() + remote = self.request_state._create_database(replica_uid=client._uuid) + remote = CouchDatabase.open_database( + urljoin(self.couch_url, 'user-' + user), + create=True) + + # ensure remote db is empty + gen, docs = remote.get_all_docs() + assert gen == 0 + assert len(docs) == 0 + + # create a doc with deprecated client and sync + yield deprecated_client.create_doc(json.loads(simple_doc)) + yield deprecated_client.sync() + + # check for doc in remote db + gen, docs = remote.get_all_docs() + assert gen == 1 + assert len(docs) == 1 + doc = docs.pop() + content = doc.content + assert common_crypto.ENC_JSON_KEY in content + assert common_crypto.ENC_SCHEME_KEY in content + assert common_crypto.ENC_METHOD_KEY in content + assert common_crypto.ENC_IV_KEY in content + assert common_crypto.MAC_KEY in content + assert common_crypto.MAC_METHOD_KEY in content + + # "touch" the document with a newer client and synx + _, docs = yield client.get_all_docs() + yield client.put_doc(doc) + yield client.sync() + + # check for newer representation of doc in remote db + gen, docs = remote.get_all_docs() + assert gen == 2 + assert len(docs) == 1 + doc = docs.pop() + content = doc.content + assert len(content) == 1 + assert 'raw' in content -- cgit v1.2.3 From c26915bde89b970311d83956aea8255b8d5998e1 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 19 Nov 2016 17:49:12 -0300 Subject: [bug] fix upload progress We need to emit zmq status during doc prepare, which is called during upload. --- client/src/leap/soledad/client/http_target/send.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/src/leap/soledad/client/http_target/send.py b/client/src/leap/soledad/client/http_target/send.py index c15488a0..ffb5f4ba 100644 --- a/client/src/leap/soledad/client/http_target/send.py +++ b/client/src/leap/soledad/client/http_target/send.py @@ -73,7 +73,6 @@ class HTTPDocSender(object): entry, body, idx, total)) result = yield self._send_request(body, calls) - _emit_send_status(self.uuid, body.consumed, total) defer.returnValue(result) def _send_request(self, body, calls): @@ -92,6 +91,7 @@ class HTTPDocSender(object): id=doc.doc_id, rev=doc.rev, content=content, gen=gen, trans_id=trans_id, number_of_docs=total, doc_idx=idx) + _emit_send_status(self.uuid, body.consumed, total) @defer.inlineCallbacks def _encrypt_doc(self, get_doc_call): -- cgit v1.2.3 From 63c33b1b20e013571fb870205302bbc9e4a06e23 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 19 Nov 2016 21:52:30 -0300 Subject: [tests] use options instead of marks When we use marks the new pytests from benchmarks folder are collected and ignored, but this causes trial to fail sometimes. Using --ignore avoids it from being loaded while --benchmark-only will properly select the benchmarks for tox, as intended. --- testing/pytest.ini | 1 - testing/tests/benchmarks/test_crypto.py | 3 --- testing/tests/benchmarks/test_misc.py | 2 -- testing/tests/benchmarks/test_sqlcipher.py | 2 -- testing/tests/benchmarks/test_sync.py | 2 -- testing/tox.ini | 6 +++--- 6 files changed, 3 insertions(+), 13 deletions(-) diff --git a/testing/pytest.ini b/testing/pytest.ini index 9c9fc3b7..eb70b67c 100644 --- a/testing/pytest.ini +++ b/testing/pytest.ini @@ -1,4 +1,3 @@ [pytest] testpaths = tests twisted = yes -norecursedirs = tests/benchmarks diff --git a/testing/tests/benchmarks/test_crypto.py b/testing/tests/benchmarks/test_crypto.py index ab586bea..367c3b5b 100644 --- a/testing/tests/benchmarks/test_crypto.py +++ b/testing/tests/benchmarks/test_crypto.py @@ -18,9 +18,6 @@ from leap.soledad.client import _crypto LIMIT = int(float(os.environ.get('SIZE_LIMIT', 50 * 1000 * 1000))) -pytestmark = pytest.mark.benchmark - - def create_doc_encryption(size): @pytest.mark.benchmark(group="test_crypto_encrypt_doc") def test_doc_encryption(soledad_client, benchmark, payload): diff --git a/testing/tests/benchmarks/test_misc.py b/testing/tests/benchmarks/test_misc.py index 2f32ad7c..ead48adf 100644 --- a/testing/tests/benchmarks/test_misc.py +++ b/testing/tests/benchmarks/test_misc.py @@ -1,7 +1,5 @@ import pytest -pytestmark = pytest.mark.benchmark - @pytest.mark.benchmark(group="test_instance") def test_initialization(soledad_client, benchmark): diff --git a/testing/tests/benchmarks/test_sqlcipher.py b/testing/tests/benchmarks/test_sqlcipher.py index 7f8842bd..39c9e3ad 100644 --- a/testing/tests/benchmarks/test_sqlcipher.py +++ b/testing/tests/benchmarks/test_sqlcipher.py @@ -5,8 +5,6 @@ import pytest from twisted.internet.defer import gatherResults -pytestmark = pytest.mark.benchmark - def load_up(client, amount, payload, defer=True): results = [client.create_doc({'content': payload}) for _ in xrange(amount)] diff --git a/testing/tests/benchmarks/test_sync.py b/testing/tests/benchmarks/test_sync.py index 88afe9f8..1bf6cc21 100644 --- a/testing/tests/benchmarks/test_sync.py +++ b/testing/tests/benchmarks/test_sync.py @@ -1,7 +1,5 @@ import pytest -pytestmark = pytest.mark.benchmark - @pytest.inlineCallbacks def load_up(client, amount, payload): diff --git a/testing/tox.ini b/testing/tox.ini index f720d0a6..c46c6af1 100644 --- a/testing/tox.ini +++ b/testing/tox.ini @@ -4,7 +4,7 @@ skipsdist=True [testenv] basepython = python2.7 -commands = py.test -m "not benchmark" \ +commands = py.test --ignore=tests/benchmarks \ --cov-report=html \ --cov-report=term \ --cov=leap.soledad \ @@ -35,7 +35,7 @@ install_command = pip install {opts} {packages} deps = {[testenv]deps} pytest-benchmark -commands = py.test -m benchmark {posargs} +commands = py.test --benchmark-only {posargs} [testenv:code-check] changedir = .. @@ -51,4 +51,4 @@ deps = {[testenv]deps} pytest-xdist install_command = pip install {opts} {packages} -commands = py.test {posargs} -n 4 +commands = py.test --ignore=tests/benchmarks {posargs} -n 4 -- cgit v1.2.3 From bfe330a7eaad1c51640dbbb91be233a65d2a4bd7 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 19 Nov 2016 21:54:13 -0300 Subject: [tests] fixes test_crypto bench encrypt returns a deferred and needs the adapted benchmark runner. --- testing/tests/benchmarks/test_crypto.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/testing/tests/benchmarks/test_crypto.py b/testing/tests/benchmarks/test_crypto.py index 367c3b5b..75ad9a30 100644 --- a/testing/tests/benchmarks/test_crypto.py +++ b/testing/tests/benchmarks/test_crypto.py @@ -20,7 +20,8 @@ LIMIT = int(float(os.environ.get('SIZE_LIMIT', 50 * 1000 * 1000))) def create_doc_encryption(size): @pytest.mark.benchmark(group="test_crypto_encrypt_doc") - def test_doc_encryption(soledad_client, benchmark, payload): + @pytest.inlineCallbacks + def test_doc_encryption(soledad_client, txbenchmark, payload): crypto = soledad_client()._crypto DOC_CONTENT = {'payload': payload(size)} @@ -28,7 +29,7 @@ def create_doc_encryption(size): doc_id=uuid4().hex, rev='rev', json=json.dumps(DOC_CONTENT)) - benchmark(crypto.encrypt_doc, doc) + yield txbenchmark(crypto.encrypt_doc, doc) return test_doc_encryption -- cgit v1.2.3 From b17eecaa35240333d0270c1a3437e67510fe4f20 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sun, 20 Nov 2016 15:41:21 -0300 Subject: [bug] emit last sent doc event Document sending happens after encryption, so the last sent document needs to be signalled after request end. --- client/src/leap/soledad/client/http_target/send.py | 1 + testing/tests/sync/test_sync_target.py | 31 ++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/client/src/leap/soledad/client/http_target/send.py b/client/src/leap/soledad/client/http_target/send.py index ffb5f4ba..b9ca7da2 100644 --- a/client/src/leap/soledad/client/http_target/send.py +++ b/client/src/leap/soledad/client/http_target/send.py @@ -72,6 +72,7 @@ class HTTPDocSender(object): calls.append((self._prepare_one_doc, entry, body, idx, total)) result = yield self._send_request(body, calls) + _emit_send_status(self.uuid, body.consumed, total) defer.returnValue(result) diff --git a/testing/tests/sync/test_sync_target.py b/testing/tests/sync/test_sync_target.py index dd69ffa1..d02aba68 100644 --- a/testing/tests/sync/test_sync_target.py +++ b/testing/tests/sync/test_sync_target.py @@ -310,6 +310,37 @@ class TestSoledadSyncTarget( self.assertGetEncryptedDoc( db, 'doc-here', 'replica:1', '{"value": "here"}', False) + @defer.inlineCallbacks + def test_sync_exchange_send_events(self): + """ + Test for sync exchange's SOLEDAD_SYNC_SEND_STATUS event. + """ + remote_target = self.getSyncTarget() + uuid = remote_target.uuid + events = [] + + def mocked_events(*args): + events.append((args)) + self.patch( + target.send, '_emit_send_status', mocked_events) + + doc = self.make_document('doc-here', 'replica:1', '{"value": "here"}') + doc2 = self.make_document('doc-here', 'replica:1', '{"value": "here"}') + doc3 = self.make_document('doc-here', 'replica:1', '{"value": "here"}') + get_doc = (lambda _: doc, (1,), {}) + get_doc2 = (lambda _: doc2, (1,), {}) + get_doc3 = (lambda _: doc3, (1,), {}) + docs = [(get_doc, 10, 'T-sid'), + (get_doc2, 11, 'T-sid2'), (get_doc3, 12, 'T-sid3')] + new_gen, trans_id = yield remote_target.sync_exchange( + docs, 'replica', last_known_generation=0, + last_known_trans_id=None, insert_doc_cb=lambda _: 1, + ensure_callback=lambda _: 1) + self.assertEqual(1, new_gen) + self.assertEqual(4, len(events)) + self.assertEquals([(uuid, 0, 3), (uuid, 1, 3), (uuid, 2, 3), + (uuid, 3, 3)], events) + def test_sync_exchange_in_stream_error(self): self.skipTest("bypass this test because our sync_exchange process " "does not return u1db error 503 \"unavailable\" for " -- cgit v1.2.3 From 8ebed9c686f79b6d099266d3c5440f5dc9fb383e Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sun, 20 Nov 2016 18:11:16 -0300 Subject: [refactor] Remove dead code Batching is now decided on server side, so the code can be simplified. Also, sync_db and other parameters were used to initialize encdecpool, which is no longer supported. --- .../src/leap/soledad/client/http_target/__init__.py | 19 +++---------------- client/src/leap/soledad/client/http_target/send.py | 14 +++----------- client/src/leap/soledad/client/sqlcipher.py | 8 ++------ 3 files changed, 8 insertions(+), 33 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/__init__.py b/client/src/leap/soledad/client/http_target/__init__.py index 91d87f0c..0e250bf1 100644 --- a/client/src/leap/soledad/client/http_target/__init__.py +++ b/client/src/leap/soledad/client/http_target/__init__.py @@ -54,8 +54,7 @@ class SoledadHTTPSyncTarget(SyncTargetAPI, HTTPDocSender, HTTPDocFetcher): the parsed documents that the remote send us, before being decrypted and written to the main database. """ - def __init__(self, url, source_replica_uid, creds, crypto, cert_file, - sync_db=None): + def __init__(self, url, source_replica_uid, creds, crypto, cert_file): """ Initialize the sync target. @@ -68,17 +67,11 @@ class SoledadHTTPSyncTarget(SyncTargetAPI, HTTPDocSender, HTTPDocFetcher): :type creds: creds :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt document contents when syncing. - :type crypto: soledad.crypto.SoledadCrypto + :type crypto: soledad._crypto.SoledadCrypto :param cert_file: Path to the certificate of the ca used to validate the SSL certificate used by the remote soledad server. :type cert_file: str - :param sync_db: Optional. handler for the db with the symmetric - encryption of the syncing documents. If - None, encryption will be done in-place, - instead of retreiving it from the dedicated - database. - :type sync_db: Sqlite handler """ if url.endswith("/"): url = url[:-1] @@ -90,15 +83,9 @@ class SoledadHTTPSyncTarget(SyncTargetAPI, HTTPDocSender, HTTPDocFetcher): self._crypto = crypto # TODO: DEPRECATED CRYPTO self._deprecated_crypto = old_crypto.SoledadCrypto(crypto.secret) - self._sync_db = sync_db self._insert_doc_cb = None - # asynchronous encryption/decryption attributes - self._decryption_callback = None - self._sync_decr_pool = None - # XXX Increasing timeout of simple requests to avoid chances of hitting - # the duplicated syncing bug. This could be reduced to the 30s default - # after implementing Cancellable Sync. See #7382 + # Twisted default Agent with our own ssl context factory self._http = Agent(reactor, get_compatible_ssl_context_factory(cert_file)) diff --git a/client/src/leap/soledad/client/http_target/send.py b/client/src/leap/soledad/client/http_target/send.py index b9ca7da2..2b286ec5 100644 --- a/client/src/leap/soledad/client/http_target/send.py +++ b/client/src/leap/soledad/client/http_target/send.py @@ -33,8 +33,6 @@ class HTTPDocSender(object): They need to be encrypted and metadata prepared before sending. """ - MAX_BATCH_SIZE = 0 # disabled by now, this is being tested yet - # The uuid of the local replica. # Any class inheriting from this one should provide a meaningful attribute # if the sync status event is meant to be used somewhere else. @@ -63,14 +61,10 @@ class HTTPDocSender(object): @defer.inlineCallbacks def _send_batch(self, body, docs): - total = len(docs) - missing = total - body.consumed - calls = [] - for i in xrange(1, missing + 1): - idx = body.consumed + i - entry = docs[idx - 1] + total, calls = len(docs), [] + for i, entry in enumerate(docs): calls.append((self._prepare_one_doc, - entry, body, idx, total)) + entry, body, i + 1, total)) result = yield self._send_request(body, calls) _emit_send_status(self.uuid, body.consumed, total) @@ -101,8 +95,6 @@ class HTTPDocSender(object): if doc.is_tombstone(): defer.returnValue((doc, None)) else: - # TODO -- for blobs, should stream the doc raw content - # TODO -- get rid of this json encoding content = yield self._crypto.encrypt_doc(doc) defer.returnValue((doc, content)) diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index b9db3674..e7057a8d 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -396,8 +396,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): """ ENCRYPT_LOOP_PERIOD = 1 - def __init__(self, opts, soledad_crypto, replica_uid, cert_file, - sync_db=None): + def __init__(self, opts, soledad_crypto, replica_uid, cert_file): self._opts = opts self._path = opts.path @@ -405,8 +404,6 @@ class SQLCipherU1DBSync(SQLCipherDatabase): self.__replica_uid = replica_uid self._cert_file = cert_file - self._sync_db = sync_db - # storage for the documents received during a sync self.received_docs = [] @@ -494,8 +491,7 @@ class SQLCipherU1DBSync(SQLCipherDatabase): self._replica_uid, creds=creds, crypto=self._crypto, - cert_file=self._cert_file, - sync_db=self._sync_db)) + cert_file=self._cert_file)) # # Symmetric encryption of syncing docs -- cgit v1.2.3 From 171dff213b9aacdb7ac4f86ed81e741aa965aa35 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Mon, 21 Nov 2016 01:08:49 -0300 Subject: [refactor] remove dead parameters, improve comments received docs makes no sense for a single request download, plus all its comments and docstrings. Also updated docstrings for other methods. The method that tests if sqlcipher is encrypted can return a db handle that can be used right away. If we ignore it and reopen we can end up with a lost open cursor. --- .../src/leap/soledad/client/http_target/fetch.py | 51 ++++++++++------------ .../soledad/client/http_target/fetch_protocol.py | 1 + client/src/leap/soledad/client/sqlcipher.py | 10 ++--- server/src/leap/soledad/server/__init__.py | 4 +- server/src/leap/soledad/server/sync.py | 17 ++------ 5 files changed, 33 insertions(+), 50 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 53650de4..7d27c06d 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -54,7 +54,6 @@ class HTTPDocFetcher(object): ensure_callback, sync_id): new_generation = last_known_generation new_transaction_id = last_known_trans_id - self._received_docs = 0 # Acts as a queue, ensuring line order on async processing # as `self._insert_doc_cb` cant be run concurrently or out of order. # DeferredSemaphore solves the concurrency and its implementation uses @@ -64,9 +63,8 @@ class HTTPDocFetcher(object): metadata = yield self._fetch_all( last_known_generation, last_known_trans_id, - sync_id, self._received_docs) - metadata = self._parse_metadata(metadata) - number_of_changes, ngen, ntrans = metadata + sync_id) + number_of_changes, ngen, ntrans = self._parse_metadata(metadata) # wait for pending inserts yield self.semaphore.acquire() @@ -78,16 +76,15 @@ class HTTPDocFetcher(object): defer.returnValue([new_generation, new_transaction_id]) def _fetch_all(self, last_known_generation, - last_known_trans_id, sync_id, received): + last_known_trans_id, sync_id): # add remote replica metadata to the request body = RequestBody( last_known_generation=last_known_generation, last_known_trans_id=last_known_trans_id, sync_id=sync_id, ensure=self._ensure_callback is not None) - # inform server of how many documents have already been received - body.insert_info(received=received) - # build a stream reader with doc parser callback + self._received_docs = 0 + # build a stream reader with _doc_parser as a callback body_reader = fetch_protocol.build_body_reader(self._doc_parser) # start download stream return self._http_request( @@ -100,18 +97,17 @@ class HTTPDocFetcher(object): @defer.inlineCallbacks def _doc_parser(self, doc_info, content, total): """ - Insert a received document into the local replica. - - :param response: The body and headers of the response. - :type response: tuple(str, dict) - :param idx: The index count of the current operation. - :type idx: int + Insert a received document into the local replica, decrypting + if necessary. The case where it's not decrypted is when a doc gets + inserted from Server side with a GPG encrypted content. + + :param doc_info: Dictionary representing Document information. + :type doc_info: dict + :param content: The Document's content. + :type idx: str :param total: The total number of operations. :type total: int """ - # If arriving content was symmetrically encrypted, we decrypt incoming - # document and insert into local database - doc = SoledadDocument(doc_info['id'], doc_info['rev'], content) if is_symmetrically_encrypted(doc): content = yield self._crypto.decrypt_doc(doc) @@ -132,26 +128,23 @@ class HTTPDocFetcher(object): def _parse_metadata(self, metadata): """ - Parse the response from the server containing the received document. + Parse the response from the server containing the sync metadata. - :param response: The body and headers of the response. - :type response: tuple(str, dict) + :param response: Metadata as string + :type response: str - :return: (new_gen, new_trans_id, number_of_changes, doc_id, rev, - content, gen, trans_id) + :return: (number_of_changes, new_gen, new_trans_id) :rtype: tuple """ try: metadata = json.loads(metadata) - new_generation = metadata['new_generation'] - new_transaction_id = metadata['new_transaction_id'] - number_of_changes = metadata['number_of_changes'] + # make sure we have replica_uid from fresh new dbs + if self._ensure_callback and 'replica_uid' in metadata: + self._ensure_callback(metadata['replica_uid']) + return (metadata['number_of_changes'], metadata['new_generation'], + metadata['new_transaction_id']) except (ValueError, KeyError): raise errors.BrokenSyncStream - # make sure we have replica_uid from fresh new dbs - if self._ensure_callback and 'replica_uid' in metadata: - self._ensure_callback(metadata['replica_uid']) - return number_of_changes, new_generation, new_transaction_id def _emit_receive_status(user_data, received_docs, total): diff --git a/client/src/leap/soledad/client/http_target/fetch_protocol.py b/client/src/leap/soledad/client/http_target/fetch_protocol.py index a15991f3..dd83c4f7 100644 --- a/client/src/leap/soledad/client/http_target/fetch_protocol.py +++ b/client/src/leap/soledad/client/http_target/fetch_protocol.py @@ -46,6 +46,7 @@ class DocStreamReceiver(ReadBodyProtocol): self.message = response.phrase if response else None self.headers = response.headers if response else {} self.delimiter = '\r\n' + self.metadata = '' self._doc_reader = doc_reader self.reset() diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index e7057a8d..c9a9444e 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -217,10 +217,10 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): """ # ensure the db is encrypted if the file already exists if os.path.isfile(opts.path): - _assert_db_is_encrypted(opts) - - # connect to the sqlcipher database - self._db_handle = initialize_sqlcipher_db(opts) + self._db_handle = _assert_db_is_encrypted(opts) + else: + # connect to the sqlcipher database + self._db_handle = initialize_sqlcipher_db(opts) # TODO --------------------------------------------------- # Everything else in this initialization has to be factored @@ -565,7 +565,7 @@ def _assert_db_is_encrypted(opts): # assert that we can access it using SQLCipher with the given # key dummy_query = ('SELECT count(*) FROM sqlite_master',) - initialize_sqlcipher_db(opts, on_init=dummy_query) + return initialize_sqlcipher_db(opts, on_init=dummy_query) else: raise DatabaseIsNotEncrypted() diff --git a/server/src/leap/soledad/server/__init__.py b/server/src/leap/soledad/server/__init__.py index 7ba95543..039bef75 100644 --- a/server/src/leap/soledad/server/__init__.py +++ b/server/src/leap/soledad/server/__init__.py @@ -235,10 +235,8 @@ class HTTPInvocationByMethodWithBody( return meth_end() # handle outgoing documents elif content_type == 'application/x-soledad-sync-get': - line = body_getline() - entry = line.strip() meth_get = self._lookup('%s_get' % method) - return meth_get({}, line) + return meth_get() else: raise http_app.BadRequest() else: diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index f505a044..b553a056 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -56,7 +56,7 @@ class SyncExchange(sync.SyncExchange): # recover sync state self._sync_state = ServerSyncState(self.source_replica_uid, sync_id) - def find_changes_to_return(self, received): + def find_changes_to_return(self): """ Find changes to return. @@ -64,10 +64,6 @@ class SyncExchange(sync.SyncExchange): order using whats_changed. It excludes documents ids that have already been considered (superseded by the sender, etc). - :param received: How many documents the source replica has already - received during the current sync process. - :type received: int - :return: the generation of this database, which the caller can consider themselves to be synchronized after processing allreturned documents, and the amount of documents to be sent @@ -252,14 +248,9 @@ class SyncResource(http_app.SyncResource): self._staging = [] self._staging_size = 0 - @http_app.http_method(received=int, content_as_args=True) - def post_get(self, received): + def post_get(self): """ - Return one syncing document to the client. - - :param received: How many documents have already been received by the - client on the current sync session. - :type received: int + Return syncing documents to the client. """ def send_doc(doc, gen, trans_id): entry = dict(id=doc.doc_id, rev=doc.rev, @@ -278,7 +269,7 @@ class SyncResource(http_app.SyncResource): self.responder.stream_entry('') new_gen, number_of_changes = \ - self.sync_exch.find_changes_to_return(received) + self.sync_exch.find_changes_to_return() self.responder.content_type = 'application/x-u1db-sync-response' self.responder.start_response(200) self.responder.start_stream(), -- cgit v1.2.3 From 89d5c898527c32baec8454d6e3c749935d00a313 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 21 Nov 2016 16:49:41 -0200 Subject: [refactor] separate server application into another file --- testing/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/tests/conftest.py b/testing/tests/conftest.py index 5698c8a9..1ff1cbb7 100644 --- a/testing/tests/conftest.py +++ b/testing/tests/conftest.py @@ -103,7 +103,7 @@ class SoledadServer(object): '--logfile=%s' % self._logfile, '--pidfile=%s' % self._pidfile, 'web', - '--wsgi=leap.soledad.server.application', + '--wsgi=leap.soledad.server.application.wsgi_application', '--port=2424' ]) -- cgit v1.2.3 From 8b091951e8adebadc4be4dc5b2fb4f8b63c6900e Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 24 Nov 2016 21:09:33 -0300 Subject: [docs] add docstrings for _crypto Also explaining how we are using Twisted's consumer interfaces. --- client/src/leap/soledad/client/_crypto.py | 109 +++++++++++++++++++++++++++--- 1 file changed, 101 insertions(+), 8 deletions(-) diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index d8d37f55..4a59159c 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -75,11 +75,30 @@ docinfo = namedtuple('docinfo', 'doc_id rev') class SoledadCrypto(object): - + """ + This class provides convenient methods for document encryption and + decryption using BlobEncryptor and BlobDecryptor classes. + """ def __init__(self, secret): + """ + Initialize the crypto object. + + :param secret: The Soledad remote storage secret. + :type secret: str + """ self.secret = secret def encrypt_doc(self, doc): + """ + Creates and configures a BlobEncryptor, asking it to start encryption + and wrapping the result as a simple JSON string with a "raw" key. + + :param doc: the document to be encrypted. + :type doc: SoledadDocument + :return: A deferred whose callback will be invoked with a JSON string + containing the ciphertext as the value of "raw" key. + :rtype: twisted.internet.defer.Deferred + """ def put_raw(blob): raw = blob.getvalue() @@ -95,6 +114,15 @@ class SoledadCrypto(object): return d def decrypt_doc(self, doc): + """ + Creates and configures a BlobDecryptor, asking it decrypt and returning + the decrypted cleartext content from the encrypted document. + + :param doc: the document to be decrypted. + :type doc: SoledadDocument + :return: The decrypted cleartext content of the document. + :rtype: str + """ info = docinfo(doc.doc_id, doc.rev) ciphertext = BytesIO() payload = doc.content['raw'] @@ -106,6 +134,18 @@ class SoledadCrypto(object): def encrypt_sym(data, key): + """ + Encrypt data using AES-256 cipher in CTR mode. + + :param data: The data to be encrypted. + :type data: str + :param key: The key used to encrypt data (must be 256 bits long). + :type key: str + + :return: A tuple with the initialization vector and the ciphertext, both + encoded as base64. + :rtype: (str, str) + """ iv = os.urandom(16) encryptor = AESEncryptor(key, iv) encryptor.write(data) @@ -115,6 +155,20 @@ def encrypt_sym(data, key): def decrypt_sym(data, key, iv): + """ + Decrypt data using AES-256 cipher in CTR mode. + + :param data: The data to be decrypted. + :type data: str + :param key: The symmetric key used to decrypt data (must be 256 bits + long). + :type key: str + :param iv: The base64 encoded initialization vector. + :type iv: str + + :return: The decrypted data. + :rtype: str + """ _iv = base64.b64decode(str(iv)) decryptor = AESDecryptor(key, _iv) decryptor.write(data) @@ -124,11 +178,16 @@ def decrypt_sym(data, key, iv): class BlobEncryptor(object): - """ - Encrypts a payload associated with a given Document. + Produces encrypted data from the cleartext data associated with a given + SoledadDocument using AES-256 cipher in CTR mode, together with a + HMAC-SHA512 Message Authentication Code. + The production happens using a Twisted's FileBodyProducer, which uses a + Cooperator to schedule calls and can be paused/resumed. Each call takes at + most 65536 bytes from the input. + Both the production input and output are file descriptors, so they can be + applied to a stream of data. """ - def __init__(self, doc_info, content_fd, result=None, secret=None, iv=None): if iv is None: @@ -162,11 +221,25 @@ class BlobEncryptor(object): self._crypter = VerifiedEncrypter(self._aes, self._hmac) def encrypt(self): + """ + Starts producing encrypted data from the cleartext data. + + :return: A deferred which will be fired when encryption ends and whose + callback will be invoked with the resulting ciphertext. + :rtype: twisted.internet.defer.Deferred + """ d = self._producer.startProducing(self._crypter) d.addCallback(self._end_crypto_stream) return d def encrypt_whole(self): + """ + Encrypts the input data at once and returns the resulting ciphertext + wrapped into a JSON string under the "raw" key. + + :return: The resulting ciphertext JSON string. + :rtype: str + """ self._crypter.write(self._content_fd.getvalue()) self._end_crypto_stream(None) return '{"raw":"' + self.result.getvalue() + '"}' @@ -281,7 +354,10 @@ class BlobDecryptor(object): class AESEncryptor(object): - + """ + A Twisted's Consumer implementation that takes an input file descriptor and + applies AES-256 cipher in CTR mode. + """ implements(interfaces.IConsumer) def __init__(self, key, iv, fd=None): @@ -311,7 +387,10 @@ class AESEncryptor(object): class HMACWriter(object): - + """ + A Twisted's Consumer implementation that takes an input file descriptor and + produces a HMAC-SHA512 Message Authentication Code. + """ implements(interfaces.IConsumer) hashtype = 'sha512' @@ -327,7 +406,10 @@ class HMACWriter(object): class VerifiedEncrypter(object): - + """ + A Twisted's Consumer implementation combining AESEncryptor and HMACWriter. + It directs the resulting ciphertext into HMAC-SHA512 processing. + """ implements(interfaces.IConsumer) def __init__(self, crypter, hmac): @@ -340,7 +422,10 @@ class VerifiedEncrypter(object): class AESDecryptor(object): - + """ + A Twisted's Consumer implementation that consumes data encrypted with + AES-256 in CTR mode from a file descriptor and generates decrypted data. + """ implements(interfaces.IConsumer) def __init__(self, key, iv, fd=None): @@ -373,6 +458,14 @@ class AESDecryptor(object): def is_symmetrically_encrypted(doc): + """ + Return True if the document was symmetrically encrypted. + + :param doc: The document to check. + :type doc: SoledadDocument + + :rtype: bool + """ payload = doc.content if not payload or 'raw' not in payload: return False -- cgit v1.2.3 From 1c8e3359734831562fca76b529c0b1f95af565d5 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 25 Nov 2016 19:55:36 -0300 Subject: [refactor] Hide IV, simplify some calls IV was being set during tests and this required some defensive coding to avoid IV being set in production. This commits makes the test use the generated IV and "hides" it using a read-only property to let it clear this should never happen. Also refactored out some parameters that are generated automatically to reduce some lines of code and enhance readability. --- client/src/leap/soledad/client/_crypto.py | 45 +++++++++++-------------------- testing/tests/client/test_crypto.py | 32 +++++++++------------- 2 files changed, 29 insertions(+), 48 deletions(-) diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index 4a59159c..109cf299 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -146,12 +146,11 @@ def encrypt_sym(data, key): encoded as base64. :rtype: (str, str) """ - iv = os.urandom(16) - encryptor = AESEncryptor(key, iv) + encryptor = AESEncryptor(key) encryptor.write(data) encryptor.end() ciphertext = encryptor.fd.getvalue() - return base64.b64encode(iv), ciphertext + return base64.b64encode(encryptor.iv), ciphertext def decrypt_sym(data, key, iv): @@ -188,13 +187,7 @@ class BlobEncryptor(object): Both the production input and output are file descriptors, so they can be applied to a stream of data. """ - def __init__(self, doc_info, content_fd, result=None, secret=None, - iv=None): - if iv is None: - iv = os.urandom(16) - else: - log.warn('Using a fixed IV. Use only for testing!') - self.iv = iv + def __init__(self, doc_info, content_fd, result=None, secret=None): if not secret: raise EncryptionDecryptionError('no secret given') @@ -206,20 +199,22 @@ class BlobEncryptor(object): self._content_fd = content_fd self._preamble = BytesIO() - if result is None: - result = BytesIO() - self.result = result + self.result = result or BytesIO() sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret) mac_key = _get_mac_key_for_doc(doc_info.doc_id, secret) self._aes_fd = BytesIO() - self._aes = AESEncryptor(sym_key, self.iv, self._aes_fd) + self._aes = AESEncryptor(sym_key, self._aes_fd) self._hmac = HMACWriter(mac_key) self._write_preamble() self._crypter = VerifiedEncrypter(self._aes, self._hmac) + @property + def iv(self): + return self._aes.iv + def encrypt(self): """ Starts producing encrypted data from the cleartext data. @@ -298,9 +293,7 @@ class BlobDecryptor(object): self.sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret) self.mac_key = _get_mac_key_for_doc(doc_info.doc_id, secret) - if result is None: - result = BytesIO() - self.result = result + self.result = result or BytesIO() def decrypt(self): try: @@ -360,18 +353,15 @@ class AESEncryptor(object): """ implements(interfaces.IConsumer) - def __init__(self, key, iv, fd=None): + def __init__(self, key, fd=None): if len(key) != 32: raise EncryptionDecryptionError('key is not 256 bits') - if len(iv) != 16: - raise EncryptionDecryptionError('iv is not 128 bits') + self.iv = os.urandom(16) - cipher = _get_aes_ctr_cipher(key, iv) + cipher = _get_aes_ctr_cipher(key, self.iv) self.encryptor = cipher.encryptor() - if fd is None: - fd = BytesIO() - self.fd = fd + self.fd = fd or BytesIO() self.done = False @@ -429,8 +419,7 @@ class AESDecryptor(object): implements(interfaces.IConsumer) def __init__(self, key, iv, fd=None): - if iv is None: - iv = os.urandom(16) + iv = iv or os.urandom(16) if len(key) != 32: raise EncryptionDecryptionError('key is not 256 bits') if len(iv) != 16: @@ -439,9 +428,7 @@ class AESDecryptor(object): cipher = _get_aes_ctr_cipher(key, iv) self.decryptor = cipher.decryptor() - if fd is None: - fd = BytesIO() - self.fd = fd + self.fd = fd or BytesIO() self.done = False self.deferred = defer.Deferred() diff --git a/testing/tests/client/test_crypto.py b/testing/tests/client/test_crypto.py index 483c7803..6d896604 100644 --- a/testing/tests/client/test_crypto.py +++ b/testing/tests/client/test_crypto.py @@ -51,10 +51,10 @@ class AESTest(unittest.TestCase): def test_chunked_encryption(self): key = 'A' * 32 - iv = 'A' * 16 fd = BytesIO() - aes = _crypto.AESEncryptor(key, iv, fd) + aes = _crypto.AESEncryptor(key, fd) + iv = aes.iv data = snowden1 block = 16 @@ -99,14 +99,11 @@ class BlobTestCase(unittest.TestCase): @defer.inlineCallbacks def test_blob_encryptor(self): - inf = BytesIO() - inf.write(snowden1) - inf.seek(0) - outf = BytesIO() + inf = BytesIO(snowden1) blob = _crypto.BlobEncryptor( - self.doc_info, inf, result=outf, - secret='A' * 96, iv='B' * 16) + self.doc_info, inf, + secret='A' * 96) encrypted = yield blob.encrypt() data = base64.urlsafe_b64decode(encrypted.getvalue()) @@ -117,7 +114,7 @@ class BlobTestCase(unittest.TestCase): assert sch == 1 assert meth == 1 iv = data[11:27] - assert iv == 'B' * 16 + assert iv == blob.iv doc_id = data[27:37] assert doc_id == 'D-deadbeef' @@ -127,26 +124,23 @@ class BlobTestCase(unittest.TestCase): ciphertext = data[71:-64] aes_key = _crypto._get_sym_key_for_doc( self.doc_info.doc_id, 'A' * 96) - assert ciphertext == _aes_encrypt(aes_key, 'B' * 16, snowden1) + assert ciphertext == _aes_encrypt(aes_key, blob.iv, snowden1) - decrypted = _aes_decrypt(aes_key, 'B' * 16, ciphertext) + decrypted = _aes_decrypt(aes_key, blob.iv, ciphertext) assert str(decrypted) == snowden1 @defer.inlineCallbacks def test_blob_decryptor(self): - inf = BytesIO() - inf.write(snowden1) - inf.seek(0) - outf = BytesIO() + inf = BytesIO(snowden1) blob = _crypto.BlobEncryptor( - self.doc_info, inf, result=outf, - secret='A' * 96, iv='B' * 16) - yield blob.encrypt() + self.doc_info, inf, + secret='A' * 96) + ciphertext = yield blob.encrypt() decryptor = _crypto.BlobDecryptor( - self.doc_info, outf, + self.doc_info, ciphertext, secret='A' * 96) decrypted = yield decryptor.decrypt() assert decrypted.getvalue() == snowden1 -- cgit v1.2.3 From e65cb7bfecd530252e86878dfec117c2793aa04b Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 26 Nov 2016 01:11:28 -0300 Subject: [feature] delimit preamble from ciphertext We now encode preamble and ciphertext+hmac in two distinct payloads separated by a space. This allows metadata to be extracted and used before decoding the whole document. It also introduces a single packer for packing and unpacking of data instead of reads and writes. Downside: doc_id and rev are limited to 255 chars now. --- client/src/leap/soledad/client/_crypto.py | 60 +++++++++++++++++-------------- testing/tests/client/test_crypto.py | 23 ++++++------ 2 files changed, 44 insertions(+), 39 deletions(-) diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index 109cf299..a235e246 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -24,10 +24,12 @@ import base64 import hashlib import hmac import os +import re import struct import time from io import BytesIO +from itertools import imap from collections import namedtuple import six @@ -54,6 +56,8 @@ MAC_KEY_LENGTH = 64 crypto_backend = MultiBackend([OpenSSLBackend()]) +PACMAN = struct.Struct('cQbb16s255p255p') + class ENC_SCHEME: symkey = 1 @@ -247,15 +251,14 @@ class BlobEncryptor(object): current_time = int(time.time()) - write(b'\x80') - write(struct.pack( - 'Qbb', + write(PACMAN.pack( + '\x80', current_time, ENC_SCHEME.symkey, - ENC_METHOD.aes_256_ctr)) - write(self.iv) - write(str(self.doc_id)) - write(str(self.rev)) + ENC_METHOD.aes_256_ctr, + self.iv, + str(self.doc_id), + str(self.rev))) def _end_crypto_stream(self, ignored): self._aes.end() @@ -267,7 +270,10 @@ class BlobEncryptor(object): hmac = self._hmac.result.getvalue() self.result.write( - base64.urlsafe_b64encode(preamble + encrypted + hmac)) + base64.urlsafe_b64encode(preamble)) + self.result.write(' ') + self.result.write( + base64.urlsafe_b64encode(encrypted + hmac)) self._preamble.close() self._aes_fd.close() self._hmac.result.close() @@ -297,17 +303,21 @@ class BlobDecryptor(object): def decrypt(self): try: - data = base64.urlsafe_b64decode(self.ciphertext.getvalue()) + preamble, ciphertext = _split(self.ciphertext.getvalue()) + hmac, ciphertext = ciphertext[-64:], ciphertext[:-64] except (TypeError, binascii.Error): raise InvalidBlob self.ciphertext.close() - - if not data or six.indexbytes(data, 0) != 0x80: + if len(preamble) != PACMAN.size: raise InvalidBlob + try: - ts, sch, meth = struct.unpack("Qbb", data[1:11]) + unpacked_data = PACMAN.unpack(preamble) + pad, ts, sch, meth, iv, doc_id, rev = unpacked_data except struct.error: raise InvalidBlob + if pad != '\x80': + raise InvalidBlob # TODO check timestamp if sch != ENC_SCHEME.symkey: @@ -316,21 +326,12 @@ class BlobDecryptor(object): if meth != ENC_METHOD.aes_256_ctr: raise InvalidBlob('invalid encryption scheme') - iv = data[11:27] - docidlen = len(self.doc_id) - ciph_idx = 26 + docidlen - revlen = len(self.rev) - rev_idx = ciph_idx + 1 + revlen - rev = data[ciph_idx + 1:rev_idx] - if rev != self.rev: raise InvalidBlob('invalid revision') - ciphertext = data[rev_idx:-64] - hmac = data[-64:] - h = HMAC(self.mac_key, hashes.SHA512(), backend=crypto_backend) - h.update(data[:-64]) + h.update(preamble) + h.update(ciphertext) try: h.verify(hmac) except InvalidSignature: @@ -457,12 +458,13 @@ def is_symmetrically_encrypted(doc): if not payload or 'raw' not in payload: return False payload = str(payload['raw']) - if len(payload) < 16: + if len(payload) < PACMAN.size: return False - header = base64.urlsafe_b64decode(payload[:18] + '==') - if six.indexbytes(header, 0) != 0x80: + payload = _split(payload).next() + if six.indexbytes(payload, 0) != 0x80: return False - ts, sch, meth = struct.unpack('Qbb', header[1:11]) + unpacked = PACMAN.unpack(payload) + ts, sch, meth = unpacked[1:4] return sch == ENC_SCHEME.symkey and meth == ENC_METHOD.aes_256_ctr @@ -485,3 +487,7 @@ def _get_sym_key_for_doc(doc_id, secret): def _get_aes_ctr_cipher(key, iv): return Cipher(algorithms.AES(key), modes.CTR(iv), backend=crypto_backend) + + +def _split(base64_raw_payload): + return imap(base64.urlsafe_b64decode, re.split(' ', base64_raw_payload)) diff --git a/testing/tests/client/test_crypto.py b/testing/tests/client/test_crypto.py index 6d896604..78da8d24 100644 --- a/testing/tests/client/test_crypto.py +++ b/testing/tests/client/test_crypto.py @@ -22,7 +22,6 @@ import base64 import hashlib import json import os -import struct from io import BytesIO @@ -106,22 +105,19 @@ class BlobTestCase(unittest.TestCase): secret='A' * 96) encrypted = yield blob.encrypt() - data = base64.urlsafe_b64decode(encrypted.getvalue()) + preamble, ciphertext = _crypto._split(encrypted.getvalue()) + ciphertext = ciphertext[:-64] - assert data[0] == '\x80' - ts, sch, meth = struct.unpack( - 'Qbb', data[1:11]) + assert len(preamble) == _crypto.PACMAN.size + unpacked_data = _crypto.PACMAN.unpack(preamble) + pad, ts, sch, meth, iv, doc_id, rev = unpacked_data + assert pad == '\x80' assert sch == 1 assert meth == 1 - iv = data[11:27] assert iv == blob.iv - doc_id = data[27:37] assert doc_id == 'D-deadbeef' - - rev = data[37:71] assert rev == self.doc_info.rev - ciphertext = data[71:-64] aes_key = _crypto._get_sym_key_for_doc( self.doc_info.doc_id, 'A' * 96) assert ciphertext == _aes_encrypt(aes_key, blob.iv, snowden1) @@ -159,6 +155,7 @@ class BlobTestCase(unittest.TestCase): assert 'raw' in encrypted doc2 = SoledadDocument('id1', '1') doc2.set_json(encrypted) + assert _crypto.is_symmetrically_encrypted(doc2) decrypted = yield crypto.decrypt_doc(doc2) assert len(decrypted) != 0 assert json.loads(decrypted) == payload @@ -174,10 +171,12 @@ class BlobTestCase(unittest.TestCase): encrypted = yield crypto.encrypt_doc(doc1) encdict = json.loads(encrypted) - raw = base64.urlsafe_b64decode(str(encdict['raw'])) + preamble, raw = _crypto._split(str(encdict['raw'])) # mess with MAC messed = raw[:-64] + '0' * 64 - newraw = base64.urlsafe_b64encode(str(messed)) + + preamble = base64.urlsafe_b64encode(preamble) + newraw = preamble + ' ' + base64.urlsafe_b64encode(str(messed)) doc2 = SoledadDocument('id1', '1') doc2.set_json(json.dumps({"raw": str(newraw)})) -- cgit v1.2.3 From 42082cfa648ec10612823086e72dc2a70a0e773c Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 26 Nov 2016 18:09:26 -0300 Subject: [feature] make _crypto stream on decryption We are already doing this on encryption, now we can stream also from decryption. This unblocks the reactor and will be valuable for blobs-io. --- client/src/leap/soledad/client/_crypto.py | 83 +++++++++++++++++++++++-------- testing/tests/benchmarks/test_crypto.py | 4 +- testing/tests/client/test_crypto.py | 2 +- 3 files changed, 64 insertions(+), 25 deletions(-) diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index a235e246..163c9e4e 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -39,9 +39,6 @@ from twisted.internet import interfaces from twisted.logger import Logger from twisted.web.client import FileBodyProducer -from cryptography.exceptions import InvalidSignature -from cryptography.hazmat.primitives import hashes -from cryptography.hazmat.primitives.hmac import HMAC from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from cryptography.hazmat.backends.multibackend import MultiBackend from cryptography.hazmat.backends.openssl.backend \ @@ -133,8 +130,7 @@ class SoledadCrypto(object): del doc ciphertext.write(str(payload)) decryptor = BlobDecryptor(info, ciphertext, secret=self.secret) - buf = decryptor.decrypt() - return buf.getvalue() + return decryptor.decrypt() def encrypt_sym(data, key): @@ -291,29 +287,45 @@ class BlobDecryptor(object): def __init__(self, doc_info, ciphertext_fd, result=None, secret=None): + if not secret: + raise EncryptionDecryptionError('no secret given') + ciphertext_fd.seek(0) + self.doc_id = doc_info.doc_id self.rev = doc_info.rev - self.ciphertext = ciphertext_fd - self.sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret) self.mac_key = _get_mac_key_for_doc(doc_info.doc_id, secret) + self._read_preamble(ciphertext_fd) + + self._producer = FileBodyProducer(self.ciphertext, readSize=2**16) + self._content_fd = self.ciphertext + self.result = result or BytesIO() - def decrypt(self): + self._aes_fd = BytesIO() + self._aes = AESDecryptor(self.sym_key, self.iv, self.result) + self._hmac = HMACWriter(self.mac_key) + self._hmac.write(self.preamble) + + self._decrypter = VerifiedDecrypter(self._aes, self._hmac) + + def _read_preamble(self, ciphertext): try: - preamble, ciphertext = _split(self.ciphertext.getvalue()) - hmac, ciphertext = ciphertext[-64:], ciphertext[:-64] + self.preamble, ciphertext = _split(ciphertext.getvalue()) + self.doc_hmac, self.ciphertext = ciphertext[-64:], ciphertext[:-64] except (TypeError, binascii.Error): raise InvalidBlob - self.ciphertext.close() - if len(preamble) != PACMAN.size: + self.ciphertext = BytesIO(self.ciphertext) + + if len(self.preamble) != PACMAN.size: raise InvalidBlob try: - unpacked_data = PACMAN.unpack(preamble) + unpacked_data = PACMAN.unpack(self.preamble) pad, ts, sch, meth, iv, doc_id, rev = unpacked_data + self.iv = iv except struct.error: raise InvalidBlob if pad != '\x80': @@ -329,18 +341,28 @@ class BlobDecryptor(object): if rev != self.rev: raise InvalidBlob('invalid revision') - h = HMAC(self.mac_key, hashes.SHA512(), backend=crypto_backend) - h.update(preamble) - h.update(ciphertext) - try: - h.verify(hmac) - except InvalidSignature: + def _check_hmac(self): + if self._hmac._hmac.digest() != self.doc_hmac: raise InvalidBlob('HMAC could not be verifed') - decryptor = _get_aes_ctr_cipher(self.sym_key, iv).decryptor() + def decrypt(self): + """ + Starts producing encrypted data from the cleartext data. + + :return: A deferred which will be fired when encryption ends and whose + callback will be invoked with the resulting ciphertext. + :rtype: twisted.internet.defer.Deferred + """ + d = self._producer.startProducing(self._decrypter) + d.addCallback(lambda _: self._check_hmac()) + d.addCallback(lambda _: self.result.getvalue()) + return d - # TODO pass chunks, streaming, instead - # Use AESDecryptor below + def decrypt_whole(self): + ciphertext = self.ciphertext.getvalue() + self.hmac_obj.update(ciphertext) + self._check_hmac() + decryptor = _get_aes_ctr_cipher(self.sym_key, self.iv).decryptor() self.result.write(decryptor.update(ciphertext)) self.result.write(decryptor.finalize()) @@ -412,6 +434,23 @@ class VerifiedEncrypter(object): self.hmac.write(enc_chunk) +class VerifiedDecrypter(object): + """ + A Twisted's Consumer implementation combining AESDecryptor and HMACWriter. + It directs the resulting ciphertext into HMAC-SHA512 processing, then + decrypt. + """ + implements(interfaces.IConsumer) + + def __init__(self, decrypter, hmac): + self.decrypter = decrypter + self.hmac = hmac + + def write(self, enc_chunk): + self.hmac.write(enc_chunk) + self.decrypter.write(enc_chunk) + + class AESDecryptor(object): """ A Twisted's Consumer implementation that consumes data encrypted with diff --git a/testing/tests/benchmarks/test_crypto.py b/testing/tests/benchmarks/test_crypto.py index 75ad9a30..8ee9b899 100644 --- a/testing/tests/benchmarks/test_crypto.py +++ b/testing/tests/benchmarks/test_crypto.py @@ -39,7 +39,7 @@ def create_doc_encryption(size): def create_doc_decryption(size): @pytest.inlineCallbacks @pytest.mark.benchmark(group="test_crypto_decrypt_doc") - def test_doc_decryption(soledad_client, benchmark, payload): + def test_doc_decryption(soledad_client, txbenchmark, payload): crypto = soledad_client()._crypto DOC_CONTENT = {'payload': payload(size)} @@ -50,7 +50,7 @@ def create_doc_decryption(size): encrypted_doc = yield crypto.encrypt_doc(doc) doc.set_json(encrypted_doc) - benchmark(crypto.decrypt_doc, doc) + yield txbenchmark(crypto.decrypt_doc, doc) return test_doc_decryption diff --git a/testing/tests/client/test_crypto.py b/testing/tests/client/test_crypto.py index 78da8d24..863873f7 100644 --- a/testing/tests/client/test_crypto.py +++ b/testing/tests/client/test_crypto.py @@ -139,7 +139,7 @@ class BlobTestCase(unittest.TestCase): self.doc_info, ciphertext, secret='A' * 96) decrypted = yield decryptor.decrypt() - assert decrypted.getvalue() == snowden1 + assert decrypted == snowden1 @defer.inlineCallbacks def test_encrypt_and_decrypt(self): -- cgit v1.2.3 From d72e3763538d1156bcf72b643626c2111a5a02cf Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 26 Nov 2016 18:11:11 -0300 Subject: [bug] make the semaphore cover all parsing Unfortunately, if a doc finishes decryption before the previous one we will still have an issue while inserting. This commits solves it by adding the parse and decrypt inside of the semaphore. --- client/src/leap/soledad/client/http_target/fetch.py | 9 +++++++-- client/src/leap/soledad/client/http_target/fetch_protocol.py | 10 +++++++++- testing/tests/sync/test_sync_target.py | 2 +- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 7d27c06d..85e2967d 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -108,6 +108,11 @@ class HTTPDocFetcher(object): :param total: The total number of operations. :type total: int """ + yield self.semaphore.run(self.__atomic_doc_parse, doc_info, content, + total) + + @defer.inlineCallbacks + def __atomic_doc_parse(self, doc_info, content, total): doc = SoledadDocument(doc_info['id'], doc_info['rev'], content) if is_symmetrically_encrypted(doc): content = yield self._crypto.decrypt_doc(doc) @@ -120,8 +125,8 @@ class HTTPDocFetcher(object): # from multiple threads is dangerous. We should bring the dbpool here # or find an alternative. Deferring to a thread only helps releasing # the reactor for other tasks as this is an IO intensive call. - yield self.semaphore.run(threads.deferToThread, self._insert_doc_cb, - doc, doc_info['gen'], doc_info['trans_id']) + yield threads.deferToThread(self._insert_doc_cb, + doc, doc_info['gen'], doc_info['trans_id']) self._received_docs += 1 user_data = {'uuid': self.uuid, 'userid': self.userid} _emit_receive_status(user_data, self._received_docs, total=total) diff --git a/client/src/leap/soledad/client/http_target/fetch_protocol.py b/client/src/leap/soledad/client/http_target/fetch_protocol.py index dd83c4f7..3322ec70 100644 --- a/client/src/leap/soledad/client/http_target/fetch_protocol.py +++ b/client/src/leap/soledad/client/http_target/fetch_protocol.py @@ -20,9 +20,12 @@ from cStringIO import StringIO from twisted.web._newclient import ResponseDone from leap.soledad.common.l2db import errors from leap.soledad.common.l2db.remote import utils +from leap.soledad.common.log import getLogger from .support import ReadBodyProtocol from .support import readBody +logger = getLogger(__name__) + class DocStreamReceiver(ReadBodyProtocol): """ @@ -120,8 +123,13 @@ class DocStreamReceiver(ReadBodyProtocol): if 'error' in self.current_doc: raise errors.BrokenSyncStream("Error from server: %s" % line) else: - self._doc_reader( + d = self._doc_reader( self.current_doc, line.strip() or None, self.total) + d.addErrback(self._error) + + def _error(self, reason): + logger.error(reason) + self.transport.loseConnection() def finish(self): """ diff --git a/testing/tests/sync/test_sync_target.py b/testing/tests/sync/test_sync_target.py index d02aba68..6ce9a5c5 100644 --- a/testing/tests/sync/test_sync_target.py +++ b/testing/tests/sync/test_sync_target.py @@ -63,7 +63,7 @@ class TestSoledadParseReceivedDocResponse(unittest.TestCase): """ def parse(self, stream): - parser = DocStreamReceiver(None, None, lambda *_: 42) + parser = DocStreamReceiver(None, None, lambda *_: defer.succeed(42)) parser.dataReceived(stream) parser.finish() -- cgit v1.2.3 From fbca1644823acbe4165ad1087db5baed28a6809d Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 26 Nov 2016 18:12:36 -0300 Subject: [refactor] improve logging Some exceptions were missing a proper description and client_side_db.py script wasn't capturing logs from Twisted. --- client/src/leap/soledad/client/http_target/fetch.py | 2 +- client/src/leap/soledad/client/http_target/fetch_protocol.py | 2 +- scripts/db_access/client_side_db.py | 9 +++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 85e2967d..df07a96a 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -149,7 +149,7 @@ class HTTPDocFetcher(object): return (metadata['number_of_changes'], metadata['new_generation'], metadata['new_transaction_id']) except (ValueError, KeyError): - raise errors.BrokenSyncStream + raise errors.BrokenSyncStream('Metadata parsing failed') def _emit_receive_status(user_data, received_docs, total): diff --git a/client/src/leap/soledad/client/http_target/fetch_protocol.py b/client/src/leap/soledad/client/http_target/fetch_protocol.py index 3322ec70..fa6b1969 100644 --- a/client/src/leap/soledad/client/http_target/fetch_protocol.py +++ b/client/src/leap/soledad/client/http_target/fetch_protocol.py @@ -136,7 +136,7 @@ class DocStreamReceiver(ReadBodyProtocol): Checks that ']' came and stream was properly closed. """ if not self._properly_finished: - raise errors.BrokenSyncStream() + raise errors.BrokenSyncStream('Stream not properly closed') content = self._buffer.getvalue()[0:self._buffer.tell()] self._buffer.close() return content diff --git a/scripts/db_access/client_side_db.py b/scripts/db_access/client_side_db.py index 2acee2b5..48eec0f7 100644 --- a/scripts/db_access/client_side_db.py +++ b/scripts/db_access/client_side_db.py @@ -1,13 +1,13 @@ #!/usr/bin/python import os +import sys import argparse import tempfile import getpass import requests import srp._pysrp as srp import binascii -import logging import json import time @@ -15,6 +15,7 @@ from twisted.internet import reactor from twisted.internet.defer import inlineCallbacks from leap.soledad.client import Soledad +from leap.soledad.common.log import getLogger from leap.keymanager import KeyManager from leap.keymanager.openpgp import OpenPGPKey @@ -39,9 +40,9 @@ Use the --help option to see available options. # create a logger -logger = logging.getLogger(__name__) -LOG_FORMAT = '%(asctime)s %(message)s' -logging.basicConfig(format=LOG_FORMAT, level=logging.DEBUG) +logger = getLogger(__name__) +from twisted.python import log +log.startLogging(sys.stdout) safe_unhexlify = lambda x: binascii.unhexlify(x) if ( -- cgit v1.2.3 From bae95c183e68481db0fe36f066cd14c97bff3013 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 26 Nov 2016 21:26:23 -0300 Subject: [refactor] simplify _crypto After adding the streaming decrypt, some classes were doing almost the same thing. Unified them. Also fixed some module level variables to upper case and some class name to camel case. --- client/src/leap/soledad/client/_crypto.py | 228 ++++++++++++------------------ testing/tests/client/test_crypto.py | 5 +- 2 files changed, 91 insertions(+), 142 deletions(-) diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index 163c9e4e..22335f9d 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -36,7 +36,6 @@ import six from twisted.internet import defer from twisted.internet import interfaces -from twisted.logger import Logger from twisted.web.client import FileBodyProducer from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes @@ -47,21 +46,16 @@ from cryptography.hazmat.backends.openssl.backend \ from zope.interface import implements -log = Logger() - MAC_KEY_LENGTH = 64 -crypto_backend = MultiBackend([OpenSSLBackend()]) +CRYPTO_BACKEND = MultiBackend([OpenSSLBackend()]) PACMAN = struct.Struct('cQbb16s255p255p') -class ENC_SCHEME: - symkey = 1 - - -class ENC_METHOD: - aes_256_ctr = 1 +ENC_SCHEME = namedtuple('SCHEME', 'symkey')(1) +ENC_METHOD = namedtuple('METHOD', 'aes_256_ctr')(1) +DocInfo = namedtuple('DocInfo', 'doc_id rev') class EncryptionDecryptionError(Exception): @@ -72,9 +66,6 @@ class InvalidBlob(Exception): pass -docinfo = namedtuple('docinfo', 'doc_id rev') - - class SoledadCrypto(object): """ This class provides convenient methods for document encryption and @@ -107,7 +98,7 @@ class SoledadCrypto(object): content = BytesIO() content.write(str(doc.get_json())) - info = docinfo(doc.doc_id, doc.rev) + info = DocInfo(doc.doc_id, doc.rev) del doc encryptor = BlobEncryptor(info, content, secret=self.secret) d = encryptor.encrypt() @@ -124,7 +115,7 @@ class SoledadCrypto(object): :return: The decrypted cleartext content of the document. :rtype: str """ - info = docinfo(doc.doc_id, doc.rev) + info = DocInfo(doc.doc_id, doc.rev) ciphertext = BytesIO() payload = doc.content['raw'] del doc @@ -146,10 +137,10 @@ def encrypt_sym(data, key): encoded as base64. :rtype: (str, str) """ - encryptor = AESEncryptor(key) + encryptor = AESConsumer(key) encryptor.write(data) encryptor.end() - ciphertext = encryptor.fd.getvalue() + ciphertext = encryptor.buffer.getvalue() return base64.b64encode(encryptor.iv), ciphertext @@ -169,10 +160,10 @@ def decrypt_sym(data, key, iv): :rtype: str """ _iv = base64.b64decode(str(iv)) - decryptor = AESDecryptor(key, _iv) + decryptor = AESConsumer(key, _iv, operation=AESConsumer.decrypt) decryptor.write(data) decryptor.end() - plaintext = decryptor.fd.getvalue() + plaintext = decryptor.buffer.getvalue() return plaintext @@ -205,15 +196,16 @@ class BlobEncryptor(object): mac_key = _get_mac_key_for_doc(doc_info.doc_id, secret) self._aes_fd = BytesIO() - self._aes = AESEncryptor(sym_key, self._aes_fd) - self._hmac = HMACWriter(mac_key) + _aes = AESConsumer(sym_key, _buffer=self._aes_fd) + self.__iv = _aes.iv + self._hmac_writer = HMACWriter(mac_key) self._write_preamble() - self._crypter = VerifiedEncrypter(self._aes, self._hmac) + self._crypter = VerifiedEncrypter(_aes, self._hmac_writer) @property def iv(self): - return self._aes.iv + return self.__iv def encrypt(self): """ @@ -224,26 +216,14 @@ class BlobEncryptor(object): :rtype: twisted.internet.defer.Deferred """ d = self._producer.startProducing(self._crypter) - d.addCallback(self._end_crypto_stream) + d.addCallback(lambda _: self._end_crypto_stream()) return d - def encrypt_whole(self): - """ - Encrypts the input data at once and returns the resulting ciphertext - wrapped into a JSON string under the "raw" key. - - :return: The resulting ciphertext JSON string. - :rtype: str - """ - self._crypter.write(self._content_fd.getvalue()) - self._end_crypto_stream(None) - return '{"raw":"' + self.result.getvalue() + '"}' - def _write_preamble(self): def write(data): self._preamble.write(data) - self._hmac.write(data) + self._hmac_writer.write(data) current_time = int(time.time()) @@ -256,23 +236,16 @@ class BlobEncryptor(object): str(self.doc_id), str(self.rev))) - def _end_crypto_stream(self, ignored): - self._aes.end() - self._hmac.end() - self._content_fd.close() + def _end_crypto_stream(self): + encrypted, content_hmac = self._crypter.end() preamble = self._preamble.getvalue() - encrypted = self._aes_fd.getvalue() - hmac = self._hmac.result.getvalue() self.result.write( base64.urlsafe_b64encode(preamble)) self.result.write(' ') self.result.write( - base64.urlsafe_b64encode(encrypted + hmac)) - self._preamble.close() - self._aes_fd.close() - self._hmac.result.close() + base64.urlsafe_b64encode(encrypted + content_hmac)) self.result.seek(0) return defer.succeed(self.result) @@ -289,62 +262,65 @@ class BlobDecryptor(object): secret=None): if not secret: raise EncryptionDecryptionError('no secret given') - ciphertext_fd.seek(0) self.doc_id = doc_info.doc_id self.rev = doc_info.rev - self.sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret) - self.mac_key = _get_mac_key_for_doc(doc_info.doc_id, secret) - - self._read_preamble(ciphertext_fd) - - self._producer = FileBodyProducer(self.ciphertext, readSize=2**16) - self._content_fd = self.ciphertext + ciphertext_fd, preamble, iv = self._consume_preamble(ciphertext_fd) + mac_key = _get_mac_key_for_doc(doc_info.doc_id, secret) + self._current_hmac = BytesIO() + _hmac_writer = HMACWriter(mac_key, self._current_hmac) + _hmac_writer.write(preamble) self.result = result or BytesIO() + sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret) + _aes = AESConsumer(sym_key, iv, self.result, + operation=AESConsumer.decrypt) + self._decrypter = VerifiedDecrypter(_aes, _hmac_writer) - self._aes_fd = BytesIO() - self._aes = AESDecryptor(self.sym_key, self.iv, self.result) - self._hmac = HMACWriter(self.mac_key) - self._hmac.write(self.preamble) - - self._decrypter = VerifiedDecrypter(self._aes, self._hmac) + self._producer = FileBodyProducer(ciphertext_fd, readSize=2**16) - def _read_preamble(self, ciphertext): + def _consume_preamble(self, ciphertext_fd): + ciphertext_fd.seek(0) try: - self.preamble, ciphertext = _split(ciphertext.getvalue()) - self.doc_hmac, self.ciphertext = ciphertext[-64:], ciphertext[:-64] + preamble, ciphertext = _split(ciphertext_fd.getvalue()) + self.doc_hmac, ciphertext = ciphertext[-64:], ciphertext[:-64] except (TypeError, binascii.Error): raise InvalidBlob - self.ciphertext = BytesIO(self.ciphertext) + ciphertext_fd.close() - if len(self.preamble) != PACMAN.size: + if len(preamble) != PACMAN.size: raise InvalidBlob try: - unpacked_data = PACMAN.unpack(self.preamble) + unpacked_data = PACMAN.unpack(preamble) pad, ts, sch, meth, iv, doc_id, rev = unpacked_data - self.iv = iv except struct.error: raise InvalidBlob + if pad != '\x80': raise InvalidBlob - # TODO check timestamp if sch != ENC_SCHEME.symkey: raise InvalidBlob('invalid scheme') # TODO should adapt the assymetric-gpg too, rigth? if meth != ENC_METHOD.aes_256_ctr: raise InvalidBlob('invalid encryption scheme') - if rev != self.rev: raise InvalidBlob('invalid revision') + if doc_id != self.doc_id: + raise InvalidBlob('invalid revision') + return BytesIO(ciphertext), preamble, iv def _check_hmac(self): - if self._hmac._hmac.digest() != self.doc_hmac: + if self._current_hmac.getvalue() != self.doc_hmac: raise InvalidBlob('HMAC could not be verifed') + def _end_stream(self): + self._decrypter.end() + self._check_hmac() + return self.result.getvalue() + def decrypt(self): """ Starts producing encrypted data from the cleartext data. @@ -354,50 +330,9 @@ class BlobDecryptor(object): :rtype: twisted.internet.defer.Deferred """ d = self._producer.startProducing(self._decrypter) - d.addCallback(lambda _: self._check_hmac()) - d.addCallback(lambda _: self.result.getvalue()) + d.addCallback(lambda _: self._end_stream()) return d - def decrypt_whole(self): - ciphertext = self.ciphertext.getvalue() - self.hmac_obj.update(ciphertext) - self._check_hmac() - decryptor = _get_aes_ctr_cipher(self.sym_key, self.iv).decryptor() - - self.result.write(decryptor.update(ciphertext)) - self.result.write(decryptor.finalize()) - return self.result - - -class AESEncryptor(object): - """ - A Twisted's Consumer implementation that takes an input file descriptor and - applies AES-256 cipher in CTR mode. - """ - implements(interfaces.IConsumer) - - def __init__(self, key, fd=None): - if len(key) != 32: - raise EncryptionDecryptionError('key is not 256 bits') - self.iv = os.urandom(16) - - cipher = _get_aes_ctr_cipher(key, self.iv) - self.encryptor = cipher.encryptor() - - self.fd = fd or BytesIO() - - self.done = False - - def write(self, data): - encrypted = self.encryptor.update(data) - self.fd.write(encrypted) - return encrypted - - def end(self): - if not self.done: - self.fd.write(self.encryptor.finalize()) - self.done = True - class HMACWriter(object): """ @@ -407,15 +342,16 @@ class HMACWriter(object): implements(interfaces.IConsumer) hashtype = 'sha512' - def __init__(self, key): + def __init__(self, key, result=None): self._hmac = hmac.new(key, '', getattr(hashlib, self.hashtype)) - self.result = BytesIO('') + self.result = result or BytesIO('') def write(self, data): self._hmac.update(data) def end(self): self.result.write(self._hmac.digest()) + return self.result.getvalue() class VerifiedEncrypter(object): @@ -425,13 +361,18 @@ class VerifiedEncrypter(object): """ implements(interfaces.IConsumer) - def __init__(self, crypter, hmac): + def __init__(self, crypter, hmac_writer): self.crypter = crypter - self.hmac = hmac + self.hmac_writer = hmac_writer def write(self, data): enc_chunk = self.crypter.write(data) - self.hmac.write(enc_chunk) + self.hmac_writer.write(enc_chunk) + + def end(self): + ciphertext = self.crypter.end() + content_hmac = self.hmac_writer.end() + return ciphertext, content_hmac class VerifiedDecrypter(object): @@ -442,46 +383,53 @@ class VerifiedDecrypter(object): """ implements(interfaces.IConsumer) - def __init__(self, decrypter, hmac): + def __init__(self, decrypter, hmac_writer): self.decrypter = decrypter - self.hmac = hmac + self.hmac_writer = hmac_writer def write(self, enc_chunk): - self.hmac.write(enc_chunk) + self.hmac_writer.write(enc_chunk) self.decrypter.write(enc_chunk) + def end(self): + self.decrypter.end() + self.hmac_writer.end() + -class AESDecryptor(object): +class AESConsumer(object): """ - A Twisted's Consumer implementation that consumes data encrypted with - AES-256 in CTR mode from a file descriptor and generates decrypted data. + A Twisted's Consumer implementation that takes an input file descriptor and + applies AES-256 cipher in CTR mode. """ implements(interfaces.IConsumer) + encrypt = 1 + decrypt = 2 - def __init__(self, key, iv, fd=None): - iv = iv or os.urandom(16) + def __init__(self, key, iv=None, _buffer=None, operation=encrypt): if len(key) != 32: raise EncryptionDecryptionError('key is not 256 bits') - if len(iv) != 16: - raise EncryptionDecryptionError('iv is not 128 bits') - - cipher = _get_aes_ctr_cipher(key, iv) - self.decryptor = cipher.decryptor() - - self.fd = fd or BytesIO() - self.done = False + self.iv = iv or os.urandom(16) + self.buffer = _buffer or BytesIO() self.deferred = defer.Deferred() + self.done = False + + cipher = _get_aes_ctr_cipher(key, self.iv) + if operation == self.encrypt: + self.operator = cipher.encryptor() + else: + self.operator = cipher.decryptor() def write(self, data): - decrypted = self.decryptor.update(data) - self.fd.write(decrypted) - return decrypted + consumed = self.operator.update(data) + self.buffer.write(consumed) + return consumed def end(self): if not self.done: - self.decryptor.finalize() - self.deferred.callback(self.fd) + self.buffer.write(self.operator.finalize()) + self.deferred.callback(self.buffer) self.done = True + return self.buffer.getvalue() def is_symmetrically_encrypted(doc): @@ -525,7 +473,7 @@ def _get_sym_key_for_doc(doc_id, secret): def _get_aes_ctr_cipher(key, iv): - return Cipher(algorithms.AES(key), modes.CTR(iv), backend=crypto_backend) + return Cipher(algorithms.AES(key), modes.CTR(iv), backend=CRYPTO_BACKEND) def _split(base64_raw_payload): diff --git a/testing/tests/client/test_crypto.py b/testing/tests/client/test_crypto.py index 863873f7..7643f75d 100644 --- a/testing/tests/client/test_crypto.py +++ b/testing/tests/client/test_crypto.py @@ -52,7 +52,7 @@ class AESTest(unittest.TestCase): key = 'A' * 32 fd = BytesIO() - aes = _crypto.AESEncryptor(key, fd) + aes = _crypto.AESConsumer(key, _buffer=fd) iv = aes.iv data = snowden1 @@ -78,7 +78,8 @@ class AESTest(unittest.TestCase): ciphertext = _aes_encrypt(key, iv, data) fd = BytesIO() - aes = _crypto.AESDecryptor(key, iv, fd) + operation = _crypto.AESConsumer.decrypt + aes = _crypto.AESConsumer(key, iv, fd, operation) for i in range(len(ciphertext) / block): chunk = ciphertext[i * block:(i + 1) * block] -- cgit v1.2.3 From b7bf30ca644775b38473571e47cbe102a5216d19 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sun, 27 Nov 2016 01:13:04 -0300 Subject: [refactor] adds PipeableWriter to pipe two streams VerifiedEncryptor and VerifiedDecryptor are just a pipe and a fan-out. This class provides both behaviors to two distinct writeable things. --- client/src/leap/soledad/client/_crypto.py | 45 +++++++++++-------------------- 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index 22335f9d..aaae7b92 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -201,7 +201,7 @@ class BlobEncryptor(object): self._hmac_writer = HMACWriter(mac_key) self._write_preamble() - self._crypter = VerifiedEncrypter(_aes, self._hmac_writer) + self._crypter = PipeableWriter(_aes, self._hmac_writer) @property def iv(self): @@ -276,7 +276,7 @@ class BlobDecryptor(object): sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret) _aes = AESConsumer(sym_key, iv, self.result, operation=AESConsumer.decrypt) - self._decrypter = VerifiedDecrypter(_aes, _hmac_writer) + self._decrypter = PipeableWriter(_aes, _hmac_writer, pipe=False) self._producer = FileBodyProducer(ciphertext_fd, readSize=2**16) @@ -354,48 +354,33 @@ class HMACWriter(object): return self.result.getvalue() -class VerifiedEncrypter(object): +class PipeableWriter(object): """ - A Twisted's Consumer implementation combining AESEncryptor and HMACWriter. - It directs the resulting ciphertext into HMAC-SHA512 processing. + A Twisted's Consumer implementation that flows data into two writers. + Here we can combine AESEncryptor and HMACWriter. + It directs the resulting ciphertext into HMAC-SHA512 processing if + pipe=True or writes the ciphertext to both (fan out, which is the case when + decrypting). """ implements(interfaces.IConsumer) - def __init__(self, crypter, hmac_writer): - self.crypter = crypter + def __init__(self, aes_writer, hmac_writer, pipe=True): + self.pipe = pipe + self.aes_writer = aes_writer self.hmac_writer = hmac_writer def write(self, data): - enc_chunk = self.crypter.write(data) + enc_chunk = self.aes_writer.write(data) + if not self.pipe: + enc_chunk = data self.hmac_writer.write(enc_chunk) def end(self): - ciphertext = self.crypter.end() + ciphertext = self.aes_writer.end() content_hmac = self.hmac_writer.end() return ciphertext, content_hmac -class VerifiedDecrypter(object): - """ - A Twisted's Consumer implementation combining AESDecryptor and HMACWriter. - It directs the resulting ciphertext into HMAC-SHA512 processing, then - decrypt. - """ - implements(interfaces.IConsumer) - - def __init__(self, decrypter, hmac_writer): - self.decrypter = decrypter - self.hmac_writer = hmac_writer - - def write(self, enc_chunk): - self.hmac_writer.write(enc_chunk) - self.decrypter.write(enc_chunk) - - def end(self): - self.decrypter.end() - self.hmac_writer.end() - - class AESConsumer(object): """ A Twisted's Consumer implementation that takes an input file descriptor and -- cgit v1.2.3 From dc80d2b59edd14ab463dc74e5fa19d1a04c27ca1 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sun, 27 Nov 2016 02:25:07 -0300 Subject: [refactor] introduces a GenericWriter AESWriter and HMACWriter are just applying hmac or aes into a flow of data. Abstracted the application of those operations into a super class and highlighted just the difference on each implementation. --- client/src/leap/soledad/client/_crypto.py | 114 +++++++++++++----------------- testing/tests/client/test_crypto.py | 5 +- 2 files changed, 52 insertions(+), 67 deletions(-) diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index aaae7b92..f6a84b70 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -137,10 +137,9 @@ def encrypt_sym(data, key): encoded as base64. :rtype: (str, str) """ - encryptor = AESConsumer(key) + encryptor = AESWriter(key) encryptor.write(data) - encryptor.end() - ciphertext = encryptor.buffer.getvalue() + ciphertext = encryptor.end() return base64.b64encode(encryptor.iv), ciphertext @@ -160,10 +159,9 @@ def decrypt_sym(data, key, iv): :rtype: str """ _iv = base64.b64decode(str(iv)) - decryptor = AESConsumer(key, _iv, operation=AESConsumer.decrypt) + decryptor = AESWriter(key, _iv, encrypt=False) decryptor.write(data) - decryptor.end() - plaintext = decryptor.buffer.getvalue() + plaintext = decryptor.end() return plaintext @@ -196,12 +194,12 @@ class BlobEncryptor(object): mac_key = _get_mac_key_for_doc(doc_info.doc_id, secret) self._aes_fd = BytesIO() - _aes = AESConsumer(sym_key, _buffer=self._aes_fd) + _aes = AESWriter(sym_key, _buffer=self._aes_fd) self.__iv = _aes.iv self._hmac_writer = HMACWriter(mac_key) self._write_preamble() - self._crypter = PipeableWriter(_aes, self._hmac_writer) + self._crypter = VerifiedAESWriter(_aes, self._hmac_writer) @property def iv(self): @@ -274,9 +272,9 @@ class BlobDecryptor(object): self.result = result or BytesIO() sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret) - _aes = AESConsumer(sym_key, iv, self.result, - operation=AESConsumer.decrypt) - self._decrypter = PipeableWriter(_aes, _hmac_writer, pipe=False) + _aes = AESWriter(sym_key, iv, self.result, + encrypt=False) + self._decrypter = VerifiedAESWriter(_aes, _hmac_writer, encrypt=False) self._producer = FileBodyProducer(ciphertext_fd, readSize=2**16) @@ -334,87 +332,75 @@ class BlobDecryptor(object): return d -class HMACWriter(object): +class GenericWriter(object): """ - A Twisted's Consumer implementation that takes an input file descriptor and - produces a HMAC-SHA512 Message Authentication Code. + A Twisted's Consumer implementation that can perform one opearation at the + written data and another at the end of the stream. """ implements(interfaces.IConsumer) - hashtype = 'sha512' - def __init__(self, key, result=None): - self._hmac = hmac.new(key, '', getattr(hashlib, self.hashtype)) - self.result = result or BytesIO('') + def __init__(self, operator, closer, result=None): + self.result = result or BytesIO() + self.operator, self.closer = operator, closer def write(self, data): - self._hmac.update(data) + out = self.operator(data) + if out: + self.result.write(out) + return out def end(self): - self.result.write(self._hmac.digest()) + self.result.write(self.closer()) return self.result.getvalue() -class PipeableWriter(object): +class HMACWriter(GenericWriter): """ - A Twisted's Consumer implementation that flows data into two writers. - Here we can combine AESEncryptor and HMACWriter. - It directs the resulting ciphertext into HMAC-SHA512 processing if - pipe=True or writes the ciphertext to both (fan out, which is the case when - decrypting). + A Twisted's Consumer implementation that takes an input file descriptor and + produces a HMAC-SHA512 Message Authentication Code. """ - implements(interfaces.IConsumer) - - def __init__(self, aes_writer, hmac_writer, pipe=True): - self.pipe = pipe - self.aes_writer = aes_writer - self.hmac_writer = hmac_writer - - def write(self, data): - enc_chunk = self.aes_writer.write(data) - if not self.pipe: - enc_chunk = data - self.hmac_writer.write(enc_chunk) + hashtype = 'sha512' - def end(self): - ciphertext = self.aes_writer.end() - content_hmac = self.hmac_writer.end() - return ciphertext, content_hmac + def __init__(self, key, result=None): + hmac_obj = hmac.new(key, '', getattr(hashlib, self.hashtype)) + GenericWriter.__init__(self, hmac_obj.update, hmac_obj.digest, result) -class AESConsumer(object): +class AESWriter(GenericWriter): """ A Twisted's Consumer implementation that takes an input file descriptor and applies AES-256 cipher in CTR mode. """ - implements(interfaces.IConsumer) - encrypt = 1 - decrypt = 2 - - def __init__(self, key, iv=None, _buffer=None, operation=encrypt): + def __init__(self, key, iv=None, _buffer=None, encrypt=True): if len(key) != 32: raise EncryptionDecryptionError('key is not 256 bits') self.iv = iv or os.urandom(16) - self.buffer = _buffer or BytesIO() - self.deferred = defer.Deferred() - self.done = False - cipher = _get_aes_ctr_cipher(key, self.iv) - if operation == self.encrypt: - self.operator = cipher.encryptor() - else: - self.operator = cipher.decryptor() + cipher = cipher.encryptor() if encrypt else cipher.decryptor() + GenericWriter.__init__(self, cipher.update, cipher.finalize, _buffer) + + +class VerifiedAESWriter(object): + """ + A Twisted's Consumer implementation that flows data into two writers. + Here we can combine AESEncryptor and HMACWriter. + It directs the resulting ciphertext into HMAC-SHA512 processing if + pipe=True or writes the ciphertext to both (fan out, which is the case when + decrypting). + """ + implements(interfaces.IConsumer) + + def __init__(self, aes_writer, hmac_writer, encrypt=True): + self.encrypt = encrypt + self.aes_writer = aes_writer + self.hmac_writer = hmac_writer def write(self, data): - consumed = self.operator.update(data) - self.buffer.write(consumed) - return consumed + enc_chunk = self.aes_writer.write(data) + self.hmac_writer.write(enc_chunk if self.encrypt else data) def end(self): - if not self.done: - self.buffer.write(self.operator.finalize()) - self.deferred.callback(self.buffer) - self.done = True - return self.buffer.getvalue() + return self.aes_writer.end(), self.hmac_writer.end() def is_symmetrically_encrypted(doc): diff --git a/testing/tests/client/test_crypto.py b/testing/tests/client/test_crypto.py index 7643f75d..aad588c0 100644 --- a/testing/tests/client/test_crypto.py +++ b/testing/tests/client/test_crypto.py @@ -52,7 +52,7 @@ class AESTest(unittest.TestCase): key = 'A' * 32 fd = BytesIO() - aes = _crypto.AESConsumer(key, _buffer=fd) + aes = _crypto.AESWriter(key, _buffer=fd) iv = aes.iv data = snowden1 @@ -78,8 +78,7 @@ class AESTest(unittest.TestCase): ciphertext = _aes_encrypt(key, iv, data) fd = BytesIO() - operation = _crypto.AESConsumer.decrypt - aes = _crypto.AESConsumer(key, iv, fd, operation) + aes = _crypto.AESWriter(key, iv, fd, encrypt=False) for i in range(len(ciphertext) / block): chunk = ciphertext[i * block:(i + 1) * block] -- cgit v1.2.3 From 67917656589d08a84f98ff675f6aeade809e1faf Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Tue, 29 Nov 2016 02:04:57 -0300 Subject: [feature] speed up sync benchmark setup code We aren't testing huge payloads on CI, so it doesn't make sense to insert docs one by one. 'gatherResults' can speed up bench setup. --- testing/tests/benchmarks/test_sync.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/testing/tests/benchmarks/test_sync.py b/testing/tests/benchmarks/test_sync.py index 1bf6cc21..1501d74b 100644 --- a/testing/tests/benchmarks/test_sync.py +++ b/testing/tests/benchmarks/test_sync.py @@ -1,11 +1,14 @@ import pytest +from twisted.internet.defer import gatherResults @pytest.inlineCallbacks def load_up(client, amount, payload): # create a bunch of local documents + deferreds = [] for i in xrange(amount): - yield client.create_doc({'content': payload}) + deferreds.append(client.create_doc({'content': payload})) + yield gatherResults(deferreds) def create_upload(uploads, size): -- cgit v1.2.3 From 5a93aeaab78c95dd707f922a4f45bb5d2eeca951 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Tue, 29 Nov 2016 02:20:03 -0300 Subject: [style] fixes from code-review Naming, interfaces and other details. --- client/src/leap/soledad/client/_crypto.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index f6a84b70..574e2b6e 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -195,7 +195,7 @@ class BlobEncryptor(object): self._aes_fd = BytesIO() _aes = AESWriter(sym_key, _buffer=self._aes_fd) - self.__iv = _aes.iv + self._iv = _aes.iv self._hmac_writer = HMACWriter(mac_key) self._write_preamble() @@ -203,7 +203,7 @@ class BlobEncryptor(object): @property def iv(self): - return self.__iv + return self._iv def encrypt(self): """ @@ -292,16 +292,15 @@ class BlobDecryptor(object): try: unpacked_data = PACMAN.unpack(preamble) - pad, ts, sch, meth, iv, doc_id, rev = unpacked_data + magic, ts, sch, meth, iv, doc_id, rev = unpacked_data except struct.error: raise InvalidBlob - if pad != '\x80': + if magic != '\x80': raise InvalidBlob # TODO check timestamp if sch != ENC_SCHEME.symkey: raise InvalidBlob('invalid scheme') - # TODO should adapt the assymetric-gpg too, rigth? if meth != ENC_METHOD.aes_256_ctr: raise InvalidBlob('invalid encryption scheme') if rev != self.rev: @@ -339,18 +338,18 @@ class GenericWriter(object): """ implements(interfaces.IConsumer) - def __init__(self, operator, closer, result=None): + def __init__(self, process, close, result=None): self.result = result or BytesIO() - self.operator, self.closer = operator, closer + self.process, self.close = process, close def write(self, data): - out = self.operator(data) + out = self.process(data) if out: self.result.write(out) return out def end(self): - self.result.write(self.closer()) + self.result.write(self.close()) return self.result.getvalue() @@ -359,6 +358,7 @@ class HMACWriter(GenericWriter): A Twisted's Consumer implementation that takes an input file descriptor and produces a HMAC-SHA512 Message Authentication Code. """ + implements(interfaces.IConsumer) hashtype = 'sha512' def __init__(self, key, result=None): @@ -371,6 +371,8 @@ class AESWriter(GenericWriter): A Twisted's Consumer implementation that takes an input file descriptor and applies AES-256 cipher in CTR mode. """ + implements(interfaces.IConsumer) + def __init__(self, key, iv=None, _buffer=None, encrypt=True): if len(key) != 32: raise EncryptionDecryptionError('key is not 256 bits') -- cgit v1.2.3 From 5a1827f87dafbf64cfd39dd26e1923a456f05d44 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Tue, 29 Nov 2016 02:21:02 -0300 Subject: [bug] enable batching again Something happened during rebase. This configuration is supposed to be True by default now. --- server/src/leap/soledad/server/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/leap/soledad/server/config.py b/server/src/leap/soledad/server/config.py index 4a791cbe..3c17ec19 100644 --- a/server/src/leap/soledad/server/config.py +++ b/server/src/leap/soledad/server/config.py @@ -24,7 +24,7 @@ CONFIG_DEFAULTS = { 'couch_url': 'http://localhost:5984', 'create_cmd': None, 'admin_netrc': '/etc/couchdb/couchdb-admin.netrc', - 'batching': False + 'batching': True }, 'database-security': { 'members': ['soledad'], -- cgit v1.2.3 From 694e5670da53e923cf809948e400cd546154162b Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 30 Nov 2016 00:07:24 -0300 Subject: [refactor] improve blob signature magic usage Our magic value wasn't being used and were represented as a string. Refactored it to a constant, increased it's size to 2 bytes and optimzed is_symmetrically_encrypted to look for the magic and symmetrically encrypted flag under base64 encoding. Most file types will use this feature to help identifying themselves, so it got refactored to serve the purpose it was created. --- client/pkg/requirements.pip | 1 - client/src/leap/soledad/client/_crypto.py | 36 ++++++++-------------- .../src/leap/soledad/client/http_target/fetch.py | 2 +- testing/tests/client/test_crypto.py | 6 ++-- testing/tests/server/test_server.py | 3 +- 5 files changed, 19 insertions(+), 29 deletions(-) diff --git a/client/pkg/requirements.pip b/client/pkg/requirements.pip index a18fe124..24b168b4 100644 --- a/client/pkg/requirements.pip +++ b/client/pkg/requirements.pip @@ -2,5 +2,4 @@ pysqlcipher>2.6.3 scrypt zope.proxy twisted -six cryptography diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index 574e2b6e..b1c6b059 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -32,8 +32,6 @@ from io import BytesIO from itertools import imap from collections import namedtuple -import six - from twisted.internet import defer from twisted.internet import interfaces from twisted.web.client import FileBodyProducer @@ -50,7 +48,8 @@ MAC_KEY_LENGTH = 64 CRYPTO_BACKEND = MultiBackend([OpenSSLBackend()]) -PACMAN = struct.Struct('cQbb16s255p255p') +PACMAN = struct.Struct('2sbbQ16s255p255p') +BLOB_SIGNATURE_MAGIC = '\x13\x37' ENC_SCHEME = namedtuple('SCHEME', 'symkey')(1) @@ -226,10 +225,10 @@ class BlobEncryptor(object): current_time = int(time.time()) write(PACMAN.pack( - '\x80', - current_time, + BLOB_SIGNATURE_MAGIC, ENC_SCHEME.symkey, ENC_METHOD.aes_256_ctr, + current_time, self.iv, str(self.doc_id), str(self.rev))) @@ -292,11 +291,11 @@ class BlobDecryptor(object): try: unpacked_data = PACMAN.unpack(preamble) - magic, ts, sch, meth, iv, doc_id, rev = unpacked_data + magic, sch, meth, ts, iv, doc_id, rev = unpacked_data except struct.error: raise InvalidBlob - if magic != '\x80': + if magic != BLOB_SIGNATURE_MAGIC: raise InvalidBlob # TODO check timestamp if sch != ENC_SCHEME.symkey: @@ -405,27 +404,18 @@ class VerifiedAESWriter(object): return self.aes_writer.end(), self.hmac_writer.end() -def is_symmetrically_encrypted(doc): +def is_symmetrically_encrypted(content): """ - Return True if the document was symmetrically encrypted. + Returns True if the document was symmetrically encrypted. + 'EzcB' is the base64 encoding of \x13\x37 magic number and 1 (symmetrically + encrypted value for enc_scheme flag). - :param doc: The document to check. - :type doc: SoledadDocument + :param doc: The document content as string + :type doc: str :rtype: bool """ - payload = doc.content - if not payload or 'raw' not in payload: - return False - payload = str(payload['raw']) - if len(payload) < PACMAN.size: - return False - payload = _split(payload).next() - if six.indexbytes(payload, 0) != 0x80: - return False - unpacked = PACMAN.unpack(payload) - ts, sch, meth = unpacked[1:4] - return sch == ENC_SCHEME.symkey and meth == ENC_METHOD.aes_256_ctr + return content and content[:13] == '{"raw": "EzcB' # utils diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index df07a96a..8676ceed 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -114,7 +114,7 @@ class HTTPDocFetcher(object): @defer.inlineCallbacks def __atomic_doc_parse(self, doc_info, content, total): doc = SoledadDocument(doc_info['id'], doc_info['rev'], content) - if is_symmetrically_encrypted(doc): + if is_symmetrically_encrypted(content): content = yield self._crypto.decrypt_doc(doc) elif old_crypto.is_symmetrically_encrypted(doc): content = self._deprecated_crypto.decrypt_doc(doc) diff --git a/testing/tests/client/test_crypto.py b/testing/tests/client/test_crypto.py index aad588c0..33a660c9 100644 --- a/testing/tests/client/test_crypto.py +++ b/testing/tests/client/test_crypto.py @@ -110,8 +110,8 @@ class BlobTestCase(unittest.TestCase): assert len(preamble) == _crypto.PACMAN.size unpacked_data = _crypto.PACMAN.unpack(preamble) - pad, ts, sch, meth, iv, doc_id, rev = unpacked_data - assert pad == '\x80' + magic, sch, meth, ts, iv, doc_id, rev = unpacked_data + assert magic == _crypto.BLOB_SIGNATURE_MAGIC assert sch == 1 assert meth == 1 assert iv == blob.iv @@ -155,7 +155,7 @@ class BlobTestCase(unittest.TestCase): assert 'raw' in encrypted doc2 = SoledadDocument('id1', '1') doc2.set_json(encrypted) - assert _crypto.is_symmetrically_encrypted(doc2) + assert _crypto.is_symmetrically_encrypted(encrypted) decrypted = yield crypto.decrypt_doc(doc2) assert len(decrypted) != 0 assert json.loads(decrypted) == payload diff --git a/testing/tests/server/test_server.py b/testing/tests/server/test_server.py index 2f958b29..6710caaf 100644 --- a/testing/tests/server/test_server.py +++ b/testing/tests/server/test_server.py @@ -413,7 +413,8 @@ class EncryptedSyncTestCase( self.assertEqual(soldoc.rev, couchdoc.rev) couch_content = couchdoc.content.keys() self.assertEqual(['raw'], couch_content) - self.assertTrue(_crypto.is_symmetrically_encrypted(couchdoc)) + content = couchdoc.get_json() + self.assertTrue(_crypto.is_symmetrically_encrypted(content)) d = sol1.get_all_docs() d.addCallback(_db1AssertEmptyDocList) -- cgit v1.2.3 From 349a49d2be011a428023a4ece14001fda57e65c4 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Tue, 6 Dec 2016 23:16:28 -0300 Subject: [feature] use GCM instead of CTR+HMAC Resolves: #8668 - client: substitute usage of CTR mode + HMAC by GCM cipher mode Signed-off-by: Victor Shyba --- client/src/leap/soledad/client/_crypto.py | 200 ++++++++++-------------------- client/src/leap/soledad/client/secrets.py | 2 +- testing/tests/benchmarks/test_crypto.py | 4 +- testing/tests/client/test_crypto.py | 48 +++---- 4 files changed, 95 insertions(+), 159 deletions(-) diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index b1c6b059..d9211322 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -36,6 +36,7 @@ from twisted.internet import defer from twisted.internet import interfaces from twisted.web.client import FileBodyProducer +from cryptography.exceptions import InvalidTag from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from cryptography.hazmat.backends.multibackend import MultiBackend from cryptography.hazmat.backends.openssl.backend \ @@ -44,7 +45,7 @@ from cryptography.hazmat.backends.openssl.backend \ from zope.interface import implements -MAC_KEY_LENGTH = 64 +SECRET_LENGTH = 64 CRYPTO_BACKEND = MultiBackend([OpenSSLBackend()]) @@ -53,7 +54,7 @@ BLOB_SIGNATURE_MAGIC = '\x13\x37' ENC_SCHEME = namedtuple('SCHEME', 'symkey')(1) -ENC_METHOD = namedtuple('METHOD', 'aes_256_ctr')(1) +ENC_METHOD = namedtuple('METHOD', 'aes_256_gcm')(1) DocInfo = namedtuple('DocInfo', 'doc_id rev') @@ -95,8 +96,7 @@ class SoledadCrypto(object): raw = blob.getvalue() return '{"raw": "' + raw + '"}' - content = BytesIO() - content.write(str(doc.get_json())) + content = BytesIO(str(doc.get_json())) info = DocInfo(doc.doc_id, doc.rev) del doc encryptor = BlobEncryptor(info, content, secret=self.secret) @@ -125,7 +125,7 @@ class SoledadCrypto(object): def encrypt_sym(data, key): """ - Encrypt data using AES-256 cipher in CTR mode. + Encrypt data using AES-256 cipher in GCM mode. :param data: The data to be encrypted. :type data: str @@ -138,13 +138,15 @@ def encrypt_sym(data, key): """ encryptor = AESWriter(key) encryptor.write(data) - ciphertext = encryptor.end() - return base64.b64encode(encryptor.iv), ciphertext + _, ciphertext = encryptor.end() + iv = base64.b64encode(encryptor.iv) + tag = base64.b64encode(encryptor.tag) + return iv, tag, ciphertext -def decrypt_sym(data, key, iv): +def decrypt_sym(data, key, iv, tag): """ - Decrypt data using AES-256 cipher in CTR mode. + Decrypt data using AES-256 cipher in GCM mode. :param data: The data to be decrypted. :type data: str @@ -158,51 +160,43 @@ def decrypt_sym(data, key, iv): :rtype: str """ _iv = base64.b64decode(str(iv)) - decryptor = AESWriter(key, _iv, encrypt=False) + tag = base64.b64decode(str(tag)) + decryptor = AESWriter(key, _iv, tag=tag) decryptor.write(data) - plaintext = decryptor.end() + _, plaintext = decryptor.end() return plaintext class BlobEncryptor(object): """ Produces encrypted data from the cleartext data associated with a given - SoledadDocument using AES-256 cipher in CTR mode, together with a - HMAC-SHA512 Message Authentication Code. + SoledadDocument using AES-256 cipher in GCM mode. The production happens using a Twisted's FileBodyProducer, which uses a Cooperator to schedule calls and can be paused/resumed. Each call takes at most 65536 bytes from the input. Both the production input and output are file descriptors, so they can be applied to a stream of data. """ - def __init__(self, doc_info, content_fd, result=None, secret=None): + def __init__(self, doc_info, content_fd, secret=None): if not secret: raise EncryptionDecryptionError('no secret given') self.doc_id = doc_info.doc_id self.rev = doc_info.rev - - content_fd.seek(0) - self._producer = FileBodyProducer(content_fd, readSize=2**16) self._content_fd = content_fd - - self._preamble = BytesIO() - self.result = result or BytesIO() + self._producer = FileBodyProducer(content_fd, readSize=2**16) sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret) - mac_key = _get_mac_key_for_doc(doc_info.doc_id, secret) - - self._aes_fd = BytesIO() - _aes = AESWriter(sym_key, _buffer=self._aes_fd) - self._iv = _aes.iv - self._hmac_writer = HMACWriter(mac_key) - self._write_preamble() - - self._crypter = VerifiedAESWriter(_aes, self._hmac_writer) + self._aes = AESWriter(sym_key) + self._aes.authenticate(self._make_preamble()) @property def iv(self): - return self._iv + return self._aes.iv + + @property + def tag(self): + return self._aes.tag def encrypt(self): """ @@ -212,39 +206,31 @@ class BlobEncryptor(object): callback will be invoked with the resulting ciphertext. :rtype: twisted.internet.defer.Deferred """ - d = self._producer.startProducing(self._crypter) + d = self._producer.startProducing(self._aes) d.addCallback(lambda _: self._end_crypto_stream()) return d - def _write_preamble(self): - - def write(data): - self._preamble.write(data) - self._hmac_writer.write(data) - + def _make_preamble(self): current_time = int(time.time()) - write(PACMAN.pack( + return PACMAN.pack( BLOB_SIGNATURE_MAGIC, ENC_SCHEME.symkey, - ENC_METHOD.aes_256_ctr, + ENC_METHOD.aes_256_gcm, current_time, self.iv, str(self.doc_id), - str(self.rev))) + str(self.rev)) def _end_crypto_stream(self): - encrypted, content_hmac = self._crypter.end() - - preamble = self._preamble.getvalue() - - self.result.write( + preamble, encrypted = self._aes.end() + result = BytesIO() + result.write( base64.urlsafe_b64encode(preamble)) - self.result.write(' ') - self.result.write( - base64.urlsafe_b64encode(encrypted + content_hmac)) - self.result.seek(0) - return defer.succeed(self.result) + result.write(' ') + result.write( + base64.urlsafe_b64encode(encrypted + self.tag)) + return defer.succeed(result) class BlobDecryptor(object): @@ -252,7 +238,7 @@ class BlobDecryptor(object): Decrypts an encrypted blob associated with a given Document. Will raise an exception if the blob doesn't have the expected structure, or - if the HMAC doesn't verify. + if the GCM tag doesn't verify. """ def __init__(self, doc_info, ciphertext_fd, result=None, @@ -264,16 +250,11 @@ class BlobDecryptor(object): self.rev = doc_info.rev ciphertext_fd, preamble, iv = self._consume_preamble(ciphertext_fd) - mac_key = _get_mac_key_for_doc(doc_info.doc_id, secret) - self._current_hmac = BytesIO() - _hmac_writer = HMACWriter(mac_key, self._current_hmac) - _hmac_writer.write(preamble) self.result = result or BytesIO() sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret) - _aes = AESWriter(sym_key, iv, self.result, - encrypt=False) - self._decrypter = VerifiedAESWriter(_aes, _hmac_writer, encrypt=False) + self._aes = AESWriter(sym_key, iv, self.result, tag=self.tag) + self._aes.authenticate(preamble) self._producer = FileBodyProducer(ciphertext_fd, readSize=2**16) @@ -281,7 +262,7 @@ class BlobDecryptor(object): ciphertext_fd.seek(0) try: preamble, ciphertext = _split(ciphertext_fd.getvalue()) - self.doc_hmac, ciphertext = ciphertext[-64:], ciphertext[:-64] + self.tag, ciphertext = ciphertext[-16:], ciphertext[:-16] except (TypeError, binascii.Error): raise InvalidBlob ciphertext_fd.close() @@ -300,7 +281,7 @@ class BlobDecryptor(object): # TODO check timestamp if sch != ENC_SCHEME.symkey: raise InvalidBlob('invalid scheme') - if meth != ENC_METHOD.aes_256_ctr: + if meth != ENC_METHOD.aes_256_gcm: raise InvalidBlob('invalid encryption scheme') if rev != self.rev: raise InvalidBlob('invalid revision') @@ -308,14 +289,11 @@ class BlobDecryptor(object): raise InvalidBlob('invalid revision') return BytesIO(ciphertext), preamble, iv - def _check_hmac(self): - if self._current_hmac.getvalue() != self.doc_hmac: - raise InvalidBlob('HMAC could not be verifed') - def _end_stream(self): - self._decrypter.end() - self._check_hmac() - return self.result.getvalue() + try: + return self._aes.end()[1] + except InvalidTag: + raise InvalidBlob('Invalid Tag. Blob authentication failed.') def decrypt(self): """ @@ -325,83 +303,41 @@ class BlobDecryptor(object): callback will be invoked with the resulting ciphertext. :rtype: twisted.internet.defer.Deferred """ - d = self._producer.startProducing(self._decrypter) + d = self._producer.startProducing(self._aes) d.addCallback(lambda _: self._end_stream()) return d -class GenericWriter(object): - """ - A Twisted's Consumer implementation that can perform one opearation at the - written data and another at the end of the stream. - """ - implements(interfaces.IConsumer) - - def __init__(self, process, close, result=None): - self.result = result or BytesIO() - self.process, self.close = process, close - - def write(self, data): - out = self.process(data) - if out: - self.result.write(out) - return out - - def end(self): - self.result.write(self.close()) - return self.result.getvalue() - - -class HMACWriter(GenericWriter): - """ - A Twisted's Consumer implementation that takes an input file descriptor and - produces a HMAC-SHA512 Message Authentication Code. - """ - implements(interfaces.IConsumer) - hashtype = 'sha512' - - def __init__(self, key, result=None): - hmac_obj = hmac.new(key, '', getattr(hashlib, self.hashtype)) - GenericWriter.__init__(self, hmac_obj.update, hmac_obj.digest, result) - - -class AESWriter(GenericWriter): +class AESWriter(object): """ A Twisted's Consumer implementation that takes an input file descriptor and - applies AES-256 cipher in CTR mode. + applies AES-256 cipher in GCM mode. """ implements(interfaces.IConsumer) - def __init__(self, key, iv=None, _buffer=None, encrypt=True): + def __init__(self, key, iv=None, _buffer=None, tag=None): if len(key) != 32: raise EncryptionDecryptionError('key is not 256 bits') self.iv = iv or os.urandom(16) - cipher = _get_aes_ctr_cipher(key, self.iv) - cipher = cipher.encryptor() if encrypt else cipher.decryptor() - GenericWriter.__init__(self, cipher.update, cipher.finalize, _buffer) - + self.buffer = _buffer or BytesIO() + cipher = _get_aes_gcm_cipher(key, self.iv, tag) + cipher = cipher.decryptor() if tag else cipher.encryptor() + self.cipher, self.aead = cipher, '' -class VerifiedAESWriter(object): - """ - A Twisted's Consumer implementation that flows data into two writers. - Here we can combine AESEncryptor and HMACWriter. - It directs the resulting ciphertext into HMAC-SHA512 processing if - pipe=True or writes the ciphertext to both (fan out, which is the case when - decrypting). - """ - implements(interfaces.IConsumer) + def authenticate(self, data): + self.aead += data + self.cipher.authenticate_additional_data(data) - def __init__(self, aes_writer, hmac_writer, encrypt=True): - self.encrypt = encrypt - self.aes_writer = aes_writer - self.hmac_writer = hmac_writer + @property + def tag(self): + return self.cipher.tag def write(self, data): - enc_chunk = self.aes_writer.write(data) - self.hmac_writer.write(enc_chunk if self.encrypt else data) + self.buffer.write(self.cipher.update(data)) def end(self): - return self.aes_writer.end(), self.hmac_writer.end() + self.buffer.write(self.cipher.finalize()) + return self.aead, self.buffer.getvalue() def is_symmetrically_encrypted(content): @@ -425,18 +361,14 @@ def _hmac_sha256(key, data): return hmac.new(key, data, hashlib.sha256).digest() -def _get_mac_key_for_doc(doc_id, secret): - key = secret[:MAC_KEY_LENGTH] - return _hmac_sha256(key, doc_id) - - def _get_sym_key_for_doc(doc_id, secret): - key = secret[MAC_KEY_LENGTH:] + key = secret[SECRET_LENGTH:] return _hmac_sha256(key, doc_id) -def _get_aes_ctr_cipher(key, iv): - return Cipher(algorithms.AES(key), modes.CTR(iv), backend=CRYPTO_BACKEND) +def _get_aes_gcm_cipher(key, iv, tag): + mode = modes.GCM(iv, tag) + return Cipher(algorithms.AES(key), mode, backend=CRYPTO_BACKEND) def _split(base64_raw_payload): diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index 21c4f291..1eb6f31d 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -34,7 +34,7 @@ from leap.soledad.common import soledad_assert_type from leap.soledad.common import document from leap.soledad.common.log import getLogger from leap.soledad.client import events -from leap.soledad.client._crypto import encrypt_sym, decrypt_sym +from leap.soledad.client.crypto import encrypt_sym, decrypt_sym logger = getLogger(__name__) diff --git a/testing/tests/benchmarks/test_crypto.py b/testing/tests/benchmarks/test_crypto.py index 8ee9b899..631ac041 100644 --- a/testing/tests/benchmarks/test_crypto.py +++ b/testing/tests/benchmarks/test_crypto.py @@ -66,8 +66,8 @@ def create_raw_decryption(size): @pytest.mark.benchmark(group="test_crypto_raw_decrypt") def test_raw_decrypt(benchmark, payload): key = payload(32) - iv, ciphertext = _crypto.encrypt_sym(payload(size), key) - benchmark(_crypto.decrypt_sym, ciphertext, key, iv) + iv, tag, ciphertext = _crypto.encrypt_sym(payload(size), key) + benchmark(_crypto.decrypt_sym, ciphertext, key, iv, tag) return test_raw_decrypt diff --git a/testing/tests/client/test_crypto.py b/testing/tests/client/test_crypto.py index 33a660c9..10acba56 100644 --- a/testing/tests/client/test_crypto.py +++ b/testing/tests/client/test_crypto.py @@ -29,6 +29,7 @@ import pytest from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from cryptography.hazmat.backends import default_backend +from cryptography.exceptions import InvalidTag from leap.soledad.common.document import SoledadDocument from test_soledad.util import BaseSoledadTest @@ -64,7 +65,7 @@ class AESTest(unittest.TestCase): aes.end() ciphertext_chunked = fd.getvalue() - ciphertext = _aes_encrypt(key, iv, data) + ciphertext, tag = _aes_encrypt(key, iv, data) assert ciphertext_chunked == ciphertext @@ -75,10 +76,10 @@ class AESTest(unittest.TestCase): data = snowden1 block = 16 - ciphertext = _aes_encrypt(key, iv, data) + ciphertext, tag = _aes_encrypt(key, iv, data) fd = BytesIO() - aes = _crypto.AESWriter(key, iv, fd, encrypt=False) + aes = _crypto.AESWriter(key, iv, fd, tag=tag) for i in range(len(ciphertext) / block): chunk = ciphertext[i * block:(i + 1) * block] @@ -106,7 +107,7 @@ class BlobTestCase(unittest.TestCase): encrypted = yield blob.encrypt() preamble, ciphertext = _crypto._split(encrypted.getvalue()) - ciphertext = ciphertext[:-64] + ciphertext = ciphertext[:-16] assert len(preamble) == _crypto.PACMAN.size unpacked_data = _crypto.PACMAN.unpack(preamble) @@ -120,9 +121,10 @@ class BlobTestCase(unittest.TestCase): aes_key = _crypto._get_sym_key_for_doc( self.doc_info.doc_id, 'A' * 96) - assert ciphertext == _aes_encrypt(aes_key, blob.iv, snowden1) + assert ciphertext == _aes_encrypt(aes_key, blob.iv, snowden1)[0] - decrypted = _aes_decrypt(aes_key, blob.iv, ciphertext) + decrypted = _aes_decrypt(aes_key, blob.iv, blob.tag, ciphertext, + preamble) assert str(decrypted) == snowden1 @defer.inlineCallbacks @@ -173,7 +175,7 @@ class BlobTestCase(unittest.TestCase): encdict = json.loads(encrypted) preamble, raw = _crypto._split(str(encdict['raw'])) # mess with MAC - messed = raw[:-64] + '0' * 64 + messed = raw[:-16] + '0' * 16 preamble = base64.urlsafe_b64encode(preamble) newraw = preamble + ' ' + base64.urlsafe_b64encode(str(messed)) @@ -275,16 +277,16 @@ class SoledadCryptoAESTestCase(BaseSoledadTest): def test_encrypt_decrypt_sym(self): # generate 256-bit key key = os.urandom(32) - iv, cyphertext = _crypto.encrypt_sym('data', key) + iv, tag, cyphertext = _crypto.encrypt_sym('data', key) self.assertTrue(cyphertext is not None) self.assertTrue(cyphertext != '') self.assertTrue(cyphertext != 'data') - plaintext = _crypto.decrypt_sym(cyphertext, key, iv) + plaintext = _crypto.decrypt_sym(cyphertext, key, iv, tag) self.assertEqual('data', plaintext) - def test_decrypt_with_wrong_iv_fails(self): + def test_decrypt_with_wrong_iv_raises(self): key = os.urandom(32) - iv, cyphertext = _crypto.encrypt_sym('data', key) + iv, tag, cyphertext = _crypto.encrypt_sym('data', key) self.assertTrue(cyphertext is not None) self.assertTrue(cyphertext != '') self.assertTrue(cyphertext != 'data') @@ -293,13 +295,13 @@ class SoledadCryptoAESTestCase(BaseSoledadTest): wrongiv = rawiv while wrongiv == rawiv: wrongiv = os.urandom(1) + rawiv[1:] - plaintext = _crypto.decrypt_sym( - cyphertext, key, iv=binascii.b2a_base64(wrongiv)) - self.assertNotEqual('data', plaintext) + with pytest.raises(InvalidTag): + _crypto.decrypt_sym( + cyphertext, key, iv=binascii.b2a_base64(wrongiv), tag=tag) - def test_decrypt_with_wrong_key_fails(self): + def test_decrypt_with_wrong_key_raises(self): key = os.urandom(32) - iv, cyphertext = _crypto.encrypt_sym('data', key) + iv, tag, cyphertext = _crypto.encrypt_sym('data', key) self.assertTrue(cyphertext is not None) self.assertTrue(cyphertext != '') self.assertTrue(cyphertext != 'data') @@ -307,19 +309,21 @@ class SoledadCryptoAESTestCase(BaseSoledadTest): # ensure keys are different in case we are extremely lucky while wrongkey == key: wrongkey = os.urandom(32) - plaintext = _crypto.decrypt_sym(cyphertext, wrongkey, iv) - self.assertNotEqual('data', plaintext) + with pytest.raises(InvalidTag): + _crypto.decrypt_sym(cyphertext, wrongkey, iv, tag) def _aes_encrypt(key, iv, data): backend = default_backend() - cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=backend) + cipher = Cipher(algorithms.AES(key), modes.GCM(iv), backend=backend) encryptor = cipher.encryptor() - return encryptor.update(data) + encryptor.finalize() + return encryptor.update(data) + encryptor.finalize(), encryptor.tag -def _aes_decrypt(key, iv, data): +def _aes_decrypt(key, iv, tag, data, aead=''): backend = default_backend() - cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=backend) + cipher = Cipher(algorithms.AES(key), modes.GCM(iv, tag), backend=backend) decryptor = cipher.decryptor() + if aead: + decryptor.authenticate_additional_data(aead) return decryptor.update(data) + decryptor.finalize() -- cgit v1.2.3 From b3fcc5c5bddc73475596c4fe74e3402f0d5c021a Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 7 Dec 2016 01:24:53 -0300 Subject: [feature] Add retro compat on secrets.py ciphers Integrated the secrets's JSON key that specifies ciphers into _crypto and added optional GCM. Also added a test to check if both cipher types can be imported. Resolves: #8680 Signed-off-by: Victor Shyba --- client/src/leap/soledad/client/_crypto.py | 41 ++++++++++++++++++++----------- client/src/leap/soledad/client/secrets.py | 35 ++++++++++++++++---------- testing/tests/benchmarks/test_crypto.py | 4 +-- testing/tests/client/test_crypto.py | 30 +++++++++++++--------- 4 files changed, 69 insertions(+), 41 deletions(-) diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py index d9211322..4bbdd044 100644 --- a/client/src/leap/soledad/client/_crypto.py +++ b/client/src/leap/soledad/client/_crypto.py @@ -54,7 +54,7 @@ BLOB_SIGNATURE_MAGIC = '\x13\x37' ENC_SCHEME = namedtuple('SCHEME', 'symkey')(1) -ENC_METHOD = namedtuple('METHOD', 'aes_256_gcm')(1) +ENC_METHOD = namedtuple('METHOD', 'aes_256_ctr aes_256_gcm')(1, 2) DocInfo = namedtuple('DocInfo', 'doc_id rev') @@ -123,9 +123,9 @@ class SoledadCrypto(object): return decryptor.decrypt() -def encrypt_sym(data, key): +def encrypt_sym(data, key, method=ENC_METHOD.aes_256_gcm): """ - Encrypt data using AES-256 cipher in GCM mode. + Encrypt data using AES-256 cipher in selected mode. :param data: The data to be encrypted. :type data: str @@ -136,17 +136,18 @@ def encrypt_sym(data, key): encoded as base64. :rtype: (str, str) """ - encryptor = AESWriter(key) + mode = _mode_by_method(method) + encryptor = AESWriter(key, mode=mode) encryptor.write(data) _, ciphertext = encryptor.end() iv = base64.b64encode(encryptor.iv) - tag = base64.b64encode(encryptor.tag) - return iv, tag, ciphertext + tag = encryptor.tag or '' + return iv, ciphertext + tag -def decrypt_sym(data, key, iv, tag): +def decrypt_sym(data, key, iv, method=ENC_METHOD.aes_256_gcm): """ - Decrypt data using AES-256 cipher in GCM mode. + Decrypt data using AES-256 cipher in selected mode. :param data: The data to be decrypted. :type data: str @@ -160,8 +161,11 @@ def decrypt_sym(data, key, iv, tag): :rtype: str """ _iv = base64.b64decode(str(iv)) - tag = base64.b64decode(str(tag)) - decryptor = AESWriter(key, _iv, tag=tag) + mode = _mode_by_method(method) + tag = None + if mode == modes.GCM: + data, tag = data[:-16], data[-16:] + decryptor = AESWriter(key, _iv, tag=tag, mode=mode) decryptor.write(data) _, plaintext = decryptor.end() return plaintext @@ -315,12 +319,12 @@ class AESWriter(object): """ implements(interfaces.IConsumer) - def __init__(self, key, iv=None, _buffer=None, tag=None): + def __init__(self, key, iv=None, _buffer=None, tag=None, mode=modes.GCM): if len(key) != 32: raise EncryptionDecryptionError('key is not 256 bits') self.iv = iv or os.urandom(16) self.buffer = _buffer or BytesIO() - cipher = _get_aes_gcm_cipher(key, self.iv, tag) + cipher = _get_aes_cipher(key, self.iv, tag, mode) cipher = cipher.decryptor() if tag else cipher.encryptor() self.cipher, self.aead = cipher, '' @@ -330,7 +334,7 @@ class AESWriter(object): @property def tag(self): - return self.cipher.tag + return getattr(self.cipher, 'tag', None) def write(self, data): self.buffer.write(self.cipher.update(data)) @@ -366,10 +370,17 @@ def _get_sym_key_for_doc(doc_id, secret): return _hmac_sha256(key, doc_id) -def _get_aes_gcm_cipher(key, iv, tag): - mode = modes.GCM(iv, tag) +def _get_aes_cipher(key, iv, tag, mode=modes.GCM): + mode = mode(iv, tag) if mode == modes.GCM else mode(iv) return Cipher(algorithms.AES(key), mode, backend=CRYPTO_BACKEND) def _split(base64_raw_payload): return imap(base64.urlsafe_b64decode, re.split(' ', base64_raw_payload)) + + +def _mode_by_method(method): + if method == ENC_METHOD.aes_256_gcm: + return modes.GCM + else: + return modes.CTR diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index 1eb6f31d..06488f74 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -34,7 +34,7 @@ from leap.soledad.common import soledad_assert_type from leap.soledad.common import document from leap.soledad.common.log import getLogger from leap.soledad.client import events -from leap.soledad.client.crypto import encrypt_sym, decrypt_sym +from leap.soledad.client import _crypto logger = getLogger(__name__) @@ -126,7 +126,7 @@ class SoledadSecrets(object): instantiates Soledad. """ - IV_SEPARATOR = ":" + SEPARATOR = ":" """ A separator used for storing the encryption initial value prepended to the ciphertext. @@ -142,7 +142,8 @@ class SoledadSecrets(object): KDF_SALT_KEY = 'kdf_salt' KDF_LENGTH_KEY = 'kdf_length' KDF_SCRYPT = 'scrypt' - CIPHER_AES256 = 'aes256' + CIPHER_AES256 = 'aes256' # deprecated, AES-GCM + CIPHER_AES256_GCM = _crypto.ENC_METHOD.aes_256_gcm RECOVERY_DOC_VERSION_KEY = 'version' RECOVERY_DOC_VERSION = 1 """ @@ -343,7 +344,7 @@ class SoledadSecrets(object): '%s%s' % (self._passphrase_as_string(), self._uuid)).hexdigest() - def _export_recovery_document(self): + def _export_recovery_document(self, cipher=None): """ Export the storage secrets. @@ -364,6 +365,9 @@ class SoledadSecrets(object): Note that multiple storage secrets might be stored in one recovery document. + :param cipher: (Optional) The ciper to use. Defaults to AES256 + :type cipher: str + :return: The recovery document. :rtype: dict """ @@ -371,7 +375,7 @@ class SoledadSecrets(object): encrypted_secrets = {} for secret_id in self._secrets: encrypted_secrets[secret_id] = self._encrypt_storage_secret( - self._secrets[secret_id]) + self._secrets[secret_id], doc_cipher=cipher) # create the recovery document data = { self.STORAGE_SECRETS_KEY: encrypted_secrets, @@ -537,18 +541,20 @@ class SoledadSecrets(object): ) if encrypted_secret_dict[self.KDF_LENGTH_KEY] != len(key): raise SecretsException("Wrong length of decryption key.") - if encrypted_secret_dict[self.CIPHER_KEY] != self.CIPHER_AES256: + supported_ciphers = [self.CIPHER_AES256, self.CIPHER_AES256_GCM] + doc_cipher = encrypted_secret_dict[self.CIPHER_KEY] + if doc_cipher not in supported_ciphers: raise SecretsException("Unknown cipher in stored secret.") # recover the initial value and ciphertext iv, ciphertext = encrypted_secret_dict[self.SECRET_KEY].split( - self.IV_SEPARATOR, 1) + self.SEPARATOR, 1) ciphertext = binascii.a2b_base64(ciphertext) - decrypted_secret = decrypt_sym(ciphertext, key, iv) + decrypted_secret = _crypto.decrypt_sym(ciphertext, key, iv, doc_cipher) if encrypted_secret_dict[self.LENGTH_KEY] != len(decrypted_secret): raise SecretsException("Wrong length of decrypted secret.") return decrypted_secret - def _encrypt_storage_secret(self, decrypted_secret): + def _encrypt_storage_secret(self, decrypted_secret, doc_cipher=None): """ Encrypt the storage secret. @@ -567,6 +573,8 @@ class SoledadSecrets(object): :param decrypted_secret: The decrypted storage secret. :type decrypted_secret: str + :param cipher: (Optional) The ciper to use. Defaults to AES256 + :type cipher: str :return: The encrypted storage secret. :rtype: dict @@ -575,17 +583,18 @@ class SoledadSecrets(object): salt = os.urandom(self.SALT_LENGTH) # get a 256-bit key key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32) - iv, ciphertext = encrypt_sym(decrypted_secret, key) + doc_cipher = doc_cipher or self.CIPHER_AES256 + iv, ciphertext = _crypto.encrypt_sym(decrypted_secret, key, doc_cipher) + ciphertext = binascii.b2a_base64(ciphertext) encrypted_secret_dict = { # leap.soledad.crypto submodule uses AES256 for symmetric # encryption. self.KDF_KEY: self.KDF_SCRYPT, self.KDF_SALT_KEY: binascii.b2a_base64(salt), self.KDF_LENGTH_KEY: len(key), - self.CIPHER_KEY: self.CIPHER_AES256, + self.CIPHER_KEY: doc_cipher, self.LENGTH_KEY: len(decrypted_secret), - self.SECRET_KEY: '%s%s%s' % ( - str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)), + self.SECRET_KEY: self.SEPARATOR.join([str(iv), ciphertext]) } return encrypted_secret_dict diff --git a/testing/tests/benchmarks/test_crypto.py b/testing/tests/benchmarks/test_crypto.py index 631ac041..8ee9b899 100644 --- a/testing/tests/benchmarks/test_crypto.py +++ b/testing/tests/benchmarks/test_crypto.py @@ -66,8 +66,8 @@ def create_raw_decryption(size): @pytest.mark.benchmark(group="test_crypto_raw_decrypt") def test_raw_decrypt(benchmark, payload): key = payload(32) - iv, tag, ciphertext = _crypto.encrypt_sym(payload(size), key) - benchmark(_crypto.decrypt_sym, ciphertext, key, iv, tag) + iv, ciphertext = _crypto.encrypt_sym(payload(size), key) + benchmark(_crypto.decrypt_sym, ciphertext, key, iv) return test_raw_decrypt diff --git a/testing/tests/client/test_crypto.py b/testing/tests/client/test_crypto.py index 10acba56..277d5430 100644 --- a/testing/tests/client/test_crypto.py +++ b/testing/tests/client/test_crypto.py @@ -114,7 +114,7 @@ class BlobTestCase(unittest.TestCase): magic, sch, meth, ts, iv, doc_id, rev = unpacked_data assert magic == _crypto.BLOB_SIGNATURE_MAGIC assert sch == 1 - assert meth == 1 + assert meth == _crypto.ENC_METHOD.aes_256_gcm assert iv == blob.iv assert doc_id == 'D-deadbeef' assert rev == self.doc_info.rev @@ -163,7 +163,7 @@ class BlobTestCase(unittest.TestCase): assert json.loads(decrypted) == payload @defer.inlineCallbacks - def test_decrypt_with_wrong_mac_raises(self): + def test_decrypt_with_wrong_tag_raises(self): """ Trying to decrypt a document with wrong MAC should raise. """ @@ -174,7 +174,7 @@ class BlobTestCase(unittest.TestCase): encrypted = yield crypto.encrypt_doc(doc1) encdict = json.loads(encrypted) preamble, raw = _crypto._split(str(encdict['raw'])) - # mess with MAC + # mess with tag messed = raw[:-16] + '0' * 16 preamble = base64.urlsafe_b64encode(preamble) @@ -205,8 +205,8 @@ class RecoveryDocumentTestCase(BaseSoledadTest): self.assertTrue(self._soledad.secrets.LENGTH_KEY in encrypted_secret) self.assertTrue(self._soledad.secrets.SECRET_KEY in encrypted_secret) - def test_import_recovery_document(self): - rd = self._soledad.secrets._export_recovery_document() + def test_import_recovery_document(self, cipher='aes256'): + rd = self._soledad.secrets._export_recovery_document(cipher) s = self._soledad_instance() s.secrets._import_recovery_document(rd) s.secrets.set_secret_id(self._soledad.secrets._secret_id) @@ -215,6 +215,14 @@ class RecoveryDocumentTestCase(BaseSoledadTest): 'Failed settinng secret for symmetric encryption.') s.close() + def test_import_GCM_recovery_document(self): + cipher = self._soledad.secrets.CIPHER_AES256_GCM + self.test_import_recovery_document(cipher) + + def test_import_legacy_CTR_recovery_document(self): + cipher = self._soledad.secrets.CIPHER_AES256 + self.test_import_recovery_document(cipher) + class SoledadSecretsTestCase(BaseSoledadTest): @@ -277,16 +285,16 @@ class SoledadCryptoAESTestCase(BaseSoledadTest): def test_encrypt_decrypt_sym(self): # generate 256-bit key key = os.urandom(32) - iv, tag, cyphertext = _crypto.encrypt_sym('data', key) + iv, cyphertext = _crypto.encrypt_sym('data', key) self.assertTrue(cyphertext is not None) self.assertTrue(cyphertext != '') self.assertTrue(cyphertext != 'data') - plaintext = _crypto.decrypt_sym(cyphertext, key, iv, tag) + plaintext = _crypto.decrypt_sym(cyphertext, key, iv) self.assertEqual('data', plaintext) def test_decrypt_with_wrong_iv_raises(self): key = os.urandom(32) - iv, tag, cyphertext = _crypto.encrypt_sym('data', key) + iv, cyphertext = _crypto.encrypt_sym('data', key) self.assertTrue(cyphertext is not None) self.assertTrue(cyphertext != '') self.assertTrue(cyphertext != 'data') @@ -297,11 +305,11 @@ class SoledadCryptoAESTestCase(BaseSoledadTest): wrongiv = os.urandom(1) + rawiv[1:] with pytest.raises(InvalidTag): _crypto.decrypt_sym( - cyphertext, key, iv=binascii.b2a_base64(wrongiv), tag=tag) + cyphertext, key, iv=binascii.b2a_base64(wrongiv)) def test_decrypt_with_wrong_key_raises(self): key = os.urandom(32) - iv, tag, cyphertext = _crypto.encrypt_sym('data', key) + iv, cyphertext = _crypto.encrypt_sym('data', key) self.assertTrue(cyphertext is not None) self.assertTrue(cyphertext != '') self.assertTrue(cyphertext != 'data') @@ -310,7 +318,7 @@ class SoledadCryptoAESTestCase(BaseSoledadTest): while wrongkey == key: wrongkey = os.urandom(32) with pytest.raises(InvalidTag): - _crypto.decrypt_sym(cyphertext, wrongkey, iv, tag) + _crypto.decrypt_sym(cyphertext, wrongkey, iv) def _aes_encrypt(key, iv, data): -- cgit v1.2.3 From 7877527fe64eaee1f7f107913a4a3dc78767a338 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 7 Dec 2016 02:03:58 -0300 Subject: [feature] Change CTR to GCM on secrets.py Current implementation can allow tampering and the CTR->GCM exchange can help to avoid it. This commits also alters a behaviour where we moved ahead after failing to decrypt a recovery document. IMHO we can't move ahead as this is a fatal error. Signed-off-by: Victor Shyba --- client/src/leap/soledad/client/secrets.py | 12 +++++++++--- testing/tests/client/test_aux_methods.py | 4 ++-- testing/tests/client/test_crypto.py | 5 +++-- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index 06488f74..3fe98c64 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -142,7 +142,7 @@ class SoledadSecrets(object): KDF_SALT_KEY = 'kdf_salt' KDF_LENGTH_KEY = 'kdf_length' KDF_SCRYPT = 'scrypt' - CIPHER_AES256 = 'aes256' # deprecated, AES-GCM + CIPHER_AES256 = 'aes256' # deprecated, AES-CTR CIPHER_AES256_GCM = _crypto.ENC_METHOD.aes_256_gcm RECOVERY_DOC_VERSION_KEY = 'version' RECOVERY_DOC_VERSION = 1 @@ -451,6 +451,7 @@ class SoledadSecrets(object): except SecretsException as e: logger.error("failed to decrypt storage secret: %s" % str(e)) + raise e return secret_count, active_secret def _get_secrets_from_shared_db(self): @@ -549,7 +550,12 @@ class SoledadSecrets(object): iv, ciphertext = encrypted_secret_dict[self.SECRET_KEY].split( self.SEPARATOR, 1) ciphertext = binascii.a2b_base64(ciphertext) - decrypted_secret = _crypto.decrypt_sym(ciphertext, key, iv, doc_cipher) + try: + decrypted_secret = _crypto.decrypt_sym( + ciphertext, key, iv, doc_cipher) + except Exception as e: + logger.error(e) + raise SecretsException("Unable to decrypt secret.") if encrypted_secret_dict[self.LENGTH_KEY] != len(decrypted_secret): raise SecretsException("Wrong length of decrypted secret.") return decrypted_secret @@ -583,7 +589,7 @@ class SoledadSecrets(object): salt = os.urandom(self.SALT_LENGTH) # get a 256-bit key key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32) - doc_cipher = doc_cipher or self.CIPHER_AES256 + doc_cipher = doc_cipher or self.CIPHER_AES256_GCM iv, ciphertext = _crypto.encrypt_sym(decrypted_secret, key, doc_cipher) ciphertext = binascii.b2a_base64(ciphertext) encrypted_secret_dict = { diff --git a/testing/tests/client/test_aux_methods.py b/testing/tests/client/test_aux_methods.py index c25ff8ca..9b4a175f 100644 --- a/testing/tests/client/test_aux_methods.py +++ b/testing/tests/client/test_aux_methods.py @@ -21,10 +21,10 @@ import os from twisted.internet import defer -from leap.soledad.common.errors import DatabaseAccessError from leap.soledad.client import Soledad from leap.soledad.client.adbapi import U1DBConnectionPool from leap.soledad.client.secrets import PassphraseTooShort +from leap.soledad.client.secrets import SecretsException from test_soledad.util import BaseSoledadTest @@ -108,7 +108,7 @@ class AuxMethodsTestCase(BaseSoledadTest): sol.change_passphrase(u'654321') sol.close() - with self.assertRaises(DatabaseAccessError): + with self.assertRaises(SecretsException): self._soledad_instance( 'leap@leap.se', passphrase=u'123', diff --git a/testing/tests/client/test_crypto.py b/testing/tests/client/test_crypto.py index 277d5430..49a61438 100644 --- a/testing/tests/client/test_crypto.py +++ b/testing/tests/client/test_crypto.py @@ -200,8 +200,9 @@ class RecoveryDocumentTestCase(BaseSoledadTest): encrypted_secret = rd[ self._soledad.secrets.STORAGE_SECRETS_KEY][secret_id] self.assertTrue(self._soledad.secrets.CIPHER_KEY in encrypted_secret) - self.assertTrue( - encrypted_secret[self._soledad.secrets.CIPHER_KEY] == 'aes256') + self.assertEquals( + _crypto.ENC_METHOD.aes_256_gcm, + encrypted_secret[self._soledad.secrets.CIPHER_KEY]) self.assertTrue(self._soledad.secrets.LENGTH_KEY in encrypted_secret) self.assertTrue(self._soledad.secrets.SECRET_KEY in encrypted_secret) -- cgit v1.2.3 From 086fc21058314e0a3b13b06af3905ca5c3ee311e Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 1 Dec 2016 09:25:17 -0200 Subject: [test] configure baremetal gitlab ci runner --- .gitlab-ci.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d11a4d1e..d93af13d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -25,12 +25,8 @@ tests: benchmark: stage: benchmark - image: leapcode/soledad:latest - services: - - couchdb script: - cd testing - - tox -e benchmark -- --couch-url http://couchdb:5984 + - tox -e benchmark tags: - - docker - benchmark -- cgit v1.2.3 From feb14a1eadb894f16fcfd09ee6d229d6dfb35569 Mon Sep 17 00:00:00 2001 From: drebs Date: Sat, 17 Dec 2016 10:56:22 -0200 Subject: [pkg] use a twisted resource as server entrypoint --- server/pkg/soledad-server | 4 +-- server/src/leap/soledad/server/resource.py | 53 ++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 server/src/leap/soledad/server/resource.py diff --git a/server/pkg/soledad-server b/server/pkg/soledad-server index d9dab040..753a260b 100644 --- a/server/pkg/soledad-server +++ b/server/pkg/soledad-server @@ -11,7 +11,7 @@ PATH=/sbin:/bin:/usr/sbin:/usr/bin PIDFILE=/var/run/soledad.pid -OBJ=leap.soledad.server.application.wsgi_application +RESOURCE_CLASS=leap.soledad.server.resource.SoledadResource HTTPS_PORT=2424 CONFDIR=/etc/soledad CERT_PATH="${CONFDIR}/soledad-server.pem" @@ -39,7 +39,7 @@ case "${1}" in --syslog \ --prefix=soledad-server \ web \ - --wsgi=${OBJ} \ + --class=${RESOURCE_CLASS} \ --port=ssl:${HTTPS_PORT}:privateKey=${PRIVKEY_PATH}:certKey=${CERT_PATH}:sslmethod=${SSL_METHOD} echo "." ;; diff --git a/server/src/leap/soledad/server/resource.py b/server/src/leap/soledad/server/resource.py new file mode 100644 index 00000000..dbb91b0a --- /dev/null +++ b/server/src/leap/soledad/server/resource.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# resource.py +# Copyright (C) 2016 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +A twisted resource that serves the Soledad Server. +""" + +from twisted.web.resource import Resource +from twisted.web.wsgi import WSGIResource +from twisted.internet import reactor +from twisted.python import threadpool + +from leap.soledad.server.application import wsgi_application + + +__all__ = ['SoledadResource'] + + +# setup a wsgi resource with its own threadpool +pool = threadpool.ThreadPool() +reactor.callWhenRunning(pool.start) +reactor.addSystemEventTrigger('after', 'shutdown', pool.stop) +wsgi_resource = WSGIResource(reactor, pool, wsgi_application) + + +class SoledadResource(Resource): + """ + This is a dummy twisted resource, used only to allow different entry points + for the Soledad Server. + """ + + def __init__(self): + self.children = {'': wsgi_resource} + + def getChild(self, path, request): + # for now, just "rewind" the path and serve the wsgi resource for all + # requests. In the future, we might look into the request path to + # decide which child resources should serve each request. + request.postpath.insert(0, request.prepath.pop()) + return self.children[''] -- cgit v1.2.3 From e28cd85b37f362a8748861da8eb846a65df39369 Mon Sep 17 00:00:00 2001 From: drebs Date: Sat, 17 Dec 2016 15:05:13 -0200 Subject: [test] add couchdb tag for tests --- .gitlab-ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d93af13d..18522cdb 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -22,6 +22,8 @@ tests: script: - cd testing - tox -- --couch-url http://couchdb:5984 + tags: + - couchdb benchmark: stage: benchmark -- cgit v1.2.3 From ea501c30c1c4a7978a4f9253072081c48fd80cb2 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 19 Dec 2016 12:50:24 -0200 Subject: [test] remove benchmark from ci pipeline --- .gitlab-ci.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 18522cdb..ac2ae1f0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,7 +1,6 @@ stages: - code-check - tests - - benchmark # Cache tox envs between builds cache: @@ -24,11 +23,3 @@ tests: - tox -- --couch-url http://couchdb:5984 tags: - couchdb - -benchmark: - stage: benchmark - script: - - cd testing - - tox -e benchmark - tags: - - benchmark -- cgit v1.2.3 From e360a3a75999503cf45bfbbad69970a452cf3d32 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 22 Dec 2016 17:27:57 -0200 Subject: [pkg] update changelog for 0.9.2 --- CHANGELOG.rst | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 12cb56ab..f47749d1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,26 @@ +0.9.2 - 22 December, 2016 ++++++++++++++++++++++++++ + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- use AES 256 GCM mode instead of CTR+HMAC. +- streaming encryption/decryption and data transfer. + +Server +~~~~~~ + +- move server to a twisted resource entrypoint. + +Client +~~~~~~ + +- use twisted http agent in the client. +- maintain backwards compatibility with old crypto scheme (AES 256 CTR+HMAC). + No migration for now, only in 0.10. +- remove the encryption/decryption pools, replace for inline streaming crypto. +- use sqlcipher transactions on sync. + 0.9.1 - 27 November, 2016 +++++++++++++++++++++++++ -- cgit v1.2.3