From 55f45b770a57d1c5f54a66a490aeeea7edae0184 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Tue, 1 Dec 2015 15:58:57 -0300 Subject: [bug] fire callback after reseting instance vars If we reset the vars after firing the finish callback, other thread can pick up a dirty state on due concurrency. --- client/src/leap/soledad/client/encdecpool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'client') diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py index 6d3c11b9..9333578b 100644 --- a/client/src/leap/soledad/client/encdecpool.py +++ b/client/src/leap/soledad/client/encdecpool.py @@ -807,6 +807,6 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): self._finish() def _finish(self): - self._deferred.callback(None) self._processed_docs = 0 self._last_inserted_idx = 0 + self._deferred.callback(None) -- cgit v1.2.3 From 841e6712ff9ff1ce2b8a5fe92012d89c2aec7077 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Tue, 1 Dec 2015 16:06:26 -0300 Subject: [bug] concurrency bug while querying and inserting This line was missing an yield and without it we end up inserting a document that is being retrieved and bad things happen. This is the core fix from yesterday debugging session. During sequential syncs the pool was inserting and querying at the same time and sometimes repeating or failing to delete documents. --- client/src/leap/soledad/client/encdecpool.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'client') diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py index 9333578b..a01d3b71 100644 --- a/client/src/leap/soledad/client/encdecpool.py +++ b/client/src/leap/soledad/client/encdecpool.py @@ -70,11 +70,13 @@ class SyncEncryptDecryptPool(object): self._started = False def start(self): + if self.running: + return self._create_pool() self._started = True def stop(self): - if not self._started: + if not self.running: return self._started = False self._destroy_pool() @@ -650,14 +652,6 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): last_idx = self._last_inserted_idx for doc_id, rev, content, gen, trans_id, encrypted, idx in \ decrypted_docs: - # XXX for some reason, a document might not have been deleted from - # the database. This is a bug. In this point, already - # processed documents should have been removed from the sync - # database and we should not have to skip them here. We need - # to find out why this is happening, fix, and remove the - # skipping below. - if (idx < last_idx + 1): - continue if (idx != last_idx + 1): break insertable.append((doc_id, rev, content, gen, trans_id, idx)) @@ -763,6 +757,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): query = "DELETE FROM %s WHERE 1" % (self.TABLE_NAME,) return self._runOperation(query) + @defer.inlineCallbacks def _collect_async_decryption_results(self): """ Collect the results of the asynchronous doc decryptions and re-raise @@ -773,7 +768,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): async_results = self._async_results[:] for res in async_results: if res.ready(): - self._decrypt_doc_cb(res.get()) # might raise an exception! + yield self._decrypt_doc_cb(res.get()) # might raise an exception! self._async_results.remove(res) @defer.inlineCallbacks @@ -796,7 +791,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): if processed < pending: yield self._async_decrypt_received_docs() - self._collect_async_decryption_results() + yield self._collect_async_decryption_results() docs = yield self._process_decrypted_docs() yield self._delete_processed_docs(docs) # recurse -- cgit v1.2.3 From 3f327c4f472f43d281e3bd7be67aaa9ce3f7d822 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Tue, 1 Dec 2015 16:38:02 -0300 Subject: [style] fix pep8 --- client/src/leap/soledad/client/encdecpool.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'client') diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py index a01d3b71..0954c1df 100644 --- a/client/src/leap/soledad/client/encdecpool.py +++ b/client/src/leap/soledad/client/encdecpool.py @@ -768,7 +768,8 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): async_results = self._async_results[:] for res in async_results: if res.ready(): - yield self._decrypt_doc_cb(res.get()) # might raise an exception! + # XXX: might raise an exception! + yield self._decrypt_doc_cb(res.get()) self._async_results.remove(res) @defer.inlineCallbacks -- cgit v1.2.3 From 6fd543b9fd9679f4978aeedee32eeece5593acc3 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 13 Nov 2015 22:10:18 -0300 Subject: [feat] Adds support to batching limited by size u1db provides batching by default. Current Soledad HTTPS Sync Target was stuck at 1 doc per request. This commit adds batching capability, limiting the size to a predefined value. Default limit size: 500kB --- client/src/leap/soledad/client/http_target/send.py | 46 +++++++++++++++------- .../src/leap/soledad/client/http_target/support.py | 13 ++++-- 2 files changed, 41 insertions(+), 18 deletions(-) (limited to 'client') diff --git a/client/src/leap/soledad/client/http_target/send.py b/client/src/leap/soledad/client/http_target/send.py index 80483f0d..c1252c13 100644 --- a/client/src/leap/soledad/client/http_target/send.py +++ b/client/src/leap/soledad/client/http_target/send.py @@ -29,6 +29,8 @@ class HTTPDocSender(object): They need to be encrypted and metadata prepared before sending. """ + MAX_BATCH_SIZE = 500 * 1000 # 500kB by default + @defer.inlineCallbacks def _send_docs(self, docs_by_generation, last_known_generation, last_known_trans_id, sync_id): @@ -43,25 +45,41 @@ class HTTPDocSender(object): sync_id=sync_id, ensure=self._ensure_callback is not None) total = len(docs_by_generation) - for idx, entry in enumerate(docs_by_generation, 1): - yield self._prepare_one_doc(entry, body, idx, total) - result = yield self._http_request( - self._url, - method='POST', - body=body.pop(1), - content_type='application/x-soledad-sync-put') - if self._defer_encryption: - self._delete_sent(idx, docs_by_generation) - _emit_send_status(idx, total) + while body.consumed < total: + result = yield self._send_batch(total, body, docs_by_generation) response_dict = json.loads(result)[0] gen_after_send = response_dict['new_generation'] trans_id_after_send = response_dict['new_transaction_id'] defer.returnValue([gen_after_send, trans_id_after_send]) - def _delete_sent(self, idx, docs_by_generation): - doc = docs_by_generation[idx - 1][0] - self._sync_enc_pool.delete_encrypted_doc( - doc.doc_id, doc.rev) + def _delete_sent(self, docs): + for doc, gen, trans_id in docs: + self._sync_enc_pool.delete_encrypted_doc( + doc.doc_id, doc.rev) + + @defer.inlineCallbacks + def _send_batch(self, total, body, docs): + sent = [] + missing = total - body.consumed + for i in xrange(1, missing + 1): + if body.pending_size > self.MAX_BATCH_SIZE: + break + idx = body.consumed + i + entry = docs[idx - 1] + sent.append(entry) + yield self._prepare_one_doc(entry, body, idx, total) + result = yield self._send_request(body.pop()) + if self._defer_encryption: + self._delete_sent(sent) + _emit_send_status(body.consumed, total) + defer.returnValue(result) + + def _send_request(self, body): + return self._http_request( + self._url, + method='POST', + body=body, + content_type='application/x-soledad-sync-put') @defer.inlineCallbacks def _prepare_one_doc(self, entry, body, idx, total): diff --git a/client/src/leap/soledad/client/http_target/support.py b/client/src/leap/soledad/client/http_target/support.py index 44cd7089..2625744c 100644 --- a/client/src/leap/soledad/client/http_target/support.py +++ b/client/src/leap/soledad/client/http_target/support.py @@ -152,6 +152,8 @@ class RequestBody(object): """ self.headers = header_dict self.entries = [] + self.consumed = 0 + self.pending_size = 0 def insert_info(self, **entry_dict): """ @@ -165,11 +167,11 @@ class RequestBody(object): """ entry = json.dumps(entry_dict) self.entries.append(entry) - return len(entry) + self.pending_size += len(entry) - def pop(self, number=1): + def pop(self): """ - Removes an amount of entries and returns it formatted and ready + Removes all entries and returns it formatted and ready to be sent. :param number: number of entries to pop and format @@ -178,7 +180,10 @@ class RequestBody(object): :return: formatted body ready to be sent :rtype: str """ - entries = [self.entries.pop(0) for i in xrange(number)] + entries = self.entries[:] + self.entries = [] + self.pending_size = 0 + self.consumed += len(entries) return self.entries_to_str(entries) def __str__(self): -- cgit v1.2.3 From 577abee147c98592753bcdc68e1693d1f4ab5a08 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 13 Nov 2015 23:02:28 -0300 Subject: [feat] prepare server to handle batches Created two methods on the backend to start and finish a batch. A dict of callbacks is available to defer actions for the last document, allowing temporary (changing often) metadata to be recorded only once. Using those methods we will also be able to put all docs in one go on the CouchDatabase implementation, but that is another step. --- client/changes/feat_send_batch | 1 + 1 file changed, 1 insertion(+) create mode 100644 client/changes/feat_send_batch (limited to 'client') diff --git a/client/changes/feat_send_batch b/client/changes/feat_send_batch new file mode 100644 index 00000000..fbfce519 --- /dev/null +++ b/client/changes/feat_send_batch @@ -0,0 +1 @@ +o Client will now send documents at a limited size batch due to changes on SyncTarget. The default limit is 500kB. -- cgit v1.2.3 From 7208d8bc5e5f23d0773533b15763f64d236489b4 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 3 Dec 2015 19:34:56 -0300 Subject: [feat] set default to False on batching for now All batching code has no effect by default with this commit. Since we know that this is a dangerous new feature we will enable them only on our test servers and check them manually before setting it as default or adding more configuration features. Use SyncTarget and server conf file to enable it for testing. --- client/src/leap/soledad/client/http_target/send.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'client') diff --git a/client/src/leap/soledad/client/http_target/send.py b/client/src/leap/soledad/client/http_target/send.py index c1252c13..e8abf35b 100644 --- a/client/src/leap/soledad/client/http_target/send.py +++ b/client/src/leap/soledad/client/http_target/send.py @@ -29,7 +29,7 @@ class HTTPDocSender(object): They need to be encrypted and metadata prepared before sending. """ - MAX_BATCH_SIZE = 500 * 1000 # 500kB by default + MAX_BATCH_SIZE = 0 # disabled by now, this is being tested yet @defer.inlineCallbacks def _send_docs(self, docs_by_generation, last_known_generation, -- cgit v1.2.3