summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictor Shyba <victor.shyba@gmail.com>2016-02-04 13:33:49 -0300
committerKali Kaneko <kali@leap.se>2016-06-06 19:58:50 -0400
commit498e9e1353700b61950ef87c007e6c0a84fbe201 (patch)
tree75a49f25cc651bfc61fc89dfdf65475c4e59ce83
parent50f90874815a85eb82e0fba2d953b680ee9eeb53 (diff)
[refactor] encdecpool queries and testing
This commit adds tests for doc ordering and encdecpool control (start/stop). Also optimizes by deleting in batch and checking for a sequence in memory before asking the local staging for documents.
-rw-r--r--client/changes/next-changelog.rst1
-rw-r--r--client/src/leap/soledad/client/encdecpool.py183
-rw-r--r--client/src/leap/soledad/client/http_target/fetch.py9
-rw-r--r--client/src/leap/soledad/client/sqlcipher.py2
-rw-r--r--common/src/leap/soledad/common/tests/test_encdecpool.py47
-rw-r--r--common/src/leap/soledad/common/tests/test_server.py4
6 files changed, 122 insertions, 124 deletions
diff --git a/client/changes/next-changelog.rst b/client/changes/next-changelog.rst
index a696fe10..c676625f 100644
--- a/client/changes/next-changelog.rst
+++ b/client/changes/next-changelog.rst
@@ -26,6 +26,7 @@ Misc
- Refactor bootstrap to remove shared db lock.
- `#1236 <https://leap.se/code/issues/1236>`_: Description of the new feature corresponding with issue #1236.
- Some change without issue number.
+- Removed multiprocessing from encdecpool with some extra refactoring.
Known Issues
~~~~~~~~~~~~
diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py
index 576b8b2c..218ebfa9 100644
--- a/client/src/leap/soledad/client/encdecpool.py
+++ b/client/src/leap/soledad/client/encdecpool.py
@@ -25,7 +25,7 @@ during synchronization.
import json
import logging
-from twisted.internet import reactor
+from twisted.internet.task import LoopingCall
from twisted.internet import threads
from twisted.internet import defer
from twisted.python import log
@@ -167,26 +167,14 @@ class SyncEncrypterPool(SyncEncryptDecryptPool):
SyncEncryptDecryptPool.stop(self)
- def enqueue_doc_for_encryption(self, doc):
+ def encrypt_doc(self, doc):
"""
- Enqueue a document for encryption.
+ Encrypt document asynchronously then insert it on
+ local staging database.
:param doc: The document to be encrypted.
:type doc: SoledadDocument
"""
- self._encrypt_doc(doc)
-
- def _encrypt_doc(self, doc):
- """
- Symmetrically encrypt a document.
-
- :param doc: The document with contents to be encrypted.
- :type doc: SoledadDocument
-
- :param workers: Whether to defer the decryption to the multiprocess
- pool of workers. Useful for debugging purposes.
- :type workers: bool
- """
soledad_assert(self._crypto is not None, "need a crypto object")
docstr = doc.get_json()
key = self._crypto.doc_passphrase(doc.doc_id)
@@ -276,8 +264,8 @@ def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret,
:type doc_id: str
:param doc_rev: The document revision.
:type doc_rev: str
- :param content: The encrypted content of the document.
- :type content: str
+ :param content: The encrypted content of the document as JSON dict.
+ :type content: dict
:param gen: The generation corresponding to the modification of that
document.
:type gen: int
@@ -294,7 +282,6 @@ def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret,
:return: A tuple containing the doc id, revision and encrypted content.
:rtype: tuple(str, str, str)
"""
- content = json.loads(content) if type(content) == str else content
decrypted_content = decrypt_doc_dict(content, doc_id, doc_rev, key, secret)
return doc_id, doc_rev, decrypted_content, gen, trans_id, idx
@@ -356,14 +343,12 @@ class SyncDecrypterPool(SyncEncryptDecryptPool):
self._processed_docs = 0
self._last_inserted_idx = 0
- # a list that holds the asynchronous decryption results so they can be
- # collected when they are ready
- self._async_results = []
-
# initialize db and make sure any database operation happens after
# db initialization
self._deferred_init = self._init_db()
self._wait_init_db('_runOperation', '_runQuery')
+ self._loop = LoopingCall(self._decrypt_and_recurse)
+ self._decrypted_docs_indexes = set()
def _wait_init_db(self, *methods):
"""
@@ -408,11 +393,13 @@ class SyncDecrypterPool(SyncEncryptDecryptPool):
SyncEncryptDecryptPool.start(self)
self._docs_to_process = docs_to_process
self._deferred = defer.Deferred()
- reactor.callWhenRunning(self._launch_decrypt_and_recurse)
+ self._loop.start(self.DECRYPT_LOOP_PERIOD)
- def _launch_decrypt_and_recurse(self):
- d = self._decrypt_and_recurse()
- d.addErrback(self._errback)
+ def stop(self):
+ if self._loop.running:
+ self._loop.stop()
+ self._finish()
+ SyncEncryptDecryptPool.stop(self)
def _errback(self, failure):
log.err(failure)
@@ -431,8 +418,8 @@ class SyncDecrypterPool(SyncEncryptDecryptPool):
def insert_encrypted_received_doc(
self, doc_id, doc_rev, content, gen, trans_id, idx):
"""
- Insert a received message with encrypted content, to be decrypted later
- on.
+ Decrypt and insert a received document into local staging area to be
+ processed later on.
:param doc_id: The document ID.
:type doc_id: str
@@ -447,11 +434,19 @@ class SyncDecrypterPool(SyncEncryptDecryptPool):
:param idx: The index of this document in the current sync process.
:type idx: int
- :return: A deferred that will fire when the operation in the database
- has finished.
+ :return: A deferred that will fire after the decrypted document has
+ been inserted in the sync db.
:rtype: twisted.internet.defer.Deferred
"""
- return self._async_decrypt_doc(doc_id, doc_rev, content, gen, trans_id, idx)
+ soledad_assert(self._crypto is not None, "need a crypto object")
+
+ key = self._crypto.doc_passphrase(doc_id)
+ secret = self._crypto.secret
+ args = doc_id, doc_rev, content, gen, trans_id, key, secret, idx
+ # decrypt asynchronously
+ doc = decrypt_doc_task(*args)
+ # callback will insert it for later processing
+ return self._decrypt_doc_cb(doc)
def insert_received_doc(
self, doc_id, doc_rev, content, gen, trans_id, idx):
@@ -481,52 +476,26 @@ class SyncDecrypterPool(SyncEncryptDecryptPool):
content = json.dumps(content)
query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?, ?)" \
% self.TABLE_NAME
- return self._runOperation(
+ d = self._runOperation(
query, (doc_id, doc_rev, content, gen, trans_id, 0, idx))
+ d.addCallback(lambda _: self._decrypted_docs_indexes.add(idx))
+ return d
- def _delete_received_doc(self, doc_id):
+ def _delete_received_docs(self, doc_ids):
"""
- Delete a received doc after it was inserted into the local db.
+ Delete a list of received docs after get them inserted into the db.
- :param doc_id: Document ID.
- :type doc_id: str
+ :param doc_id: Document ID list.
+ :type doc_id: list
:return: A deferred that will fire when the operation in the database
has finished.
:rtype: twisted.internet.defer.Deferred
"""
- query = "DELETE FROM '%s' WHERE doc_id=?" \
- % self.TABLE_NAME
- return self._runOperation(query, (doc_id,))
-
- def _async_decrypt_doc(self, doc_id, rev, content, gen, trans_id, idx):
- """
- Dispatch an asynchronous document decrypting routine and save the
- result object.
-
- :param doc_id: The ID for the document with contents to be encrypted.
- :type doc: str
- :param rev: The revision of the document.
- :type rev: str
- :param content: The serialized content of the document.
- :type content: str
- :param gen: The generation corresponding to the modification of that
- document.
- :type gen: int
- :param trans_id: The transaction id corresponding to the modification
- of that document.
- :type trans_id: str
- :param idx: The index of this document in the current sync process.
- :type idx: int
- """
- soledad_assert(self._crypto is not None, "need a crypto object")
-
- key = self._crypto.doc_passphrase(doc_id)
- secret = self._crypto.secret
- args = doc_id, rev, content, gen, trans_id, key, secret, idx
- # decrypt asynchronously
- d = threads.deferToThread(decrypt_doc_task, *args)
- d.addCallback(self._decrypt_doc_cb)
+ placeholders = ', '.join('?' for _ in doc_ids)
+ query = "DELETE FROM '%s' WHERE doc_id in (%s)" \
+ % (self.TABLE_NAME, placeholders)
+ return self._runOperation(query, (doc_ids))
def _decrypt_doc_cb(self, result):
"""
@@ -547,7 +516,8 @@ class SyncDecrypterPool(SyncEncryptDecryptPool):
return self.insert_received_doc(
doc_id, rev, content, gen, trans_id, idx)
- def _get_docs(self, encrypted=None, order_by='idx', order='ASC'):
+ def _get_docs(self, encrypted=None, order_by='idx', order='ASC',
+ sequence=None):
"""
Get documents from the received docs table in the sync db.
@@ -565,8 +535,13 @@ class SyncDecrypterPool(SyncEncryptDecryptPool):
"""
query = "SELECT doc_id, rev, content, gen, trans_id, encrypted, " \
"idx FROM %s" % self.TABLE_NAME
- if encrypted is not None:
- query += " WHERE encrypted = %d" % int(encrypted)
+ if encrypted or sequence:
+ query += " WHERE"
+ if encrypted:
+ query += " encrypted = %d" % int(encrypted)
+ if sequence:
+ query += " idx in (" + ', '.join(sequence) + ")"
+
query += " ORDER BY %s %s" % (order_by, order)
return self._runQuery(query)
@@ -579,18 +554,19 @@ class SyncDecrypterPool(SyncEncryptDecryptPool):
documents.
:rtype: twisted.internet.defer.Deferred
"""
- # here, we fetch the list of decrypted documents and compare with the
- # index of the last succesfully processed document.
- decrypted_docs = yield self._get_docs(encrypted=False)
- insertable = []
- last_idx = self._last_inserted_idx
- for doc_id, rev, content, gen, trans_id, encrypted, idx in \
- decrypted_docs:
- if (idx != last_idx + 1):
- break
- insertable.append((doc_id, rev, content, gen, trans_id, idx))
- last_idx += 1
- defer.returnValue(insertable)
+ # Here, check in memory what are the insertable indexes that can
+ # form a sequence starting from the last inserted index
+ sequence = []
+ insertable_docs = []
+ next_index = self._last_inserted_idx + 1
+ while next_index in self._decrypted_docs_indexes:
+ sequence.append(str(next_index))
+ next_index += 1
+ # Then fetch all the ones ready for insertion.
+ if sequence:
+ insertable_docs = yield self._get_docs(encrypted=False,
+ sequence=sequence)
+ defer.returnValue(insertable_docs)
@defer.inlineCallbacks
def _process_decrypted_docs(self):
@@ -603,36 +579,18 @@ class SyncDecrypterPool(SyncEncryptDecryptPool):
:rtype: twisted.internet.defer.Deferred
"""
insertable = yield self._get_insertable_docs()
+ processed_docs_ids = []
for doc_fields in insertable:
method = self._insert_decrypted_local_doc
# FIXME: This is used only because SQLCipherU1DBSync is synchronous
# When adbapi is used there is no need for an external thread
# Without this the reactor can freeze and fail docs download
yield threads.deferToThread(method, *doc_fields)
- defer.returnValue(insertable)
-
- def _delete_processed_docs(self, inserted):
- """
- Delete from the sync db documents that have been processed.
-
- :param inserted: List of documents inserted in the previous process
- step.
- :type inserted: list
-
- :return: A list of deferreds that will fire when each operation in the
- database has finished.
- :rtype: twisted.internet.defer.DeferredList
- """
- deferreds = []
- for doc_id, doc_rev, _, _, _, _ in inserted:
- deferreds.append(
- self._delete_received_doc(doc_id))
- if not deferreds:
- return defer.succeed(None)
- return defer.gatherResults(deferreds)
+ processed_docs_ids.append(doc_fields[0])
+ yield self._delete_received_docs(processed_docs_ids)
def _insert_decrypted_local_doc(self, doc_id, doc_rev, content,
- gen, trans_id, idx):
+ gen, trans_id, encrypted, idx):
"""
Insert the decrypted document into the local replica.
@@ -693,20 +651,19 @@ class SyncDecrypterPool(SyncEncryptDecryptPool):
delete operations have been executed.
:rtype: twisted.internet.defer.Deferred
"""
+ if not self.running:
+ defer.returnValue(None)
processed = self._processed_docs
pending = self._docs_to_process
if processed < pending:
- docs = yield self._process_decrypted_docs()
- yield self._delete_processed_docs(docs)
- # recurse
- self._delayed_call = reactor.callLater(
- self.DECRYPT_LOOP_PERIOD,
- self._launch_decrypt_and_recurse)
+ yield self._process_decrypted_docs()
else:
self._finish()
def _finish(self):
self._processed_docs = 0
self._last_inserted_idx = 0
- self._deferred.callback(None)
+ self._decrypted_docs_indexes = set()
+ if not self._deferred.called:
+ self._deferred.callback(None)
diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py
index fda90909..9f7a4193 100644
--- a/client/src/leap/soledad/client/http_target/fetch.py
+++ b/client/src/leap/soledad/client/http_target/fetch.py
@@ -19,7 +19,6 @@ import json
from u1db import errors
from u1db.remote import utils
from twisted.internet import defer
-from twisted.internet import threads
from leap.soledad.common.document import SoledadDocument
from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS
from leap.soledad.client.events import emit_async
@@ -76,7 +75,7 @@ class HTTPDocFetcher(object):
last_known_generation, last_known_trans_id,
sync_id, 0)
self._received_docs = 0
- number_of_changes, ngen, ntrans = yield self._insert_received_doc(doc, 1, 1)
+ number_of_changes, ngen, ntrans = self._insert_received_doc(doc, 1, 1)
if ngen:
new_generation = ngen
@@ -138,7 +137,6 @@ class HTTPDocFetcher(object):
body=str(body),
content_type='application/x-soledad-sync-get')
- @defer.inlineCallbacks
def _insert_received_doc(self, response, idx, total):
"""
Insert a received document into the local replica.
@@ -152,8 +150,7 @@ class HTTPDocFetcher(object):
"""
new_generation, new_transaction_id, number_of_changes, doc_id, \
rev, content, gen, trans_id = \
- (yield threads.deferToThread(self._parse_received_doc_response,
- response))
+ self._parse_received_doc_response(response)
if doc_id is not None:
# decrypt incoming document and insert into local database
# -------------------------------------------------------------
@@ -188,7 +185,7 @@ class HTTPDocFetcher(object):
self._received_docs += 1
user_data = {'uuid': self.uuid, 'userid': self.userid}
_emit_receive_status(user_data, self._received_docs, total)
- defer.returnValue((number_of_changes, new_generation, new_transaction_id))
+ return number_of_changes, new_generation, new_transaction_id
def _parse_received_doc_response(self, response):
"""
diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py
index 22ddc87d..cdc7255c 100644
--- a/client/src/leap/soledad/client/sqlcipher.py
+++ b/client/src/leap/soledad/client/sqlcipher.py
@@ -278,7 +278,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):
doc_rev = sqlite_backend.SQLitePartialExpandDatabase.put_doc(self, doc)
if self.defer_encryption:
# TODO move to api?
- self._sync_enc_pool.enqueue_doc_for_encryption(doc)
+ self._sync_enc_pool.encrypt_doc(doc)
return doc_rev
#
diff --git a/common/src/leap/soledad/common/tests/test_encdecpool.py b/common/src/leap/soledad/common/tests/test_encdecpool.py
index 694eb7ad..6676c298 100644
--- a/common/src/leap/soledad/common/tests/test_encdecpool.py
+++ b/common/src/leap/soledad/common/tests/test_encdecpool.py
@@ -57,13 +57,13 @@ class TestSyncEncrypterPool(BaseSoledadTest):
self.assertIsNone(doc)
@inlineCallbacks
- def test_enqueue_doc_for_encryption_and_get_encrypted_doc(self):
+ def test_encrypt_doc_and_get_it_back(self):
"""
Test that the pool actually encrypts a document added to the queue.
"""
doc = SoledadDocument(
doc_id=DOC_ID, rev=DOC_REV, json=json.dumps(DOC_CONTENT))
- self._pool.enqueue_doc_for_encryption(doc)
+ self._pool.encrypt_doc(doc)
# exhaustivelly attempt to get the encrypted document
encrypted = None
@@ -117,6 +117,16 @@ class TestSyncDecrypterPool(BaseSoledadTest):
self._pool.deferred.addCallback(_assert_doc_was_inserted)
return self._pool.deferred
+ def test_looping_control(self):
+ """
+ Start and stop cleanly.
+ """
+ self._pool.start(10)
+ self.assertTrue(self._pool.running)
+ self._pool.stop()
+ self.assertFalse(self._pool.running)
+ self.assertTrue(self._pool.deferred.called)
+
def test_insert_received_doc_many(self):
"""
Test that many documents added to the pool are inserted using the
@@ -179,6 +189,38 @@ class TestSyncDecrypterPool(BaseSoledadTest):
_assert_doc_was_decrypted_and_inserted)
return self._pool.deferred
+ @inlineCallbacks
+ def test_processing_order(self):
+ """
+ This test ensures that processing of documents only occur if there is
+ a sequence in place.
+ """
+ crypto = self._soledad._crypto
+ docs = []
+ for i in xrange(1, 10):
+ i = str(i)
+ doc = SoledadDocument(
+ doc_id=DOC_ID + i, rev=DOC_REV + i,
+ json=json.dumps(DOC_CONTENT))
+ encrypted_content = json.loads(crypto.encrypt_doc(doc))
+ docs.append((doc, encrypted_content))
+
+ # insert the encrypted document in the pool
+ self._pool.start(10) # pool is expecting to process 10 docs
+ # first three arrives, forming a sequence
+ for i, (doc, encrypted_content) in enumerate(docs[:3]):
+ gen = idx = i + 1
+ yield self._pool.insert_encrypted_received_doc(
+ doc.doc_id, doc.rev, encrypted_content, gen, "trans_id", idx)
+ # last one arrives alone, so it can't be processed
+ doc, encrypted_content = docs[-1]
+ yield self._pool.insert_encrypted_received_doc(
+ doc.doc_id, doc.rev, encrypted_content, 10, "trans_id", 10)
+
+ yield self._pool._decrypt_and_recurse()
+
+ self.assertEqual(3, self._pool._processed_docs)
+
def test_insert_encrypted_received_doc_many(self, many=100):
"""
Test that many encrypted documents added to the pool are decrypted and
@@ -241,3 +283,4 @@ class TestSyncDecrypterPool(BaseSoledadTest):
decrypted_docs = yield self._pool._get_docs(encrypted=False)
# check that decrypted docs staging is clean
self.assertEquals([], decrypted_docs)
+ self._pool.stop()
diff --git a/common/src/leap/soledad/common/tests/test_server.py b/common/src/leap/soledad/common/tests/test_server.py
index bf6c1515..ba7edfe3 100644
--- a/common/src/leap/soledad/common/tests/test_server.py
+++ b/common/src/leap/soledad/common/tests/test_server.py
@@ -481,8 +481,8 @@ class EncryptedSyncTestCase(
Test if Soledad can sync very large files.
"""
self.skipTest(
- "Work in progress. For reference, see: "
- "https://leap.se/code/issues/7370")
+ "Work in progress. For reference, see: "
+ "https://leap.se/code/issues/7370")
length = 100 * (10 ** 6) # 100 MB
return self._test_encrypted_sym_sync(doc_size=length, number_of_docs=1)