diff options
152 files changed, 9305 insertions, 2854 deletions
@@ -12,6 +12,8 @@ MANIFEST *.log *.*~ *.csv +.cache +.tox .eggs _trial_temp .DS_Store diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 00000000..647cc43c --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,3 @@ +trial: + script: + - cd testing; tox diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ed60b5ec..24c20641 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,40 @@ +0.8.1 - 14 July, 2016 ++++++++++++++++++++++ + +Client +====== + +Features +~~~~~~~~ +- Add recovery document format version for future migrations. +- Use DeferredLock instead of its locking cousin. +- Use DeferredSemaphore instead of its locking cousin. + +Bugfixes +~~~~~~~~ +- `#8180 <https://leap.se/code/issues/8180>`_: Initialize OpenSSL context just once. +- Remove document content conversion to unicode. Users of API are responsible + for only passing valid JSON to Soledad for storage. + +Misc +~~~~ +- Add ability to get information about sync phases for profiling purposes. +- Add script for setting up develop environment. +- Refactor bootstrap to remove shared db lock. +- Removed multiprocessing from encdecpool with some extra refactoring. +- Remove user_id argument from Soledad init. + +Common +====== + +Features +~~~~~~~~ +- Embed l2db, forking u1db. + +Misc +~~~~ +- Toxify tests. + 0.8.0 - 18 Apr, 2016 ++++++++++++++++++++ @@ -47,11 +47,10 @@ Compatibility Tests ----- -Client and server tests are both included in leap.soledad.common. If you want -to run tests in development mode you must do the following:: +Soledad tests use tox, and they live in the testing folder:: - scripts/develop_mode.sh - scripts/run_tests.sh + cd testing + tox Note that to run CouchDB tests, be sure you have `CouchDB`_ installed on your system. diff --git a/client/changes/next-changelog.rst b/client/changes/next-changelog.rst index bdc9f893..6c1c2a49 100644 --- a/client/changes/next-changelog.rst +++ b/client/changes/next-changelog.rst @@ -1,4 +1,4 @@ -0.8.1 - ... +0.8.2 - ... ++++++++++++++++++++ Please add lines to this file, they will be moved to the CHANGELOG.rst during @@ -11,17 +11,14 @@ I've added a new category `Misc` so we can track doc/style/packaging stuff. Features ~~~~~~~~ - `#1234 <https://leap.se/code/issues/1234>`_: Description of the new feature corresponding with issue #1234. -- New feature without related issue number. Bugfixes ~~~~~~~~ - `#1235 <https://leap.se/code/issues/1235>`_: Description for the fixed stuff corresponding with issue #1235. -- Bugfix without related issue number. Misc ~~~~ - `#1236 <https://leap.se/code/issues/1236>`_: Description of the new feature corresponding with issue #1236. -- Some change without issue number. Known Issues ~~~~~~~~~~~~ diff --git a/client/pkg/generate_wheels.sh b/client/pkg/generate_wheels.sh index e29c327e..a13e2c7a 100755 --- a/client/pkg/generate_wheels.sh +++ b/client/pkg/generate_wheels.sh @@ -7,7 +7,7 @@ if [ "$WHEELHOUSE" = "" ]; then fi pip wheel --wheel-dir $WHEELHOUSE pip -pip wheel --wheel-dir $WHEELHOUSE --allow-external u1db --allow-unverified u1db --allow-external dirspec --allow-unverified dirspec -r pkg/requirements.pip +pip wheel --wheel-dir $WHEELHOUSE -r pkg/requirements.pip if [ -f pkg/requirements-testing.pip ]; then pip wheel --wheel-dir $WHEELHOUSE -r pkg/requirements-testing.pip fi diff --git a/client/pkg/pip_install_requirements.sh b/client/pkg/pip_install_requirements.sh index d0479365..f4b5f67a 100755 --- a/client/pkg/pip_install_requirements.sh +++ b/client/pkg/pip_install_requirements.sh @@ -4,7 +4,7 @@ # Use at your own risk. # See $usage for help -insecure_packages="u1db dirspec" +insecure_packages="" leap_wheelhouse=https://lizard.leap.se/wheels show_help() { @@ -80,5 +80,5 @@ insecure_flags=`return_insecure_flags` packages=`return_packages` pip install -U wheel -pip install $install_options pip +pip install -U pip pip install $install_options $insecure_flags $packages diff --git a/client/pkg/requirements-latest.pip b/client/pkg/requirements-latest.pip index a629aa57..46a7ccba 100644 --- a/client/pkg/requirements-latest.pip +++ b/client/pkg/requirements-latest.pip @@ -1,8 +1,5 @@ --index-url https://pypi.python.org/simple/ ---allow-external u1db --allow-unverified u1db ---allow-external dirspec --allow-unverified dirspec - -e 'git+https://github.com/pixelated-project/leap_pycommon.git@develop#egg=leap.common' -e '../common' -e . diff --git a/client/pkg/requirements.pip b/client/pkg/requirements.pip index 2f658d76..2ae844e1 100644 --- a/client/pkg/requirements.pip +++ b/client/pkg/requirements.pip @@ -1,11 +1,4 @@ pysqlcipher>2.6.3 -u1db scrypt -cchardet zope.proxy twisted - -# XXX -- fix me! -# oauth is not strictly needed by us, but we need it until u1db adds it to its -# release as a dep. -oauth diff --git a/client/setup.py b/client/setup.py index 4480e247..90986dde 100644 --- a/client/setup.py +++ b/client/setup.py @@ -68,14 +68,20 @@ class freeze_debianver(Command): # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. -version_version = '{version}' -full_revisionid = '{full_revisionid}' -""" - templatefun = r""" - -def get_versions(default={}, verbose=False): - return {'version': version_version, - 'full-revisionid': full_revisionid} +import json +import sys + +version_json = ''' +{ + "dirty": false, + "error": null, + "full-revisionid": "FULL_REVISIONID", + "version": "VERSION_STRING" +} +''' # END VERSION_JSON + +def get_versions(): + return json.loads(version_json) """ def initialize_options(self): @@ -90,9 +96,9 @@ def get_versions(default={}, verbose=False): if proceed != "y": print("He. You scared. Aborting.") return - subst_template = self.template.format( - version=VERSION_SHORT, - full_revisionid=VERSION_REVISION) + self.templatefun + subst_template = self.template.replace( + 'VERSION_STRING', VERSION_SHORT).replace( + 'FULL_REVISIONID', VERSION_REVISION) versioneer_cfg = versioneer.get_config_from_root('.') with open(versioneer_cfg.versionfile_source, 'w') as f: f.write(subst_template) diff --git a/client/src/leap/soledad/client/adbapi.py b/client/src/leap/soledad/client/adbapi.py index 77822247..ef0f9066 100644 --- a/client/src/leap/soledad/client/adbapi.py +++ b/client/src/leap/soledad/client/adbapi.py @@ -24,13 +24,12 @@ import sys import logging from functools import partial -from threading import BoundedSemaphore from twisted.enterprise import adbapi +from twisted.internet.defer import DeferredSemaphore from twisted.python import log from zope.proxy import ProxyBase, setProxiedObject -from pysqlcipher.dbapi2 import OperationalError -from pysqlcipher.dbapi2 import DatabaseError +from pysqlcipher import dbapi2 from leap.soledad.common.errors import DatabaseAccessError @@ -49,7 +48,7 @@ if DEBUG_SQL: How long the SQLCipher connection should wait for the lock to go away until raising an exception. """ -SQLCIPHER_CONNECTION_TIMEOUT = 10 +SQLCIPHER_CONNECTION_TIMEOUT = 5 """ How many times a SQLCipher query should be retried in case of timeout. @@ -79,9 +78,11 @@ def getConnectionPool(opts, openfun=None, driver="pysqlcipher", if openfun is None and driver == "pysqlcipher": openfun = partial(set_init_pragmas, opts=opts) return U1DBConnectionPool( - "%s.dbapi2" % driver, opts=opts, sync_enc_pool=sync_enc_pool, - database=opts.path, check_same_thread=False, cp_openfun=openfun, - timeout=SQLCIPHER_CONNECTION_TIMEOUT) + opts, sync_enc_pool, + # the following params are relayed "as is" to twisted's + # ConnectionPool. + "%s.dbapi2" % driver, opts.path, timeout=SQLCIPHER_CONNECTION_TIMEOUT, + check_same_thread=False, cp_openfun=openfun) class U1DBConnection(adbapi.Connection): @@ -105,8 +106,10 @@ class U1DBConnection(adbapi.Connection): self._sync_enc_pool = sync_enc_pool try: adbapi.Connection.__init__(self, pool) - except DatabaseError: - raise DatabaseAccessError('Could not open sqlcipher database') + except dbapi2.DatabaseError as e: + raise DatabaseAccessError( + 'Error initializing connection to sqlcipher database: %s' + % str(e)) def reconnect(self): """ @@ -165,17 +168,17 @@ class U1DBConnectionPool(adbapi.ConnectionPool): connectionFactory = U1DBConnection transactionFactory = U1DBTransaction - def __init__(self, *args, **kwargs): + def __init__(self, opts, sync_enc_pool, *args, **kwargs): """ Initialize the connection pool. """ - # extract soledad-specific objects from keyword arguments - self.opts = kwargs.pop("opts") - self._sync_enc_pool = kwargs.pop("sync_enc_pool") + self.opts = opts + self._sync_enc_pool = sync_enc_pool try: adbapi.ConnectionPool.__init__(self, *args, **kwargs) - except DatabaseError: - raise DatabaseAccessError('Could not open sqlcipher database') + except dbapi2.DatabaseError as e: + raise DatabaseAccessError( + 'Error initializing u1db connection pool: %s' % str(e)) # all u1db connections, hashed by thread-id self._u1dbconnections = {} @@ -183,10 +186,15 @@ class U1DBConnectionPool(adbapi.ConnectionPool): # The replica uid, primed by the connections on init. self.replica_uid = ProxyBase(None) - conn = self.connectionFactory( - self, self._sync_enc_pool, init_u1db=True) - replica_uid = conn._u1db._real_replica_uid - setProxiedObject(self.replica_uid, replica_uid) + try: + conn = self.connectionFactory( + self, self._sync_enc_pool, init_u1db=True) + replica_uid = conn._u1db._real_replica_uid + setProxiedObject(self.replica_uid, replica_uid) + except DatabaseAccessError as e: + self.threadpool.stop() + raise DatabaseAccessError( + "Error initializing connection factory: %s" % str(e)) def runU1DBQuery(self, meth, *args, **kw): """ @@ -204,16 +212,17 @@ class U1DBConnectionPool(adbapi.ConnectionPool): :rtype: twisted.internet.defer.Deferred """ meth = "u1db_%s" % meth - semaphore = BoundedSemaphore(SQLCIPHER_MAX_RETRIES - 1) + semaphore = DeferredSemaphore(SQLCIPHER_MAX_RETRIES) def _run_interaction(): return self.runInteraction( self._runU1DBQuery, meth, *args, **kw) def _errback(failure): - failure.trap(OperationalError) + failure.trap(dbapi2.OperationalError) if failure.getErrorMessage() == "database is locked": - should_retry = semaphore.acquire(False) + logger.warning("Database operation timed out.") + should_retry = semaphore.acquire() if should_retry: logger.warning( "Database operation timed out while waiting for " diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index e657c939..1bfbed8a 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -35,16 +35,11 @@ import ssl import uuid import urlparse -try: - import cchardet as chardet -except ImportError: - import chardet from itertools import chain from StringIO import StringIO from collections import defaultdict -from u1db.remote import http_client -from u1db.remote.ssl_match_hostname import match_hostname + from twisted.internet.defer import DeferredLock, returnValue, inlineCallbacks from zope.interface import implements @@ -54,6 +49,9 @@ from leap.common.plugins import collect_plugins from leap.soledad.common import SHARED_DB_NAME from leap.soledad.common import soledad_assert from leap.soledad.common import soledad_assert_type +from leap.soledad.common.l2db.remote import http_client +from leap.soledad.common.l2db.remote.ssl_match_hostname import match_hostname +from leap.soledad.common.errors import DatabaseAccessError from leap.soledad.client import adbapi from leap.soledad.client import events as soledad_events @@ -66,6 +64,13 @@ from leap.soledad.client import encdecpool logger = logging.getLogger(name=__name__) + +# we may want to collect statistics from the sync process +DO_STATS = False +if os.environ.get('SOLEDAD_STATS'): + DO_STATS = True + + # # Constants # @@ -126,8 +131,7 @@ class Soledad(object): def __init__(self, uuid, passphrase, secrets_path, local_db_path, server_url, cert_file, shared_db=None, - auth_token=None, defer_encryption=False, syncable=True, - userid=None): + auth_token=None, defer_encryption=False, syncable=True): """ Initialize configuration, cryptographic keys and dbs. @@ -181,7 +185,6 @@ class Soledad(object): """ # store config params self._uuid = uuid - self._userid = userid self._passphrase = passphrase self._local_db_path = local_db_path self._server_url = server_url @@ -211,10 +214,22 @@ class Soledad(object): self._init_secrets() self._crypto = SoledadCrypto(self._secrets.remote_storage_secret) - self._init_u1db_sqlcipher_backend() - if syncable: - self._init_u1db_syncer() + try: + # initialize database access, trap any problems so we can shutdown + # smoothly. + self._init_u1db_sqlcipher_backend() + if syncable: + self._init_u1db_syncer() + except DatabaseAccessError: + # oops! something went wrong with backend initialization. We + # have to close any thread-related stuff we have already opened + # here, otherwise there might be zombie threads that may clog the + # reactor. + self._sync_db.close() + if hasattr(self, '_dbpool'): + self._dbpool.close() + raise # # initialization/destruction methods @@ -255,7 +270,7 @@ class Soledad(object): """ self._secrets = SoledadSecrets( self.uuid, self._passphrase, self._secrets_path, - self.shared_db, userid=self._userid) + self.shared_db, userid=self.userid) self._secrets.bootstrap() def _init_u1db_sqlcipher_backend(self): @@ -303,6 +318,17 @@ class Soledad(object): sync_db=self._sync_db, sync_enc_pool=self._sync_enc_pool) + def sync_stats(self): + sync_phase = 0 + if getattr(self._dbsyncer, 'sync_phase', None): + sync_phase = self._dbsyncer.sync_phase[0] + sync_exchange_phase = 0 + if getattr(self._dbsyncer, 'syncer', None): + if getattr(self._dbsyncer.syncer, 'sync_exchange_phase', None): + _p = self._dbsyncer.syncer.sync_exchange_phase[0] + sync_exchange_phase = _p + return sync_phase, sync_exchange_phase + # # Closing methods # @@ -359,7 +385,6 @@ class Soledad(object): also be updated. :rtype: twisted.internet.defer.Deferred """ - doc.content = _convert_to_unicode(doc.content) return self._defer("put_doc", doc) def delete_doc(self, doc): @@ -454,8 +479,7 @@ class Soledad(object): # create_doc (and probably to put_doc too). There are cases (mail # payloads for example) in which we already have the encoding in the # headers, so we don't need to guess it. - return self._defer( - "create_doc", _convert_to_unicode(content), doc_id=doc_id) + return self._defer("create_doc", content, doc_id=doc_id) def create_doc_from_json(self, json, doc_id=None): """ @@ -655,7 +679,7 @@ class Soledad(object): @property def userid(self): - return self._userid + return self.uuid # # ISyncableStorage @@ -976,44 +1000,6 @@ class Soledad(object): return self.create_doc(doc) -def _convert_to_unicode(content): - """ - Convert content to unicode (or all the strings in content). - - NOTE: Even though this method supports any type, it will - currently ignore contents of lists, tuple or any other - iterable than dict. We don't need support for these at the - moment - - :param content: content to convert - :type content: object - - :rtype: object - """ - # Chardet doesn't guess very well with some smallish payloads. - # This parameter might need some empirical tweaking. - CUTOFF_CONFIDENCE = 0.90 - - if isinstance(content, unicode): - return content - elif isinstance(content, str): - encoding = "utf-8" - result = chardet.detect(content) - if result["confidence"] > CUTOFF_CONFIDENCE: - encoding = result["encoding"] - try: - content = content.decode(encoding) - except UnicodeError as e: - logger.error("Unicode error: {0!r}. Using 'replace'".format(e)) - content = content.decode(encoding, 'replace') - return content - else: - if isinstance(content, dict): - for key in content.keys(): - content[key] = _convert_to_unicode(content[key]) - return content - - def create_path_if_not_exists(path): try: if not os.path.isdir(path): diff --git a/client/src/leap/soledad/client/auth.py b/client/src/leap/soledad/client/auth.py index 6dfabeb4..78e9bf1b 100644 --- a/client/src/leap/soledad/client/auth.py +++ b/client/src/leap/soledad/client/auth.py @@ -22,7 +22,7 @@ they can do token-based auth requests to the Soledad server. """ import base64 -from u1db import errors +from leap.soledad.common.l2db import errors class TokenBasedAuth(object): diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index 363d71b9..f7d92372 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -26,7 +26,8 @@ import logging from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from cryptography.hazmat.backends.multibackend import MultiBackend -from cryptography.hazmat.backends.openssl.backend import Backend as OpenSSLBackend +from cryptography.hazmat.backends.openssl.backend \ + import Backend as OpenSSLBackend from leap.soledad.common import soledad_assert from leap.soledad.common import soledad_assert_type @@ -38,6 +39,8 @@ logger = logging.getLogger(__name__) MAC_KEY_LENGTH = 64 +crypto_backend = MultiBackend([OpenSSLBackend()]) + def encrypt_sym(data, key): """ @@ -58,8 +61,7 @@ def encrypt_sym(data, key): (len(key) * 8)) iv = os.urandom(16) - backend = MultiBackend([OpenSSLBackend()]) - cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=backend) + cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=crypto_backend) encryptor = cipher.encryptor() ciphertext = encryptor.update(data) + encryptor.finalize() @@ -86,9 +88,8 @@ def decrypt_sym(data, key, iv): soledad_assert( len(key) == 32, # 32 x 8 = 256 bits. 'Wrong key size: %s (must be 256 bits long).' % len(key)) - backend = MultiBackend([OpenSSLBackend()]) iv = binascii.a2b_base64(iv) - cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=backend) + cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=crypto_backend) decryptor = cipher.decryptor() return decryptor.update(data) + decryptor.finalize() diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py index 34667a1e..a6d49b21 100644 --- a/client/src/leap/soledad/client/encdecpool.py +++ b/client/src/leap/soledad/client/encdecpool.py @@ -22,12 +22,11 @@ during synchronization. """ -import multiprocessing -import Queue import json import logging +from uuid import uuid4 -from twisted.internet import reactor +from twisted.internet.task import LoopingCall from twisted.internet import threads from twisted.internet import defer from twisted.python import log @@ -51,9 +50,6 @@ class SyncEncryptDecryptPool(object): Base class for encrypter/decrypter pools. """ - # TODO implement throttling to reduce cpu usage?? - WORKERS = multiprocessing.cpu_count() - def __init__(self, crypto, sync_db): """ Initialize the pool of encryption-workers. @@ -66,21 +62,14 @@ class SyncEncryptDecryptPool(object): """ self._crypto = crypto self._sync_db = sync_db - self._pool = None self._delayed_call = None self._started = False def start(self): - if self.running: - return - self._create_pool() self._started = True def stop(self): - if not self.running: - return self._started = False - self._destroy_pool() # maybe cancel the next delayed call if self._delayed_call \ and not self._delayed_call.called: @@ -90,27 +79,6 @@ class SyncEncryptDecryptPool(object): def running(self): return self._started - def _create_pool(self): - self._pool = multiprocessing.Pool(self.WORKERS) - - def _destroy_pool(self): - """ - Cleanly close the pool of workers. - """ - logger.debug("Closing %s" % (self.__class__.__name__,)) - self._pool.close() - try: - self._pool.join() - except Exception: - pass - - def terminate(self): - """ - Terminate the pool of workers. - """ - logger.debug("Terminating %s" % (self.__class__.__name__,)) - self._pool.terminate() - def _runOperation(self, query, *args): """ Run an operation on the sync db. @@ -180,7 +148,6 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): Initialize the sync encrypter pool. """ SyncEncryptDecryptPool.__init__(self, *args, **kwargs) - self._encr_queue = defer.DeferredQueue() # TODO delete already synced files from database def start(self): @@ -189,73 +156,33 @@ class SyncEncrypterPool(SyncEncryptDecryptPool): """ SyncEncryptDecryptPool.start(self) logger.debug("Starting the encryption loop...") - reactor.callWhenRunning(self._maybe_encrypt_and_recurse) def stop(self): """ Stop the encrypter pool. """ - # close the sync queue - if self._encr_queue: - q = self._encr_queue - for d in q.pending: - d.cancel() - del q - self._encr_queue = None SyncEncryptDecryptPool.stop(self) - def enqueue_doc_for_encryption(self, doc): + def encrypt_doc(self, doc): """ - Enqueue a document for encryption. + Encrypt document asynchronously then insert it on + local staging database. :param doc: The document to be encrypted. :type doc: SoledadDocument """ - try: - self._encr_queue.put(doc) - except Queue.Full: - # do not asynchronously encrypt this file if the queue is full - pass - - @defer.inlineCallbacks - def _maybe_encrypt_and_recurse(self): - """ - Process one document from the encryption queue. - - Asynchronously encrypt a document that will then be stored in the sync - db. Processed documents will be read by the SoledadSyncTarget during - the sync_exchange. - """ - try: - while self.running: - doc = yield self._encr_queue.get() - self._encrypt_doc(doc) - except defer.QueueUnderflow: - self._delayed_call = reactor.callLater( - self.ENCRYPT_LOOP_PERIOD, - self._maybe_encrypt_and_recurse) - - def _encrypt_doc(self, doc): - """ - Symmetrically encrypt a document. - - :param doc: The document with contents to be encrypted. - :type doc: SoledadDocument - - :param workers: Whether to defer the decryption to the multiprocess - pool of workers. Useful for debugging purposes. - :type workers: bool - """ soledad_assert(self._crypto is not None, "need a crypto object") docstr = doc.get_json() key = self._crypto.doc_passphrase(doc.doc_id) secret = self._crypto.secret args = doc.doc_id, doc.rev, docstr, key, secret # encrypt asynchronously - self._pool.apply_async( - encrypt_doc_task, args, - callback=self._encrypt_doc_cb) + # TODO use dedicated threadpool / move to ampoule + d = threads.deferToThread( + encrypt_doc_task, *args) + d.addCallback(self._encrypt_doc_cb) + return d def _encrypt_doc_cb(self, result): """ @@ -336,8 +263,8 @@ def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret, :type doc_id: str :param doc_rev: The document revision. :type doc_rev: str - :param content: The encrypted content of the document. - :type content: str + :param content: The encrypted content of the document as JSON dict. + :type content: dict :param gen: The generation corresponding to the modification of that document. :type gen: int @@ -384,7 +311,7 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): """ TABLE_NAME = "docs_received" FIELD_NAMES = "doc_id PRIMARY KEY, rev, content, gen, " \ - "trans_id, encrypted, idx" + "trans_id, encrypted, idx, sync_id" """ Period of recurrence of the periodic decrypting task, in seconds. @@ -414,46 +341,8 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): self._docs_to_process = None self._processed_docs = 0 self._last_inserted_idx = 0 - self._decrypting_docs = [] - - # a list that holds the asynchronous decryption results so they can be - # collected when they are ready - self._async_results = [] - # initialize db and make sure any database operation happens after - # db initialization - self._deferred_init = self._init_db() - self._wait_init_db('_runOperation', '_runQuery') - - def _wait_init_db(self, *methods): - """ - Methods that need to wait for db initialization. - - :param methods: methods that need to wait for initialization - :type methods: tuple(str) - """ - self._waiting = [] - self._stored = {} - - def _restore(_): - for method in self._stored: - setattr(self, method, self._stored[method]) - for d in self._waiting: - d.callback(None) - - def _makeWrapper(method): - def wrapper(*args, **kw): - d = defer.Deferred() - d.addCallback(lambda _: self._stored[method](*args, **kw)) - self._waiting.append(d) - return d - return wrapper - - for method in methods: - self._stored[method] = getattr(self, method) - setattr(self, method, _makeWrapper(method)) - - self._deferred_init.addCallback(_restore) + self._loop = LoopingCall(self._decrypt_and_recurse) def start(self, docs_to_process): """ @@ -466,13 +355,39 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :type docs_to_process: int """ SyncEncryptDecryptPool.start(self) + self._decrypted_docs_indexes = set() + self._sync_id = uuid4().hex self._docs_to_process = docs_to_process self._deferred = defer.Deferred() - reactor.callWhenRunning(self._launch_decrypt_and_recurse) + d = self._init_db() + d.addCallback(lambda _: self._loop.start(self.DECRYPT_LOOP_PERIOD)) + return d + + def stop(self): + if self._loop.running: + self._loop.stop() + self._finish() + SyncEncryptDecryptPool.stop(self) + + def _init_db(self): + """ + Ensure sync_id column is present then + Empty the received docs table of the sync database. - def _launch_decrypt_and_recurse(self): - d = self._decrypt_and_recurse() - d.addErrback(self._errback) + :return: A deferred that will fire when the operation in the database + has finished. + :rtype: twisted.internet.defer.Deferred + """ + ensure_sync_id_column = ("ALTER TABLE %s ADD COLUMN sync_id" % + self.TABLE_NAME) + d = self._runQuery(ensure_sync_id_column) + + def empty_received_docs(_): + query = "DELETE FROM %s WHERE sync_id <> ?" % (self.TABLE_NAME,) + return self._runOperation(query, (self._sync_id,)) + + d.addCallbacks(empty_received_docs, empty_received_docs) + return d def _errback(self, failure): log.err(failure) @@ -491,8 +406,8 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): def insert_encrypted_received_doc( self, doc_id, doc_rev, content, gen, trans_id, idx): """ - Insert a received message with encrypted content, to be decrypted later - on. + Decrypt and insert a received document into local staging area to be + processed later on. :param doc_id: The document ID. :type doc_id: str @@ -507,15 +422,22 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :param idx: The index of this document in the current sync process. :type idx: int - :return: A deferred that will fire when the operation in the database - has finished. + :return: A deferred that will fire after the decrypted document has + been inserted in the sync db. :rtype: twisted.internet.defer.Deferred """ - docstr = json.dumps(content) - query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?, ?)" \ - % self.TABLE_NAME - return self._runOperation( - query, (doc_id, doc_rev, docstr, gen, trans_id, 1, idx)) + soledad_assert(self._crypto is not None, "need a crypto object") + + key = self._crypto.doc_passphrase(doc_id) + secret = self._crypto.secret + args = doc_id, doc_rev, content, gen, trans_id, key, secret, idx + # decrypt asynchronously + # TODO use dedicated threadpool / move to ampoule + d = threads.deferToThread( + decrypt_doc_task, *args) + # callback will insert it for later processing + d.addCallback(self._decrypt_doc_cb) + return d def insert_received_doc( self, doc_id, doc_rev, content, gen, trans_id, idx): @@ -543,56 +465,29 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): """ if not isinstance(content, str): content = json.dumps(content) - query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?, ?)" \ + query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?, ?, ?)" \ % self.TABLE_NAME - return self._runOperation( - query, (doc_id, doc_rev, content, gen, trans_id, 0, idx)) + d = self._runOperation( + query, (doc_id, doc_rev, content, gen, trans_id, 0, + idx, self._sync_id)) + d.addCallback(lambda _: self._decrypted_docs_indexes.add(idx)) + return d - def _delete_received_doc(self, doc_id): + def _delete_received_docs(self, doc_ids): """ - Delete a received doc after it was inserted into the local db. + Delete a list of received docs after get them inserted into the db. - :param doc_id: Document ID. - :type doc_id: str + :param doc_id: Document ID list. + :type doc_id: list :return: A deferred that will fire when the operation in the database has finished. :rtype: twisted.internet.defer.Deferred """ - query = "DELETE FROM '%s' WHERE doc_id=?" \ - % self.TABLE_NAME - return self._runOperation(query, (doc_id,)) - - def _async_decrypt_doc(self, doc_id, rev, content, gen, trans_id, idx): - """ - Dispatch an asynchronous document decrypting routine and save the - result object. - - :param doc_id: The ID for the document with contents to be encrypted. - :type doc: str - :param rev: The revision of the document. - :type rev: str - :param content: The serialized content of the document. - :type content: str - :param gen: The generation corresponding to the modification of that - document. - :type gen: int - :param trans_id: The transaction id corresponding to the modification - of that document. - :type trans_id: str - :param idx: The index of this document in the current sync process. - :type idx: int - """ - soledad_assert(self._crypto is not None, "need a crypto object") - - content = json.loads(content) - key = self._crypto.doc_passphrase(doc_id) - secret = self._crypto.secret - args = doc_id, rev, content, gen, trans_id, key, secret, idx - # decrypt asynchronously - self._async_results.append( - self._pool.apply_async( - decrypt_doc_task, args)) + placeholders = ', '.join('?' for _ in doc_ids) + query = "DELETE FROM '%s' WHERE doc_id in (%s)" \ + % (self.TABLE_NAME, placeholders) + return self._runOperation(query, (doc_ids)) def _decrypt_doc_cb(self, result): """ @@ -610,11 +505,10 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): doc_id, rev, content, gen, trans_id, idx = result logger.debug("Sync decrypter pool: decrypted doc %s: %s %s %s" % (doc_id, rev, gen, trans_id)) - self._decrypting_docs.remove((doc_id, rev)) return self.insert_received_doc( doc_id, rev, content, gen, trans_id, idx) - def _get_docs(self, encrypted=None, order_by='idx', order='ASC'): + def _get_docs(self, encrypted=None, sequence=None): """ Get documents from the received docs table in the sync db. @@ -622,9 +516,6 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): field equal to given parameter. :type encrypted: bool or None :param order_by: The name of the field to order results. - :type order_by: str - :param order: Whether the order should be ASC or DESC. - :type order: str :return: A deferred that will fire with the results of the database query. @@ -632,10 +523,18 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): """ query = "SELECT doc_id, rev, content, gen, trans_id, encrypted, " \ "idx FROM %s" % self.TABLE_NAME - if encrypted is not None: - query += " WHERE encrypted = %d" % int(encrypted) - query += " ORDER BY %s %s" % (order_by, order) - return self._runQuery(query) + parameters = [] + if encrypted or sequence: + query += " WHERE sync_id = ? and" + parameters += [self._sync_id] + if encrypted: + query += " encrypted = ?" + parameters += [int(encrypted)] + if sequence: + query += " idx in (" + ', '.join('?' * len(sequence)) + ")" + parameters += [int(i) for i in sequence] + query += " ORDER BY idx ASC" + return self._runQuery(query, parameters) @defer.inlineCallbacks def _get_insertable_docs(self): @@ -646,35 +545,19 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): documents. :rtype: twisted.internet.defer.Deferred """ - # here, we fetch the list of decrypted documents and compare with the - # index of the last succesfully processed document. - decrypted_docs = yield self._get_docs(encrypted=False) - insertable = [] - last_idx = self._last_inserted_idx - for doc_id, rev, content, gen, trans_id, encrypted, idx in \ - decrypted_docs: - if (idx != last_idx + 1): - break - insertable.append((doc_id, rev, content, gen, trans_id, idx)) - last_idx += 1 - defer.returnValue(insertable) - - @defer.inlineCallbacks - def _async_decrypt_received_docs(self): - """ - Get all the encrypted documents from the sync database and dispatch a - decrypt worker to decrypt each one of them. - - :return: A deferred that will fire after all documents have been - decrypted and inserted back in the sync db. - :rtype: twisted.internet.defer.Deferred - """ - docs = yield self._get_docs(encrypted=True) - for doc_id, rev, content, gen, trans_id, _, idx in docs: - if (doc_id, rev) not in self._decrypting_docs: - self._decrypting_docs.append((doc_id, rev)) - self._async_decrypt_doc( - doc_id, rev, content, gen, trans_id, idx) + # Here, check in memory what are the insertable indexes that can + # form a sequence starting from the last inserted index + sequence = [] + insertable_docs = [] + next_index = self._last_inserted_idx + 1 + while next_index in self._decrypted_docs_indexes: + sequence.append(str(next_index)) + next_index += 1 + # Then fetch all the ones ready for insertion. + if sequence: + insertable_docs = yield self._get_docs(encrypted=False, + sequence=sequence) + defer.returnValue(insertable_docs) @defer.inlineCallbacks def _process_decrypted_docs(self): @@ -687,36 +570,18 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): :rtype: twisted.internet.defer.Deferred """ insertable = yield self._get_insertable_docs() + processed_docs_ids = [] for doc_fields in insertable: method = self._insert_decrypted_local_doc # FIXME: This is used only because SQLCipherU1DBSync is synchronous # When adbapi is used there is no need for an external thread # Without this the reactor can freeze and fail docs download yield threads.deferToThread(method, *doc_fields) - defer.returnValue(insertable) - - def _delete_processed_docs(self, inserted): - """ - Delete from the sync db documents that have been processed. - - :param inserted: List of documents inserted in the previous process - step. - :type inserted: list - - :return: A list of deferreds that will fire when each operation in the - database has finished. - :rtype: twisted.internet.defer.DeferredList - """ - deferreds = [] - for doc_id, doc_rev, _, _, _, _ in inserted: - deferreds.append( - self._delete_received_doc(doc_id)) - if not deferreds: - return defer.succeed(None) - return defer.gatherResults(deferreds) + processed_docs_ids.append(doc_fields[0]) + yield self._delete_received_docs(processed_docs_ids) def _insert_decrypted_local_doc(self, doc_id, doc_rev, content, - gen, trans_id, idx): + gen, trans_id, encrypted, idx): """ Insert the decrypted document into the local replica. @@ -751,32 +616,6 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): self._last_inserted_idx = idx self._processed_docs += 1 - def _init_db(self): - """ - Empty the received docs table of the sync database. - - :return: A deferred that will fire when the operation in the database - has finished. - :rtype: twisted.internet.defer.Deferred - """ - query = "DELETE FROM %s WHERE 1" % (self.TABLE_NAME,) - return self._runOperation(query) - - @defer.inlineCallbacks - def _collect_async_decryption_results(self): - """ - Collect the results of the asynchronous doc decryptions and re-raise - any exception raised by a multiprocessing async decryption call. - - :raise Exception: Raised if an async call has raised an exception. - """ - async_results = self._async_results[:] - for res in async_results: - if res.ready(): - # XXX: might raise an exception! - yield self._decrypt_doc_cb(res.get()) - self._async_results.remove(res) - @defer.inlineCallbacks def _decrypt_and_recurse(self): """ @@ -792,22 +631,19 @@ class SyncDecrypterPool(SyncEncryptDecryptPool): delete operations have been executed. :rtype: twisted.internet.defer.Deferred """ + if not self.running: + defer.returnValue(None) processed = self._processed_docs pending = self._docs_to_process if processed < pending: - yield self._async_decrypt_received_docs() - yield self._collect_async_decryption_results() - docs = yield self._process_decrypted_docs() - yield self._delete_processed_docs(docs) - # recurse - self._delayed_call = reactor.callLater( - self.DECRYPT_LOOP_PERIOD, - self._launch_decrypt_and_recurse) + yield self._process_decrypted_docs() else: self._finish() def _finish(self): self._processed_docs = 0 self._last_inserted_idx = 0 - self._deferred.callback(None) + self._decrypted_docs_indexes = set() + if not self._deferred.called: + self._deferred.callback(None) diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py index 08775580..4fc91d9d 100644 --- a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py +++ b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py @@ -24,9 +24,9 @@ import hashlib import os import sys -import u1db from twisted.internet import defer, reactor +from leap.soledad.common import l2db from leap.soledad.client import adbapi from leap.soledad.client.sqlcipher import SQLCipherOptions @@ -135,7 +135,7 @@ def countDocs(_): def printResult(r, **kwargs): if kwargs: debug(*kwargs.values()) - elif isinstance(r, u1db.Document): + elif isinstance(r, l2db.Document): debug(r.doc_id, r.content['number']) else: len_results = len(r[1]) diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py index 9deba136..38ea18a3 100644 --- a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py +++ b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py @@ -24,11 +24,11 @@ import hashlib import os import sys -import u1db from twisted.internet import defer, reactor from leap.soledad.client import adbapi from leap.soledad.client.sqlcipher import SQLCipherOptions +from leap.soledad.common import l2db folder = os.environ.get("TMPDIR", "tmp") @@ -135,7 +135,7 @@ def countDocs(_): def printResult(r, **kwargs): if kwargs: debug(*kwargs.values()) - elif isinstance(r, u1db.Document): + elif isinstance(r, l2db.Document): debug(r.doc_id, r.content['number']) else: len_results = len(r[1]) diff --git a/client/src/leap/soledad/client/examples/use_adbapi.py b/client/src/leap/soledad/client/examples/use_adbapi.py index d7bd21f2..a2683836 100644 --- a/client/src/leap/soledad/client/examples/use_adbapi.py +++ b/client/src/leap/soledad/client/examples/use_adbapi.py @@ -21,11 +21,11 @@ from __future__ import print_function import datetime import os -import u1db from twisted.internet import defer, reactor from leap.soledad.client import adbapi from leap.soledad.client.sqlcipher import SQLCipherOptions +from leap.soledad.common import l2db folder = os.environ.get("TMPDIR", "tmp") @@ -68,7 +68,7 @@ def countDocs(_): def printResult(r): - if isinstance(r, u1db.Document): + if isinstance(r, l2db.Document): debug(r.doc_id, r.content['number']) else: len_results = len(r[1]) diff --git a/client/src/leap/soledad/client/http_target/__init__.py b/client/src/leap/soledad/client/http_target/__init__.py index a16531ef..b7e54aa4 100644 --- a/client/src/leap/soledad/client/http_target/__init__.py +++ b/client/src/leap/soledad/client/http_target/__init__.py @@ -22,6 +22,7 @@ after receiving. """ +import os import logging from leap.common.http import HTTPClient @@ -33,6 +34,12 @@ from leap.soledad.client.http_target.fetch import HTTPDocFetcher logger = logging.getLogger(__name__) +# we may want to collect statistics from the sync process +DO_STATS = False +if os.environ.get('SOLEDAD_STATS'): + DO_STATS = True + + class SoledadHTTPSyncTarget(SyncTargetAPI, HTTPDocSender, HTTPDocFetcher): """ @@ -93,3 +100,6 @@ class SoledadHTTPSyncTarget(SyncTargetAPI, HTTPDocSender, HTTPDocFetcher): # the duplicated syncing bug. This could be reduced to the 30s default # after implementing Cancellable Sync. See #7382 self._http = HTTPClient(cert_file, timeout=90) + + if DO_STATS: + self.sync_exchange_phase = [0] diff --git a/client/src/leap/soledad/client/http_target/api.py b/client/src/leap/soledad/client/http_target/api.py index 94354092..f8de9a15 100644 --- a/client/src/leap/soledad/client/http_target/api.py +++ b/client/src/leap/soledad/client/http_target/api.py @@ -14,17 +14,25 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. +import os +import time import json import base64 from uuid import uuid4 -from u1db import SyncTarget from twisted.web.error import Error from twisted.internet import defer -from leap.soledad.common.errors import InvalidAuthTokenError from leap.soledad.client.http_target.support import readBody +from leap.soledad.common.errors import InvalidAuthTokenError +from leap.soledad.common.l2db import SyncTarget + + +# we may want to collect statistics from the sync process +DO_STATS = False +if os.environ.get('SOLEDAD_STATS'): + DO_STATS = True class SyncTargetAPI(SyncTarget): @@ -187,6 +195,10 @@ class SyncTargetAPI(SyncTarget): transaction id of the target replica. :rtype: twisted.internet.defer.Deferred """ + # ---------- phase 1: send docs to server ---------------------------- + if DO_STATS: + self.sync_exchange_phase[0] += 1 + # -------------------------------------------------------------------- self._ensure_callback = ensure_callback @@ -203,6 +215,11 @@ class SyncTargetAPI(SyncTarget): last_known_trans_id, sync_id) + # ---------- phase 2: receive docs ----------------------------------- + if DO_STATS: + self.sync_exchange_phase[0] += 1 + # -------------------------------------------------------------------- + cur_target_gen, cur_target_trans_id = yield self._receive_docs( last_known_generation, last_known_trans_id, ensure_callback, sync_id, @@ -214,6 +231,11 @@ class SyncTargetAPI(SyncTarget): cur_target_gen = gen_after_send cur_target_trans_id = trans_id_after_send + # ---------- phase 3: sync exchange is over -------------------------- + if DO_STATS: + self.sync_exchange_phase[0] += 1 + # -------------------------------------------------------------------- + defer.returnValue([cur_target_gen, cur_target_trans_id]) diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py index 9f7a4193..a3f70b02 100644 --- a/client/src/leap/soledad/client/http_target/fetch.py +++ b/client/src/leap/soledad/client/http_target/fetch.py @@ -16,15 +16,17 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. import logging import json -from u1db import errors -from u1db.remote import utils + from twisted.internet import defer -from leap.soledad.common.document import SoledadDocument + from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS from leap.soledad.client.events import emit_async from leap.soledad.client.crypto import is_symmetrically_encrypted from leap.soledad.client.encdecpool import SyncDecrypterPool from leap.soledad.client.http_target.support import RequestBody +from leap.soledad.common.document import SoledadDocument +from leap.soledad.common.l2db import errors +from leap.soledad.common.l2db.remote import utils logger = logging.getLogger(__name__) @@ -81,9 +83,6 @@ class HTTPDocFetcher(object): new_generation = ngen new_transaction_id = ntrans - if defer_decryption: - self._sync_decr_pool.start(number_of_changes) - # --------------------------------------------------------------------- # maybe receive the rest of the documents # --------------------------------------------------------------------- @@ -151,6 +150,10 @@ class HTTPDocFetcher(object): new_generation, new_transaction_id, number_of_changes, doc_id, \ rev, content, gen, trans_id = \ self._parse_received_doc_response(response) + + if self._sync_decr_pool and not self._sync_decr_pool.running: + self._sync_decr_pool.start(number_of_changes) + if doc_id is not None: # decrypt incoming document and insert into local database # ------------------------------------------------------------- diff --git a/client/src/leap/soledad/client/http_target/send.py b/client/src/leap/soledad/client/http_target/send.py index 89288779..13218acf 100644 --- a/client/src/leap/soledad/client/http_target/send.py +++ b/client/src/leap/soledad/client/http_target/send.py @@ -16,10 +16,13 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. import json import logging + from twisted.internet import defer + from leap.soledad.client.events import emit_async from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS from leap.soledad.client.http_target.support import RequestBody + logger = logging.getLogger(__name__) diff --git a/client/src/leap/soledad/client/http_target/support.py b/client/src/leap/soledad/client/http_target/support.py index 2625744c..6ec98ed4 100644 --- a/client/src/leap/soledad/client/http_target/support.py +++ b/client/src/leap/soledad/client/http_target/support.py @@ -16,20 +16,22 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. import warnings import json -from u1db import errors -from u1db.remote import http_errors + from twisted.internet import defer from twisted.web.client import _ReadBodyProtocol from twisted.web.client import PartialDownloadError from twisted.web._newclient import ResponseDone from twisted.web._newclient import PotentialDataLoss +from leap.soledad.common.l2db import errors +from leap.soledad.common.l2db.remote import http_errors # we want to make sure that HTTP errors will raise appropriate u1db errors, # that is, fire errbacks with the appropriate failures, in the context of # twisted. Because of that, we redefine the http body reader used by the HTTP # client below. + class ReadBodyProtocol(_ReadBodyProtocol): """ From original Twisted implementation, focused on adding our error diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py index e2a5a1d7..3547a711 100644 --- a/client/src/leap/soledad/client/secrets.py +++ b/client/src/leap/soledad/client/secrets.py @@ -33,7 +33,6 @@ from hashlib import sha256 from leap.soledad.common import soledad_assert from leap.soledad.common import soledad_assert_type from leap.soledad.common import document -from leap.soledad.common import errors from leap.soledad.client import events from leap.soledad.client.crypto import encrypt_sym, decrypt_sym @@ -81,6 +80,7 @@ class BootstrapSequenceError(SecretsException): # Secrets handler # + class SoledadSecrets(object): """ @@ -143,6 +143,8 @@ class SoledadSecrets(object): KDF_LENGTH_KEY = 'kdf_length' KDF_SCRYPT = 'scrypt' CIPHER_AES256 = 'aes256' + RECOVERY_DOC_VERSION_KEY = 'version' + RECOVERY_DOC_VERSION = 1 """ Keys used to access storage secrets in recovery documents. """ @@ -162,17 +164,12 @@ class SoledadSecrets(object): :param shared_db: The shared database that stores user secrets. :type shared_db: leap.soledad.client.shared_db.SoledadSharedDatabase """ - # XXX removed since not in use - # We will pick the first secret available. - # param secret_id: The id of the storage secret to be used. - self._uuid = uuid self._userid = userid self._passphrase = passphrase self._secrets_path = secrets_path self._shared_db = shared_db self._secrets = {} - self._secret_id = None def bootstrap(self): @@ -195,49 +192,44 @@ class SoledadSecrets(object): storage on server sequence has failed for some reason. """ # STAGE 1 - verify if secrets exist locally - if not self._has_secret(): # try to load from local storage. - - # STAGE 2 - there are no secrets in local storage, so try to fetch - # encrypted secrets from server. - logger.info( - 'Trying to fetch cryptographic secrets from shared recovery ' - 'database...') - - # --- start of atomic operation in shared db --- + try: + logger.info("Trying to load secrets from local storage...") + version = self._load_secrets_from_local_file() + # eventually migrate local and remote stored documents from old + # format version + if version < self.RECOVERY_DOC_VERSION: + self._store_secrets() + self._upload_crypto_secrets() + logger.info("Found secrets in local storage.") + return - # obtain lock on shared db - token = timeout = None - try: - token, timeout = self._shared_db.lock() - except errors.AlreadyLockedError: - raise BootstrapSequenceError('Database is already locked.') - except errors.LockTimedOutError: - raise BootstrapSequenceError('Lock operation timed out.') + except NoStorageSecret: + logger.info("Could not find secrets in local storage.") - self._get_or_gen_crypto_secrets() + # STAGE 2 - there are no secrets in local storage and this is the + # first time we are running soledad with the specified + # secrets_path. Try to fetch encrypted secrets from + # server. + try: + logger.info('Trying to fetch secrets from remote storage...') + version = self._download_crypto_secrets() + self._store_secrets() + # eventually migrate remote stored document from old format + # version + if version < self.RECOVERY_DOC_VERSION: + self._upload_crypto_secrets() + logger.info('Found secrets in remote storage.') + return + except NoStorageSecret: + logger.info("Could not find secrets in remote storage.") - # release the lock on shared db - try: - self._shared_db.unlock(token) - self._shared_db.close() - except errors.NotLockedError: - # for some reason the lock expired. Despite that, secret - # loading or generation/storage must have been executed - # successfully, so we pass. - pass - except errors.InvalidTokenError: - # here, our lock has not only expired but also some other - # client application has obtained a new lock and is currently - # doing its thing in the shared database. Using the same - # reasoning as above, we assume everything went smooth and - # pass. - pass - except Exception as e: - logger.error("Unhandled exception when unlocking shared " - "database.") - logger.exception(e) - - # --- end of atomic operation in shared db --- + # STAGE 3 - there are no secrets in server also, so we want to + # generate the secrets and store them in the remote + # db. + logger.info("Generating secrets...") + self._gen_crypto_secrets() + logger.info("Uploading secrets...") + self._upload_crypto_secrets() def _has_secret(self): """ @@ -246,21 +238,7 @@ class SoledadSecrets(object): :return: Whether there's a storage secret for symmetric encryption. :rtype: bool """ - logger.info("Checking if there's a secret in local storage...") - if (self._secret_id is None or self._secret_id not in self._secrets) \ - and os.path.isfile(self._secrets_path): - try: - self._load_secrets() # try to load from disk - except IOError as e: - logger.warning( - 'IOError while loading secrets from disk: %s' % str(e)) - - if self.storage_secret is not None: - logger.info("Found a secret in local storage.") - return True - - logger.info("Could not find a secret in local storage.") - return False + return self.storage_secret is not None def _maybe_set_active_secret(self, active_secret): """ @@ -272,73 +250,82 @@ class SoledadSecrets(object): active_secret = self._secrets.items()[0][0] self.set_secret_id(active_secret) - def _load_secrets(self): + def _load_secrets_from_local_file(self): """ Load storage secrets from local file. + + :return version: The version of the locally stored recovery document. + + :raise NoStorageSecret: Raised if there are no secrets available in + local storage. """ + # check if secrets file exists and we can read it + if not os.path.isfile(self._secrets_path): + raise NoStorageSecret + # read storage secrets from file content = None with open(self._secrets_path, 'r') as f: content = json.loads(f.read()) - _, active_secret = self._import_recovery_document(content) + _, active_secret, version = self._import_recovery_document(content) + self._maybe_set_active_secret(active_secret) - # enlarge secret if needed - enlarged = False - if len(self._secrets[self._secret_id]) < self.GEN_SECRET_LENGTH: - gen_len = self.GEN_SECRET_LENGTH \ - - len(self._secrets[self._secret_id]) - new_piece = os.urandom(gen_len) - self._secrets[self._secret_id] += new_piece - enlarged = True - # store and save in shared db if needed - if enlarged: - self._store_secrets() - self._put_secrets_in_shared_db() - def _get_or_gen_crypto_secrets(self): + return version + + def _download_crypto_secrets(self): + """ + Download crypto secrets. + + :return version: The version of the remotelly stored recovery document. + + :raise NoStorageSecret: Raised if there are no secrets available in + remote storage. + """ + doc = None + if self._shared_db.syncable: + doc = self._get_secrets_from_shared_db() + + if doc is None: + raise NoStorageSecret + + _, active_secret, version = self._import_recovery_document(doc.content) + self._maybe_set_active_secret(active_secret) + + return version + + def _gen_crypto_secrets(self): + """ + Generate the crypto secrets. + """ + logger.info('No cryptographic secrets found, creating new secrets...') + secret_id = self._gen_secret() + self.set_secret_id(secret_id) + + def _upload_crypto_secrets(self): """ - Retrieves or generates the crypto secrets. + Send crypto secrets to shared db. :raises BootstrapSequenceError: Raised when unable to store secrets in shared database. """ if self._shared_db.syncable: - doc = self._get_secrets_from_shared_db() - else: - doc = None - - if doc is not None: - logger.info( - 'Found cryptographic secrets in shared recovery ' - 'database.') - _, active_secret = self._import_recovery_document(doc.content) - self._maybe_set_active_secret(active_secret) - self._store_secrets() # save new secrets in local file - else: - # STAGE 3 - there are no secrets in server also, so - # generate a secret and store it in remote db. - logger.info( - 'No cryptographic secrets found, creating new ' - ' secrets...') - self.set_secret_id(self._gen_secret()) - - if self._shared_db.syncable: + try: + self._put_secrets_in_shared_db() + except Exception as ex: + # storing generated secret in shared db failed for + # some reason, so we erase the generated secret and + # raise. try: - self._put_secrets_in_shared_db() - except Exception as ex: - # storing generated secret in shared db failed for - # some reason, so we erase the generated secret and - # raise. - try: - os.unlink(self._secrets_path) - except OSError as e: - if e.errno != errno.ENOENT: - # no such file or directory - logger.exception(e) - logger.exception(ex) - raise BootstrapSequenceError( - 'Could not store generated secret in the shared ' - 'database, bailing out...') + os.unlink(self._secrets_path) + except OSError as e: + if e.errno != errno.ENOENT: + # no such file or directory + logger.exception(e) + logger.exception(ex) + raise BootstrapSequenceError( + 'Could not store generated secret in the shared ' + 'database, bailing out...') # # Shared DB related methods @@ -360,7 +347,7 @@ class SoledadSecrets(object): """ Export the storage secrets. - A recovery document has the following structure: + Current format of recovery document has the following structure: { 'storage_secrets': { @@ -371,6 +358,7 @@ class SoledadSecrets(object): }, }, 'active_secret': '<secret_id>', + 'version': '<recovery document format version>', } Note that multiple storage secrets might be stored in one recovery @@ -388,13 +376,14 @@ class SoledadSecrets(object): data = { self.STORAGE_SECRETS_KEY: encrypted_secrets, self.ACTIVE_SECRET_KEY: self._secret_id, + self.RECOVERY_DOC_VERSION_KEY: self.RECOVERY_DOC_VERSION, } return data def _import_recovery_document(self, data): """ - Import storage secrets for symmetric encryption and uuid (if present) - from a recovery document. + Import storage secrets for symmetric encryption from a recovery + document. Note that this method does not store the imported data on disk. For that, use C{self._store_secrets()}. @@ -402,11 +391,44 @@ class SoledadSecrets(object): :param data: The recovery document. :type data: dict - :return: A tuple containing the number of imported secrets and the - secret_id of the last active secret. - :rtype: (int, str) + :return: A tuple containing the number of imported secrets, the + secret_id of the last active secret, and the recovery + document format version. + :rtype: (int, str, int) """ soledad_assert(self.STORAGE_SECRETS_KEY in data) + version = data.get(self.RECOVERY_DOC_VERSION_KEY, 1) + meth = getattr(self, '_import_recovery_document_version_%d' % version) + secret_count, active_secret = meth(data) + return secret_count, active_secret, version + + def _import_recovery_document_version_1(self, data): + """ + Import storage secrets for symmetric encryption from a recovery + document with format version 1. + + Version 1 of recovery document has the following structure: + + { + 'storage_secrets': { + '<storage_secret id>': { + 'cipher': 'aes256', + 'length': <secret length>, + 'secret': '<encrypted storage_secret>', + }, + }, + 'active_secret': '<secret_id>', + 'version': '<recovery document format version>', + } + + :param data: The recovery document. + :type data: dict + + :return: A tuple containing the number of imported secrets, the + secret_id of the last active secret, and the recovery + document format version. + :rtype: (int, str, int) + """ # include secrets in the secret pool. secret_count = 0 secrets = data[self.STORAGE_SECRETS_KEY].items() @@ -419,7 +441,8 @@ class SoledadSecrets(object): if secret_id not in self._secrets: try: self._secrets[secret_id] = \ - self._decrypt_storage_secret(encrypted_secret) + self._decrypt_storage_secret_version_1( + encrypted_secret) secret_count += 1 except SecretsException as e: logger.error("Failed to decrypt storage secret: %s" @@ -478,13 +501,21 @@ class SoledadSecrets(object): # Management of secret for symmetric encryption. # - def _decrypt_storage_secret(self, encrypted_secret_dict): + def _decrypt_storage_secret_version_1(self, encrypted_secret_dict): """ Decrypt the storage secret. Storage secret is encrypted before being stored. This method decrypts and returns the decrypted storage secret. + Version 1 of storage secret format has the following structure: + + '<storage_secret id>': { + 'cipher': 'aes256', + 'length': <secret length>, + 'secret': '<encrypted storage_secret>', + }, + :param encrypted_secret_dict: The encrypted storage secret. :type encrypted_secret_dict: dict diff --git a/client/src/leap/soledad/client/shared_db.py b/client/src/leap/soledad/client/shared_db.py index 6abf8ea3..d43db045 100644 --- a/client/src/leap/soledad/client/shared_db.py +++ b/client/src/leap/soledad/client/shared_db.py @@ -17,7 +17,7 @@ """ A shared database for storing/retrieving encrypted key material. """ -from u1db.remote import http_database +from leap.soledad.common.l2db.remote import http_database from leap.soledad.client.auth import TokenBasedAuth @@ -151,33 +151,3 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): http_database.HTTPDatabase.__init__(self, url, document_factory, creds) self._uuid = uuid - - def lock(self): - """ - Obtain a lock on document with id C{doc_id}. - - :return: A tuple containing the token to unlock and the timeout until - lock expiration. - :rtype: (str, float) - - :raise HTTPError: Raised if any HTTP error occurs. - """ - if self.syncable: - res, headers = self._request_json( - 'PUT', ['lock', self._uuid], body={}) - return res['token'], res['timeout'] - else: - return None, None - - def unlock(self, token): - """ - Release the lock on shared database. - - :param token: The token returned by a previous call to lock(). - :type token: str - - :raise HTTPError: - """ - if self.syncable: - _, _ = self._request_json( - 'DELETE', ['lock', self._uuid], params={'token': token}) diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 22ddc87d..166c0783 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -44,10 +44,6 @@ handled by Soledad should be created by SQLCipher >= 2.0. import logging import os import json -import u1db - -from u1db import errors as u1db_errors -from u1db.backends import sqlite_backend from hashlib import sha256 from functools import partial @@ -58,11 +54,15 @@ from twisted.internet import reactor from twisted.internet import defer from twisted.enterprise import adbapi +from leap.soledad.common.document import SoledadDocument +from leap.soledad.common import l2db +from leap.soledad.common.l2db import errors as u1db_errors +from leap.soledad.common.l2db.backends import sqlite_backend +from leap.soledad.common.errors import DatabaseAccessError + from leap.soledad.client.http_target import SoledadHTTPSyncTarget from leap.soledad.client.sync import SoledadSynchronizer - from leap.soledad.client import pragmas -from leap.soledad.common.document import SoledadDocument logger = logging.getLogger(__name__) @@ -72,6 +72,12 @@ logger = logging.getLogger(__name__) sqlite_backend.dbapi2 = sqlcipher_dbapi2 +# we may want to collect statistics from the sync process +DO_STATS = False +if os.environ.get('SOLEDAD_STATS'): + DO_STATS = True + + def initialize_sqlcipher_db(opts, on_init=None, check_same_thread=True): """ Initialize a SQLCipher database. @@ -278,7 +284,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): doc_rev = sqlite_backend.SQLitePartialExpandDatabase.put_doc(self, doc) if self.defer_encryption: # TODO move to api? - self._sync_enc_pool.enqueue_doc_for_encryption(doc) + self._sync_enc_pool.encrypt_doc(doc) return doc_rev # @@ -437,21 +443,21 @@ class SQLCipherU1DBSync(SQLCipherDatabase): # format is the following: # # self._syncers = {'<url>': ('<auth_hash>', syncer), ...} - self._syncers = {} - - # Storage for the documents received during a sync + # storage for the documents received during a sync self.received_docs = [] self.running = False + self.shutdownID = None + self._db_handle = None + # initialize the main db before scheduling a start + self._initialize_main_db() self._reactor = reactor self._reactor.callWhenRunning(self._start) - self._db_handle = None - self._initialize_main_db() - - self.shutdownID = None + if DO_STATS: + self.sync_phase = None @property def _replica_uid(self): @@ -464,11 +470,14 @@ class SQLCipherU1DBSync(SQLCipherDatabase): self.running = True def _initialize_main_db(self): - self._db_handle = initialize_sqlcipher_db( - self._opts, check_same_thread=False) - self._real_replica_uid = None - self._ensure_schema() - self.set_document_factory(soledad_doc_factory) + try: + self._db_handle = initialize_sqlcipher_db( + self._opts, check_same_thread=False) + self._real_replica_uid = None + self._ensure_schema() + self.set_document_factory(soledad_doc_factory) + except sqlcipher_dbapi2.DatabaseError as e: + raise DatabaseAccessError(str(e)) @defer.inlineCallbacks def sync(self, url, creds=None, defer_decryption=True): @@ -497,6 +506,10 @@ class SQLCipherU1DBSync(SQLCipherDatabase): :rtype: Deferred """ syncer = self._get_syncer(url, creds=creds) + if DO_STATS: + self.sync_phase = syncer.sync_phase + self.syncer = syncer + self.sync_exchange_phase = syncer.sync_exchange_phase local_gen_before_sync = yield syncer.sync( defer_decryption=defer_decryption) self.received_docs = syncer.received_docs @@ -582,7 +595,7 @@ class U1DBSQLiteBackend(sqlite_backend.SQLitePartialExpandDatabase): self._db_handle = conn self._real_replica_uid = None self._ensure_schema() - self._factory = u1db.Document + self._factory = l2db.Document class SoledadSQLCipherWrapper(SQLCipherDatabase): diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index 1879031f..2656a150 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -17,18 +17,26 @@ """ Soledad synchronization utilities. """ +import os +import time import logging from twisted.internet import defer -from u1db import errors +from leap.soledad.common.l2db import errors +from leap.soledad.common.l2db.sync import Synchronizer from leap.soledad.common.errors import BackendNotReadyError -from u1db.sync import Synchronizer logger = logging.getLogger(__name__) +# we may want to collect statistics from the sync process +DO_STATS = False +if os.environ.get('SOLEDAD_STATS'): + DO_STATS = True + + class SoledadSynchronizer(Synchronizer): """ Collect the state around synchronizing 2 U1DB replicas. @@ -42,6 +50,12 @@ class SoledadSynchronizer(Synchronizer): """ received_docs = [] + def __init__(self, *args, **kwargs): + Synchronizer.__init__(self, *args, **kwargs) + if DO_STATS: + self.sync_phase = [0] + self.sync_exchange_phase = None + @defer.inlineCallbacks def sync(self, defer_decryption=True): """ @@ -64,9 +78,16 @@ class SoledadSynchronizer(Synchronizer): the local generation before the synchronization was performed. :rtype: twisted.internet.defer.Deferred """ + sync_target = self.sync_target self.received_docs = [] + # ---------- phase 1: get sync info from server ---------------------- + if DO_STATS: + self.sync_phase[0] += 1 + self.sync_exchange_phase = self.sync_target.sync_exchange_phase + # -------------------------------------------------------------------- + # get target identifier, its current generation, # and its last-seen database generation for this source ensure_callback = None @@ -106,6 +127,11 @@ class SoledadSynchronizer(Synchronizer): self.source.validate_gen_and_trans_id( target_my_gen, target_my_trans_id) + # ---------- phase 2: what's changed --------------------------------- + if DO_STATS: + self.sync_phase[0] += 1 + # -------------------------------------------------------------------- + # what's changed since that generation and this current gen my_gen, _, changes = self.source.whats_changed(target_my_gen) logger.debug("Soledad sync: there are %d documents to send." @@ -130,6 +156,11 @@ class SoledadSynchronizer(Synchronizer): raise errors.InvalidTransactionId defer.returnValue(my_gen) + # ---------- phase 3: sync exchange ---------------------------------- + if DO_STATS: + self.sync_phase[0] += 1 + # -------------------------------------------------------------------- + # prepare to send all the changed docs changed_doc_ids = [doc_id for doc_id, _, _ in changes] docs_to_send = self.source.get_docs( @@ -162,6 +193,12 @@ class SoledadSynchronizer(Synchronizer): "my_gen": my_gen } self._syncing_info = info + + # ---------- phase 4: complete sync ---------------------------------- + if DO_STATS: + self.sync_phase[0] += 1 + # -------------------------------------------------------------------- + yield self.complete_sync() _, _, changes = self.source.whats_changed(target_my_gen) @@ -170,6 +207,11 @@ class SoledadSynchronizer(Synchronizer): just_received = list(set(changed_doc_ids) - set(ids_sent)) self.received_docs = just_received + # ---------- phase 5: sync is over ----------------------------------- + if DO_STATS: + self.sync_phase[0] += 1 + # -------------------------------------------------------------------- + defer.returnValue(my_gen) def complete_sync(self): diff --git a/common/changes/next-changelog.rst b/common/changes/next-changelog.rst index c0974384..64162b7b 100644 --- a/common/changes/next-changelog.rst +++ b/common/changes/next-changelog.rst @@ -1,4 +1,4 @@ -0.8.0 - ... +0.8.2 - ... +++++++++++++++++++++++++++++++ Please add lines to this file, they will be moved to the CHANGELOG.rst during diff --git a/common/pkg/generate_wheels.sh b/common/pkg/generate_wheels.sh index e29c327e..a13e2c7a 100755 --- a/common/pkg/generate_wheels.sh +++ b/common/pkg/generate_wheels.sh @@ -7,7 +7,7 @@ if [ "$WHEELHOUSE" = "" ]; then fi pip wheel --wheel-dir $WHEELHOUSE pip -pip wheel --wheel-dir $WHEELHOUSE --allow-external u1db --allow-unverified u1db --allow-external dirspec --allow-unverified dirspec -r pkg/requirements.pip +pip wheel --wheel-dir $WHEELHOUSE -r pkg/requirements.pip if [ -f pkg/requirements-testing.pip ]; then pip wheel --wheel-dir $WHEELHOUSE -r pkg/requirements-testing.pip fi diff --git a/common/pkg/pip_install_requirements.sh b/common/pkg/pip_install_requirements.sh index d0479365..f4b5f67a 100755 --- a/common/pkg/pip_install_requirements.sh +++ b/common/pkg/pip_install_requirements.sh @@ -4,7 +4,7 @@ # Use at your own risk. # See $usage for help -insecure_packages="u1db dirspec" +insecure_packages="" leap_wheelhouse=https://lizard.leap.se/wheels show_help() { @@ -80,5 +80,5 @@ insecure_flags=`return_insecure_flags` packages=`return_packages` pip install -U wheel -pip install $install_options pip +pip install -U pip pip install $install_options $insecure_flags $packages diff --git a/common/pkg/requirements-latest.pip b/common/pkg/requirements-latest.pip index 9de17382..396d77f1 100644 --- a/common/pkg/requirements-latest.pip +++ b/common/pkg/requirements-latest.pip @@ -1,6 +1,4 @@ --index-url https://pypi.python.org/simple/ ---allow-external u1db --allow-unverified u1db ---allow-external dirspec --allow-unverified dirspec -e 'git+https://github.com/pixelated-project/leap_pycommon.git@develop#egg=leap.common' -e . diff --git a/common/pkg/requirements.pip b/common/pkg/requirements.pip index a1238707..d3ed2b50 100644 --- a/common/pkg/requirements.pip +++ b/common/pkg/requirements.pip @@ -1,6 +1,2 @@ -u1db - -# XXX -- fix me! -# oauth is not strictly needed by us, but we need it until u1db adds it to its -# release as a dep. -oauth +paste +routes diff --git a/common/setup.py b/common/setup.py index 8d9c4d6e..7191fa00 100644 --- a/common/setup.py +++ b/common/setup.py @@ -75,14 +75,20 @@ class freeze_debianver(Command): # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. -version_version = '{version}' -full_revisionid = '{full_revisionid}' -""" - templatefun = r""" - -def get_versions(default={}, verbose=False): - return {'version': version_version, - 'full-revisionid': full_revisionid} +import json +import sys + +version_json = ''' +{ + "dirty": false, + "error": null, + "full-revisionid": "FULL_REVISIONID", + "version": "VERSION_STRING" +} +''' # END VERSION_JSON + +def get_versions(): + return json.loads(version_json) """ def initialize_options(self): @@ -97,14 +103,13 @@ def get_versions(default={}, verbose=False): if proceed != "y": print("He. You scared. Aborting.") return - subst_template = self.template.format( - version=VERSION_SHORT, - full_revisionid=VERSION_REVISION) + self.templatefun + subst_template = self.template.replace( + 'VERSION_STRING', VERSION_SHORT).replace( + 'FULL_REVISIONID', VERSION_REVISION) versioneer_cfg = versioneer.get_config_from_root('.') with open(versioneer_cfg.versionfile_source, 'w') as f: f.write(subst_template) - cmdclass = versioneer.get_cmdclass() # @@ -279,11 +284,9 @@ setup( namespace_packages=["leap", "leap.soledad"], packages=find_packages('src', exclude=['*.tests', '*.tests.*']), package_dir={'': 'src'}, + package_data={'': ["*.sql"]}, test_suite='leap.soledad.common.tests', install_requires=requirements, tests_require=utils.parse_requirements( reqfiles=['pkg/requirements-testing.pip']), - extras_require={ - 'couchdb': ['couchdb'], - }, ) diff --git a/common/src/leap/soledad/common/README.txt b/common/src/leap/soledad/common/README.txt index 106efb5e..38b9858e 100644 --- a/common/src/leap/soledad/common/README.txt +++ b/common/src/leap/soledad/common/README.txt @@ -3,10 +3,10 @@ Soledad common package This package contains Soledad bits used by both server and client. -Couch U1DB Backend +Couch L2DB Backend ------------------ -U1DB backends rely on some atomic operations that modify documents contents +L2DB backends rely on some atomic operations that modify documents contents and metadata (conflicts, transaction ids and indexes). The only atomic operation in Couch is a document put, so every u1db atomic operation has to be mapped to a couch document put. diff --git a/common/src/leap/soledad/common/__init__.py b/common/src/leap/soledad/common/__init__.py index d7f6929c..1ba6ab89 100644 --- a/common/src/leap/soledad/common/__init__.py +++ b/common/src/leap/soledad/common/__init__.py @@ -47,7 +47,3 @@ __all__ = [ "soledad_assert_type", "__version__", ] - -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions diff --git a/common/src/leap/soledad/common/backend.py b/common/src/leap/soledad/common/backend.py index 53426fb5..f4f48f86 100644 --- a/common/src/leap/soledad/common/backend.py +++ b/common/src/leap/soledad/common/backend.py @@ -16,27 +16,28 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. -"""A U1DB generic backend.""" +"""A L2DB generic backend.""" +import functools -from u1db import vectorclock -from u1db.errors import ( +from leap.soledad.common.document import ServerDocument +from leap.soledad.common.l2db import vectorclock +from leap.soledad.common.l2db.errors import ( RevisionConflict, InvalidDocId, ConflictedDoc, DocumentDoesNotExist, DocumentAlreadyDeleted, ) -from u1db.backends import CommonBackend -from u1db.backends import CommonSyncTarget -from leap.soledad.common.document import ServerDocument +from leap.soledad.common.l2db.backends import CommonBackend +from leap.soledad.common.l2db.backends import CommonSyncTarget class SoledadBackend(CommonBackend): BATCH_SUPPORT = False """ - A U1DB backend implementation. + A L2DB backend implementation. """ def __init__(self, database, replica_uid=None): @@ -438,9 +439,8 @@ class SoledadBackend(CommonBackend): generation. :type other_transaction_id: str """ - function = self._set_replica_gen_and_trans_id args = [other_replica_uid, other_generation, other_transaction_id] - callback = lambda: function(*args) + callback = functools.partial(self._set_replica_gen_and_trans_id, *args) if self.batching: self.after_batch_callbacks['set_source_info'] = callback else: diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index 18ed8a19..523a50a0 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -24,6 +24,7 @@ import re import uuid import binascii import time +import functools from StringIO import StringIO @@ -41,12 +42,12 @@ from couchdb.http import ( urljoin as couch_urljoin, Resource, ) -from u1db.errors import ( +from leap.soledad.common.l2db.errors import ( DatabaseDoesNotExist, InvalidGeneration, RevisionConflict, ) -from u1db.remote import http_app +from leap.soledad.common.l2db.remote import http_app from leap.soledad.common import ddocs @@ -340,7 +341,8 @@ class CouchDatabase(object): # This will not be needed when/if we switch from python-couchdb to # paisley. time.strptime('Mar 8 1917', '%b %d %Y') - get_one = lambda doc_id: self.get_doc(doc_id, check_for_conflicts) + get_one = functools.partial( + self.get_doc, check_for_conflicts=check_for_conflicts) docs = [THREAD_POOL.apply_async(get_one, [doc_id]) for doc_id in doc_ids] for doc in docs: diff --git a/common/src/leap/soledad/common/couch/state.py b/common/src/leap/soledad/common/couch/state.py index 4f07c105..9b40a264 100644 --- a/common/src/leap/soledad/common/couch/state.py +++ b/common/src/leap/soledad/common/couch/state.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # state.py -# Copyright (C) 2015 LEAP +# Copyright (C) 2015,2016 LEAP # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,17 +17,17 @@ """ Server state using CouchDatabase as backend. """ -import re import logging +import re import time from urlparse import urljoin from hashlib import sha512 -from u1db.remote.server_state import ServerState -from leap.soledad.common.command import exec_validated_cmd from leap.soledad.common.couch import CouchDatabase from leap.soledad.common.couch import couch_server -from u1db.errors import Unauthorized +from leap.soledad.common.command import exec_validated_cmd +from leap.soledad.common.l2db.remote.server_state import ServerState +from leap.soledad.common.l2db.errors import Unauthorized logger = logging.getLogger(__name__) diff --git a/common/src/leap/soledad/common/document.py b/common/src/leap/soledad/common/document.py index 9e0c0976..6c26a29f 100644 --- a/common/src/leap/soledad/common/document.py +++ b/common/src/leap/soledad/common/document.py @@ -17,11 +17,11 @@ """ -A Soledad Document is an u1db.Document with lasers. +A Soledad Document is an l2db.Document with lasers. """ -from u1db import Document +from .l2db import Document # diff --git a/common/src/leap/soledad/common/errors.py b/common/src/leap/soledad/common/errors.py index 0b6bb4e6..dec871c9 100644 --- a/common/src/leap/soledad/common/errors.py +++ b/common/src/leap/soledad/common/errors.py @@ -20,9 +20,8 @@ Soledad errors. """ - -from u1db import errors -from u1db.remote import http_errors +from .l2db import errors +from .l2db.remote import http_errors def register_exception(cls): @@ -71,67 +70,6 @@ class InvalidAuthTokenError(errors.Unauthorized): # -# LockResource errors -# - -@register_exception -class InvalidTokenError(SoledadError): - - """ - Exception raised when trying to unlock shared database with invalid token. - """ - - wire_description = "unlock unauthorized" - status = 401 - - -@register_exception -class NotLockedError(SoledadError): - - """ - Exception raised when trying to unlock shared database when it is not - locked. - """ - - wire_description = "lock not found" - status = 404 - - -@register_exception -class AlreadyLockedError(SoledadError): - - """ - Exception raised when trying to lock shared database but it is already - locked. - """ - - wire_description = "lock is locked" - status = 403 - - -@register_exception -class LockTimedOutError(SoledadError): - - """ - Exception raised when timing out while trying to lock the shared database. - """ - - wire_description = "lock timed out" - status = 408 - - -@register_exception -class CouldNotObtainLockError(SoledadError): - - """ - Exception raised when timing out while trying to lock the shared database. - """ - - wire_description = "error obtaining lock" - status = 500 - - -# # SoledadBackend errors # u1db error statuses also have to be updated http_errors.ERROR_STATUSES = set( diff --git a/common/src/leap/soledad/common/l2db/__init__.py b/common/src/leap/soledad/common/l2db/__init__.py new file mode 100644 index 00000000..c0bd15fe --- /dev/null +++ b/common/src/leap/soledad/common/l2db/__init__.py @@ -0,0 +1,697 @@ +# Copyright 2011 Canonical Ltd. +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. + +"""L2DB""" + +try: + import simplejson as json +except ImportError: + import json # noqa + +from leap.soledad.common.l2db.errors import InvalidJSON, InvalidContent + +__version_info__ = (13, 9) +__version__ = '.'.join(map(lambda x: '%02d' % x, __version_info__)) + + +def open(path, create, document_factory=None): + """Open a database at the given location. + + Will raise u1db.errors.DatabaseDoesNotExist if create=False and the + database does not already exist. + + :param path: The filesystem path for the database to open. + :param create: True/False, should the database be created if it doesn't + already exist? + :param document_factory: A function that will be called with the same + parameters as Document.__init__. + :return: An instance of Database. + """ + from leap.soledad.common.l2db.backends import sqlite_backend + return sqlite_backend.SQLiteDatabase.open_database( + path, create=create, document_factory=document_factory) + + +# constraints on database names (relevant for remote access, as regex) +DBNAME_CONSTRAINTS = r"[a-zA-Z0-9][a-zA-Z0-9.-]*" + +# constraints on doc ids (as regex) +# (no slashes, and no characters outside the ascii range) +DOC_ID_CONSTRAINTS = r"[a-zA-Z0-9.%_-]+" + + +class Database(object): + """A JSON Document data store. + + This data store can be synchronized with other u1db.Database instances. + """ + + def set_document_factory(self, factory): + """Set the document factory that will be used to create objects to be + returned as documents by the database. + + :param factory: A function that returns an object which at minimum must + satisfy the same interface as does the class DocumentBase. + Subclassing that class is the easiest way to create such + a function. + """ + raise NotImplementedError(self.set_document_factory) + + def set_document_size_limit(self, limit): + """Set the maximum allowed document size for this database. + + :param limit: Maximum allowed document size in bytes. + """ + raise NotImplementedError(self.set_document_size_limit) + + def whats_changed(self, old_generation=0): + """Return a list of documents that have changed since old_generation. + This allows APPS to only store a db generation before going + 'offline', and then when coming back online they can use this + data to update whatever extra data they are storing. + + :param old_generation: The generation of the database in the old + state. + :return: (generation, trans_id, [(doc_id, generation, trans_id),...]) + The current generation of the database, its associated transaction + id, and a list of of changed documents since old_generation, + represented by tuples with for each document its doc_id and the + generation and transaction id corresponding to the last intervening + change and sorted by generation (old changes first) + """ + raise NotImplementedError(self.whats_changed) + + def get_doc(self, doc_id, include_deleted=False): + """Get the JSON string for the given document. + + :param doc_id: The unique document identifier + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise asking for a deleted + document will return None. + :return: a Document object. + """ + raise NotImplementedError(self.get_doc) + + def get_docs(self, doc_ids, check_for_conflicts=True, + include_deleted=False): + """Get the JSON content for many documents. + + :param doc_ids: A list of document identifiers. + :param check_for_conflicts: If set to False, then the conflict check + will be skipped, and 'None' will be returned instead of True/False. + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted documents will not + be included in the results. + :return: iterable giving the Document object for each document id + in matching doc_ids order. + """ + raise NotImplementedError(self.get_docs) + + def get_all_docs(self, include_deleted=False): + """Get the JSON content for all documents in the database. + + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted documents will not + be included in the results. + :return: (generation, [Document]) + The current generation of the database, followed by a list of all + the documents in the database. + """ + raise NotImplementedError(self.get_all_docs) + + def create_doc(self, content, doc_id=None): + """Create a new document. + + You can optionally specify the document identifier, but the document + must not already exist. See 'put_doc' if you want to override an + existing document. + If the database specifies a maximum document size and the document + exceeds it, create will fail and raise a DocumentTooBig exception. + + :param content: A Python dictionary. + :param doc_id: An optional identifier specifying the document id. + :return: Document + """ + raise NotImplementedError(self.create_doc) + + def create_doc_from_json(self, json, doc_id=None): + """Create a new document. + + You can optionally specify the document identifier, but the document + must not already exist. See 'put_doc' if you want to override an + existing document. + If the database specifies a maximum document size and the document + exceeds it, create will fail and raise a DocumentTooBig exception. + + :param json: The JSON document string + :param doc_id: An optional identifier specifying the document id. + :return: Document + """ + raise NotImplementedError(self.create_doc_from_json) + + def put_doc(self, doc): + """Update a document. + If the document currently has conflicts, put will fail. + If the database specifies a maximum document size and the document + exceeds it, put will fail and raise a DocumentTooBig exception. + + :param doc: A Document with new content. + :return: new_doc_rev - The new revision identifier for the document. + The Document object will also be updated. + """ + raise NotImplementedError(self.put_doc) + + def delete_doc(self, doc): + """Mark a document as deleted. + Will abort if the current revision doesn't match doc.rev. + This will also set doc.content to None. + """ + raise NotImplementedError(self.delete_doc) + + def create_index(self, index_name, *index_expressions): + """Create an named index, which can then be queried for future lookups. + Creating an index which already exists is not an error, and is cheap. + Creating an index which does not match the index_expressions of the + existing index is an error. + Creating an index will block until the expressions have been evaluated + and the index generated. + + :param index_name: A unique name which can be used as a key prefix + :param index_expressions: index expressions defining the index + information. + + Examples: + + "fieldname", or "fieldname.subfieldname" to index alphabetically + sorted on the contents of a field. + + "number(fieldname, width)", "lower(fieldname)" + """ + raise NotImplementedError(self.create_index) + + def delete_index(self, index_name): + """Remove a named index. + + :param index_name: The name of the index we are removing + """ + raise NotImplementedError(self.delete_index) + + def list_indexes(self): + """List the definitions of all known indexes. + + :return: A list of [('index-name', ['field', 'field2'])] definitions. + """ + raise NotImplementedError(self.list_indexes) + + def get_from_index(self, index_name, *key_values): + """Return documents that match the keys supplied. + + You must supply exactly the same number of values as have been defined + in the index. It is possible to do a prefix match by using '*' to + indicate a wildcard match. You can only supply '*' to trailing entries, + (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) + It is also possible to append a '*' to the last supplied value (eg + 'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + + :param index_name: The index to query + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :return: List of [Document] + """ + raise NotImplementedError(self.get_from_index) + + def get_range_from_index(self, index_name, start_value, end_value): + """Return documents that fall within the specified range. + + Both ends of the range are inclusive. For both start_value and + end_value, one must supply exactly the same number of values as have + been defined in the index, or pass None. In case of a single column + index, a string is accepted as an alternative for a tuple with a single + value. It is possible to do a prefix match by using '*' to indicate + a wildcard match. You can only supply '*' to trailing entries, (eg + 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also + possible to append a '*' to the last supplied value (eg 'val*', '*', + '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + + :param index_name: The index to query + :param start_values: tuples of values that define the lower bound of + the range. eg, if you have an index with 3 fields then you would + have: (val1, val2, val3) + :param end_values: tuples of values that define the upper bound of the + range. eg, if you have an index with 3 fields then you would have: + (val1, val2, val3) + :return: List of [Document] + """ + raise NotImplementedError(self.get_range_from_index) + + def get_index_keys(self, index_name): + """Return all keys under which documents are indexed in this index. + + :param index_name: The index to query + :return: [] A list of tuples of indexed keys. + """ + raise NotImplementedError(self.get_index_keys) + + def get_doc_conflicts(self, doc_id): + """Get the list of conflicts for the given document. + + The order of the conflicts is such that the first entry is the value + that would be returned by "get_doc". + + :return: [doc] A list of the Document entries that are conflicted. + """ + raise NotImplementedError(self.get_doc_conflicts) + + def resolve_doc(self, doc, conflicted_doc_revs): + """Mark a document as no longer conflicted. + + We take the list of revisions that the client knows about that it is + superseding. This may be a different list from the actual current + conflicts, in which case only those are removed as conflicted. This + may fail if the conflict list is significantly different from the + supplied information. (sync could have happened in the background from + the time you GET_DOC_CONFLICTS until the point where you RESOLVE) + + :param doc: A Document with the new content to be inserted. + :param conflicted_doc_revs: A list of revisions that the new content + supersedes. + """ + raise NotImplementedError(self.resolve_doc) + + def get_sync_target(self): + """Return a SyncTarget object, for another u1db to synchronize with. + + :return: An instance of SyncTarget. + """ + raise NotImplementedError(self.get_sync_target) + + def close(self): + """Release any resources associated with this database.""" + raise NotImplementedError(self.close) + + def sync(self, url, creds=None, autocreate=True): + """Synchronize documents with remote replica exposed at url. + + :param url: the url of the target replica to sync with. + :param creds: optional dictionary giving credentials + to authorize the operation with the server. For using OAuth + the form of creds is: + {'oauth': { + 'consumer_key': ..., + 'consumer_secret': ..., + 'token_key': ..., + 'token_secret': ... + }} + :param autocreate: ask the target to create the db if non-existent. + :return: local_gen_before_sync The local generation before the + synchronisation was performed. This is useful to pass into + whatschanged, if an application wants to know which documents were + affected by a synchronisation. + """ + from u1db.sync import Synchronizer + from u1db.remote.http_target import HTTPSyncTarget + return Synchronizer(self, HTTPSyncTarget(url, creds=creds)).sync( + autocreate=autocreate) + + def _get_replica_gen_and_trans_id(self, other_replica_uid): + """Return the last known generation and transaction id for the other db + replica. + + When you do a synchronization with another replica, the Database keeps + track of what generation the other database replica was at, and what + the associated transaction id was. This is used to determine what data + needs to be sent, and if two databases are claiming to be the same + replica. + + :param other_replica_uid: The identifier for the other replica. + :return: (gen, trans_id) The generation and transaction id we + encountered during synchronization. If we've never synchronized + with the replica, this is (0, ''). + """ + raise NotImplementedError(self._get_replica_gen_and_trans_id) + + def _set_replica_gen_and_trans_id(self, other_replica_uid, + other_generation, other_transaction_id): + """Set the last-known generation and transaction id for the other + database replica. + + We have just performed some synchronization, and we want to track what + generation the other replica was at. See also + _get_replica_gen_and_trans_id. + :param other_replica_uid: The U1DB identifier for the other replica. + :param other_generation: The generation number for the other replica. + :param other_transaction_id: The transaction id associated with the + generation. + """ + raise NotImplementedError(self._set_replica_gen_and_trans_id) + + def _put_doc_if_newer(self, doc, save_conflict, replica_uid, replica_gen, + replica_trans_id=''): + """Insert/update document into the database with a given revision. + + This api is used during synchronization operations. + + If a document would conflict and save_conflict is set to True, the + content will be selected as the 'current' content for doc.doc_id, + even though doc.rev doesn't supersede the currently stored revision. + The currently stored document will be added to the list of conflict + alternatives for the given doc_id. + + This forces the new content to be 'current' so that we get convergence + after synchronizing, even if people don't resolve conflicts. Users can + then notice that their content is out of date, update it, and + synchronize again. (The alternative is that users could synchronize and + think the data has propagated, but their local copy looks fine, and the + remote copy is never updated again.) + + :param doc: A Document object + :param save_conflict: If this document is a conflict, do you want to + save it as a conflict, or just ignore it. + :param replica_uid: A unique replica identifier. + :param replica_gen: The generation of the replica corresponding to the + this document. The replica arguments are optional, but are used + during synchronization. + :param replica_trans_id: The transaction_id associated with the + generation. + :return: (state, at_gen) - If we don't have doc_id already, + or if doc_rev supersedes the existing document revision, + then the content will be inserted, and state is 'inserted'. + If doc_rev is less than or equal to the existing revision, + then the put is ignored and state is respecitvely 'superseded' + or 'converged'. + If doc_rev is not strictly superseded or supersedes, then + state is 'conflicted'. The document will not be inserted if + save_conflict is False. + For 'inserted' or 'converged', at_gen is the insertion/current + generation. + """ + raise NotImplementedError(self._put_doc_if_newer) + + +class DocumentBase(object): + """Container for handling a single document. + + :ivar doc_id: Unique identifier for this document. + :ivar rev: The revision identifier of the document. + :ivar json_string: The JSON string for this document. + :ivar has_conflicts: Boolean indicating if this document has conflicts + """ + + def __init__(self, doc_id, rev, json_string, has_conflicts=False): + self.doc_id = doc_id + self.rev = rev + if json_string is not None: + try: + value = json.loads(json_string) + except ValueError: + raise InvalidJSON + if not isinstance(value, dict): + raise InvalidJSON + self._json = json_string + self.has_conflicts = has_conflicts + + def same_content_as(self, other): + """Compare the content of two documents.""" + if self._json: + c1 = json.loads(self._json) + else: + c1 = None + if other._json: + c2 = json.loads(other._json) + else: + c2 = None + return c1 == c2 + + def __repr__(self): + if self.has_conflicts: + extra = ', conflicted' + else: + extra = '' + return '%s(%s, %s%s, %r)' % (self.__class__.__name__, self.doc_id, + self.rev, extra, self.get_json()) + + def __hash__(self): + raise NotImplementedError(self.__hash__) + + def __eq__(self, other): + if not isinstance(other, Document): + return NotImplemented + return ( + self.doc_id == other.doc_id and self.rev == other.rev and + self.same_content_as(other) and self.has_conflicts == + other.has_conflicts) + + def __lt__(self, other): + """This is meant for testing, not part of the official api. + + It is implemented so that sorted([Document, Document]) can be used. + It doesn't imply that users would want their documents to be sorted in + this order. + """ + # Since this is just for testing, we don't worry about comparing + # against things that aren't a Document. + return ((self.doc_id, self.rev, self.get_json()) < + (other.doc_id, other.rev, other.get_json())) + + def get_json(self): + """Get the json serialization of this document.""" + if self._json is not None: + return self._json + return None + + def get_size(self): + """Calculate the total size of the document.""" + size = 0 + json = self.get_json() + if json: + size += len(json) + if self.rev: + size += len(self.rev) + if self.doc_id: + size += len(self.doc_id) + return size + + def set_json(self, json_string): + """Set the json serialization of this document.""" + if json_string is not None: + try: + value = json.loads(json_string) + except ValueError: + raise InvalidJSON + if not isinstance(value, dict): + raise InvalidJSON + self._json = json_string + + def make_tombstone(self): + """Make this document into a tombstone.""" + self._json = None + + def is_tombstone(self): + """Return True if the document is a tombstone, False otherwise.""" + if self._json is not None: + return False + return True + + +class Document(DocumentBase): + """Container for handling a single document. + + :ivar doc_id: Unique identifier for this document. + :ivar rev: The revision identifier of the document. + :ivar json: The JSON string for this document. + :ivar has_conflicts: Boolean indicating if this document has conflicts + """ + + # The following part of the API is optional: no implementation is forced to + # have it but if the language supports dictionaries/hashtables, it makes + # Documents a lot more user friendly. + + def __init__(self, doc_id=None, rev=None, json='{}', has_conflicts=False): + # TODO: We convert the json in the superclass to check its validity so + # we might as well set _content here directly since the price is + # already being paid. + super(Document, self).__init__(doc_id, rev, json, has_conflicts) + self._content = None + + def same_content_as(self, other): + """Compare the content of two documents.""" + if self._json: + c1 = json.loads(self._json) + else: + c1 = self._content + if other._json: + c2 = json.loads(other._json) + else: + c2 = other._content + return c1 == c2 + + def get_json(self): + """Get the json serialization of this document.""" + json_string = super(Document, self).get_json() + if json_string is not None: + return json_string + if self._content is not None: + return json.dumps(self._content) + return None + + def set_json(self, json): + """Set the json serialization of this document.""" + self._content = None + super(Document, self).set_json(json) + + def make_tombstone(self): + """Make this document into a tombstone.""" + self._content = None + super(Document, self).make_tombstone() + + def is_tombstone(self): + """Return True if the document is a tombstone, False otherwise.""" + if self._content is not None: + return False + return super(Document, self).is_tombstone() + + def _get_content(self): + """Get the dictionary representing this document.""" + if self._json is not None: + self._content = json.loads(self._json) + self._json = None + if self._content is not None: + return self._content + return None + + def _set_content(self, content): + """Set the dictionary representing this document.""" + try: + tmp = json.dumps(content) + except TypeError: + raise InvalidContent( + "Can not be converted to JSON: %r" % (content,)) + if not tmp.startswith('{'): + raise InvalidContent( + "Can not be converted to a JSON object: %r." % (content,)) + # We might as well store the JSON at this point since we did the work + # of encoding it, and it doesn't lose any information. + self._json = tmp + self._content = None + + content = property( + _get_content, _set_content, doc="Content of the Document.") + + # End of optional part. + + +class SyncTarget(object): + """Functionality for using a Database as a synchronization target.""" + + def get_sync_info(self, source_replica_uid): + """Return information about known state. + + Return the replica_uid and the current database generation of this + database, and the last-seen database generation for source_replica_uid + + :param source_replica_uid: Another replica which we might have + synchronized with in the past. + :return: (target_replica_uid, target_replica_generation, + target_trans_id, source_replica_last_known_generation, + source_replica_last_known_transaction_id) + """ + raise NotImplementedError(self.get_sync_info) + + def record_sync_info(self, source_replica_uid, source_replica_generation, + source_replica_transaction_id): + """Record tip information for another replica. + + After sync_exchange has been processed, the caller will have + received new content from this replica. This call allows the + source replica instigating the sync to inform us what their + generation became after applying the documents we returned. + + This is used to allow future sync operations to not need to repeat data + that we just talked about. It also means that if this is called at the + wrong time, there can be database records that will never be + synchronized. + + :param source_replica_uid: The identifier for the source replica. + :param source_replica_generation: + The database generation for the source replica. + :param source_replica_transaction_id: The transaction id associated + with the source replica generation. + """ + raise NotImplementedError(self.record_sync_info) + + def sync_exchange(self, docs_by_generation, source_replica_uid, + last_known_generation, last_known_trans_id, + return_doc_cb, ensure_callback=None): + """Incorporate the documents sent from the source replica. + + This is not meant to be called by client code directly, but is used as + part of sync(). + + This adds docs to the local store, and determines documents that need + to be returned to the source replica. + + Documents must be supplied in docs_by_generation paired with + the generation of their latest change in order from the oldest + change to the newest, that means from the oldest generation to + the newest. + + Documents are also returned paired with the generation of + their latest change in order from the oldest change to the + newest. + + :param docs_by_generation: A list of [(Document, generation, + transaction_id)] tuples indicating documents which should be + updated on this replica paired with the generation and transaction + id of their latest change. + :param source_replica_uid: The source replica's identifier + :param last_known_generation: The last generation that the source + replica knows about this target replica + :param last_known_trans_id: The last transaction id that the source + replica knows about this target replica + :param: return_doc_cb(doc, gen): is a callback + used to return documents to the source replica, it will + be invoked in turn with Documents that have changed since + last_known_generation together with the generation of + their last change. + :param: ensure_callback(replica_uid): if set the target may create + the target db if not yet existent, the callback can then + be used to inform of the created db replica uid. + :return: new_generation - After applying docs_by_generation, this is + the current generation for this replica + """ + raise NotImplementedError(self.sync_exchange) + + def _set_trace_hook(self, cb): + """Set a callback that will be invoked to trace database actions. + + The callback will be passed a string indicating the current state, and + the sync target object. Implementations do not have to implement this + api, it is used by the test suite. + + :param cb: A callable that takes cb(state) + """ + raise NotImplementedError(self._set_trace_hook) + + def _set_trace_hook_shallow(self, cb): + """Set a callback that will be invoked to trace database actions. + + Similar to _set_trace_hook, for implementations that don't offer + state changes from the inner working of sync_exchange(). + + :param cb: A callable that takes cb(state) + """ + self._set_trace_hook(cb) diff --git a/common/src/leap/soledad/common/l2db/backends/__init__.py b/common/src/leap/soledad/common/l2db/backends/__init__.py new file mode 100644 index 00000000..922daafd --- /dev/null +++ b/common/src/leap/soledad/common/l2db/backends/__init__.py @@ -0,0 +1,207 @@ +# Copyright 2011 Canonical Ltd. +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. + +"""Abstract classes and common implementations for the backends.""" + +import re +try: + import simplejson as json +except ImportError: + import json # noqa +import uuid + +from leap.soledad.common import l2db +from leap.soledad.common.l2db import sync as l2db_sync +from leap.soledad.common.l2db import errors +from leap.soledad.common.l2db.vectorclock import VectorClockRev + + +check_doc_id_re = re.compile("^" + l2db.DOC_ID_CONSTRAINTS + "$", re.UNICODE) + + +class CommonSyncTarget(l2db_sync.LocalSyncTarget): + pass + + +class CommonBackend(l2db.Database): + + document_size_limit = 0 + + def _allocate_doc_id(self): + """Generate a unique identifier for this document.""" + return 'D-' + uuid.uuid4().hex # 'D-' stands for document + + def _allocate_transaction_id(self): + return 'T-' + uuid.uuid4().hex # 'T-' stands for transaction + + def _allocate_doc_rev(self, old_doc_rev): + vcr = VectorClockRev(old_doc_rev) + vcr.increment(self._replica_uid) + return vcr.as_str() + + def _check_doc_id(self, doc_id): + if not check_doc_id_re.match(doc_id): + raise errors.InvalidDocId() + + def _check_doc_size(self, doc): + if not self.document_size_limit: + return + if doc.get_size() > self.document_size_limit: + raise errors.DocumentTooBig + + def _get_generation(self): + """Return the current generation. + + """ + raise NotImplementedError(self._get_generation) + + def _get_generation_info(self): + """Return the current generation and transaction id. + + """ + raise NotImplementedError(self._get_generation_info) + + def _get_doc(self, doc_id, check_for_conflicts=False): + """Extract the document from storage. + + This can return None if the document doesn't exist. + """ + raise NotImplementedError(self._get_doc) + + def _has_conflicts(self, doc_id): + """Return True if the doc has conflicts, False otherwise.""" + raise NotImplementedError(self._has_conflicts) + + def create_doc(self, content, doc_id=None): + if not isinstance(content, dict): + raise errors.InvalidContent + json_string = json.dumps(content) + return self.create_doc_from_json(json_string, doc_id) + + def create_doc_from_json(self, json, doc_id=None): + if doc_id is None: + doc_id = self._allocate_doc_id() + doc = self._factory(doc_id, None, json) + self.put_doc(doc) + return doc + + def _get_transaction_log(self): + """This is only for the test suite, it is not part of the api.""" + raise NotImplementedError(self._get_transaction_log) + + def _put_and_update_indexes(self, doc_id, old_doc, new_rev, content): + raise NotImplementedError(self._put_and_update_indexes) + + def get_docs(self, doc_ids, check_for_conflicts=True, + include_deleted=False): + for doc_id in doc_ids: + doc = self._get_doc( + doc_id, check_for_conflicts=check_for_conflicts) + if doc.is_tombstone() and not include_deleted: + continue + yield doc + + def _get_trans_id_for_gen(self, generation): + """Get the transaction id corresponding to a particular generation. + + Raises an InvalidGeneration when the generation does not exist. + + """ + raise NotImplementedError(self._get_trans_id_for_gen) + + def validate_gen_and_trans_id(self, generation, trans_id): + """Validate the generation and transaction id. + + Raises an InvalidGeneration when the generation does not exist, and an + InvalidTransactionId when it does but with a different transaction id. + + """ + if generation == 0: + return + known_trans_id = self._get_trans_id_for_gen(generation) + if known_trans_id != trans_id: + raise errors.InvalidTransactionId + + def _validate_source(self, other_replica_uid, other_generation, + other_transaction_id): + """Validate the new generation and transaction id. + + other_generation must be greater than what we have stored for this + replica, *or* it must be the same and the transaction_id must be the + same as well. + """ + (old_generation, + old_transaction_id) = self._get_replica_gen_and_trans_id( + other_replica_uid) + if other_generation < old_generation: + raise errors.InvalidGeneration + if other_generation > old_generation: + return + if other_transaction_id == old_transaction_id: + return + raise errors.InvalidTransactionId + + def _put_doc_if_newer(self, doc, save_conflict, replica_uid, replica_gen, + replica_trans_id=''): + cur_doc = self._get_doc(doc.doc_id) + doc_vcr = VectorClockRev(doc.rev) + if cur_doc is None: + cur_vcr = VectorClockRev(None) + else: + cur_vcr = VectorClockRev(cur_doc.rev) + self._validate_source(replica_uid, replica_gen, replica_trans_id) + if doc_vcr.is_newer(cur_vcr): + rev = doc.rev + self._prune_conflicts(doc, doc_vcr) + if doc.rev != rev: + # conflicts have been autoresolved + state = 'superseded' + else: + state = 'inserted' + self._put_and_update_indexes(cur_doc, doc) + elif doc.rev == cur_doc.rev: + # magical convergence + state = 'converged' + elif cur_vcr.is_newer(doc_vcr): + # Don't add this to seen_ids, because we have something newer, + # so we should send it back, and we should not generate a + # conflict + state = 'superseded' + elif cur_doc.same_content_as(doc): + # the documents have been edited to the same thing at both ends + doc_vcr.maximize(cur_vcr) + doc_vcr.increment(self._replica_uid) + doc.rev = doc_vcr.as_str() + self._put_and_update_indexes(cur_doc, doc) + state = 'superseded' + else: + state = 'conflicted' + if save_conflict: + self._force_doc_sync_conflict(doc) + if replica_uid is not None and replica_gen is not None: + self._do_set_replica_gen_and_trans_id( + replica_uid, replica_gen, replica_trans_id) + return state, self._get_generation() + + def _ensure_maximal_rev(self, cur_rev, extra_revs): + vcr = VectorClockRev(cur_rev) + for rev in extra_revs: + vcr.maximize(VectorClockRev(rev)) + vcr.increment(self._replica_uid) + return vcr.as_str() + + def set_document_size_limit(self, limit): + self.document_size_limit = limit diff --git a/common/src/leap/soledad/common/l2db/backends/dbschema.sql b/common/src/leap/soledad/common/l2db/backends/dbschema.sql new file mode 100644 index 00000000..ae027fc5 --- /dev/null +++ b/common/src/leap/soledad/common/l2db/backends/dbschema.sql @@ -0,0 +1,42 @@ +-- Database schema +CREATE TABLE transaction_log ( + generation INTEGER PRIMARY KEY AUTOINCREMENT, + doc_id TEXT NOT NULL, + transaction_id TEXT NOT NULL +); +CREATE TABLE document ( + doc_id TEXT PRIMARY KEY, + doc_rev TEXT NOT NULL, + content TEXT +); +CREATE TABLE document_fields ( + doc_id TEXT NOT NULL, + field_name TEXT NOT NULL, + value TEXT +); +CREATE INDEX document_fields_field_value_doc_idx + ON document_fields(field_name, value, doc_id); + +CREATE TABLE sync_log ( + replica_uid TEXT PRIMARY KEY, + known_generation INTEGER, + known_transaction_id TEXT +); +CREATE TABLE conflicts ( + doc_id TEXT, + doc_rev TEXT, + content TEXT, + CONSTRAINT conflicts_pkey PRIMARY KEY (doc_id, doc_rev) +); +CREATE TABLE index_definitions ( + name TEXT, + offset INT, + field TEXT, + CONSTRAINT index_definitions_pkey PRIMARY KEY (name, offset) +); +create index index_definitions_field on index_definitions(field); +CREATE TABLE u1db_config ( + name TEXT PRIMARY KEY, + value TEXT +); +INSERT INTO u1db_config VALUES ('sql_schema', '0'); diff --git a/common/src/leap/soledad/common/l2db/backends/inmemory.py b/common/src/leap/soledad/common/l2db/backends/inmemory.py new file mode 100644 index 00000000..06a934a6 --- /dev/null +++ b/common/src/leap/soledad/common/l2db/backends/inmemory.py @@ -0,0 +1,469 @@ +# Copyright 2011 Canonical Ltd. +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. + +"""The in-memory Database class for U1DB.""" + +try: + import simplejson as json +except ImportError: + import json # noqa + +from leap.soledad.common.l2db import ( + Document, errors, + query_parser, vectorclock) +from leap.soledad.common.l2db.backends import CommonBackend, CommonSyncTarget + + +def get_prefix(value): + key_prefix = '\x01'.join(value) + return key_prefix.rstrip('*') + + +class InMemoryDatabase(CommonBackend): + """A database that only stores the data internally.""" + + def __init__(self, replica_uid, document_factory=None): + self._transaction_log = [] + self._docs = {} + # Map from doc_id => [(doc_rev, doc)] conflicts beyond 'winner' + self._conflicts = {} + self._other_generations = {} + self._indexes = {} + self._replica_uid = replica_uid + self._factory = document_factory or Document + + def _set_replica_uid(self, replica_uid): + """Force the replica_uid to be set.""" + self._replica_uid = replica_uid + + def set_document_factory(self, factory): + self._factory = factory + + def close(self): + # This is a no-op, We don't want to free the data because one client + # may be closing it, while another wants to inspect the results. + pass + + def _get_replica_gen_and_trans_id(self, other_replica_uid): + return self._other_generations.get(other_replica_uid, (0, '')) + + def _set_replica_gen_and_trans_id(self, other_replica_uid, + other_generation, other_transaction_id): + self._do_set_replica_gen_and_trans_id( + other_replica_uid, other_generation, other_transaction_id) + + def _do_set_replica_gen_and_trans_id(self, other_replica_uid, + other_generation, + other_transaction_id): + # TODO: to handle race conditions, we may want to check if the current + # value is greater than this new value. + self._other_generations[other_replica_uid] = (other_generation, + other_transaction_id) + + def get_sync_target(self): + return InMemorySyncTarget(self) + + def _get_transaction_log(self): + # snapshot! + return self._transaction_log[:] + + def _get_generation(self): + return len(self._transaction_log) + + def _get_generation_info(self): + if not self._transaction_log: + return 0, '' + return len(self._transaction_log), self._transaction_log[-1][1] + + def _get_trans_id_for_gen(self, generation): + if generation == 0: + return '' + if generation > len(self._transaction_log): + raise errors.InvalidGeneration + return self._transaction_log[generation - 1][1] + + def put_doc(self, doc): + if doc.doc_id is None: + raise errors.InvalidDocId() + self._check_doc_id(doc.doc_id) + self._check_doc_size(doc) + old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) + if old_doc and old_doc.has_conflicts: + raise errors.ConflictedDoc() + if old_doc and doc.rev is None and old_doc.is_tombstone(): + new_rev = self._allocate_doc_rev(old_doc.rev) + else: + if old_doc is not None: + if old_doc.rev != doc.rev: + raise errors.RevisionConflict() + else: + if doc.rev is not None: + raise errors.RevisionConflict() + new_rev = self._allocate_doc_rev(doc.rev) + doc.rev = new_rev + self._put_and_update_indexes(old_doc, doc) + return new_rev + + def _put_and_update_indexes(self, old_doc, doc): + for index in self._indexes.itervalues(): + if old_doc is not None and not old_doc.is_tombstone(): + index.remove_json(old_doc.doc_id, old_doc.get_json()) + if not doc.is_tombstone(): + index.add_json(doc.doc_id, doc.get_json()) + trans_id = self._allocate_transaction_id() + self._docs[doc.doc_id] = (doc.rev, doc.get_json()) + self._transaction_log.append((doc.doc_id, trans_id)) + + def _get_doc(self, doc_id, check_for_conflicts=False): + try: + doc_rev, content = self._docs[doc_id] + except KeyError: + return None + doc = self._factory(doc_id, doc_rev, content) + if check_for_conflicts: + doc.has_conflicts = (doc.doc_id in self._conflicts) + return doc + + def _has_conflicts(self, doc_id): + return doc_id in self._conflicts + + def get_doc(self, doc_id, include_deleted=False): + doc = self._get_doc(doc_id, check_for_conflicts=True) + if doc is None: + return None + if doc.is_tombstone() and not include_deleted: + return None + return doc + + def get_all_docs(self, include_deleted=False): + """Return all documents in the database.""" + generation = self._get_generation() + results = [] + for doc_id, (doc_rev, content) in self._docs.items(): + if content is None and not include_deleted: + continue + doc = self._factory(doc_id, doc_rev, content) + doc.has_conflicts = self._has_conflicts(doc_id) + results.append(doc) + return (generation, results) + + def get_doc_conflicts(self, doc_id): + if doc_id not in self._conflicts: + return [] + result = [self._get_doc(doc_id)] + result[0].has_conflicts = True + result.extend([self._factory(doc_id, rev, content) + for rev, content in self._conflicts[doc_id]]) + return result + + def _replace_conflicts(self, doc, conflicts): + if not conflicts: + del self._conflicts[doc.doc_id] + else: + self._conflicts[doc.doc_id] = conflicts + doc.has_conflicts = bool(conflicts) + + def _prune_conflicts(self, doc, doc_vcr): + if self._has_conflicts(doc.doc_id): + autoresolved = False + remaining_conflicts = [] + cur_conflicts = self._conflicts[doc.doc_id] + for c_rev, c_doc in cur_conflicts: + c_vcr = vectorclock.VectorClockRev(c_rev) + if doc_vcr.is_newer(c_vcr): + continue + if doc.same_content_as(Document(doc.doc_id, c_rev, c_doc)): + doc_vcr.maximize(c_vcr) + autoresolved = True + continue + remaining_conflicts.append((c_rev, c_doc)) + if autoresolved: + doc_vcr.increment(self._replica_uid) + doc.rev = doc_vcr.as_str() + self._replace_conflicts(doc, remaining_conflicts) + + def resolve_doc(self, doc, conflicted_doc_revs): + cur_doc = self._get_doc(doc.doc_id) + if cur_doc is None: + cur_rev = None + else: + cur_rev = cur_doc.rev + new_rev = self._ensure_maximal_rev(cur_rev, conflicted_doc_revs) + superseded_revs = set(conflicted_doc_revs) + remaining_conflicts = [] + cur_conflicts = self._conflicts[doc.doc_id] + for c_rev, c_doc in cur_conflicts: + if c_rev in superseded_revs: + continue + remaining_conflicts.append((c_rev, c_doc)) + doc.rev = new_rev + if cur_rev in superseded_revs: + self._put_and_update_indexes(cur_doc, doc) + else: + remaining_conflicts.append((new_rev, doc.get_json())) + self._replace_conflicts(doc, remaining_conflicts) + + def delete_doc(self, doc): + if doc.doc_id not in self._docs: + raise errors.DocumentDoesNotExist + if self._docs[doc.doc_id][1] in ('null', None): + raise errors.DocumentAlreadyDeleted + doc.make_tombstone() + self.put_doc(doc) + + def create_index(self, index_name, *index_expressions): + if index_name in self._indexes: + if self._indexes[index_name]._definition == list( + index_expressions): + return + raise errors.IndexNameTakenError + index = InMemoryIndex(index_name, list(index_expressions)) + for doc_id, (doc_rev, doc) in self._docs.iteritems(): + if doc is not None: + index.add_json(doc_id, doc) + self._indexes[index_name] = index + + def delete_index(self, index_name): + try: + del self._indexes[index_name] + except KeyError: + pass + + def list_indexes(self): + definitions = [] + for idx in self._indexes.itervalues(): + definitions.append((idx._name, idx._definition)) + return definitions + + def get_from_index(self, index_name, *key_values): + try: + index = self._indexes[index_name] + except KeyError: + raise errors.IndexDoesNotExist + doc_ids = index.lookup(key_values) + result = [] + for doc_id in doc_ids: + result.append(self._get_doc(doc_id, check_for_conflicts=True)) + return result + + def get_range_from_index(self, index_name, start_value=None, + end_value=None): + """Return all documents with key values in the specified range.""" + try: + index = self._indexes[index_name] + except KeyError: + raise errors.IndexDoesNotExist + if isinstance(start_value, basestring): + start_value = (start_value,) + if isinstance(end_value, basestring): + end_value = (end_value,) + doc_ids = index.lookup_range(start_value, end_value) + result = [] + for doc_id in doc_ids: + result.append(self._get_doc(doc_id, check_for_conflicts=True)) + return result + + def get_index_keys(self, index_name): + try: + index = self._indexes[index_name] + except KeyError: + raise errors.IndexDoesNotExist + keys = index.keys() + # XXX inefficiency warning + return list(set([tuple(key.split('\x01')) for key in keys])) + + def whats_changed(self, old_generation=0): + changes = [] + relevant_tail = self._transaction_log[old_generation:] + # We don't use len(self._transaction_log) because _transaction_log may + # get mutated by a concurrent operation. + cur_generation = old_generation + len(relevant_tail) + last_trans_id = '' + if relevant_tail: + last_trans_id = relevant_tail[-1][1] + elif self._transaction_log: + last_trans_id = self._transaction_log[-1][1] + seen = set() + generation = cur_generation + for doc_id, trans_id in reversed(relevant_tail): + if doc_id not in seen: + changes.append((doc_id, generation, trans_id)) + seen.add(doc_id) + generation -= 1 + changes.reverse() + return (cur_generation, last_trans_id, changes) + + def _force_doc_sync_conflict(self, doc): + my_doc = self._get_doc(doc.doc_id) + self._prune_conflicts(doc, vectorclock.VectorClockRev(doc.rev)) + self._conflicts.setdefault(doc.doc_id, []).append( + (my_doc.rev, my_doc.get_json())) + doc.has_conflicts = True + self._put_and_update_indexes(my_doc, doc) + + +class InMemoryIndex(object): + """Interface for managing an Index.""" + + def __init__(self, index_name, index_definition): + self._name = index_name + self._definition = index_definition + self._values = {} + parser = query_parser.Parser() + self._getters = parser.parse_all(self._definition) + + def evaluate_json(self, doc): + """Determine the 'key' after applying this index to the doc.""" + raw = json.loads(doc) + return self.evaluate(raw) + + def evaluate(self, obj): + """Evaluate a dict object, applying this definition.""" + all_rows = [[]] + for getter in self._getters: + new_rows = [] + keys = getter.get(obj) + if not keys: + return [] + for key in keys: + new_rows.extend([row + [key] for row in all_rows]) + all_rows = new_rows + all_rows = ['\x01'.join(row) for row in all_rows] + return all_rows + + def add_json(self, doc_id, doc): + """Add this json doc to the index.""" + keys = self.evaluate_json(doc) + if not keys: + return + for key in keys: + self._values.setdefault(key, []).append(doc_id) + + def remove_json(self, doc_id, doc): + """Remove this json doc from the index.""" + keys = self.evaluate_json(doc) + if keys: + for key in keys: + doc_ids = self._values[key] + doc_ids.remove(doc_id) + if not doc_ids: + del self._values[key] + + def _find_non_wildcards(self, values): + """Check if this should be a wildcard match. + + Further, this will raise an exception if the syntax is improperly + defined. + + :return: The offset of the last value we need to match against. + """ + if len(values) != len(self._definition): + raise errors.InvalidValueForIndex() + is_wildcard = False + last = 0 + for idx, val in enumerate(values): + if val.endswith('*'): + if val != '*': + # We have an 'x*' style wildcard + if is_wildcard: + # We were already in wildcard mode, so this is invalid + raise errors.InvalidGlobbing + last = idx + 1 + is_wildcard = True + else: + if is_wildcard: + # We were in wildcard mode, we can't follow that with + # non-wildcard + raise errors.InvalidGlobbing + last = idx + 1 + if not is_wildcard: + return -1 + return last + + def lookup(self, values): + """Find docs that match the values.""" + last = self._find_non_wildcards(values) + if last == -1: + return self._lookup_exact(values) + else: + return self._lookup_prefix(values[:last]) + + def lookup_range(self, start_values, end_values): + """Find docs within the range.""" + # TODO: Wildly inefficient, which is unlikely to be a problem for the + # inmemory implementation. + if start_values: + self._find_non_wildcards(start_values) + start_values = get_prefix(start_values) + if end_values: + if self._find_non_wildcards(end_values) == -1: + exact = True + else: + exact = False + end_values = get_prefix(end_values) + found = [] + for key, doc_ids in sorted(self._values.iteritems()): + if start_values and start_values > key: + continue + if end_values and end_values < key: + if exact: + break + else: + if not key.startswith(end_values): + break + found.extend(doc_ids) + return found + + def keys(self): + """Find the indexed keys.""" + return self._values.keys() + + def _lookup_prefix(self, value): + """Find docs that match the prefix string in values.""" + # TODO: We need a different data structure to make prefix style fast, + # some sort of sorted list would work, but a plain dict doesn't. + key_prefix = get_prefix(value) + all_doc_ids = [] + for key, doc_ids in sorted(self._values.iteritems()): + if key.startswith(key_prefix): + all_doc_ids.extend(doc_ids) + return all_doc_ids + + def _lookup_exact(self, value): + """Find docs that match exactly.""" + key = '\x01'.join(value) + if key in self._values: + return self._values[key] + return () + + +class InMemorySyncTarget(CommonSyncTarget): + + def get_sync_info(self, source_replica_uid): + source_gen, source_trans_id = self._db._get_replica_gen_and_trans_id( + source_replica_uid) + my_gen, my_trans_id = self._db._get_generation_info() + return ( + self._db._replica_uid, my_gen, my_trans_id, source_gen, + source_trans_id) + + def record_sync_info(self, source_replica_uid, source_replica_generation, + source_transaction_id): + if self._trace_hook: + self._trace_hook('record_sync_info') + self._db._set_replica_gen_and_trans_id( + source_replica_uid, source_replica_generation, + source_transaction_id) diff --git a/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py b/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py new file mode 100644 index 00000000..ba273039 --- /dev/null +++ b/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py @@ -0,0 +1,930 @@ +# Copyright 2011 Canonical Ltd. +# Copyright 2016 LEAP Encryption Access Project +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. + +""" +A L2DB implementation that uses SQLite as its persistence layer. +""" + +import errno +import os +try: + import simplejson as json +except ImportError: + import json # noqa +from sqlite3 import dbapi2 +import sys +import time +import uuid + +import pkg_resources + +from leap.soledad.common.l2db.backends import CommonBackend, CommonSyncTarget +from leap.soledad.common.l2db import ( + Document, errors, + query_parser, vectorclock) + + +class SQLiteDatabase(CommonBackend): + """A U1DB implementation that uses SQLite as its persistence layer.""" + + _sqlite_registry = {} + + def __init__(self, sqlite_file, document_factory=None): + """Create a new sqlite file.""" + self._db_handle = dbapi2.connect(sqlite_file) + self._real_replica_uid = None + self._ensure_schema() + self._factory = document_factory or Document + + def set_document_factory(self, factory): + self._factory = factory + + def get_sync_target(self): + return SQLiteSyncTarget(self) + + @classmethod + def _which_index_storage(cls, c): + try: + c.execute("SELECT value FROM u1db_config" + " WHERE name = 'index_storage'") + except dbapi2.OperationalError, e: + # The table does not exist yet + return None, e + else: + return c.fetchone()[0], None + + WAIT_FOR_PARALLEL_INIT_HALF_INTERVAL = 0.5 + + @classmethod + def _open_database(cls, sqlite_file, document_factory=None): + if not os.path.isfile(sqlite_file): + raise errors.DatabaseDoesNotExist() + tries = 2 + while True: + # Note: There seems to be a bug in sqlite 3.5.9 (with python2.6) + # where without re-opening the database on Windows, it + # doesn't see the transaction that was just committed + db_handle = dbapi2.connect(sqlite_file) + c = db_handle.cursor() + v, err = cls._which_index_storage(c) + db_handle.close() + if v is not None: + break + # possibly another process is initializing it, wait for it to be + # done + if tries == 0: + raise err # go for the richest error? + tries -= 1 + time.sleep(cls.WAIT_FOR_PARALLEL_INIT_HALF_INTERVAL) + return SQLiteDatabase._sqlite_registry[v]( + sqlite_file, document_factory=document_factory) + + @classmethod + def open_database(cls, sqlite_file, create, backend_cls=None, + document_factory=None): + try: + return cls._open_database( + sqlite_file, document_factory=document_factory) + except errors.DatabaseDoesNotExist: + if not create: + raise + if backend_cls is None: + # default is SQLitePartialExpandDatabase + backend_cls = SQLitePartialExpandDatabase + return backend_cls(sqlite_file, document_factory=document_factory) + + @staticmethod + def delete_database(sqlite_file): + try: + os.unlink(sqlite_file) + except OSError as ex: + if ex.errno == errno.ENOENT: + raise errors.DatabaseDoesNotExist() + raise + + @staticmethod + def register_implementation(klass): + """Register that we implement an SQLiteDatabase. + + The attribute _index_storage_value will be used as the lookup key. + """ + SQLiteDatabase._sqlite_registry[klass._index_storage_value] = klass + + def _get_sqlite_handle(self): + """Get access to the underlying sqlite database. + + This should only be used by the test suite, etc, for examining the + state of the underlying database. + """ + return self._db_handle + + def _close_sqlite_handle(self): + """Release access to the underlying sqlite database.""" + self._db_handle.close() + + def close(self): + self._close_sqlite_handle() + + def _is_initialized(self, c): + """Check if this database has been initialized.""" + c.execute("PRAGMA case_sensitive_like=ON") + try: + c.execute("SELECT value FROM u1db_config" + " WHERE name = 'sql_schema'") + except dbapi2.OperationalError: + # The table does not exist yet + val = None + else: + val = c.fetchone() + if val is not None: + return True + return False + + def _initialize(self, c): + """Create the schema in the database.""" + # read the script with sql commands + # TODO: Change how we set up the dependency. Most likely use something + # like lp:dirspec to grab the file from a common resource + # directory. Doesn't specifically need to be handled until we get + # to the point of packaging this. + schema_content = pkg_resources.resource_string( + __name__, 'dbschema.sql') + # Note: We'd like to use c.executescript() here, but it seems that + # executescript always commits, even if you set + # isolation_level = None, so if we want to properly handle + # exclusive locking and rollbacks between processes, we need + # to execute it line-by-line + for line in schema_content.split(';'): + if not line: + continue + c.execute(line) + # add extra fields + self._extra_schema_init(c) + # A unique identifier should be set for this replica. Implementations + # don't have to strictly use uuid here, but we do want the uid to be + # unique amongst all databases that will sync with each other. + # We might extend this to using something with hostname for easier + # debugging. + self._set_replica_uid_in_transaction(uuid.uuid4().hex) + c.execute("INSERT INTO u1db_config VALUES" " ('index_storage', ?)", + (self._index_storage_value,)) + + def _ensure_schema(self): + """Ensure that the database schema has been created.""" + old_isolation_level = self._db_handle.isolation_level + c = self._db_handle.cursor() + if self._is_initialized(c): + return + try: + # autocommit/own mgmt of transactions + self._db_handle.isolation_level = None + with self._db_handle: + # only one execution path should initialize the db + c.execute("begin exclusive") + if self._is_initialized(c): + return + self._initialize(c) + finally: + self._db_handle.isolation_level = old_isolation_level + + def _extra_schema_init(self, c): + """Add any extra fields, etc to the basic table definitions.""" + + def _parse_index_definition(self, index_field): + """Parse a field definition for an index, returning a Getter.""" + # Note: We may want to keep a Parser object around, and cache the + # Getter objects for a greater length of time. Specifically, if + # you create a bunch of indexes, and then insert 50k docs, you'll + # re-parse the indexes between puts. The time to insert the docs + # is still likely to dominate put_doc time, though. + parser = query_parser.Parser() + getter = parser.parse(index_field) + return getter + + def _update_indexes(self, doc_id, raw_doc, getters, db_cursor): + """Update document_fields for a single document. + + :param doc_id: Identifier for this document + :param raw_doc: The python dict representation of the document. + :param getters: A list of [(field_name, Getter)]. Getter.get will be + called to evaluate the index definition for this document, and the + results will be inserted into the db. + :param db_cursor: An sqlite Cursor. + :return: None + """ + values = [] + for field_name, getter in getters: + for idx_value in getter.get(raw_doc): + values.append((doc_id, field_name, idx_value)) + if values: + db_cursor.executemany( + "INSERT INTO document_fields VALUES (?, ?, ?)", values) + + def _set_replica_uid(self, replica_uid): + """Force the replica_uid to be set.""" + with self._db_handle: + self._set_replica_uid_in_transaction(replica_uid) + + def _set_replica_uid_in_transaction(self, replica_uid): + """Set the replica_uid. A transaction should already be held.""" + c = self._db_handle.cursor() + c.execute("INSERT OR REPLACE INTO u1db_config" + " VALUES ('replica_uid', ?)", + (replica_uid,)) + self._real_replica_uid = replica_uid + + def _get_replica_uid(self): + if self._real_replica_uid is not None: + return self._real_replica_uid + c = self._db_handle.cursor() + c.execute("SELECT value FROM u1db_config WHERE name = 'replica_uid'") + val = c.fetchone() + if val is None: + return None + self._real_replica_uid = val[0] + return self._real_replica_uid + + _replica_uid = property(_get_replica_uid) + + def _get_generation(self): + c = self._db_handle.cursor() + c.execute('SELECT max(generation) FROM transaction_log') + val = c.fetchone()[0] + if val is None: + return 0 + return val + + def _get_generation_info(self): + c = self._db_handle.cursor() + c.execute( + 'SELECT max(generation), transaction_id FROM transaction_log ') + val = c.fetchone() + if val[0] is None: + return(0, '') + return val + + def _get_trans_id_for_gen(self, generation): + if generation == 0: + return '' + c = self._db_handle.cursor() + c.execute( + 'SELECT transaction_id FROM transaction_log WHERE generation = ?', + (generation,)) + val = c.fetchone() + if val is None: + raise errors.InvalidGeneration + return val[0] + + def _get_transaction_log(self): + c = self._db_handle.cursor() + c.execute("SELECT doc_id, transaction_id FROM transaction_log" + " ORDER BY generation") + return c.fetchall() + + def _get_doc(self, doc_id, check_for_conflicts=False): + """Get just the document content, without fancy handling.""" + c = self._db_handle.cursor() + if check_for_conflicts: + c.execute( + "SELECT document.doc_rev, document.content, " + "count(conflicts.doc_rev) FROM document LEFT OUTER JOIN " + "conflicts ON conflicts.doc_id = document.doc_id WHERE " + "document.doc_id = ? GROUP BY document.doc_id, " + "document.doc_rev, document.content;", (doc_id,)) + else: + c.execute( + "SELECT doc_rev, content, 0 FROM document WHERE doc_id = ?", + (doc_id,)) + val = c.fetchone() + if val is None: + return None + doc_rev, content, conflicts = val + doc = self._factory(doc_id, doc_rev, content) + doc.has_conflicts = conflicts > 0 + return doc + + def _has_conflicts(self, doc_id): + c = self._db_handle.cursor() + c.execute("SELECT 1 FROM conflicts WHERE doc_id = ? LIMIT 1", + (doc_id,)) + val = c.fetchone() + if val is None: + return False + else: + return True + + def get_doc(self, doc_id, include_deleted=False): + doc = self._get_doc(doc_id, check_for_conflicts=True) + if doc is None: + return None + if doc.is_tombstone() and not include_deleted: + return None + return doc + + def get_all_docs(self, include_deleted=False): + """Get all documents from the database.""" + generation = self._get_generation() + results = [] + c = self._db_handle.cursor() + c.execute( + "SELECT document.doc_id, document.doc_rev, document.content, " + "count(conflicts.doc_rev) FROM document LEFT OUTER JOIN conflicts " + "ON conflicts.doc_id = document.doc_id GROUP BY document.doc_id, " + "document.doc_rev, document.content;") + rows = c.fetchall() + for doc_id, doc_rev, content, conflicts in rows: + if content is None and not include_deleted: + continue + doc = self._factory(doc_id, doc_rev, content) + doc.has_conflicts = conflicts > 0 + results.append(doc) + return (generation, results) + + def put_doc(self, doc): + if doc.doc_id is None: + raise errors.InvalidDocId() + self._check_doc_id(doc.doc_id) + self._check_doc_size(doc) + with self._db_handle: + old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) + if old_doc and old_doc.has_conflicts: + raise errors.ConflictedDoc() + if old_doc and doc.rev is None and old_doc.is_tombstone(): + new_rev = self._allocate_doc_rev(old_doc.rev) + else: + if old_doc is not None: + if old_doc.rev != doc.rev: + raise errors.RevisionConflict() + else: + if doc.rev is not None: + raise errors.RevisionConflict() + new_rev = self._allocate_doc_rev(doc.rev) + doc.rev = new_rev + self._put_and_update_indexes(old_doc, doc) + return new_rev + + def _expand_to_fields(self, doc_id, base_field, raw_doc, save_none): + """Convert a dict representation into named fields. + + So something like: {'key1': 'val1', 'key2': 'val2'} + gets converted into: [(doc_id, 'key1', 'val1', 0) + (doc_id, 'key2', 'val2', 0)] + :param doc_id: Just added to every record. + :param base_field: if set, these are nested keys, so each field should + be appropriately prefixed. + :param raw_doc: The python dictionary. + """ + # TODO: Handle lists + values = [] + for field_name, value in raw_doc.iteritems(): + if value is None and not save_none: + continue + if base_field: + full_name = base_field + '.' + field_name + else: + full_name = field_name + if value is None or isinstance(value, (int, float, basestring)): + values.append((doc_id, full_name, value, len(values))) + else: + subvalues = self._expand_to_fields(doc_id, full_name, value, + save_none) + for _, subfield_name, val, _ in subvalues: + values.append((doc_id, subfield_name, val, len(values))) + return values + + def _put_and_update_indexes(self, old_doc, doc): + """Actually insert a document into the database. + + This both updates the existing documents content, and any indexes that + refer to this document. + """ + raise NotImplementedError(self._put_and_update_indexes) + + def whats_changed(self, old_generation=0): + c = self._db_handle.cursor() + c.execute("SELECT generation, doc_id, transaction_id" + " FROM transaction_log" + " WHERE generation > ? ORDER BY generation DESC", + (old_generation,)) + results = c.fetchall() + cur_gen = old_generation + seen = set() + changes = [] + newest_trans_id = '' + for generation, doc_id, trans_id in results: + if doc_id not in seen: + changes.append((doc_id, generation, trans_id)) + seen.add(doc_id) + if changes: + cur_gen = changes[0][1] # max generation + newest_trans_id = changes[0][2] + changes.reverse() + else: + c.execute("SELECT generation, transaction_id" + " FROM transaction_log ORDER BY generation DESC LIMIT 1") + results = c.fetchone() + if not results: + cur_gen = 0 + newest_trans_id = '' + else: + cur_gen, newest_trans_id = results + + return cur_gen, newest_trans_id, changes + + def delete_doc(self, doc): + with self._db_handle: + old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) + if old_doc is None: + raise errors.DocumentDoesNotExist + if old_doc.rev != doc.rev: + raise errors.RevisionConflict() + if old_doc.is_tombstone(): + raise errors.DocumentAlreadyDeleted + if old_doc.has_conflicts: + raise errors.ConflictedDoc() + new_rev = self._allocate_doc_rev(doc.rev) + doc.rev = new_rev + doc.make_tombstone() + self._put_and_update_indexes(old_doc, doc) + return new_rev + + def _get_conflicts(self, doc_id): + c = self._db_handle.cursor() + c.execute("SELECT doc_rev, content FROM conflicts WHERE doc_id = ?", + (doc_id,)) + return [self._factory(doc_id, doc_rev, content) + for doc_rev, content in c.fetchall()] + + def get_doc_conflicts(self, doc_id): + with self._db_handle: + conflict_docs = self._get_conflicts(doc_id) + if not conflict_docs: + return [] + this_doc = self._get_doc(doc_id) + this_doc.has_conflicts = True + return [this_doc] + conflict_docs + + def _get_replica_gen_and_trans_id(self, other_replica_uid): + c = self._db_handle.cursor() + c.execute("SELECT known_generation, known_transaction_id FROM sync_log" + " WHERE replica_uid = ?", + (other_replica_uid,)) + val = c.fetchone() + if val is None: + other_gen = 0 + trans_id = '' + else: + other_gen = val[0] + trans_id = val[1] + return other_gen, trans_id + + def _set_replica_gen_and_trans_id(self, other_replica_uid, + other_generation, other_transaction_id): + with self._db_handle: + self._do_set_replica_gen_and_trans_id( + other_replica_uid, other_generation, other_transaction_id) + + def _do_set_replica_gen_and_trans_id(self, other_replica_uid, + other_generation, + other_transaction_id): + c = self._db_handle.cursor() + c.execute("INSERT OR REPLACE INTO sync_log VALUES (?, ?, ?)", + (other_replica_uid, other_generation, + other_transaction_id)) + + def _put_doc_if_newer(self, doc, save_conflict, replica_uid=None, + replica_gen=None, replica_trans_id=None): + with self._db_handle: + return super(SQLiteDatabase, self)._put_doc_if_newer( + doc, + save_conflict=save_conflict, + replica_uid=replica_uid, replica_gen=replica_gen, + replica_trans_id=replica_trans_id) + + def _add_conflict(self, c, doc_id, my_doc_rev, my_content): + c.execute("INSERT INTO conflicts VALUES (?, ?, ?)", + (doc_id, my_doc_rev, my_content)) + + def _delete_conflicts(self, c, doc, conflict_revs): + deleting = [(doc.doc_id, c_rev) for c_rev in conflict_revs] + c.executemany("DELETE FROM conflicts" + " WHERE doc_id=? AND doc_rev=?", deleting) + doc.has_conflicts = self._has_conflicts(doc.doc_id) + + def _prune_conflicts(self, doc, doc_vcr): + if self._has_conflicts(doc.doc_id): + autoresolved = False + c_revs_to_prune = [] + for c_doc in self._get_conflicts(doc.doc_id): + c_vcr = vectorclock.VectorClockRev(c_doc.rev) + if doc_vcr.is_newer(c_vcr): + c_revs_to_prune.append(c_doc.rev) + elif doc.same_content_as(c_doc): + c_revs_to_prune.append(c_doc.rev) + doc_vcr.maximize(c_vcr) + autoresolved = True + if autoresolved: + doc_vcr.increment(self._replica_uid) + doc.rev = doc_vcr.as_str() + c = self._db_handle.cursor() + self._delete_conflicts(c, doc, c_revs_to_prune) + + def _force_doc_sync_conflict(self, doc): + my_doc = self._get_doc(doc.doc_id) + c = self._db_handle.cursor() + self._prune_conflicts(doc, vectorclock.VectorClockRev(doc.rev)) + self._add_conflict(c, doc.doc_id, my_doc.rev, my_doc.get_json()) + doc.has_conflicts = True + self._put_and_update_indexes(my_doc, doc) + + def resolve_doc(self, doc, conflicted_doc_revs): + with self._db_handle: + cur_doc = self._get_doc(doc.doc_id) + # TODO: https://bugs.launchpad.net/u1db/+bug/928274 + # I think we have a logic bug in resolve_doc + # Specifically, cur_doc.rev is always in the final vector + # clock of revisions that we supersede, even if it wasn't in + # conflicted_doc_revs. We still add it as a conflict, but the + # fact that _put_doc_if_newer propagates resolutions means I + # think that conflict could accidentally be resolved. We need + # to add a test for this case first. (create a rev, create a + # conflict, create another conflict, resolve the first rev + # and first conflict, then make sure that the resolved + # rev doesn't supersede the second conflict rev.) It *might* + # not matter, because the superseding rev is in as a + # conflict, but it does seem incorrect + new_rev = self._ensure_maximal_rev(cur_doc.rev, + conflicted_doc_revs) + superseded_revs = set(conflicted_doc_revs) + c = self._db_handle.cursor() + doc.rev = new_rev + if cur_doc.rev in superseded_revs: + self._put_and_update_indexes(cur_doc, doc) + else: + self._add_conflict(c, doc.doc_id, new_rev, doc.get_json()) + # TODO: Is there some way that we could construct a rev that would + # end up in superseded_revs, such that we add a conflict, and + # then immediately delete it? + self._delete_conflicts(c, doc, superseded_revs) + + def list_indexes(self): + """Return the list of indexes and their definitions.""" + c = self._db_handle.cursor() + # TODO: How do we test the ordering? + c.execute("SELECT name, field FROM index_definitions" + " ORDER BY name, offset") + definitions = [] + cur_name = None + for name, field in c.fetchall(): + if cur_name != name: + definitions.append((name, [])) + cur_name = name + definitions[-1][-1].append(field) + return definitions + + def _get_index_definition(self, index_name): + """Return the stored definition for a given index_name.""" + c = self._db_handle.cursor() + c.execute("SELECT field FROM index_definitions" + " WHERE name = ? ORDER BY offset", (index_name,)) + fields = [x[0] for x in c.fetchall()] + if not fields: + raise errors.IndexDoesNotExist + return fields + + @staticmethod + def _strip_glob(value): + """Remove the trailing * from a value.""" + assert value[-1] == '*' + return value[:-1] + + def _format_query(self, definition, key_values): + # First, build the definition. We join the document_fields table + # against itself, as many times as the 'width' of our definition. + # We then do a query for each key_value, one-at-a-time. + # Note: All of these strings are static, we could cache them, etc. + tables = ["document_fields d%d" % i for i in range(len(definition))] + novalue_where = ["d.doc_id = d%d.doc_id" + " AND d%d.field_name = ?" + % (i, i) for i in range(len(definition))] + wildcard_where = [novalue_where[i] + + (" AND d%d.value NOT NULL" % (i,)) + for i in range(len(definition))] + exact_where = [novalue_where[i] + + (" AND d%d.value = ?" % (i,)) + for i in range(len(definition))] + like_where = [novalue_where[i] + + (" AND d%d.value GLOB ?" % (i,)) + for i in range(len(definition))] + is_wildcard = False + # Merge the lists together, so that: + # [field1, field2, field3], [val1, val2, val3] + # Becomes: + # (field1, val1, field2, val2, field3, val3) + args = [] + where = [] + for idx, (field, value) in enumerate(zip(definition, key_values)): + args.append(field) + if value.endswith('*'): + if value == '*': + where.append(wildcard_where[idx]) + else: + # This is a glob match + if is_wildcard: + # We can't have a partial wildcard following + # another wildcard + raise errors.InvalidGlobbing + where.append(like_where[idx]) + args.append(value) + is_wildcard = True + else: + if is_wildcard: + raise errors.InvalidGlobbing + where.append(exact_where[idx]) + args.append(value) + statement = ( + "SELECT d.doc_id, d.doc_rev, d.content, count(c.doc_rev) FROM " + "document d, %s LEFT OUTER JOIN conflicts c ON c.doc_id = " + "d.doc_id WHERE %s GROUP BY d.doc_id, d.doc_rev, d.content ORDER " + "BY %s;" % (', '.join(tables), ' AND '.join(where), ', '.join( + ['d%d.value' % i for i in range(len(definition))]))) + return statement, args + + def get_from_index(self, index_name, *key_values): + definition = self._get_index_definition(index_name) + if len(key_values) != len(definition): + raise errors.InvalidValueForIndex() + statement, args = self._format_query(definition, key_values) + c = self._db_handle.cursor() + try: + c.execute(statement, tuple(args)) + except dbapi2.OperationalError, e: + raise dbapi2.OperationalError( + str(e) + + '\nstatement: %s\nargs: %s\n' % (statement, args)) + res = c.fetchall() + results = [] + for row in res: + doc = self._factory(row[0], row[1], row[2]) + doc.has_conflicts = row[3] > 0 + results.append(doc) + return results + + def _format_range_query(self, definition, start_value, end_value): + tables = ["document_fields d%d" % i for i in range(len(definition))] + novalue_where = [ + "d.doc_id = d%d.doc_id AND d%d.field_name = ?" % (i, i) for i in + range(len(definition))] + wildcard_where = [ + novalue_where[i] + (" AND d%d.value NOT NULL" % (i,)) for i in + range(len(definition))] + like_where = [ + novalue_where[i] + ( + " AND (d%d.value < ? OR d%d.value GLOB ?)" % (i, i)) for i in + range(len(definition))] + range_where_lower = [ + novalue_where[i] + (" AND d%d.value >= ?" % (i,)) for i in + range(len(definition))] + range_where_upper = [ + novalue_where[i] + (" AND d%d.value <= ?" % (i,)) for i in + range(len(definition))] + args = [] + where = [] + if start_value: + if isinstance(start_value, basestring): + start_value = (start_value,) + if len(start_value) != len(definition): + raise errors.InvalidValueForIndex() + is_wildcard = False + for idx, (field, value) in enumerate(zip(definition, start_value)): + args.append(field) + if value.endswith('*'): + if value == '*': + where.append(wildcard_where[idx]) + else: + # This is a glob match + if is_wildcard: + # We can't have a partial wildcard following + # another wildcard + raise errors.InvalidGlobbing + where.append(range_where_lower[idx]) + args.append(self._strip_glob(value)) + is_wildcard = True + else: + if is_wildcard: + raise errors.InvalidGlobbing + where.append(range_where_lower[idx]) + args.append(value) + if end_value: + if isinstance(end_value, basestring): + end_value = (end_value,) + if len(end_value) != len(definition): + raise errors.InvalidValueForIndex() + is_wildcard = False + for idx, (field, value) in enumerate(zip(definition, end_value)): + args.append(field) + if value.endswith('*'): + if value == '*': + where.append(wildcard_where[idx]) + else: + # This is a glob match + if is_wildcard: + # We can't have a partial wildcard following + # another wildcard + raise errors.InvalidGlobbing + where.append(like_where[idx]) + args.append(self._strip_glob(value)) + args.append(value) + is_wildcard = True + else: + if is_wildcard: + raise errors.InvalidGlobbing + where.append(range_where_upper[idx]) + args.append(value) + statement = ( + "SELECT d.doc_id, d.doc_rev, d.content, count(c.doc_rev) FROM " + "document d, %s LEFT OUTER JOIN conflicts c ON c.doc_id = " + "d.doc_id WHERE %s GROUP BY d.doc_id, d.doc_rev, d.content ORDER " + "BY %s;" % (', '.join(tables), ' AND '.join(where), ', '.join( + ['d%d.value' % i for i in range(len(definition))]))) + return statement, args + + def get_range_from_index(self, index_name, start_value=None, + end_value=None): + """Return all documents with key values in the specified range.""" + definition = self._get_index_definition(index_name) + statement, args = self._format_range_query( + definition, start_value, end_value) + c = self._db_handle.cursor() + try: + c.execute(statement, tuple(args)) + except dbapi2.OperationalError, e: + raise dbapi2.OperationalError( + str(e) + + '\nstatement: %s\nargs: %s\n' % (statement, args)) + res = c.fetchall() + results = [] + for row in res: + doc = self._factory(row[0], row[1], row[2]) + doc.has_conflicts = row[3] > 0 + results.append(doc) + return results + + def get_index_keys(self, index_name): + c = self._db_handle.cursor() + definition = self._get_index_definition(index_name) + value_fields = ', '.join([ + 'd%d.value' % i for i in range(len(definition))]) + tables = ["document_fields d%d" % i for i in range(len(definition))] + novalue_where = [ + "d.doc_id = d%d.doc_id AND d%d.field_name = ?" % (i, i) for i in + range(len(definition))] + where = [ + novalue_where[i] + (" AND d%d.value NOT NULL" % (i,)) for i in + range(len(definition))] + statement = ( + "SELECT %s FROM document d, %s WHERE %s GROUP BY %s;" % ( + value_fields, ', '.join(tables), ' AND '.join(where), + value_fields)) + try: + c.execute(statement, tuple(definition)) + except dbapi2.OperationalError, e: + raise dbapi2.OperationalError( + str(e) + + '\nstatement: %s\nargs: %s\n' % (statement, tuple(definition))) + return c.fetchall() + + def delete_index(self, index_name): + with self._db_handle: + c = self._db_handle.cursor() + c.execute("DELETE FROM index_definitions WHERE name = ?", + (index_name,)) + c.execute( + "DELETE FROM document_fields WHERE document_fields.field_name " + " NOT IN (SELECT field from index_definitions)") + + +class SQLiteSyncTarget(CommonSyncTarget): + + def get_sync_info(self, source_replica_uid): + source_gen, source_trans_id = self._db._get_replica_gen_and_trans_id( + source_replica_uid) + my_gen, my_trans_id = self._db._get_generation_info() + return ( + self._db._replica_uid, my_gen, my_trans_id, source_gen, + source_trans_id) + + def record_sync_info(self, source_replica_uid, source_replica_generation, + source_replica_transaction_id): + if self._trace_hook: + self._trace_hook('record_sync_info') + self._db._set_replica_gen_and_trans_id( + source_replica_uid, source_replica_generation, + source_replica_transaction_id) + + +class SQLitePartialExpandDatabase(SQLiteDatabase): + """An SQLite Backend that expands documents into a document_field table. + + It stores the original document text in document.doc. For fields that are + indexed, the data goes into document_fields. + """ + + _index_storage_value = 'expand referenced' + + def _get_indexed_fields(self): + """Determine what fields are indexed.""" + c = self._db_handle.cursor() + c.execute("SELECT field FROM index_definitions") + return set([x[0] for x in c.fetchall()]) + + def _evaluate_index(self, raw_doc, field): + parser = query_parser.Parser() + getter = parser.parse(field) + return getter.get(raw_doc) + + def _put_and_update_indexes(self, old_doc, doc): + c = self._db_handle.cursor() + if doc and not doc.is_tombstone(): + raw_doc = json.loads(doc.get_json()) + else: + raw_doc = {} + if old_doc is not None: + c.execute("UPDATE document SET doc_rev=?, content=?" + " WHERE doc_id = ?", + (doc.rev, doc.get_json(), doc.doc_id)) + c.execute("DELETE FROM document_fields WHERE doc_id = ?", + (doc.doc_id,)) + else: + c.execute("INSERT INTO document (doc_id, doc_rev, content)" + " VALUES (?, ?, ?)", + (doc.doc_id, doc.rev, doc.get_json())) + indexed_fields = self._get_indexed_fields() + if indexed_fields: + # It is expected that len(indexed_fields) is shorter than + # len(raw_doc) + getters = [(field, self._parse_index_definition(field)) + for field in indexed_fields] + self._update_indexes(doc.doc_id, raw_doc, getters, c) + trans_id = self._allocate_transaction_id() + c.execute("INSERT INTO transaction_log(doc_id, transaction_id)" + " VALUES (?, ?)", (doc.doc_id, trans_id)) + + def create_index(self, index_name, *index_expressions): + with self._db_handle: + c = self._db_handle.cursor() + cur_fields = self._get_indexed_fields() + definition = [(index_name, idx, field) + for idx, field in enumerate(index_expressions)] + try: + c.executemany("INSERT INTO index_definitions VALUES (?, ?, ?)", + definition) + except dbapi2.IntegrityError as e: + stored_def = self._get_index_definition(index_name) + if stored_def == [x[-1] for x in definition]: + return + raise errors.IndexNameTakenError, e, sys.exc_info()[2] + new_fields = set( + [f for f in index_expressions if f not in cur_fields]) + if new_fields: + self._update_all_indexes(new_fields) + + def _iter_all_docs(self): + c = self._db_handle.cursor() + c.execute("SELECT doc_id, content FROM document") + while True: + next_rows = c.fetchmany() + if not next_rows: + break + for row in next_rows: + yield row + + def _update_all_indexes(self, new_fields): + """Iterate all the documents, and add content to document_fields. + + :param new_fields: The index definitions that need to be added. + """ + getters = [(field, self._parse_index_definition(field)) + for field in new_fields] + c = self._db_handle.cursor() + for doc_id, doc in self._iter_all_docs(): + if doc is None: + continue + raw_doc = json.loads(doc) + self._update_indexes(doc_id, raw_doc, getters, c) + +SQLiteDatabase.register_implementation(SQLitePartialExpandDatabase) diff --git a/common/src/leap/soledad/common/l2db/errors.py b/common/src/leap/soledad/common/l2db/errors.py new file mode 100644 index 00000000..b502fc2d --- /dev/null +++ b/common/src/leap/soledad/common/l2db/errors.py @@ -0,0 +1,194 @@ +# Copyright 2011-2012 Canonical Ltd. +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. + +"""A list of errors that u1db can raise.""" + + +class U1DBError(Exception): + """Generic base class for U1DB errors.""" + + # description/tag for identifying the error during transmission (http,...) + wire_description = "error" + + def __init__(self, message=None): + self.message = message + + +class RevisionConflict(U1DBError): + """The document revisions supplied does not match the current version.""" + + wire_description = "revision conflict" + + +class InvalidJSON(U1DBError): + """Content was not valid json.""" + + +class InvalidContent(U1DBError): + """Content was not a python dictionary.""" + + +class InvalidDocId(U1DBError): + """A document was requested with an invalid document identifier.""" + + wire_description = "invalid document id" + + +class MissingDocIds(U1DBError): + """Needs document ids.""" + + wire_description = "missing document ids" + + +class DocumentTooBig(U1DBError): + """Document exceeds the maximum document size for this database.""" + + wire_description = "document too big" + + +class UserQuotaExceeded(U1DBError): + """Document exceeds the maximum document size for this database.""" + + wire_description = "user quota exceeded" + + +class SubscriptionNeeded(U1DBError): + """User needs a subscription to be able to use this replica..""" + + wire_description = "user needs subscription" + + +class InvalidTransactionId(U1DBError): + """Invalid transaction for generation.""" + + wire_description = "invalid transaction id" + + +class InvalidGeneration(U1DBError): + """Generation was previously synced with a different transaction id.""" + + wire_description = "invalid generation" + + +class InvalidReplicaUID(U1DBError): + """Attempting to sync a database with itself.""" + + wire_description = "invalid replica uid" + + +class ConflictedDoc(U1DBError): + """The document is conflicted, you must call resolve before put()""" + + +class InvalidValueForIndex(U1DBError): + """The values supplied does not match the index definition.""" + + +class InvalidGlobbing(U1DBError): + """Raised if wildcard matches are not strictly at the tail of the request. + """ + + +class DocumentDoesNotExist(U1DBError): + """The document does not exist.""" + + wire_description = "document does not exist" + + +class DocumentAlreadyDeleted(U1DBError): + """The document was already deleted.""" + + wire_description = "document already deleted" + + +class DatabaseDoesNotExist(U1DBError): + """The database does not exist.""" + + wire_description = "database does not exist" + + +class IndexNameTakenError(U1DBError): + """The given index name is already taken.""" + + +class IndexDefinitionParseError(U1DBError): + """The index definition cannot be parsed.""" + + +class IndexDoesNotExist(U1DBError): + """No index of that name exists.""" + + +class Unauthorized(U1DBError): + """Request wasn't authorized properly.""" + + wire_description = "unauthorized" + + +class HTTPError(U1DBError): + """Unspecific HTTP errror.""" + + wire_description = None + + def __init__(self, status, message=None, headers={}): + self.status = status + self.message = message + self.headers = headers + + def __str__(self): + if not self.message: + return "HTTPError(%d)" % self.status + else: + return "HTTPError(%d, %r)" % (self.status, self.message) + + +class Unavailable(HTTPError): + """Server not available not serve request.""" + + wire_description = "unavailable" + + def __init__(self, message=None, headers={}): + super(Unavailable, self).__init__(503, message, headers) + + def __str__(self): + if not self.message: + return "Unavailable()" + else: + return "Unavailable(%r)" % self.message + + +class BrokenSyncStream(U1DBError): + """Unterminated or otherwise broken sync exchange stream.""" + + wire_description = None + + +class UnknownAuthMethod(U1DBError): + """Unknown auhorization method.""" + + wire_description = None + + +# mapping wire (transimission) descriptions/tags for errors to the exceptions +wire_description_to_exc = dict( + (x.wire_description, x) for x in globals().values() + if getattr(x, 'wire_description', None) not in (None, "error")) +wire_description_to_exc["error"] = U1DBError + + +# +# wire error descriptions not corresponding to an exception +DOCUMENT_DELETED = "document deleted" diff --git a/common/src/leap/soledad/common/l2db/query_parser.py b/common/src/leap/soledad/common/l2db/query_parser.py new file mode 100644 index 00000000..dd35b12a --- /dev/null +++ b/common/src/leap/soledad/common/l2db/query_parser.py @@ -0,0 +1,371 @@ +# Copyright 2011 Canonical Ltd. +# Copyright 2016 LEAP Encryption Access Project +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. +""" +Code for parsing Index definitions. +""" + +import re + +from leap.soledad.common.l2db import errors + + +class Getter(object): + """Get values from a document based on a specification.""" + + def get(self, raw_doc): + """Get a value from the document. + + :param raw_doc: a python dictionary to get the value from. + :return: A list of values that match the description. + """ + raise NotImplementedError(self.get) + + +class StaticGetter(Getter): + """A getter that returns a defined value (independent of the doc).""" + + def __init__(self, value): + """Create a StaticGetter. + + :param value: the value to return when get is called. + """ + if value is None: + self.value = [] + elif isinstance(value, list): + self.value = value + else: + self.value = [value] + + def get(self, raw_doc): + return self.value + + +def extract_field(raw_doc, subfields, index=0): + if not isinstance(raw_doc, dict): + return [] + val = raw_doc.get(subfields[index]) + if val is None: + return [] + if index < len(subfields) - 1: + if isinstance(val, list): + results = [] + for item in val: + results.extend(extract_field(item, subfields, index + 1)) + return results + if isinstance(val, dict): + return extract_field(val, subfields, index + 1) + return [] + if isinstance(val, dict): + return [] + if isinstance(val, list): + # Strip anything in the list that isn't a simple type + return [v for v in val if not isinstance(v, (dict, list))] + return [val] + + +class ExtractField(Getter): + """Extract a field from the document.""" + + def __init__(self, field): + """Create an ExtractField object. + + When a document is passed to get() this will return a value + from the document based on the field specifier passed to + the constructor. + + None will be returned if the field is nonexistant, or refers to an + object, rather than a simple type or list of simple types. + + :param field: a specifier for the field to return. + This is either a field name, or a dotted field name. + """ + self.field = field.split('.') + + def get(self, raw_doc): + return extract_field(raw_doc, self.field) + + +class Transformation(Getter): + """A transformation on a value from another Getter.""" + + name = None + arity = 1 + args = ['expression'] + + def __init__(self, inner): + """Create a transformation. + + :param inner: the argument(s) to the transformation. + """ + self.inner = inner + + def get(self, raw_doc): + inner_values = self.inner.get(raw_doc) + assert isinstance(inner_values, list),\ + 'get() should always return a list' + return self.transform(inner_values) + + def transform(self, values): + """Transform the values. + + This should be implemented by subclasses to transform the + value when get() is called. + + :param values: the values from the other Getter + :return: the transformed values. + """ + raise NotImplementedError(self.transform) + + +class Lower(Transformation): + """Lowercase a string. + + This transformation will return None for non-string inputs. However, + it will lowercase any strings in a list, dropping any elements + that are not strings. + """ + + name = "lower" + + def _can_transform(self, val): + return isinstance(val, basestring) + + def transform(self, values): + if not values: + return [] + return [val.lower() for val in values if self._can_transform(val)] + + +class Number(Transformation): + """Convert an integer to a zero padded string. + + This transformation will return None for non-integer inputs. However, it + will transform any integers in a list, dropping any elements that are not + integers. + """ + + name = 'number' + arity = 2 + args = ['expression', int] + + def __init__(self, inner, number): + super(Number, self).__init__(inner) + self.padding = "%%0%sd" % number + + def _can_transform(self, val): + return isinstance(val, int) and not isinstance(val, bool) + + def transform(self, values): + """Transform any integers in values into zero padded strings.""" + if not values: + return [] + return [self.padding % (v,) for v in values if self._can_transform(v)] + + +class Bool(Transformation): + """Convert bool to string.""" + + name = "bool" + args = ['expression'] + + def _can_transform(self, val): + return isinstance(val, bool) + + def transform(self, values): + """Transform any booleans in values into strings.""" + if not values: + return [] + return [('1' if v else '0') for v in values if self._can_transform(v)] + + +class SplitWords(Transformation): + """Split a string on whitespace. + + This Getter will return [] for non-string inputs. It will however + split any strings in an input list, discarding any elements that + are not strings. + """ + + name = "split_words" + + def _can_transform(self, val): + return isinstance(val, basestring) + + def transform(self, values): + if not values: + return [] + result = set() + for value in values: + if self._can_transform(value): + for word in value.split(): + result.add(word) + return list(result) + + +class Combine(Transformation): + """Combine multiple expressions into a single index.""" + + name = "combine" + # variable number of args + arity = -1 + + def __init__(self, *inner): + super(Combine, self).__init__(inner) + + def get(self, raw_doc): + inner_values = [] + for inner in self.inner: + inner_values.extend(inner.get(raw_doc)) + return self.transform(inner_values) + + def transform(self, values): + return values + + +class IsNull(Transformation): + """Indicate whether the input is None. + + This Getter returns a bool indicating whether the input is nil. + """ + + name = "is_null" + + def transform(self, values): + return [len(values) == 0] + + +def check_fieldname(fieldname): + if fieldname.endswith('.'): + raise errors.IndexDefinitionParseError( + "Fieldname cannot end in '.':%s^" % (fieldname,)) + + +class Parser(object): + """Parse an index expression into a sequence of transformations.""" + + _transformations = {} + _delimiters = re.compile("\(|\)|,") + + def __init__(self): + self._tokens = [] + + def _set_expression(self, expression): + self._open_parens = 0 + self._tokens = [] + expression = expression.strip() + while expression: + delimiter = self._delimiters.search(expression) + if delimiter: + idx = delimiter.start() + if idx == 0: + result, expression = (expression[:1], expression[1:]) + self._tokens.append(result) + else: + result, expression = (expression[:idx], expression[idx:]) + result = result.strip() + if result: + self._tokens.append(result) + else: + expression = expression.strip() + if expression: + self._tokens.append(expression) + expression = None + + def _get_token(self): + if self._tokens: + return self._tokens.pop(0) + + def _peek_token(self): + if self._tokens: + return self._tokens[0] + + @staticmethod + def _to_getter(term): + if isinstance(term, Getter): + return term + check_fieldname(term) + return ExtractField(term) + + def _parse_op(self, op_name): + self._get_token() # '(' + op = self._transformations.get(op_name, None) + if op is None: + raise errors.IndexDefinitionParseError( + "Unknown operation: %s" % op_name) + args = [] + while True: + args.append(self._parse_term()) + sep = self._get_token() + if sep == ')': + break + if sep != ',': + raise errors.IndexDefinitionParseError( + "Unexpected token '%s' in parentheses." % (sep,)) + parsed = [] + for i, arg in enumerate(args): + arg_type = op.args[i % len(op.args)] + if arg_type == 'expression': + inner = self._to_getter(arg) + else: + try: + inner = arg_type(arg) + except ValueError, e: + raise errors.IndexDefinitionParseError( + "Invalid value %r for argument type %r " + "(%r)." % (arg, arg_type, e)) + parsed.append(inner) + return op(*parsed) + + def _parse_term(self): + term = self._get_token() + if term is None: + raise errors.IndexDefinitionParseError( + "Unexpected end of index definition.") + if term in (',', ')', '('): + raise errors.IndexDefinitionParseError( + "Unexpected token '%s' at start of expression." % (term,)) + next_token = self._peek_token() + if next_token == '(': + return self._parse_op(term) + return term + + def parse(self, expression): + self._set_expression(expression) + term = self._to_getter(self._parse_term()) + if self._peek_token(): + raise errors.IndexDefinitionParseError( + "Unexpected token '%s' after end of expression." + % (self._peek_token(),)) + return term + + def parse_all(self, fields): + return [self.parse(field) for field in fields] + + @classmethod + def register_transormation(cls, transform): + assert transform.name not in cls._transformations, ( + "Transform %s already registered for %s" + % (transform.name, cls._transformations[transform.name])) + cls._transformations[transform.name] = transform + + +Parser.register_transormation(SplitWords) +Parser.register_transormation(Lower) +Parser.register_transormation(Number) +Parser.register_transormation(Bool) +Parser.register_transormation(IsNull) +Parser.register_transormation(Combine) diff --git a/common/src/leap/soledad/common/l2db/remote/__init__.py b/common/src/leap/soledad/common/l2db/remote/__init__.py new file mode 100644 index 00000000..3f32e381 --- /dev/null +++ b/common/src/leap/soledad/common/l2db/remote/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2011 Canonical Ltd. +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. diff --git a/common/src/leap/soledad/common/l2db/remote/basic_auth_middleware.py b/common/src/leap/soledad/common/l2db/remote/basic_auth_middleware.py new file mode 100644 index 00000000..a2cbff62 --- /dev/null +++ b/common/src/leap/soledad/common/l2db/remote/basic_auth_middleware.py @@ -0,0 +1,68 @@ +# Copyright 2012 Canonical Ltd. +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. +"""U1DB Basic Auth authorisation WSGI middleware.""" +import httplib +try: + import simplejson as json +except ImportError: + import json # noqa +from wsgiref.util import shift_path_info + + +class Unauthorized(Exception): + """User authorization failed.""" + + +class BasicAuthMiddleware(object): + """U1DB Basic Auth Authorisation WSGI middleware.""" + + def __init__(self, app, prefix): + self.app = app + self.prefix = prefix + + def _error(self, start_response, status, description, message=None): + start_response("%d %s" % (status, httplib.responses[status]), + [('content-type', 'application/json')]) + err = {"error": description} + if message: + err['message'] = message + return [json.dumps(err)] + + def __call__(self, environ, start_response): + if self.prefix and not environ['PATH_INFO'].startswith(self.prefix): + return self._error(start_response, 400, "bad request") + auth = environ.get('HTTP_AUTHORIZATION') + if not auth: + return self._error(start_response, 401, "unauthorized", + "Missing Basic Authentication.") + scheme, encoded = auth.split(None, 1) + if scheme.lower() != 'basic': + return self._error( + start_response, 401, "unauthorized", + "Missing Basic Authentication") + user, password = encoded.decode('base64').split(':', 1) + try: + self.verify_user(environ, user, password) + except Unauthorized: + return self._error( + start_response, 401, "unauthorized", + "Incorrect password or login.") + del environ['HTTP_AUTHORIZATION'] + shift_path_info(environ) + return self.app(environ, start_response) + + def verify_user(self, environ, username, password): + raise NotImplementedError(self.verify_user) diff --git a/common/src/leap/soledad/common/l2db/remote/http_app.py b/common/src/leap/soledad/common/l2db/remote/http_app.py new file mode 100644 index 00000000..65277bd1 --- /dev/null +++ b/common/src/leap/soledad/common/l2db/remote/http_app.py @@ -0,0 +1,660 @@ +# Copyright 2011-2012 Canonical Ltd. +# Copyright 2016 LEAP Encryption Access Project +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. + +""" +HTTP Application exposing U1DB. +""" +# TODO -- deprecate, use twisted/txaio. + +import functools +import httplib +import inspect +try: + import simplejson as json +except ImportError: + import json # noqa +import sys +import urlparse + +import routes.mapper + +from leap.soledad.common.l2db import ( + __version__ as _u1db_version, + DBNAME_CONSTRAINTS, Document, + errors, sync) +from leap.soledad.common.l2db.remote import http_errors, utils + + +def parse_bool(expression): + """Parse boolean querystring parameter.""" + if expression == 'true': + return True + return False + + +def parse_list(expression): + if not expression: + return [] + return [t.strip() for t in expression.split(',')] + + +def none_or_str(expression): + if expression is None: + return None + return str(expression) + + +class BadRequest(Exception): + """Bad request.""" + + +class _FencedReader(object): + """Read and get lines from a file but not past a given length.""" + + MAXCHUNK = 8192 + + def __init__(self, rfile, total, max_entry_size): + self.rfile = rfile + self.remaining = total + self.max_entry_size = max_entry_size + self._kept = None + + def read_chunk(self, atmost): + if self._kept is not None: + # ignore atmost, kept data should be a subchunk anyway + kept, self._kept = self._kept, None + return kept + if self.remaining == 0: + return '' + data = self.rfile.read(min(self.remaining, atmost)) + self.remaining -= len(data) + return data + + def getline(self): + line_parts = [] + size = 0 + while True: + chunk = self.read_chunk(self.MAXCHUNK) + if chunk == '': + break + nl = chunk.find("\n") + if nl != -1: + size += nl + 1 + if size > self.max_entry_size: + raise BadRequest + line_parts.append(chunk[:nl + 1]) + rest = chunk[nl + 1:] + self._kept = rest or None + break + else: + size += len(chunk) + if size > self.max_entry_size: + raise BadRequest + line_parts.append(chunk) + return ''.join(line_parts) + + +def http_method(**control): + """Decoration for handling of query arguments and content for a HTTP + method. + + args and content here are the query arguments and body of the incoming + HTTP requests. + + Match query arguments to python method arguments: + w = http_method()(f) + w(self, args, content) => args["content"]=content; + f(self, **args) + + JSON deserialize content to arguments: + w = http_method(content_as_args=True,...)(f) + w(self, args, content) => args.update(json.loads(content)); + f(self, **args) + + Support conversions (e.g int): + w = http_method(Arg=Conv,...)(f) + w(self, args, content) => args["Arg"]=Conv(args["Arg"]); + f(self, **args) + + Enforce no use of query arguments: + w = http_method(no_query=True,...)(f) + w(self, args, content) raises BadRequest if args is not empty + + Argument mismatches, deserialisation failures produce BadRequest. + """ + content_as_args = control.pop('content_as_args', False) + no_query = control.pop('no_query', False) + conversions = control.items() + + def wrap(f): + argspec = inspect.getargspec(f) + assert argspec.args[0] == "self" + nargs = len(argspec.args) + ndefaults = len(argspec.defaults or ()) + required_args = set(argspec.args[1:nargs - ndefaults]) + all_args = set(argspec.args) + + @functools.wraps(f) + def wrapper(self, args, content): + if no_query and args: + raise BadRequest() + if content is not None: + if content_as_args: + try: + args.update(json.loads(content)) + except ValueError: + raise BadRequest() + else: + args["content"] = content + if not (required_args <= set(args) <= all_args): + raise BadRequest("Missing required arguments.") + for name, conv in conversions: + if name not in args: + continue + try: + args[name] = conv(args[name]) + except ValueError: + raise BadRequest() + return f(self, **args) + + return wrapper + + return wrap + + +class URLToResource(object): + """Mappings from URLs to resources.""" + + def __init__(self): + self._map = routes.mapper.Mapper(controller_scan=None) + + def register(self, resource_cls): + # register + self._map.connect(None, resource_cls.url_pattern, + resource_cls=resource_cls, + requirements={"dbname": DBNAME_CONSTRAINTS}) + self._map.create_regs() + return resource_cls + + def match(self, path): + params = self._map.match(path) + if params is None: + return None, None + resource_cls = params.pop('resource_cls') + return resource_cls, params + +url_to_resource = URLToResource() + + +@url_to_resource.register +class GlobalResource(object): + """Global (root) resource.""" + + url_pattern = "/" + + def __init__(self, state, responder): + self.state = state + self.responder = responder + + @http_method() + def get(self): + info = self.state.global_info() + info['version'] = _u1db_version + self.responder.send_response_json(**info) + + +@url_to_resource.register +class DatabaseResource(object): + """Database resource.""" + + url_pattern = "/{dbname}" + + def __init__(self, dbname, state, responder): + self.dbname = dbname + self.state = state + self.responder = responder + + @http_method() + def get(self): + self.state.check_database(self.dbname) + self.responder.send_response_json(200) + + @http_method(content_as_args=True) + def put(self): + self.state.ensure_database(self.dbname) + self.responder.send_response_json(200, ok=True) + + @http_method() + def delete(self): + self.state.delete_database(self.dbname) + self.responder.send_response_json(200, ok=True) + + +@url_to_resource.register +class DocsResource(object): + """Documents resource.""" + + url_pattern = "/{dbname}/docs" + + def __init__(self, dbname, state, responder): + self.responder = responder + self.db = state.open_database(dbname) + + @http_method(doc_ids=parse_list, check_for_conflicts=parse_bool, + include_deleted=parse_bool) + def get(self, doc_ids=None, check_for_conflicts=True, + include_deleted=False): + if doc_ids is None: + raise errors.MissingDocIds + docs = self.db.get_docs(doc_ids, include_deleted=include_deleted) + self.responder.content_type = 'application/json' + self.responder.start_response(200) + self.responder.start_stream(), + for doc in docs: + entry = dict( + doc_id=doc.doc_id, doc_rev=doc.rev, content=doc.get_json(), + has_conflicts=doc.has_conflicts) + self.responder.stream_entry(entry) + self.responder.end_stream() + self.responder.finish_response() + + +@url_to_resource.register +class AllDocsResource(object): + """All Documents resource.""" + + url_pattern = "/{dbname}/all-docs" + + def __init__(self, dbname, state, responder): + self.responder = responder + self.db = state.open_database(dbname) + + @http_method(include_deleted=parse_bool) + def get(self, include_deleted=False): + gen, docs = self.db.get_all_docs(include_deleted=include_deleted) + self.responder.content_type = 'application/json' + # returning a x-u1db-generation header is optional + # HTTPDatabase will fallback to return -1 if it's missing + self.responder.start_response(200, + headers={'x-u1db-generation': str(gen)}) + self.responder.start_stream(), + for doc in docs: + entry = dict( + doc_id=doc.doc_id, doc_rev=doc.rev, content=doc.get_json(), + has_conflicts=doc.has_conflicts) + self.responder.stream_entry(entry) + self.responder.end_stream() + self.responder.finish_response() + + +@url_to_resource.register +class DocResource(object): + """Document resource.""" + + url_pattern = "/{dbname}/doc/{id:.*}" + + def __init__(self, dbname, id, state, responder): + self.id = id + self.responder = responder + self.db = state.open_database(dbname) + + @http_method(old_rev=str) + def put(self, content, old_rev=None): + doc = Document(self.id, old_rev, content) + doc_rev = self.db.put_doc(doc) + if old_rev is None: + status = 201 # created + else: + status = 200 + self.responder.send_response_json(status, rev=doc_rev) + + @http_method(old_rev=str) + def delete(self, old_rev=None): + doc = Document(self.id, old_rev, None) + self.db.delete_doc(doc) + self.responder.send_response_json(200, rev=doc.rev) + + @http_method(include_deleted=parse_bool) + def get(self, include_deleted=False): + doc = self.db.get_doc(self.id, include_deleted=include_deleted) + if doc is None: + wire_descr = errors.DocumentDoesNotExist.wire_description + self.responder.send_response_json( + http_errors.wire_description_to_status[wire_descr], + error=wire_descr, + headers={ + 'x-u1db-rev': '', + 'x-u1db-has-conflicts': 'false' + }) + return + headers = { + 'x-u1db-rev': doc.rev, + 'x-u1db-has-conflicts': json.dumps(doc.has_conflicts) + } + if doc.is_tombstone(): + self.responder.send_response_json( + http_errors.wire_description_to_status[ + errors.DOCUMENT_DELETED], + error=errors.DOCUMENT_DELETED, + headers=headers) + else: + self.responder.send_response_content( + doc.get_json(), headers=headers) + + +@url_to_resource.register +class SyncResource(object): + """Sync endpoint resource.""" + + # maximum allowed request body size + max_request_size = 15 * 1024 * 1024 # 15Mb + # maximum allowed entry/line size in request body + max_entry_size = 10 * 1024 * 1024 # 10Mb + + url_pattern = "/{dbname}/sync-from/{source_replica_uid}" + + # pluggable + sync_exchange_class = sync.SyncExchange + + def __init__(self, dbname, source_replica_uid, state, responder): + self.source_replica_uid = source_replica_uid + self.responder = responder + self.state = state + self.dbname = dbname + self.replica_uid = None + + def get_target(self): + return self.state.open_database(self.dbname).get_sync_target() + + @http_method() + def get(self): + result = self.get_target().get_sync_info(self.source_replica_uid) + self.responder.send_response_json( + target_replica_uid=result[0], target_replica_generation=result[1], + target_replica_transaction_id=result[2], + source_replica_uid=self.source_replica_uid, + source_replica_generation=result[3], + source_transaction_id=result[4]) + + @http_method(generation=int, + content_as_args=True, no_query=True) + def put(self, generation, transaction_id): + self.get_target().record_sync_info(self.source_replica_uid, + generation, + transaction_id) + self.responder.send_response_json(ok=True) + + # Implements the same logic as LocalSyncTarget.sync_exchange + + @http_method(last_known_generation=int, last_known_trans_id=none_or_str, + content_as_args=True) + def post_args(self, last_known_generation, last_known_trans_id=None, + ensure=False): + if ensure: + db, self.replica_uid = self.state.ensure_database(self.dbname) + else: + db = self.state.open_database(self.dbname) + db.validate_gen_and_trans_id( + last_known_generation, last_known_trans_id) + self.sync_exch = self.sync_exchange_class( + db, self.source_replica_uid, last_known_generation) + + @http_method(content_as_args=True) + def post_stream_entry(self, id, rev, content, gen, trans_id): + doc = Document(id, rev, content) + self.sync_exch.insert_doc_from_source(doc, gen, trans_id) + + def post_end(self): + + def send_doc(doc, gen, trans_id): + entry = dict(id=doc.doc_id, rev=doc.rev, content=doc.get_json(), + gen=gen, trans_id=trans_id) + self.responder.stream_entry(entry) + + new_gen = self.sync_exch.find_changes_to_return() + self.responder.content_type = 'application/x-u1db-sync-stream' + self.responder.start_response(200) + self.responder.start_stream(), + header = {"new_generation": new_gen, + "new_transaction_id": self.sync_exch.new_trans_id} + if self.replica_uid is not None: + header['replica_uid'] = self.replica_uid + self.responder.stream_entry(header) + self.sync_exch.return_docs(send_doc) + self.responder.end_stream() + self.responder.finish_response() + + +class HTTPResponder(object): + """Encode responses from the server back to the client.""" + + # a multi document response will put args and documents + # each on one line of the response body + + def __init__(self, start_response): + self._started = False + self._stream_state = -1 + self._no_initial_obj = True + self.sent_response = False + self._start_response = start_response + self._write = None + self.content_type = 'application/json' + self.content = [] + + def start_response(self, status, obj_dic=None, headers={}): + """start sending response with optional first json object.""" + if self._started: + return + self._started = True + status_text = httplib.responses[status] + self._write = self._start_response( + '%d %s' % (status, status_text), + [('content-type', self.content_type), + ('cache-control', 'no-cache')] + + headers.items()) + # xxx version in headers + if obj_dic is not None: + self._no_initial_obj = False + self._write(json.dumps(obj_dic) + "\r\n") + + def finish_response(self): + """finish sending response.""" + self.sent_response = True + + def send_response_json(self, status=200, headers={}, **kwargs): + """send and finish response with json object body from keyword args.""" + content = json.dumps(kwargs) + "\r\n" + self.send_response_content(content, headers=headers, status=status) + + def send_response_content(self, content, status=200, headers={}): + """send and finish response with content""" + headers['content-length'] = str(len(content)) + self.start_response(status, headers=headers) + if self._stream_state == 1: + self.content = [',\r\n', content] + else: + self.content = [content] + self.finish_response() + + def start_stream(self): + "start stream (array) as part of the response." + assert self._started and self._no_initial_obj + self._stream_state = 0 + self._write("[") + + def stream_entry(self, entry): + "send stream entry as part of the response." + assert self._stream_state != -1 + if self._stream_state == 0: + self._stream_state = 1 + self._write('\r\n') + else: + self._write(',\r\n') + self._write(json.dumps(entry)) + + def end_stream(self): + "end stream (array)." + assert self._stream_state != -1 + self._write("\r\n]\r\n") + + +class HTTPInvocationByMethodWithBody(object): + """Invoke methods on a resource.""" + + def __init__(self, resource, environ, parameters): + self.resource = resource + self.environ = environ + self.max_request_size = getattr( + resource, 'max_request_size', parameters.max_request_size) + self.max_entry_size = getattr( + resource, 'max_entry_size', parameters.max_entry_size) + + def _lookup(self, method): + try: + return getattr(self.resource, method) + except AttributeError: + raise BadRequest() + + def __call__(self): + args = urlparse.parse_qsl(self.environ['QUERY_STRING'], + strict_parsing=False) + try: + args = dict( + (k.decode('utf-8'), v.decode('utf-8')) for k, v in args) + except ValueError: + raise BadRequest() + method = self.environ['REQUEST_METHOD'].lower() + if method in ('get', 'delete'): + meth = self._lookup(method) + return meth(args, None) + else: + # we expect content-length > 0, reconsider if we move + # to support chunked enconding + try: + content_length = int(self.environ['CONTENT_LENGTH']) + except (ValueError, KeyError): + raise BadRequest + if content_length <= 0: + raise BadRequest + if content_length > self.max_request_size: + raise BadRequest + reader = _FencedReader(self.environ['wsgi.input'], content_length, + self.max_entry_size) + content_type = self.environ.get('CONTENT_TYPE', '') + content_type = content_type.split(';', 1)[0].strip() + if content_type == 'application/json': + meth = self._lookup(method) + body = reader.read_chunk(sys.maxint) + return meth(args, body) + elif content_type == 'application/x-u1db-sync-stream': + meth_args = self._lookup('%s_args' % method) + meth_entry = self._lookup('%s_stream_entry' % method) + meth_end = self._lookup('%s_end' % method) + body_getline = reader.getline + if body_getline().strip() != '[': + raise BadRequest() + line = body_getline() + line, comma = utils.check_and_strip_comma(line.strip()) + meth_args(args, line) + while True: + line = body_getline() + entry = line.strip() + if entry == ']': + break + if not entry or not comma: # empty or no prec comma + raise BadRequest + entry, comma = utils.check_and_strip_comma(entry) + meth_entry({}, entry) + if comma or body_getline(): # extra comma or data + raise BadRequest + return meth_end() + else: + raise BadRequest() + + +class HTTPApp(object): + + # maximum allowed request body size + max_request_size = 15 * 1024 * 1024 # 15Mb + # maximum allowed entry/line size in request body + max_entry_size = 10 * 1024 * 1024 # 10Mb + + def __init__(self, state): + self.state = state + + def _lookup_resource(self, environ, responder): + resource_cls, params = url_to_resource.match(environ['PATH_INFO']) + if resource_cls is None: + raise BadRequest # 404 instead? + resource = resource_cls( + state=self.state, responder=responder, **params) + return resource + + def __call__(self, environ, start_response): + responder = HTTPResponder(start_response) + self.request_begin(environ) + try: + resource = self._lookup_resource(environ, responder) + HTTPInvocationByMethodWithBody(resource, environ, self)() + except errors.U1DBError, e: + self.request_u1db_error(environ, e) + status = http_errors.wire_description_to_status.get( + e.wire_description, 500) + responder.send_response_json(status, error=e.wire_description) + except BadRequest: + self.request_bad_request(environ) + responder.send_response_json(400, error="bad request") + except KeyboardInterrupt: + raise + except: + self.request_failed(environ) + raise + else: + self.request_done(environ) + return responder.content + + # hooks for tracing requests + + def request_begin(self, environ): + """Hook called at the beginning of processing a request.""" + pass + + def request_done(self, environ): + """Hook called when done processing a request.""" + pass + + def request_u1db_error(self, environ, exc): + """Hook called when processing a request resulted in a U1DBError. + + U1DBError passed as exc. + """ + pass + + def request_bad_request(self, environ): + """Hook called when processing a bad request. + + No actual processing was done. + """ + pass + + def request_failed(self, environ): + """Hook called when processing a request failed unexpectedly. + + Invoked from an except block, so there's interpreter exception + information available. + """ + pass diff --git a/common/src/leap/soledad/common/l2db/remote/http_client.py b/common/src/leap/soledad/common/l2db/remote/http_client.py new file mode 100644 index 00000000..a65264b6 --- /dev/null +++ b/common/src/leap/soledad/common/l2db/remote/http_client.py @@ -0,0 +1,182 @@ +# Copyright 2011-2012 Canonical Ltd. +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. + +"""Base class to make requests to a remote HTTP server.""" + +import httplib +try: + import simplejson as json +except ImportError: + import json # noqa +import socket +import ssl +import sys +import urlparse +import urllib + +from time import sleep +from leap.soledad.common.l2db import errors +from leap.soledad.common.l2db.remote import http_errors + +from leap.soledad.common.l2db.remote.ssl_match_hostname import match_hostname + +# Ubuntu/debian +# XXX other... +CA_CERTS = "/etc/ssl/certs/ca-certificates.crt" + + +def _encode_query_parameter(value): + """Encode query parameter.""" + if isinstance(value, bool): + if value: + value = 'true' + else: + value = 'false' + return unicode(value).encode('utf-8') + + +class _VerifiedHTTPSConnection(httplib.HTTPSConnection): + """HTTPSConnection verifying server side certificates.""" + # derived from httplib.py + + def connect(self): + "Connect to a host on a given (SSL) port." + + sock = socket.create_connection((self.host, self.port), + self.timeout, self.source_address) + if self._tunnel_host: + self.sock = sock + self._tunnel() + if sys.platform.startswith('linux'): + cert_opts = { + 'cert_reqs': ssl.CERT_REQUIRED, + 'ca_certs': CA_CERTS + } + else: + # XXX no cert verification implemented elsewhere for now + cert_opts = {} + self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, + ssl_version=ssl.PROTOCOL_SSLv3, + **cert_opts + ) + if cert_opts: + match_hostname(self.sock.getpeercert(), self.host) + + +class HTTPClientBase(object): + """Base class to make requests to a remote HTTP server.""" + + # Will use these delays to retry on 503 befor finally giving up. The final + # 0 is there to not wait after the final try fails. + _delays = (1, 1, 2, 4, 0) + + def __init__(self, url, creds=None): + self._url = urlparse.urlsplit(url) + self._conn = None + self._creds = {} + if creds is not None: + if len(creds) != 1: + raise errors.UnknownAuthMethod() + auth_meth, credentials = creds.items()[0] + try: + set_creds = getattr(self, 'set_%s_credentials' % auth_meth) + except AttributeError: + raise errors.UnknownAuthMethod(auth_meth) + set_creds(**credentials) + + def _ensure_connection(self): + if self._conn is not None: + return + if self._url.scheme == 'https': + connClass = _VerifiedHTTPSConnection + else: + connClass = httplib.HTTPConnection + self._conn = connClass(self._url.hostname, self._url.port) + + def close(self): + if self._conn: + self._conn.close() + self._conn = None + + # xxx retry mechanism? + + def _error(self, respdic): + descr = respdic.get("error") + exc_cls = errors.wire_description_to_exc.get(descr) + if exc_cls is not None: + message = respdic.get("message") + raise exc_cls(message) + + def _response(self): + resp = self._conn.getresponse() + body = resp.read() + headers = dict(resp.getheaders()) + if resp.status in (200, 201): + return body, headers + elif resp.status in http_errors.ERROR_STATUSES: + try: + respdic = json.loads(body) + except ValueError: + pass + else: + self._error(respdic) + # special case + if resp.status == 503: + raise errors.Unavailable(body, headers) + raise errors.HTTPError(resp.status, body, headers) + + def _sign_request(self, method, url_query, params): + raise NotImplementedError + + def _request(self, method, url_parts, params=None, body=None, + content_type=None): + self._ensure_connection() + unquoted_url = url_query = self._url.path + if url_parts: + if not url_query.endswith('/'): + url_query += '/' + unquoted_url = url_query + url_query += '/'.join(urllib.quote(part, safe='') + for part in url_parts) + # oauth performs its own quoting + unquoted_url += '/'.join(url_parts) + encoded_params = {} + if params: + for key, value in params.items(): + key = unicode(key).encode('utf-8') + encoded_params[key] = _encode_query_parameter(value) + url_query += ('?' + urllib.urlencode(encoded_params)) + if body is not None and not isinstance(body, basestring): + body = json.dumps(body) + content_type = 'application/json' + headers = {} + if content_type: + headers['content-type'] = content_type + headers.update( + self._sign_request(method, unquoted_url, encoded_params)) + for delay in self._delays: + try: + self._conn.request(method, url_query, body, headers) + return self._response() + except errors.Unavailable, e: + sleep(delay) + raise e + + def _request_json(self, method, url_parts, params=None, body=None, + content_type=None): + res, headers = self._request(method, url_parts, params, body, + content_type) + return json.loads(res), headers diff --git a/common/src/leap/soledad/common/l2db/remote/http_database.py b/common/src/leap/soledad/common/l2db/remote/http_database.py new file mode 100644 index 00000000..b2b48dee --- /dev/null +++ b/common/src/leap/soledad/common/l2db/remote/http_database.py @@ -0,0 +1,161 @@ +# Copyright 2011 Canonical Ltd. +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. + +"""HTTPDatabase to access a remote db over the HTTP API.""" + +try: + import simplejson as json +except ImportError: + import json # noqa +import uuid + +from leap.soledad.common.l2db import ( + Database, + Document, + errors) +from leap.soledad.common.l2db.remote import ( + http_client, + http_errors, + http_target) + + +DOCUMENT_DELETED_STATUS = http_errors.wire_description_to_status[ + errors.DOCUMENT_DELETED] + + +class HTTPDatabase(http_client.HTTPClientBase, Database): + """Implement the Database API to a remote HTTP server.""" + + def __init__(self, url, document_factory=None, creds=None): + super(HTTPDatabase, self).__init__(url, creds=creds) + self._factory = document_factory or Document + + def set_document_factory(self, factory): + self._factory = factory + + @staticmethod + def open_database(url, create): + db = HTTPDatabase(url) + db.open(create) + return db + + @staticmethod + def delete_database(url): + db = HTTPDatabase(url) + db._delete() + db.close() + + def open(self, create): + if create: + self._ensure() + else: + self._check() + + def _check(self): + return self._request_json('GET', [])[0] + + def _ensure(self): + self._request_json('PUT', [], {}, {}) + + def _delete(self): + self._request_json('DELETE', [], {}, {}) + + def put_doc(self, doc): + if doc.doc_id is None: + raise errors.InvalidDocId() + params = {} + if doc.rev is not None: + params['old_rev'] = doc.rev + res, headers = self._request_json('PUT', ['doc', doc.doc_id], params, + doc.get_json(), 'application/json') + doc.rev = res['rev'] + return res['rev'] + + def get_doc(self, doc_id, include_deleted=False): + try: + res, headers = self._request( + 'GET', ['doc', doc_id], {"include_deleted": include_deleted}) + except errors.DocumentDoesNotExist: + return None + except errors.HTTPError, e: + if (e.status == DOCUMENT_DELETED_STATUS and + 'x-u1db-rev' in e.headers): + res = None + headers = e.headers + else: + raise + doc_rev = headers['x-u1db-rev'] + has_conflicts = json.loads(headers['x-u1db-has-conflicts']) + doc = self._factory(doc_id, doc_rev, res) + doc.has_conflicts = has_conflicts + return doc + + def _build_docs(self, res): + for doc_dict in json.loads(res): + doc = self._factory( + doc_dict['doc_id'], doc_dict['doc_rev'], doc_dict['content']) + doc.has_conflicts = doc_dict['has_conflicts'] + yield doc + + def get_docs(self, doc_ids, check_for_conflicts=True, + include_deleted=False): + if not doc_ids: + return [] + doc_ids = ','.join(doc_ids) + res, headers = self._request( + 'GET', ['docs'], { + "doc_ids": doc_ids, "include_deleted": include_deleted, + "check_for_conflicts": check_for_conflicts}) + return self._build_docs(res) + + def get_all_docs(self, include_deleted=False): + res, headers = self._request( + 'GET', ['all-docs'], {"include_deleted": include_deleted}) + gen = -1 + if 'x-u1db-generation' in headers: + gen = int(headers['x-u1db-generation']) + return gen, list(self._build_docs(res)) + + def _allocate_doc_id(self): + return 'D-%s' % (uuid.uuid4().hex,) + + def create_doc(self, content, doc_id=None): + if not isinstance(content, dict): + raise errors.InvalidContent + json_string = json.dumps(content) + return self.create_doc_from_json(json_string, doc_id) + + def create_doc_from_json(self, content, doc_id=None): + if doc_id is None: + doc_id = self._allocate_doc_id() + res, headers = self._request_json('PUT', ['doc', doc_id], {}, + content, 'application/json') + new_doc = self._factory(doc_id, res['rev'], content) + return new_doc + + def delete_doc(self, doc): + if doc.doc_id is None: + raise errors.InvalidDocId() + params = {'old_rev': doc.rev} + res, headers = self._request_json( + 'DELETE', ['doc', doc.doc_id], params) + doc.make_tombstone() + doc.rev = res['rev'] + + def get_sync_target(self): + st = http_target.HTTPSyncTarget(self._url.geturl()) + st._creds = self._creds + return st diff --git a/common/src/leap/soledad/common/l2db/remote/http_errors.py b/common/src/leap/soledad/common/l2db/remote/http_errors.py new file mode 100644 index 00000000..ee4cfefa --- /dev/null +++ b/common/src/leap/soledad/common/l2db/remote/http_errors.py @@ -0,0 +1,48 @@ +# Copyright 2011-2012 Canonical Ltd. +# Copyright 2016 LEAP Encryption Access Project +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. + +""" +Information about the encoding of errors over HTTP. +""" + +from leap.soledad.common.l2db import errors + + +# error wire descriptions mapping to HTTP status codes +wire_description_to_status = dict([ + (errors.InvalidDocId.wire_description, 400), + (errors.MissingDocIds.wire_description, 400), + (errors.Unauthorized.wire_description, 401), + (errors.DocumentTooBig.wire_description, 403), + (errors.UserQuotaExceeded.wire_description, 403), + (errors.SubscriptionNeeded.wire_description, 403), + (errors.DatabaseDoesNotExist.wire_description, 404), + (errors.DocumentDoesNotExist.wire_description, 404), + (errors.DocumentAlreadyDeleted.wire_description, 404), + (errors.RevisionConflict.wire_description, 409), + (errors.InvalidGeneration.wire_description, 409), + (errors.InvalidReplicaUID.wire_description, 409), + (errors.InvalidTransactionId.wire_description, 409), + (errors.Unavailable.wire_description, 503), + # without matching exception + (errors.DOCUMENT_DELETED, 404) +]) + + +ERROR_STATUSES = set(wire_description_to_status.values()) +# 400 included explicitly for tests +ERROR_STATUSES.add(400) diff --git a/common/src/leap/soledad/common/l2db/remote/http_target.py b/common/src/leap/soledad/common/l2db/remote/http_target.py new file mode 100644 index 00000000..7e7f366f --- /dev/null +++ b/common/src/leap/soledad/common/l2db/remote/http_target.py @@ -0,0 +1,128 @@ +# Copyright 2011-2012 Canonical Ltd. +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. + +"""SyncTarget API implementation to a remote HTTP server.""" + +try: + import simplejson as json +except ImportError: + import json # noqa + +from leap.soledad.common.l2db import Document, SyncTarget +from leap.soledad.common.l2db.errors import BrokenSyncStream +from leap.soledad.common.l2db.remote import ( + http_client, utils) + + +class HTTPSyncTarget(http_client.HTTPClientBase, SyncTarget): + """Implement the SyncTarget api to a remote HTTP server.""" + + @staticmethod + def connect(url): + return HTTPSyncTarget(url) + + def get_sync_info(self, source_replica_uid): + self._ensure_connection() + res, _ = self._request_json('GET', ['sync-from', source_replica_uid]) + return (res['target_replica_uid'], res['target_replica_generation'], + res['target_replica_transaction_id'], + res['source_replica_generation'], res['source_transaction_id']) + + def record_sync_info(self, source_replica_uid, source_replica_generation, + source_transaction_id): + self._ensure_connection() + if self._trace_hook: # for tests + self._trace_hook('record_sync_info') + self._request_json('PUT', ['sync-from', source_replica_uid], {}, + {'generation': source_replica_generation, + 'transaction_id': source_transaction_id}) + + def _parse_sync_stream(self, data, return_doc_cb, ensure_callback=None): + parts = data.splitlines() # one at a time + if not parts or parts[0] != '[': + raise BrokenSyncStream + data = parts[1:-1] + comma = False + if data: + line, comma = utils.check_and_strip_comma(data[0]) + res = json.loads(line) + if ensure_callback and 'replica_uid' in res: + ensure_callback(res['replica_uid']) + for entry in data[1:]: + if not comma: # missing in between comma + raise BrokenSyncStream + line, comma = utils.check_and_strip_comma(entry) + entry = json.loads(line) + doc = Document(entry['id'], entry['rev'], entry['content']) + return_doc_cb(doc, entry['gen'], entry['trans_id']) + if parts[-1] != ']': + try: + partdic = json.loads(parts[-1]) + except ValueError: + pass + else: + if isinstance(partdic, dict): + self._error(partdic) + raise BrokenSyncStream + if not data or comma: # no entries or bad extra comma + raise BrokenSyncStream + return res + + def sync_exchange(self, docs_by_generations, source_replica_uid, + last_known_generation, last_known_trans_id, + return_doc_cb, ensure_callback=None): + self._ensure_connection() + if self._trace_hook: # for tests + self._trace_hook('sync_exchange') + url = '%s/sync-from/%s' % (self._url.path, source_replica_uid) + self._conn.putrequest('POST', url) + self._conn.putheader('content-type', 'application/x-u1db-sync-stream') + for header_name, header_value in self._sign_request('POST', url, {}): + self._conn.putheader(header_name, header_value) + entries = ['['] + size = 1 + + def prepare(**dic): + entry = comma + '\r\n' + json.dumps(dic) + entries.append(entry) + return len(entry) + + comma = '' + size += prepare( + last_known_generation=last_known_generation, + last_known_trans_id=last_known_trans_id, + ensure=ensure_callback is not None) + comma = ',' + for doc, gen, trans_id in docs_by_generations: + size += prepare(id=doc.doc_id, rev=doc.rev, content=doc.get_json(), + gen=gen, trans_id=trans_id) + entries.append('\r\n]') + size += len(entries[-1]) + self._conn.putheader('content-length', str(size)) + self._conn.endheaders() + for entry in entries: + self._conn.send(entry) + entries = None + data, _ = self._response() + res = self._parse_sync_stream(data, return_doc_cb, ensure_callback) + data = None + return res['new_generation'], res['new_transaction_id'] + + # for tests + _trace_hook = None + + def _set_trace_hook_shallow(self, cb): + self._trace_hook = cb diff --git a/common/src/leap/soledad/common/l2db/remote/server_state.py b/common/src/leap/soledad/common/l2db/remote/server_state.py new file mode 100644 index 00000000..f131e09e --- /dev/null +++ b/common/src/leap/soledad/common/l2db/remote/server_state.py @@ -0,0 +1,72 @@ +# Copyright 2011 Canonical Ltd. +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. + +"""State for servers exposing a set of U1DB databases.""" +import os +import errno + + +class ServerState(object): + """Passed to a Request when it is instantiated. + + This is used to track server-side state, such as working-directory, open + databases, etc. + """ + + def __init__(self): + self._workingdir = None + + def set_workingdir(self, path): + self._workingdir = path + + def global_info(self): + """Return global information about the server.""" + return {} + + def _relpath(self, relpath): + # Note: We don't want to allow absolute paths here, because we + # don't want to expose the filesystem. We should also check that + # relpath doesn't have '..' in it, etc. + return self._workingdir + '/' + relpath + + def open_database(self, path): + """Open a database at the given location.""" + from u1db.backends import sqlite_backend + full_path = self._relpath(path) + return sqlite_backend.SQLiteDatabase.open_database(full_path, + create=False) + + def check_database(self, path): + """Check if the database at the given location exists. + + Simply returns if it does or raises DatabaseDoesNotExist. + """ + db = self.open_database(path) + db.close() + + def ensure_database(self, path): + """Ensure database at the given location.""" + from u1db.backends import sqlite_backend + full_path = self._relpath(path) + db = sqlite_backend.SQLiteDatabase.open_database(full_path, + create=True) + return db, db._replica_uid + + def delete_database(self, path): + """Delete database at the given location.""" + from u1db.backends import sqlite_backend + full_path = self._relpath(path) + sqlite_backend.SQLiteDatabase.delete_database(full_path) diff --git a/common/src/leap/soledad/common/l2db/remote/ssl_match_hostname.py b/common/src/leap/soledad/common/l2db/remote/ssl_match_hostname.py new file mode 100644 index 00000000..ce82f1b2 --- /dev/null +++ b/common/src/leap/soledad/common/l2db/remote/ssl_match_hostname.py @@ -0,0 +1,65 @@ +"""The match_hostname() function from Python 3.2, essential when using SSL.""" +# XXX put it here until it's packaged + +import re + +__version__ = '3.2a3' + + +class CertificateError(ValueError): + pass + + +def _dnsname_to_pat(dn): + pats = [] + for frag in dn.split(r'.'): + if frag == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + else: + # Otherwise, '*' matches any dotless fragment. + frag = re.escape(frag) + pats.append(frag.replace(r'\*', '[^.]*')) + return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules + are mostly followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate") + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if _dnsname_to_pat(value).match(hostname): + return + dnsnames.append(value) + if not san: + # The subject is only checked when subjectAltName is empty + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_to_pat(value).match(hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError( + "hostname %r doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError( + "hostname %r doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError( + "no appropriate commonName or " + "subjectAltName fields were found") diff --git a/common/src/leap/soledad/common/l2db/remote/utils.py b/common/src/leap/soledad/common/l2db/remote/utils.py new file mode 100644 index 00000000..14cedea9 --- /dev/null +++ b/common/src/leap/soledad/common/l2db/remote/utils.py @@ -0,0 +1,23 @@ +# Copyright 2012 Canonical Ltd. +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. + +"""Utilities for details of the procotol.""" + + +def check_and_strip_comma(line): + if line and line[-1] == ',': + return line[:-1], True + return line, False diff --git a/common/src/leap/soledad/common/l2db/sync.py b/common/src/leap/soledad/common/l2db/sync.py new file mode 100644 index 00000000..c612629f --- /dev/null +++ b/common/src/leap/soledad/common/l2db/sync.py @@ -0,0 +1,311 @@ +# Copyright 2011 Canonical Ltd. +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. + +"""The synchronization utilities for U1DB.""" +from itertools import izip + +from leap.soledad.common import l2db +from leap.soledad.common.l2db import errors + + +class Synchronizer(object): + """Collect the state around synchronizing 2 U1DB replicas. + + Synchronization is bi-directional, in that new items in the source are sent + to the target, and new items in the target are returned to the source. + However, it still recognizes that one side is initiating the request. Also, + at the moment, conflicts are only created in the source. + """ + + def __init__(self, source, sync_target): + """Create a new Synchronization object. + + :param source: A Database + :param sync_target: A SyncTarget + """ + self.source = source + self.sync_target = sync_target + self.target_replica_uid = None + self.num_inserted = 0 + + def _insert_doc_from_target(self, doc, replica_gen, trans_id): + """Try to insert synced document from target. + + Implements TAKE OTHER semantics: any document from the target + that is in conflict will be taken as the new official value, + while the current conflicting value will be stored alongside + as a conflict. In the process indexes will be updated etc. + + :return: None + """ + # Increases self.num_inserted depending whether the document + # was effectively inserted. + state, _ = self.source._put_doc_if_newer( + doc, save_conflict=True, + replica_uid=self.target_replica_uid, replica_gen=replica_gen, + replica_trans_id=trans_id) + if state == 'inserted': + self.num_inserted += 1 + elif state == 'converged': + # magical convergence + pass + elif state == 'superseded': + # we have something newer, will be taken care of at the next sync + pass + else: + assert state == 'conflicted' + # The doc was saved as a conflict, so the database was updated + self.num_inserted += 1 + + def _record_sync_info_with_the_target(self, start_generation): + """Record our new after sync generation with the target if gapless. + + Any documents received from the target will cause the local + database to increment its generation. We do not want to send + them back to the target in a future sync. However, there could + also be concurrent updates from another process doing eg + 'put_doc' while the sync was running. And we do want to + synchronize those documents. We can tell if there was a + concurrent update by comparing our new generation number + versus the generation we started, and how many documents we + inserted from the target. If it matches exactly, then we can + record with the target that they are fully up to date with our + new generation. + """ + cur_gen, trans_id = self.source._get_generation_info() + last_gen = start_generation + self.num_inserted + if (cur_gen == last_gen and self.num_inserted > 0): + self.sync_target.record_sync_info( + self.source._replica_uid, cur_gen, trans_id) + + def sync(self, callback=None, autocreate=False): + """Synchronize documents between source and target.""" + sync_target = self.sync_target + # get target identifier, its current generation, + # and its last-seen database generation for this source + try: + (self.target_replica_uid, target_gen, target_trans_id, + target_my_gen, target_my_trans_id) = sync_target.get_sync_info( + self.source._replica_uid) + except errors.DatabaseDoesNotExist: + if not autocreate: + raise + # will try to ask sync_exchange() to create the db + self.target_replica_uid = None + target_gen, target_trans_id = 0, '' + target_my_gen, target_my_trans_id = 0, '' + + def ensure_callback(replica_uid): + self.target_replica_uid = replica_uid + + else: + ensure_callback = None + if self.target_replica_uid == self.source._replica_uid: + raise errors.InvalidReplicaUID + # validate the generation and transaction id the target knows about us + self.source.validate_gen_and_trans_id( + target_my_gen, target_my_trans_id) + # what's changed since that generation and this current gen + my_gen, _, changes = self.source.whats_changed(target_my_gen) + + # this source last-seen database generation for the target + if self.target_replica_uid is None: + target_last_known_gen, target_last_known_trans_id = 0, '' + else: + target_last_known_gen, target_last_known_trans_id = ( + self.source._get_replica_gen_and_trans_id( # nopep8 + self.target_replica_uid)) + if not changes and target_last_known_gen == target_gen: + if target_trans_id != target_last_known_trans_id: + raise errors.InvalidTransactionId + return my_gen + changed_doc_ids = [doc_id for doc_id, _, _ in changes] + # prepare to send all the changed docs + docs_to_send = self.source.get_docs( + changed_doc_ids, + check_for_conflicts=False, include_deleted=True) + # TODO: there must be a way to not iterate twice + docs_by_generation = zip( + docs_to_send, (gen for _, gen, _ in changes), + (trans for _, _, trans in changes)) + + # exchange documents and try to insert the returned ones with + # the target, return target synced-up-to gen + new_gen, new_trans_id = sync_target.sync_exchange( + docs_by_generation, self.source._replica_uid, + target_last_known_gen, target_last_known_trans_id, + self._insert_doc_from_target, ensure_callback=ensure_callback) + # record target synced-up-to generation including applying what we sent + self.source._set_replica_gen_and_trans_id( + self.target_replica_uid, new_gen, new_trans_id) + + # if gapless record current reached generation with target + self._record_sync_info_with_the_target(my_gen) + + return my_gen + + +class SyncExchange(object): + """Steps and state for carrying through a sync exchange on a target.""" + + def __init__(self, db, source_replica_uid, last_known_generation): + self._db = db + self.source_replica_uid = source_replica_uid + self.source_last_known_generation = last_known_generation + self.seen_ids = {} # incoming ids not superseded + self.changes_to_return = None + self.new_gen = None + self.new_trans_id = None + # for tests + self._incoming_trace = [] + self._trace_hook = None + self._db._last_exchange_log = { + 'receive': {'docs': self._incoming_trace}, + 'return': None + } + + def _set_trace_hook(self, cb): + self._trace_hook = cb + + def _trace(self, state): + if not self._trace_hook: + return + self._trace_hook(state) + + def insert_doc_from_source(self, doc, source_gen, trans_id): + """Try to insert synced document from source. + + Conflicting documents are not inserted but will be sent over + to the sync source. + + It keeps track of progress by storing the document source + generation as well. + + The 1st step of a sync exchange is to call this repeatedly to + try insert all incoming documents from the source. + + :param doc: A Document object. + :param source_gen: The source generation of doc. + :return: None + """ + state, at_gen = self._db._put_doc_if_newer( + doc, save_conflict=False, + replica_uid=self.source_replica_uid, replica_gen=source_gen, + replica_trans_id=trans_id) + if state == 'inserted': + self.seen_ids[doc.doc_id] = at_gen + elif state == 'converged': + # magical convergence + self.seen_ids[doc.doc_id] = at_gen + elif state == 'superseded': + # we have something newer that we will return + pass + else: + # conflict that we will returne + assert state == 'conflicted' + # for tests + self._incoming_trace.append((doc.doc_id, doc.rev)) + self._db._last_exchange_log['receive'].update({ + 'source_uid': self.source_replica_uid, + 'source_gen': source_gen + }) + + def find_changes_to_return(self): + """Find changes to return. + + Find changes since last_known_generation in db generation + order using whats_changed. It excludes documents ids that have + already been considered (superseded by the sender, etc). + + :return: new_generation - the generation of this database + which the caller can consider themselves to be synchronized after + processing the returned documents. + """ + self._db._last_exchange_log['receive'].update({ # for tests + 'last_known_gen': self.source_last_known_generation + }) + self._trace('before whats_changed') + gen, trans_id, changes = self._db.whats_changed( + self.source_last_known_generation) + self._trace('after whats_changed') + self.new_gen = gen + self.new_trans_id = trans_id + seen_ids = self.seen_ids + # changed docs that weren't superseded by or converged with + self.changes_to_return = [ + (doc_id, gen, trans_id) for (doc_id, gen, trans_id) in changes if + # there was a subsequent update + doc_id not in seen_ids or seen_ids.get(doc_id) < gen] + return self.new_gen + + def return_docs(self, return_doc_cb): + """Return the changed documents and their last change generation + repeatedly invoking the callback return_doc_cb. + + The final step of a sync exchange. + + :param: return_doc_cb(doc, gen, trans_id): is a callback + used to return the documents with their last change generation + to the target replica. + :return: None + """ + changes_to_return = self.changes_to_return + # return docs, including conflicts + changed_doc_ids = [doc_id for doc_id, _, _ in changes_to_return] + self._trace('before get_docs') + docs = self._db.get_docs( + changed_doc_ids, check_for_conflicts=False, include_deleted=True) + + docs_by_gen = izip( + docs, (gen for _, gen, _ in changes_to_return), + (trans_id for _, _, trans_id in changes_to_return)) + _outgoing_trace = [] # for tests + for doc, gen, trans_id in docs_by_gen: + return_doc_cb(doc, gen, trans_id) + _outgoing_trace.append((doc.doc_id, doc.rev)) + # for tests + self._db._last_exchange_log['return'] = { + 'docs': _outgoing_trace, + 'last_gen': self.new_gen} + + +class LocalSyncTarget(l2db.SyncTarget): + """Common sync target implementation logic for all local sync targets.""" + + def __init__(self, db): + self._db = db + self._trace_hook = None + + def sync_exchange(self, docs_by_generations, source_replica_uid, + last_known_generation, last_known_trans_id, + return_doc_cb, ensure_callback=None): + self._db.validate_gen_and_trans_id( + last_known_generation, last_known_trans_id) + sync_exch = SyncExchange( + self._db, source_replica_uid, last_known_generation) + if self._trace_hook: + sync_exch._set_trace_hook(self._trace_hook) + # 1st step: try to insert incoming docs and record progress + for doc, doc_gen, trans_id in docs_by_generations: + sync_exch.insert_doc_from_source(doc, doc_gen, trans_id) + # 2nd step: find changed documents (including conflicts) to return + new_gen = sync_exch.find_changes_to_return() + # final step: return docs and record source replica sync point + sync_exch.return_docs(return_doc_cb) + return new_gen, sync_exch.new_trans_id + + def _set_trace_hook(self, cb): + self._trace_hook = cb diff --git a/common/src/leap/soledad/common/l2db/vectorclock.py b/common/src/leap/soledad/common/l2db/vectorclock.py new file mode 100644 index 00000000..42bceaa8 --- /dev/null +++ b/common/src/leap/soledad/common/l2db/vectorclock.py @@ -0,0 +1,89 @@ +# Copyright 2011 Canonical Ltd. +# +# This file is part of u1db. +# +# u1db is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# as published by the Free Software Foundation. +# +# u1db is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with u1db. If not, see <http://www.gnu.org/licenses/>. + +"""VectorClockRev helper class.""" + + +class VectorClockRev(object): + """Track vector clocks for multiple replica ids. + + This allows simple comparison to determine if one VectorClockRev is + newer/older/in-conflict-with another VectorClockRev without having to + examine history. Every replica has a strictly increasing revision. When + creating a new revision, they include all revisions for all other replicas + which the new revision dominates, and increment their own revision to + something greater than the current value. + """ + + def __init__(self, value): + self._values = self._expand(value) + + def __repr__(self): + s = self.as_str() + return '%s(%s)' % (self.__class__.__name__, s) + + def as_str(self): + s = '|'.join(['%s:%d' % (m, r) for m, r + in sorted(self._values.items())]) + return s + + def _expand(self, value): + result = {} + if value is None: + return result + for replica_info in value.split('|'): + replica_uid, counter = replica_info.split(':') + counter = int(counter) + result[replica_uid] = counter + return result + + def is_newer(self, other): + """Is this VectorClockRev strictly newer than other. + """ + if not self._values: + return False + if not other._values: + return True + this_is_newer = False + other_expand = dict(other._values) + for key, value in self._values.iteritems(): + if key in other_expand: + other_value = other_expand.pop(key) + if other_value > value: + return False + elif other_value < value: + this_is_newer = True + else: + this_is_newer = True + if other_expand: + return False + return this_is_newer + + def increment(self, replica_uid): + """Increase the 'replica_uid' section of this vector clock. + + :return: A string representing the new vector clock value + """ + self._values[replica_uid] = self._values.get(replica_uid, 0) + 1 + + def maximize(self, other_vcr): + for replica_uid, counter in other_vcr._values.iteritems(): + if replica_uid not in self._values: + self._values[replica_uid] = counter + else: + this_counter = self._values[replica_uid] + if this_counter < counter: + self._values[replica_uid] = counter diff --git a/common/src/leap/soledad/common/tests/server_state.py b/common/src/leap/soledad/common/tests/server_state.py deleted file mode 100644 index 2fe9472f..00000000 --- a/common/src/leap/soledad/common/tests/server_state.py +++ /dev/null @@ -1,81 +0,0 @@ -# -*- coding: utf-8 -*- -# server_state.py -# Copyright (C) 2013 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - - -""" -State for servers to be used in tests. -""" - - -import os -import errno -import tempfile - - -from u1db.remote.server_state import ServerState -from leap.soledad.common.tests.util import ( - copy_sqlcipher_database_for_test, -) - - -class ServerStateForTests(ServerState): - - """Passed to a Request when it is instantiated. - - This is used to track server-side state, such as working-directory, open - databases, etc. - """ - - def __init__(self): - self._workingdir = tempfile.mkdtemp() - - def _relpath(self, relpath): - return os.path.join(self._workingdir, relpath) - - def open_database(self, path): - """Open a database at the given location.""" - from leap.soledad.client.sqlcipher import SQLCipherDatabase - return SQLCipherDatabase.open_database(path, '123', False) - - def create_database(self, path): - """Create a database at the given location.""" - from leap.soledad.client.sqlcipher import SQLCipherDatabase - return SQLCipherDatabase.open_database(path, '123', True) - - def check_database(self, path): - """Check if the database at the given location exists. - - Simply returns if it does or raises DatabaseDoesNotExist. - """ - db = self.open_database(path) - db.close() - - def ensure_database(self, path): - """Ensure database at the given location.""" - from leap.soledad.client.sqlcipher import SQLCipherDatabase - full_path = self._relpath(path) - db = SQLCipherDatabase.open_database(full_path, '123', False) - return db, db._replica_uid - - def delete_database(self, path): - """Delete database at the given location.""" - from leap.u1db.backends import sqlite_backend - full_path = self._relpath(path) - sqlite_backend.SQLiteDatabase.delete_database(full_path) - - def _copy_database(self, db): - return copy_sqlcipher_database_for_test(None, db) diff --git a/common/src/leap/soledad/common/tests/test_command.py b/common/src/leap/soledad/common/tests/test_command.py index c386bdd2..2136bb8f 100644 --- a/common/src/leap/soledad/common/tests/test_command.py +++ b/common/src/leap/soledad/common/tests/test_command.py @@ -21,10 +21,13 @@ from twisted.trial import unittest from leap.soledad.common.command import exec_validated_cmd +def validator(arg): + return True if arg is 'valid' else False + + class ExecuteValidatedCommandTest(unittest.TestCase): def test_argument_validation(self): - validator = lambda arg: True if arg is 'valid' else False status, out = exec_validated_cmd("command", "invalid arg", validator) self.assertEquals(status, 1) self.assertEquals(out, "invalid argument") diff --git a/common/src/leap/soledad/common/tests/test_soledad.py b/common/src/leap/soledad/common/tests/test_soledad.py deleted file mode 100644 index 36c4003c..00000000 --- a/common/src/leap/soledad/common/tests/test_soledad.py +++ /dev/null @@ -1,372 +0,0 @@ -# -*- coding: utf-8 -*- -# test_soledad.py -# Copyright (C) 2013 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -""" -Tests for general Soledad functionality. -""" -import os - -from mock import Mock - -from twisted.internet import defer - -from leap.common.events import catalog -from leap.soledad.common.tests.util import ( - BaseSoledadTest, - ADDRESS, -) -from leap import soledad -from leap.soledad.common.document import SoledadDocument -from leap.soledad.common.errors import DatabaseAccessError -from leap.soledad.client import Soledad -from leap.soledad.client.adbapi import U1DBConnectionPool -from leap.soledad.client.secrets import PassphraseTooShort -from leap.soledad.client.shared_db import SoledadSharedDatabase - - -class AuxMethodsTestCase(BaseSoledadTest): - - def test__init_dirs(self): - sol = self._soledad_instance(prefix='_init_dirs') - local_db_dir = os.path.dirname(sol.local_db_path) - secrets_path = os.path.dirname(sol.secrets.secrets_path) - self.assertTrue(os.path.isdir(local_db_dir)) - self.assertTrue(os.path.isdir(secrets_path)) - - def _close_soledad(results): - sol.close() - - d = sol.create_doc({}) - d.addCallback(_close_soledad) - return d - - def test__init_u1db_sqlcipher_backend(self): - sol = self._soledad_instance(prefix='_init_db') - self.assertIsInstance(sol._dbpool, U1DBConnectionPool) - self.assertTrue(os.path.isfile(sol.local_db_path)) - sol.close() - - def test__init_config_with_defaults(self): - """ - Test if configuration defaults point to the correct place. - """ - - class SoledadMock(Soledad): - - def __init__(self): - pass - - # instantiate without initializing so we just test - # _init_config_with_defaults() - sol = SoledadMock() - sol._passphrase = u'' - sol._server_url = '' - sol._init_config_with_defaults() - # assert value of local_db_path - self.assertEquals( - os.path.join(sol.default_prefix, 'soledad.u1db'), - sol.local_db_path) - - def test__init_config_from_params(self): - """ - Test if configuration is correctly read from file. - """ - sol = self._soledad_instance( - 'leap@leap.se', - passphrase=u'123', - secrets_path='value_3', - local_db_path='value_2', - server_url='value_1', - cert_file=None) - self.assertEqual( - os.path.join(self.tempdir, 'value_3'), - sol.secrets.secrets_path) - self.assertEqual( - os.path.join(self.tempdir, 'value_2'), - sol.local_db_path) - self.assertEqual('value_1', sol._server_url) - sol.close() - - def test_change_passphrase(self): - """ - Test if passphrase can be changed. - """ - prefix = '_change_passphrase' - sol = self._soledad_instance( - 'leap@leap.se', - passphrase=u'123', - prefix=prefix, - ) - - def _change_passphrase(doc1): - self._doc1 = doc1 - sol.change_passphrase(u'654321') - sol.close() - - def _assert_wrong_password_raises(results): - with self.assertRaises(DatabaseAccessError): - self._soledad_instance( - 'leap@leap.se', - passphrase=u'123', - prefix=prefix) - - def _instantiate_with_new_passphrase(results): - sol2 = self._soledad_instance( - 'leap@leap.se', - passphrase=u'654321', - prefix=prefix) - self._sol2 = sol2 - return sol2.get_doc(self._doc1.doc_id) - - def _assert_docs_are_equal(doc2): - self.assertEqual(self._doc1, doc2) - self._sol2.close() - - d = sol.create_doc({'simple': 'doc'}) - d.addCallback(_change_passphrase) - d.addCallback(_assert_wrong_password_raises) - d.addCallback(_instantiate_with_new_passphrase) - d.addCallback(_assert_docs_are_equal) - d.addCallback(lambda _: sol.close()) - - return d - - def test_change_passphrase_with_short_passphrase_raises(self): - """ - Test if attempt to change passphrase passing a short passphrase - raises. - """ - sol = self._soledad_instance( - 'leap@leap.se', - passphrase=u'123') - # check that soledad complains about new passphrase length - self.assertRaises( - PassphraseTooShort, - sol.change_passphrase, u'54321') - sol.close() - - def test_get_passphrase(self): - """ - Assert passphrase getter works fine. - """ - sol = self._soledad_instance() - self.assertEqual('123', sol._passphrase) - sol.close() - - -class SoledadSharedDBTestCase(BaseSoledadTest): - - """ - These tests ensure the functionalities of the shared recovery database. - """ - - def setUp(self): - BaseSoledadTest.setUp(self) - self._shared_db = SoledadSharedDatabase( - 'https://provider/', ADDRESS, document_factory=SoledadDocument, - creds=None) - - def tearDown(self): - BaseSoledadTest.tearDown(self) - - def test__get_secrets_from_shared_db(self): - """ - Ensure the shared db is queried with the correct doc_id. - """ - doc_id = self._soledad.secrets._shared_db_doc_id() - self._soledad.secrets._get_secrets_from_shared_db() - self.assertTrue( - self._soledad.shared_db.get_doc.assert_called_with( - doc_id) is None, - 'Wrong doc_id when fetching recovery document.') - - def test__put_secrets_in_shared_db(self): - """ - Ensure recovery document is put into shared recover db. - """ - doc_id = self._soledad.secrets._shared_db_doc_id() - self._soledad.secrets._put_secrets_in_shared_db() - self.assertTrue( - self._soledad.shared_db.get_doc.assert_called_with( - doc_id) is None, - 'Wrong doc_id when fetching recovery document.') - self.assertTrue( - self._soledad.shared_db.put_doc.assert_called_with( - self._doc_put) is None, - 'Wrong document when putting recovery document.') - self.assertTrue( - self._doc_put.doc_id == doc_id, - 'Wrong doc_id when putting recovery document.') - - -class SoledadSignalingTestCase(BaseSoledadTest): - - """ - These tests ensure signals are correctly emmited by Soledad. - """ - - EVENTS_SERVER_PORT = 8090 - - def setUp(self): - # mock signaling - soledad.client.signal = Mock() - soledad.client.secrets.events.emit_async = Mock() - # run parent's setUp - BaseSoledadTest.setUp(self) - - def tearDown(self): - BaseSoledadTest.tearDown(self) - - def _pop_mock_call(self, mocked): - mocked.call_args_list.pop() - mocked.mock_calls.pop() - mocked.call_args = mocked.call_args_list[-1] - - def test_stage3_bootstrap_signals(self): - """ - Test that a fresh soledad emits all bootstrap signals. - - Signals are: - - downloading keys / done downloading keys. - - creating keys / done creating keys. - - downloading keys / done downloading keys. - - uploading keys / done uploading keys. - """ - soledad.client.secrets.events.emit_async.reset_mock() - # get a fresh instance so it emits all bootstrap signals - sol = self._soledad_instance( - secrets_path='alternative_stage3.json', - local_db_path='alternative_stage3.u1db', - userid=ADDRESS) - # reverse call order so we can verify in the order the signals were - # expected - soledad.client.secrets.events.emit_async.mock_calls.reverse() - soledad.client.secrets.events.emit_async.call_args = \ - soledad.client.secrets.events.emit_async.call_args_list[0] - soledad.client.secrets.events.emit_async.call_args_list.reverse() - - user_data = {'userid': ADDRESS, 'uuid': ADDRESS} - - # downloading keys signals - soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_DOWNLOADING_KEYS, user_data - ) - self._pop_mock_call(soledad.client.secrets.events.emit_async) - soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_DONE_DOWNLOADING_KEYS, user_data - ) - # creating keys signals - self._pop_mock_call(soledad.client.secrets.events.emit_async) - soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_CREATING_KEYS, user_data - ) - self._pop_mock_call(soledad.client.secrets.events.emit_async) - soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_DONE_CREATING_KEYS, user_data - ) - # downloading once more (inside _put_keys_in_shared_db) - self._pop_mock_call(soledad.client.secrets.events.emit_async) - soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_DOWNLOADING_KEYS, user_data - ) - self._pop_mock_call(soledad.client.secrets.events.emit_async) - soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_DONE_DOWNLOADING_KEYS, user_data - ) - # uploading keys signals - self._pop_mock_call(soledad.client.secrets.events.emit_async) - soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_UPLOADING_KEYS, user_data - ) - self._pop_mock_call(soledad.client.secrets.events.emit_async) - soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_DONE_UPLOADING_KEYS, user_data - ) - # assert db was locked and unlocked - sol.shared_db.lock.assert_called_with() - sol.shared_db.unlock.assert_called_with('atoken') - sol.close() - - def test_stage2_bootstrap_signals(self): - """ - Test that if there are keys in server, soledad will download them and - emit corresponding signals. - """ - # get existing instance so we have access to keys - sol = self._soledad_instance() - # create a document with secrets - doc = SoledadDocument(doc_id=sol.secrets._shared_db_doc_id()) - doc.content = sol.secrets._export_recovery_document() - sol.close() - # reset mock - soledad.client.secrets.events.emit_async.reset_mock() - # get a fresh instance so it emits all bootstrap signals - shared_db = self.get_default_shared_mock(get_doc_return_value=doc) - sol = self._soledad_instance( - secrets_path='alternative_stage2.json', - local_db_path='alternative_stage2.u1db', - shared_db_class=shared_db) - # reverse call order so we can verify in the order the signals were - # expected - soledad.client.secrets.events.emit_async.mock_calls.reverse() - soledad.client.secrets.events.emit_async.call_args = \ - soledad.client.secrets.events.emit_async.call_args_list[0] - soledad.client.secrets.events.emit_async.call_args_list.reverse() - # assert download keys signals - soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_DOWNLOADING_KEYS, - {'userid': ADDRESS, 'uuid': ADDRESS} - ) - self._pop_mock_call(soledad.client.secrets.events.emit_async) - soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_DONE_DOWNLOADING_KEYS, - {'userid': ADDRESS, 'uuid': ADDRESS}, - ) - sol.close() - - def test_stage1_bootstrap_signals(self): - """ - Test that if soledad already has a local secret, it emits no signals. - """ - soledad.client.signal.reset_mock() - # get an existent instance so it emits only some of bootstrap signals - sol = self._soledad_instance() - self.assertEqual([], soledad.client.signal.mock_calls) - sol.close() - - @defer.inlineCallbacks - def test_sync_signals(self): - """ - Test Soledad emits SOLEDAD_CREATING_KEYS signal. - """ - # get a fresh instance so it emits all bootstrap signals - sol = self._soledad_instance() - soledad.client.signal.reset_mock() - - # mock the actual db sync so soledad does not try to connect to the - # server - d = defer.Deferred() - d.callback(None) - sol._dbsyncer.sync = Mock(return_value=d) - - yield sol.sync() - - # assert the signal has been emitted - soledad.client.events.emit_async.assert_called_with( - catalog.SOLEDAD_DONE_DATA_SYNC, - {'userid': ADDRESS, 'uuid': ADDRESS}, - ) - sol.close() diff --git a/docs/server-token.txt b/docs/server-token.txt new file mode 100644 index 00000000..89e4d69f --- /dev/null +++ b/docs/server-token.txt @@ -0,0 +1,8 @@ +Requests to the soledad server use a slightly different format than bonafide: + +<pre> +Authentication: 'Token <[base64-encoded]uid:token>' +</pre> + +where @<...>@ is a base64-encoded string that concatenates the user id and the +token. diff --git a/scripts/db_access/client_side_db.py b/scripts/db_access/client_side_db.py index 25eebfbe..11d72791 100644 --- a/scripts/db_access/client_side_db.py +++ b/scripts/db_access/client_side_db.py @@ -55,7 +55,7 @@ def _fail(reason): def _get_api_info(provider): info = requests.get( - 'https://'+provider+'/provider.json', verify=False).json() + 'https://' + provider + '/provider.json', verify=False).json() return info['api_uri'], info['api_version'] diff --git a/scripts/docker/Dockerfile b/scripts/docker/Dockerfile new file mode 100644 index 00000000..915508ea --- /dev/null +++ b/scripts/docker/Dockerfile @@ -0,0 +1,51 @@ +# start with a fresh debian image +FROM debian + +# expose soledad server port in case we want to run a server container +EXPOSE 2424 + +# install dependencies from debian repos +COPY files/apt/leap.list /etc/apt/sources.list.d/ + +RUN apt-get update +RUN apt-get -y --force-yes install leap-archive-keyring + +RUN apt-get update + +RUN apt-get -y install git +RUN apt-get -y install vim +RUN apt-get -y install python-ipdb + +# install python deps +RUN apt-get -y install libpython2.7-dev +RUN apt-get -y install libffi-dev +RUN apt-get -y install libssl-dev +RUN apt-get -y install libzmq3-dev +RUN apt-get -y install python-pip +RUN apt-get -y install couchdb +RUN apt-get -y install python-srp +RUN apt-get -y install python-scrypt +RUN apt-get -y install leap-keymanager +RUN apt-get -y install python-tz + +RUN pip install -U pip +RUN pip install psutil + +# install soledad-perf deps +RUN pip install klein +RUN apt-get -y install curl +RUN apt-get -y install httperf + +# clone repositories +ENV BASEURL "https://github.com/leapcode" +ENV VARDIR "/var/local" +ENV REPOS "soledad leap_pycommon soledad-perf" +RUN for repo in ${REPOS}; do git clone ${BASEURL}/${repo}.git /var/local/${repo}; done + +# copy over files to help setup the environment and run soledad +RUN mkdir -p /usr/local/soledad + +COPY files/build/install-deps-from-repos.sh /usr/local/soledad/ +RUN /usr/local/soledad/install-deps-from-repos.sh + +COPY files/bin/ /usr/local/soledad/ diff --git a/scripts/docker/Makefile b/scripts/docker/Makefile new file mode 100644 index 00000000..4fa2e264 --- /dev/null +++ b/scripts/docker/Makefile @@ -0,0 +1,134 @@ +#/usr/bin/env + +# This makefile is intended to aid on running soledad docker images for +# specific purposes, as running a server, a client or tests. +# +# In order to communicate the IP address of one container to another, we make +# use of a file containing the container id. You have to explicitelly pass the +# CONTAINER_ID_FILE variable when invoking some of the targets below. +# +# Example usage: +# +# make run-server CONTAINER_ID_FILE=/tmp/container-id.txt +# make run-client-perf CONTAINER_ID_FILE=/tmp/container-id.txt + +##################################################################### +# Some configurations you might override when calling this makefile # +##################################################################### + +IMAGE_NAME ?= leap/soledad:1.0 +SOLEDAD_REMOTE ?= https://0xacab.org/leap/soledad.git +SOLEDAD_BRANCH ?= develop +SOLEDAD_PRELOAD_NUM ?= 100 +SOLEDAD_PRELOAD_SIZE ?= 500 +MEMORY ?= 512m + +############################################## +# Docker image generation (main make target) # +############################################## + +all: image + +image: + docker build -t $(IMAGE_NAME) . + +################################################## +# Run a Soledad Server inside a docker container # +################################################## + +run-server: + @if [ -z "$(CONTAINER_ID_FILE)" ]; then \ + echo "Error: you have to pass a value to CONTAINER_ID_FILE."; \ + exit 2; \ + fi + docker run \ + --memory="$(MEMORY)" \ + --cpuset-cpus=0 \ + --env="SOLEDAD_REMOTE=$(SOLEDAD_REMOTE)" \ + --env="SOLEDAD_BRANCH=$(SOLEDAD_BRANCH)" \ + --env="SOLEDAD_PRELOAD_NUM=$(SOLEDAD_PRELOAD_NUM)" \ + --env="SOLEDAD_PRELOAD_SIZE=$(SOLEDAD_PRELOAD_SIZE)" \ + --cidfile=$(CONTAINER_ID_FILE) \ + --detach \ + $(IMAGE_NAME) \ + /usr/local/soledad/run-server.sh # --drop-to-shell + +run-client-bootstrap: + @if [ -z "$(CONTAINER_ID_FILE)" ]; then \ + echo "Error: you have to pass a value to CONTAINER_ID_FILE."; \ + exit 2; \ + fi + container_id=`cat $(CONTAINER_ID_FILE)`; \ + server_ip=`./helper/get-container-ip.sh $${container_id}`; \ + docker run -t -i \ + --memory="$(MEMORY)" \ + --env="SOLEDAD_REMOTE=$(SOLEDAD_REMOTE)" \ + --env="SOLEDAD_BRANCH=$(SOLEDAD_BRANCH)" \ + --env="SOLEDAD_SERVER_URL=http://$${server_ip}:2424" \ + $(IMAGE_NAME) \ + /usr/local/soledad/run-client-bootstrap.sh + +################################################# +# Run all trial tests inside a docker container # +################################################# + +run-trial: + docker run -t -i \ + --memory="$(MEMORY)" \ + --env="SOLEDAD_REMOTE=$(SOLEDAD_REMOTE)" \ + --env="SOLEDAD_BRANCH=$(SOLEDAD_BRANCH)" \ + $(IMAGE_NAME) \ + /usr/local/soledad/run-trial.sh + +############################################ +# Performance tests and graphic generation # +############################################ + +run-perf-test: + helper/run-test.sh perf + +run-client-perf: + @if [ -z "$(CONTAINER_ID_FILE)" ]; then \ + echo "Error: you have to pass a value to CONTAINER_ID_FILE."; \ + exit 2; \ + fi + container_id=`cat $(CONTAINER_ID_FILE)`; \ + server_ip=`./helper/get-container-ip.sh $${container_id}`; \ + docker run -t -i \ + --memory="$(MEMORY)" \ + --cpuset-cpus=1 \ + --cidfile=$(CONTAINER_ID_FILE)-perf \ + --env="SOLEDAD_REMOTE=$(SOLEDAD_REMOTE)" \ + --env="SOLEDAD_BRANCH=$(SOLEDAD_BRANCH)" \ + --env="SOLEDAD_PERF_REMOTE=https://0xacab.org/drebs/soledad-perf.git" \ + --env="SOLEDAD_PERF_BRANCH=bug/ensure-events-server" \ + --env="SOLEDAD_PRELOAD_NUM=$(SOLEDAD_PRELOAD_NUM)" \ + --env="SOLEDAD_PRELOAD_SIZE=$(SOLEDAD_PRELOAD_SIZE)" \ + --env="SOLEDAD_STATS=1" \ + --env="SOLEDAD_SERVER_URL=http://$${server_ip}:2424" \ + --env="SOLEDAD_LOG=1" \ + $(IMAGE_NAME) \ + /usr/local/soledad/run-client-perf.sh # --drop-to-shell + +cp-perf-result: + @if [ -z "$(CONTAINER_ID_FILE)" ]; then \ + echo "Error: you have to pass a value to CONTAINER_ID_FILE."; \ + exit 2; \ + fi + perf_id=`cat $(CONTAINER_ID_FILE)-perf`; \ + docker cp $${perf_id}:/var/local/soledad-perf/out/sync-stats.png /tmp/; \ + docker cp $${perf_id}:/var/local/soledad-perf/out/series.log /tmp/ + +######################## +# Other helper targets # +######################## + +run-shell: image + docker run -t -i \ + --memory="$(MEMORY)" \ + $(IMAGE_NAME) \ + /bin/bash + +rm-all-containers: + containers=`docker ps -a | cut -d" " -f 1 | tail -n +2 | xargs`; \ + if [ ! -z "$${containers}" ]; then docker rm -f $${containers}; fi diff --git a/scripts/docker/README.md b/scripts/docker/README.md new file mode 100644 index 00000000..c4d7ac94 --- /dev/null +++ b/scripts/docker/README.md @@ -0,0 +1,49 @@ +Soledad Docker Images +===================== + +The files in this directory help create a docker image that is usable for +running soledad server and client in an isolated docker context. This is +especially useful for testing purposes as you can limit/reserve a certain +amount of resources for the soledad process, and thus provide a baseline for +comparison of time and resource consumption between distinct runs. + +Check the `Dockerfile` for the steps for creating the docker image. + +Check the `Makefile` for the rules for running containers. + +Check the `helper/` directory for scripts that help running tests. + + +Environment variables for docker containers +------------------------------------------- + +Different environment variables can be set for docker containers and will +cause the scripts to behave differently: + + SOLEDAD_REMOTE - a git url for a remote repository that is added at run time + to the local soledad git repository. + + SOLEDAD_BRANCH - the name of a branch to be checked out from the configured + remote repository. + + SOLEDAD_PRELOAD_NUM - The number of documents to be preloaded in the + container database (either client or server). + + SOLEDAD_PRELOAD_SIZE - The size of the payload of the documents to be + prelaoded in the container database (either client or + server). + + SOLEDAD_SERVER_URL - The URL of the soledad server to be used during the + test. + +Check the Makefile for examples on how to use these and maybe even other +variables not documented here. + + +Communication between client and server containers +-------------------------------------------------- + +A CONTAINER_ID_FILE variable can be passed to the Makefile target so that the +container id is recorded in a file for further use. This makes it possible to +extract a container's IP and pass it to another container so they can +communicate. diff --git a/scripts/docker/TODO b/scripts/docker/TODO new file mode 100644 index 00000000..5185d754 --- /dev/null +++ b/scripts/docker/TODO @@ -0,0 +1 @@ +- limit resources of containers (mem and cpu) diff --git a/scripts/docker/files/apt/leap.list b/scripts/docker/files/apt/leap.list new file mode 100644 index 00000000..7eb474d8 --- /dev/null +++ b/scripts/docker/files/apt/leap.list @@ -0,0 +1,4 @@ +# This file is meant to be copied into the `/etc/apt/sources.list.d` directory +# inside a docker image to provide a source for leap-specific packages. + +deb http://deb.leap.se/0.8 jessie main diff --git a/scripts/docker/files/bin/client_side_db.py b/scripts/docker/files/bin/client_side_db.py new file mode 100644 index 00000000..4be33d13 --- /dev/null +++ b/scripts/docker/files/bin/client_side_db.py @@ -0,0 +1,322 @@ +#!/usr/bin/python + +import os +import argparse +import tempfile +import getpass +import requests +import srp._pysrp as srp +import binascii +import logging +import json +import time + +from twisted.internet import reactor +from twisted.internet.defer import inlineCallbacks + +from leap.soledad.client import Soledad +from leap.keymanager import KeyManager +from leap.keymanager.openpgp import OpenPGPKey + +from leap.common.events import server +server.ensure_server() + +from util import ValidateUserHandle + + +""" +Script to give access to client-side Soledad database. + +This is mainly used for tests, but can also be used to recover data from a +Soledad database (public/private keys, export documents, etc). + +To speed up testing/debugging, this script can dump the auth data after +logging in. Use the --export-auth-data option to export auth data to a file. +The contents of the file is a json dictionary containing the uuid, server_url, +cert_file and token, which is enough info to instantiate a soledad client +without having to interact with the webapp again. Use the --use-auth-data +option to use the auth data stored in a file. + +Use the --help option to see available options. +""" + + +# create a logger +logger = logging.getLogger(__name__) +LOG_FORMAT = '%(asctime)s %(message)s' +logging.basicConfig(format=LOG_FORMAT, level=logging.DEBUG) + + +safe_unhexlify = lambda x: binascii.unhexlify(x) if ( + len(x) % 2 == 0) else binascii.unhexlify('0' + x) + + +def _fail(reason): + logger.error('Fail: ' + reason) + exit(2) + + +def _get_api_info(provider): + info = requests.get( + 'https://' + provider + '/provider.json', verify=False).json() + return info['api_uri'], info['api_version'] + + +def _login(username, passphrase, provider, api_uri, api_version): + usr = srp.User(username, passphrase, srp.SHA256, srp.NG_1024) + auth = None + try: + auth = _authenticate(api_uri, api_version, usr).json() + except requests.exceptions.ConnectionError: + _fail('Could not connect to server.') + if 'errors' in auth: + _fail(str(auth['errors'])) + return api_uri, api_version, auth + + +def _authenticate(api_uri, api_version, usr): + api_url = "%s/%s" % (api_uri, api_version) + session = requests.session() + uname, A = usr.start_authentication() + params = {'login': uname, 'A': binascii.hexlify(A)} + init = session.post( + api_url + '/sessions', data=params, verify=False).json() + if 'errors' in init: + _fail('test user not found') + M = usr.process_challenge( + safe_unhexlify(init['salt']), safe_unhexlify(init['B'])) + return session.put(api_url + '/sessions/' + uname, verify=False, + data={'client_auth': binascii.hexlify(M)}) + + +def _get_soledad_info(username, provider, passphrase, basedir): + api_uri, api_version = _get_api_info(provider) + auth = _login(username, passphrase, provider, api_uri, api_version) + # get soledad server url + service_url = '%s/%s/config/soledad-service.json' % \ + (api_uri, api_version) + soledad_hosts = requests.get(service_url, verify=False).json()['hosts'] + hostnames = soledad_hosts.keys() + # allow for choosing the host + host = hostnames[0] + if len(hostnames) > 1: + i = 1 + print "There are many available hosts:" + for h in hostnames: + print " (%d) %s.%s" % (i, h, provider) + i += 1 + choice = raw_input("Choose a host to use (default: 1): ") + if choice != '': + host = hostnames[int(choice) - 1] + server_url = 'https://%s:%d/user-%s' % \ + (soledad_hosts[host]['hostname'], soledad_hosts[host]['port'], + auth[2]['id']) + # get provider ca certificate + ca_cert = requests.get('https://%s/ca.crt' % provider, verify=False).text + cert_file = os.path.join(basedir, 'ca.crt') + with open(cert_file, 'w') as f: + f.write(ca_cert) + return auth[2]['id'], server_url, cert_file, auth[2]['token'] + + +def _get_soledad_instance(uuid, passphrase, basedir, server_url, cert_file, + token): + # setup soledad info + logger.info('UUID is %s' % uuid) + logger.info('Server URL is %s' % server_url) + secrets_path = os.path.join( + basedir, '%s.secret' % uuid) + local_db_path = os.path.join( + basedir, '%s.db' % uuid) + # instantiate soledad + return Soledad( + uuid, + unicode(passphrase), + secrets_path=secrets_path, + local_db_path=local_db_path, + server_url=server_url, + cert_file=cert_file, + auth_token=token, + defer_encryption=True) + + +def _get_keymanager_instance(username, provider, soledad, token, + ca_cert_path=None, api_uri=None, api_version=None, + uid=None, gpgbinary=None): + return KeyManager( + "{username}@{provider}".format(username=username, provider=provider), + "http://uri", + soledad, + token=token, + ca_cert_path=ca_cert_path, + api_uri=api_uri, + api_version=api_version, + uid=uid, + gpgbinary=gpgbinary) + + +def _parse_args(): + # parse command line + parser = argparse.ArgumentParser() + parser.add_argument( + 'user@provider', action=ValidateUserHandle, help='the user handle') + parser.add_argument( + '--basedir', '-b', default=None, + help='soledad base directory') + parser.add_argument( + '--passphrase', '-p', default=None, + help='the user passphrase') + parser.add_argument( + '--get-all-docs', '-a', action='store_true', + help='get all documents from the local database') + parser.add_argument( + '--create-docs', '-c', default=0, type=int, + help='create a number of documents') + parser.add_argument( + '--sync', '-s', action='store_true', + help='synchronize with the server replica') + parser.add_argument( + '--repeat-sync', '-r', action='store_true', + help='repeat synchronization until no new data is received') + parser.add_argument( + '--export-public-key', help="export the public key to a file") + parser.add_argument( + '--export-private-key', help="export the private key to a file") + parser.add_argument( + '--export-incoming-messages', + help="export incoming messages to a directory") + parser.add_argument( + '--export-auth-data', + help="export authentication data to a file") + parser.add_argument( + '--use-auth-data', + help="use authentication data from a file") + return parser.parse_args() + + +def _get_passphrase(args): + passphrase = args.passphrase + if passphrase is None: + passphrase = getpass.getpass( + 'Password for %s@%s: ' % (args.username, args.provider)) + return passphrase + + +def _get_basedir(args): + basedir = args.basedir + if basedir is None: + basedir = tempfile.mkdtemp() + elif not os.path.isdir(basedir): + os.mkdir(basedir) + logger.info('Using %s as base directory.' % basedir) + return basedir + + +@inlineCallbacks +def _export_key(args, km, fname, private=False): + address = args.username + "@" + args.provider + pkey = yield km.get_key( + address, OpenPGPKey, private=private, fetch_remote=False) + with open(args.export_private_key, "w") as f: + f.write(pkey.key_data) + + +@inlineCallbacks +def _export_incoming_messages(soledad, directory): + yield soledad.create_index("by-incoming", "bool(incoming)") + docs = yield soledad.get_from_index("by-incoming", '1') + i = 1 + for doc in docs: + with open(os.path.join(directory, "message_%d.gpg" % i), "w") as f: + f.write(doc.content["_enc_json"]) + i += 1 + + +@inlineCallbacks +def _get_all_docs(soledad): + _, docs = yield soledad.get_all_docs() + for doc in docs: + print json.dumps(doc.content, indent=4) + + +# main program + +@inlineCallbacks +def _main(soledad, km, args): + try: + if args.create_docs: + for i in xrange(args.create_docs): + t = time.time() + logger.debug( + "Creating doc %d/%d..." % (i + 1, args.create_docs)) + content = { + 'datetime': time.strftime( + "%Y-%m-%d %H:%M:%S", time.gmtime(t)), + 'timestamp': t, + 'index': i, + 'total': args.create_docs, + } + yield soledad.create_doc(content) + if args.sync: + yield soledad.sync() + if args.repeat_sync: + old_gen = 0 + new_gen = yield soledad.sync() + while old_gen != new_gen: + old_gen = new_gen + new_gen = yield soledad.sync() + if args.get_all_docs: + yield _get_all_docs(soledad) + if args.export_private_key: + yield _export_key(args, km, args.export_private_key, private=True) + if args.export_public_key: + yield _export_key(args, km, args.expoert_public_key, private=False) + if args.export_incoming_messages: + yield _export_incoming_messages( + soledad, args.export_incoming_messages) + except Exception as e: + logger.error(e) + finally: + soledad.close() + reactor.callWhenRunning(reactor.stop) + + +if __name__ == '__main__': + args = _parse_args() + passphrase = _get_passphrase(args) + basedir = _get_basedir(args) + + if not args.use_auth_data: + # get auth data from server + uuid, server_url, cert_file, token = \ + _get_soledad_info( + args.username, args.provider, passphrase, basedir) + else: + # load auth data from file + with open(args.use_auth_data) as f: + auth_data = json.loads(f.read()) + uuid = auth_data['uuid'] + server_url = auth_data['server_url'] + cert_file = auth_data['cert_file'] + token = auth_data['token'] + + # export auth data to a file + if args.export_auth_data: + with open(args.export_auth_data, "w") as f: + f.write(json.dumps({ + 'uuid': uuid, + 'server_url': server_url, + 'cert_file': cert_file, + 'token': token, + })) + + soledad = _get_soledad_instance( + uuid, passphrase, basedir, server_url, cert_file, token) + km = _get_keymanager_instance( + args.username, + args.provider, + soledad, + token, + uid=uuid) + _main(soledad, km, args) + reactor.run() diff --git a/scripts/docker/files/bin/conf/cert_default.conf b/scripts/docker/files/bin/conf/cert_default.conf new file mode 100644 index 00000000..8043cea3 --- /dev/null +++ b/scripts/docker/files/bin/conf/cert_default.conf @@ -0,0 +1,15 @@ +[ req ] +default_bits = 1024 +default_keyfile = keyfile.pem +distinguished_name = req_distinguished_name +prompt = no +output_password = mypass + +[ req_distinguished_name ] +C = GB +ST = Test State or Province +L = Test Locality +O = Organization Name +OU = Organizational Unit Name +CN = localhost +emailAddress = test@email.address diff --git a/scripts/docker/files/bin/conf/couchdb_default.ini b/scripts/docker/files/bin/conf/couchdb_default.ini new file mode 100644 index 00000000..5ab72d7b --- /dev/null +++ b/scripts/docker/files/bin/conf/couchdb_default.ini @@ -0,0 +1,361 @@ +; etc/couchdb/default.ini.tpl. Generated from default.ini.tpl.in by configure. + +; Upgrading CouchDB will overwrite this file. +[vendor] +name = The Apache Software Foundation +version = 1.6.0 + +[couchdb] +database_dir = BASEDIR +view_index_dir = BASEDIR +util_driver_dir = /usr/lib/x86_64-linux-gnu/couchdb/erlang/lib/couch-1.6.0/priv/lib +max_document_size = 4294967296 ; 4 GB +os_process_timeout = 5000 ; 5 seconds. for view and external servers. +max_dbs_open = 100 +delayed_commits = true ; set this to false to ensure an fsync before 201 Created is returned +uri_file = BASEDIR/couch.uri +; Method used to compress everything that is appended to database and view index files, except +; for attachments (see the attachments section). Available methods are: +; +; none - no compression +; snappy - use google snappy, a very fast compressor/decompressor +uuid = bc2f8b84ecb0b13a31cf7f6881a52194 + +; deflate_[N] - use zlib's deflate, N is the compression level which ranges from 1 (fastest, +; lowest compression ratio) to 9 (slowest, highest compression ratio) +file_compression = snappy +; Higher values may give better read performance due to less read operations +; and/or more OS page cache hits, but they can also increase overall response +; time for writes when there are many attachment write requests in parallel. +attachment_stream_buffer_size = 4096 + +plugin_dir = /usr/lib/x86_64-linux-gnu/couchdb/plugins + +[database_compaction] +; larger buffer sizes can originate smaller files +doc_buffer_size = 524288 ; value in bytes +checkpoint_after = 5242880 ; checkpoint after every N bytes were written + +[view_compaction] +; larger buffer sizes can originate smaller files +keyvalue_buffer_size = 2097152 ; value in bytes + +[httpd] +port = 5984 +bind_address = 127.0.0.1 +authentication_handlers = {couch_httpd_oauth, oauth_authentication_handler}, {couch_httpd_auth, cookie_authentication_handler}, {couch_httpd_auth, default_authentication_handler} +default_handler = {couch_httpd_db, handle_request} +secure_rewrites = true +vhost_global_handlers = _utils, _uuids, _session, _oauth, _users +allow_jsonp = false +; Options for the MochiWeb HTTP server. +;server_options = [{backlog, 128}, {acceptor_pool_size, 16}] +; For more socket options, consult Erlang's module 'inet' man page. +;socket_options = [{recbuf, 262144}, {sndbuf, 262144}, {nodelay, true}] +socket_options = [{recbuf, 262144}, {sndbuf, 262144}] +log_max_chunk_size = 1000000 +enable_cors = false +; CouchDB can optionally enforce a maximum uri length; +; max_uri_length = 8000 + +[ssl] +port = 6984 + +[log] +file = BASEDIR/couch.log +level = info +include_sasl = true + +[couch_httpd_auth] +authentication_db = _users +authentication_redirect = /_utils/session.html +require_valid_user = false +timeout = 600 ; number of seconds before automatic logout +auth_cache_size = 50 ; size is number of cache entries +allow_persistent_cookies = false ; set to true to allow persistent cookies +iterations = 10 ; iterations for password hashing +; min_iterations = 1 +; max_iterations = 1000000000 +; comma-separated list of public fields, 404 if empty +; public_fields = + +[cors] +credentials = false +; List of origins separated by a comma, * means accept all +; Origins must include the scheme: http://example.com +; You can’t set origins: * and credentials = true at the same time. +;origins = * +; List of accepted headers separated by a comma +; headers = +; List of accepted methods +; methods = + + +; Configuration for a vhost +;[cors:http://example.com] +; credentials = false +; List of origins separated by a comma +; Origins must include the scheme: http://example.com +; You can’t set origins: * and credentials = true at the same time. +;origins = +; List of accepted headers separated by a comma +; headers = +; List of accepted methods +; methods = + +[couch_httpd_oauth] +; If set to 'true', oauth token and consumer secrets will be looked up +; in the authentication database (_users). These secrets are stored in +; a top level property named "oauth" in user documents. Example: +; { +; "_id": "org.couchdb.user:joe", +; "type": "user", +; "name": "joe", +; "password_sha": "fe95df1ca59a9b567bdca5cbaf8412abd6e06121", +; "salt": "4e170ffeb6f34daecfd814dfb4001a73" +; "roles": ["foo", "bar"], +; "oauth": { +; "consumer_keys": { +; "consumerKey1": "key1Secret", +; "consumerKey2": "key2Secret" +; }, +; "tokens": { +; "token1": "token1Secret", +; "token2": "token2Secret" +; } +; } +; } +use_users_db = false + +[query_servers] +javascript = /usr/bin/couchjs /usr/share/couchdb/server/main.js +coffeescript = /usr/bin/couchjs /usr/share/couchdb/server/main-coffee.js + + +; Changing reduce_limit to false will disable reduce_limit. +; If you think you're hitting reduce_limit with a "good" reduce function, +; please let us know on the mailing list so we can fine tune the heuristic. +[query_server_config] +reduce_limit = true +os_process_limit = 25 + +[daemons] +index_server={couch_index_server, start_link, []} +external_manager={couch_external_manager, start_link, []} +query_servers={couch_query_servers, start_link, []} +vhosts={couch_httpd_vhost, start_link, []} +httpd={couch_httpd, start_link, []} +stats_aggregator={couch_stats_aggregator, start, []} +stats_collector={couch_stats_collector, start, []} +uuids={couch_uuids, start, []} +auth_cache={couch_auth_cache, start_link, []} +replicator_manager={couch_replicator_manager, start_link, []} +os_daemons={couch_os_daemons, start_link, []} +compaction_daemon={couch_compaction_daemon, start_link, []} + +[httpd_global_handlers] +/ = {couch_httpd_misc_handlers, handle_welcome_req, <<"Welcome">>} +favicon.ico = {couch_httpd_misc_handlers, handle_favicon_req, "/usr/share/couchdb/www"} + +_utils = {couch_httpd_misc_handlers, handle_utils_dir_req, "/usr/share/couchdb/www"} +_all_dbs = {couch_httpd_misc_handlers, handle_all_dbs_req} +_active_tasks = {couch_httpd_misc_handlers, handle_task_status_req} +_config = {couch_httpd_misc_handlers, handle_config_req} +_replicate = {couch_replicator_httpd, handle_req} +_uuids = {couch_httpd_misc_handlers, handle_uuids_req} +_restart = {couch_httpd_misc_handlers, handle_restart_req} +_stats = {couch_httpd_stats_handlers, handle_stats_req} +_log = {couch_httpd_misc_handlers, handle_log_req} +_session = {couch_httpd_auth, handle_session_req} +_oauth = {couch_httpd_oauth, handle_oauth_req} +_db_updates = {couch_dbupdates_httpd, handle_req} +_plugins = {couch_plugins_httpd, handle_req} + +[httpd_db_handlers] +_all_docs = {couch_mrview_http, handle_all_docs_req} +_changes = {couch_httpd_db, handle_changes_req} +_compact = {couch_httpd_db, handle_compact_req} +_design = {couch_httpd_db, handle_design_req} +_temp_view = {couch_mrview_http, handle_temp_view_req} +_view_cleanup = {couch_mrview_http, handle_cleanup_req} + +; The external module takes an optional argument allowing you to narrow it to a +; single script. Otherwise the script name is inferred from the first path section +; after _external's own path. +; _mypath = {couch_httpd_external, handle_external_req, <<"mykey">>} +; _external = {couch_httpd_external, handle_external_req} + +[httpd_design_handlers] +_compact = {couch_mrview_http, handle_compact_req} +_info = {couch_mrview_http, handle_info_req} +_list = {couch_mrview_show, handle_view_list_req} +_rewrite = {couch_httpd_rewrite, handle_rewrite_req} +_show = {couch_mrview_show, handle_doc_show_req} +_update = {couch_mrview_show, handle_doc_update_req} +_view = {couch_mrview_http, handle_view_req} + +; enable external as an httpd handler, then link it with commands here. +; note, this api is still under consideration. +; [external] +; mykey = /path/to/mycommand + +; Here you can setup commands for CouchDB to manage +; while it is alive. It will attempt to keep each command +; alive if it exits. +; [os_daemons] +; some_daemon_name = /path/to/script -with args + + +[uuids] +; Known algorithms: +; random - 128 bits of random awesome +; All awesome, all the time. +; sequential - monotonically increasing ids with random increments +; First 26 hex characters are random. Last 6 increment in +; random amounts until an overflow occurs. On overflow, the +; random prefix is regenerated and the process starts over. +; utc_random - Time since Jan 1, 1970 UTC with microseconds +; First 14 characters are the time in hex. Last 18 are random. +; utc_id - Time since Jan 1, 1970 UTC with microseconds, plus utc_id_suffix string +; First 14 characters are the time in hex. uuids/utc_id_suffix string value is appended to these. +algorithm = sequential +; The utc_id_suffix value will be appended to uuids generated by the utc_id algorithm. +; Replicating instances should have unique utc_id_suffix values to ensure uniqueness of utc_id ids. +utc_id_suffix = +# Maximum number of UUIDs retrievable from /_uuids in a single request +max_count = 1000 + +[stats] +; rate is in milliseconds +rate = 1000 +; sample intervals are in seconds +samples = [0, 60, 300, 900] + +[attachments] +compression_level = 8 ; from 1 (lowest, fastest) to 9 (highest, slowest), 0 to disable compression +compressible_types = text/*, application/javascript, application/json, application/xml + +[replicator] +db = _replicator +; Maximum replicaton retry count can be a non-negative integer or "infinity". +max_replication_retry_count = 10 +; More worker processes can give higher network throughput but can also +; imply more disk and network IO. +worker_processes = 4 +; With lower batch sizes checkpoints are done more frequently. Lower batch sizes +; also reduce the total amount of used RAM memory. +worker_batch_size = 500 +; Maximum number of HTTP connections per replication. +http_connections = 20 +; HTTP connection timeout per replication. +; Even for very fast/reliable networks it might need to be increased if a remote +; database is too busy. +connection_timeout = 30000 +; If a request fails, the replicator will retry it up to N times. +retries_per_request = 10 +; Some socket options that might boost performance in some scenarios: +; {nodelay, boolean()} +; {sndbuf, integer()} +; {recbuf, integer()} +; {priority, integer()} +; See the `inet` Erlang module's man page for the full list of options. +socket_options = [{keepalive, true}, {nodelay, false}] +; Path to a file containing the user's certificate. +;cert_file = /full/path/to/server_cert.pem +; Path to file containing user's private PEM encoded key. +;key_file = /full/path/to/server_key.pem +; String containing the user's password. Only used if the private keyfile is password protected. +;password = somepassword +; Set to true to validate peer certificates. +verify_ssl_certificates = false +; File containing a list of peer trusted certificates (in the PEM format). +;ssl_trusted_certificates_file = /etc/ssl/certs/ca-certificates.crt +; Maximum peer certificate depth (must be set even if certificate validation is off). +ssl_certificate_max_depth = 3 + +[compaction_daemon] +; The delay, in seconds, between each check for which database and view indexes +; need to be compacted. +check_interval = 300 +; If a database or view index file is smaller then this value (in bytes), +; compaction will not happen. Very small files always have a very high +; fragmentation therefore it's not worth to compact them. +min_file_size = 131072 + +[compactions] +; List of compaction rules for the compaction daemon. +; The daemon compacts databases and their respective view groups when all the +; condition parameters are satisfied. Configuration can be per database or +; global, and it has the following format: +; +; database_name = [ {ParamName, ParamValue}, {ParamName, ParamValue}, ... ] +; _default = [ {ParamName, ParamValue}, {ParamName, ParamValue}, ... ] +; +; Possible parameters: +; +; * db_fragmentation - If the ratio (as an integer percentage), of the amount +; of old data (and its supporting metadata) over the database +; file size is equal to or greater then this value, this +; database compaction condition is satisfied. +; This value is computed as: +; +; (file_size - data_size) / file_size * 100 +; +; The data_size and file_size values can be obtained when +; querying a database's information URI (GET /dbname/). +; +; * view_fragmentation - If the ratio (as an integer percentage), of the amount +; of old data (and its supporting metadata) over the view +; index (view group) file size is equal to or greater then +; this value, then this view index compaction condition is +; satisfied. This value is computed as: +; +; (file_size - data_size) / file_size * 100 +; +; The data_size and file_size values can be obtained when +; querying a view group's information URI +; (GET /dbname/_design/groupname/_info). +; +; * from _and_ to - The period for which a database (and its view groups) compaction +; is allowed. The value for these parameters must obey the format: +; +; HH:MM - HH:MM (HH in [0..23], MM in [0..59]) +; +; * strict_window - If a compaction is still running after the end of the allowed +; period, it will be canceled if this parameter is set to 'true'. +; It defaults to 'false' and it's meaningful only if the *period* +; parameter is also specified. +; +; * parallel_view_compaction - If set to 'true', the database and its views are +; compacted in parallel. This is only useful on +; certain setups, like for example when the database +; and view index directories point to different +; disks. It defaults to 'false'. +; +; Before a compaction is triggered, an estimation of how much free disk space is +; needed is computed. This estimation corresponds to 2 times the data size of +; the database or view index. When there's not enough free disk space to compact +; a particular database or view index, a warning message is logged. +; +; Examples: +; +; 1) [{db_fragmentation, "70%"}, {view_fragmentation, "60%"}] +; The `foo` database is compacted if its fragmentation is 70% or more. +; Any view index of this database is compacted only if its fragmentation +; is 60% or more. +; +; 2) [{db_fragmentation, "70%"}, {view_fragmentation, "60%"}, {from, "00:00"}, {to, "04:00"}] +; Similar to the preceding example but a compaction (database or view index) +; is only triggered if the current time is between midnight and 4 AM. +; +; 3) [{db_fragmentation, "70%"}, {view_fragmentation, "60%"}, {from, "00:00"}, {to, "04:00"}, {strict_window, true}] +; Similar to the preceding example - a compaction (database or view index) +; is only triggered if the current time is between midnight and 4 AM. If at +; 4 AM the database or one of its views is still compacting, the compaction +; process will be canceled. +; +; 4) [{db_fragmentation, "70%"}, {view_fragmentation, "60%"}, {from, "00:00"}, {to, "04:00"}, {strict_window, true}, {parallel_view_compaction, true}] +; Similar to the preceding example, but a database and its views can be +; compacted in parallel. +; +;_default = [{db_fragmentation, "70%"}, {view_fragmentation, "60%"}, {from, "23:00"}, {to, "04:00"}] diff --git a/scripts/docker/files/bin/conf/soledad-server_default.conf b/scripts/docker/files/bin/conf/soledad-server_default.conf new file mode 100644 index 00000000..5e286374 --- /dev/null +++ b/scripts/docker/files/bin/conf/soledad-server_default.conf @@ -0,0 +1,5 @@ +[soledad-server] +couch_url = http://localhost:5984 +create_cmd = sudo -u soledad-admin /usr/bin/create-user-db +admin_netrc = /etc/couchdb/couchdb-soledad-admin.netrc +batching = 0 diff --git a/scripts/docker/files/bin/run-client-bootstrap.sh b/scripts/docker/files/bin/run-client-bootstrap.sh new file mode 100755 index 00000000..fbbb42e8 --- /dev/null +++ b/scripts/docker/files/bin/run-client-bootstrap.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Run a Soledad client connection test. +# +# This script is meant to be copied to the docker container and run upon +# container start. + +CMD="/usr/local/soledad/setup-test-env.py" +REPO="/var/local/soledad" + +if [ ! -z "${SOLEDAD_REMOTE}" ]; then + git -C ${REPO} remote set-url origin ${SOLEDAD_REMOTE} + git -C ${REPO} fetch origin +fi + +if [ ! -z "${SOLEDAD_BRANCH}" ]; then + git -C ${REPO} checkout ${SOLEDAD_BRANCH} +fi + +${CMD} soledad-client test --server-url ${SOLEDAD_SERVER_URL} diff --git a/scripts/docker/files/bin/run-client-perf.sh b/scripts/docker/files/bin/run-client-perf.sh new file mode 100755 index 00000000..01b27b98 --- /dev/null +++ b/scripts/docker/files/bin/run-client-perf.sh @@ -0,0 +1,128 @@ +#!/bin/sh + +# Start a soledad-perf test using a remote server. +# +# The script does the following: +# +# - configure a remote repository for soledad repo if SOLEDAD_REMOTE is set. +# +# - checkout a specific branch if SOLEDAD_BRANCH is set. +# +# - run the soledad-perf local twisted server that runs the client. Note +# that the actual soledad server should be running on another docker +# container. This local server is only used to measure responsiveness of +# soledad client. The script waits for the server to come up before +# continuing, or else times out after TIMEOUT seconds. +# +# - trigger the creation of documents for sync. +# +# - start the measurement of server responsiveness and sync stages. +# +# - stop the test. +# +# This script is meant to be copied to the docker container and run upon +# container start. + +CMD="/usr/local/soledad/setup-test-env.py" +REPO="/var/local/soledad" +TIMEOUT=20 + +#----------------------------------------------------------------------------- +# configure a remote and checkout a branch +#----------------------------------------------------------------------------- + +if [ ! -z "${SOLEDAD_REMOTE}" ]; then + git -C ${REPO} remote set-url origin ${SOLEDAD_REMOTE} + git -C ${REPO} fetch origin +fi + +if [ ! -z "${SOLEDAD_BRANCH}" ]; then + git -C ${REPO} checkout ${SOLEDAD_BRANCH} +fi + +if [ ! -z "${SOLEDAD_PERF_REMOTE}" ]; then + git -C /var/local/soledad-perf remote set-url origin ${SOLEDAD_PERF_REMOTE} + git -C /var/local/soledad-perf fetch origin +fi + +if [ ! -z "${SOLEDAD_PERF_BRANCH}" ]; then + git -C /var/local/soledad-perf checkout ${SOLEDAD_PERF_BRANCH} +fi + +#----------------------------------------------------------------------------- +# write a configuration file for the perf test +#----------------------------------------------------------------------------- + +cd /var/local/soledad-perf + +cat > defaults.conf <<EOF +[server] +host = ${SOLEDAD_SERVER_URL} + +[client] +uuid = 1234567890abcdef +basedir = /tmp/soledad_client_test +passphrase = 12345678 + +[sync] +num_docs = ${SOLEDAD_PRELOAD_NUM} +payload = /tmp/payload +payload_size = ${SOLEDAD_PRELOAD_SIZE} +auth_token = an-auth-token + +[test] +stats_file = ./out/stats.json +EOF + +if [ "${1}" = "--drop-to-shell" ]; then + /bin/bash + exit 0 +fi + +#----------------------------------------------------------------------------- +# start the local server and wait for it to come up +#----------------------------------------------------------------------------- + +# start local test server on background +make soledad-sync-server | grep -v stats | grep -v ping & + +# wait for server until timeout +start=`date +%s` +elapsed=0 + +echo "Waiting for perf server to come up..." + +while [ ${elapsed} -lt ${TIMEOUT} ]; do + result=`curl -s http://127.0.0.1:8080/ping` + if [ ${?} -eq 0 -a "${result}" = "easy!" ]; then + echo "Perf server (running soledad client) is up!" + break + else + sleep 1 + fi + now=`date +%s` + elapsed=`expr ${now} - ${start}` +done + +# exit with an error code if timed out waiting for server +if [ ${elapsed} -ge ${TIMEOUT} ]; then + echo "Error: server unreachable at http://127.0.0.1:8080 after ${TIMEOUT} seconds." + exit 1 +fi + +#----------------------------------------------------------------------------- +# create docs and run test +#----------------------------------------------------------------------------- + +set -e + +# create documents in client +make trigger-create-docs + +# launch background series measurement +make measure-series > /dev/null & +sleep 5 # wait a bit for some data points + +# run a sync and generate a graph +make trigger-sync +make trigger-stop diff --git a/scripts/docker/files/bin/run-server.sh b/scripts/docker/files/bin/run-server.sh new file mode 100755 index 00000000..feedee7e --- /dev/null +++ b/scripts/docker/files/bin/run-server.sh @@ -0,0 +1,89 @@ +#!/bin/sh + +# Start a soledad server inside a docker container. +# +# This script will: +# +# - eventually checkout a specific branch from a specific soledad remote. +# +# - create everything a soledad server needs to run (certificate, backend +# server database, tables, etc. +# +# - eventually preload the server database with a number of documents equal +# to SOLEDAD_PRELOAD_NUM, and with payload size equal to +# SOLEDAD_PRELOAD_SIZE. +# +# - run the soledad server. +# +# This script is meant to be copied to the docker container and run upon +# container start. + +CMD="/usr/local/soledad/setup-test-env.py" + +#--------------------------------------------------------------------------- +# eventually checkout a specific branch from a specific remote +#--------------------------------------------------------------------------- + +REPO="/var/local/soledad" + +if [ ! -z "${SOLEDAD_REMOTE}" ]; then + git -C ${REPO} remote set-url origin ${SOLEDAD_REMOTE} + git -C ${REPO} fetch origin +fi + +if [ ! -z "${SOLEDAD_BRANCH}" ]; then + git -C ${REPO} checkout ${SOLEDAD_BRANCH} +fi + +#--------------------------------------------------------------------------- +# setup environment for running soledad server +#--------------------------------------------------------------------------- + +${CMD} couch start +${CMD} user-db create +${CMD} token-db create +${CMD} token-db insert-token +${CMD} shared-db create +${CMD} cert create + +#--------------------------------------------------------------------------- +# write a configuration file for the perf test +#--------------------------------------------------------------------------- + +if [ "${SOLEDAD_PRELOAD_NUM}" -gt 0 ]; then + cd /var/local/soledad-perf + + cat > defaults.conf <<EOF +[server] +host = http://127.0.0.1:2424 + +[client] +uuid = 1234567890abcdef +basedir = /tmp/soledad_client_test +passphrase = 12345678 + +[sync] +num_docs = ${SOLEDAD_PRELOAD_NUM} +payload = /tmp/payload +payload_size = ${SOLEDAD_PRELOAD_SIZE} +auth_token = an-auth-token + +[test] +stats_file = ./out/stats.json +EOF + + echo "Preloading server database..." + ./scripts/preload_server_database.py +fi + +#--------------------------------------------------------------------------- +# actually run the server +#--------------------------------------------------------------------------- + +if [ "${1}" = "--drop-to-shell" ]; then + /bin/bash + exit 0 +fi + +echo "Starting soledad server..." +${CMD} soledad-server start --no-daemonize diff --git a/scripts/docker/files/bin/run-trial-from-gitlab-ci.sh b/scripts/docker/files/bin/run-trial-from-gitlab-ci.sh new file mode 100755 index 00000000..96436e26 --- /dev/null +++ b/scripts/docker/files/bin/run-trial-from-gitlab-ci.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Run Soledad trial tests in a docker container created by gitlab-ci. +# +# Gitlab-ci will copy the current test code into /builds/leap/soledad, so this +# script has to uninstall currently installed soledad packages and re-install +# from that location instead. +# +# This script is meant to be copied to the docker container and run upon +# container start. + +CMD="/usr/local/soledad/setup-test-env.py" +BASEDIR="/builds/leap/soledad" + + +install_deps() { + # ensure all dependencies are installed + for pkg in common client server; do + testing="--testing" + if [ "${pkg}" = "server" ]; then + # soledad server doesn't currently have a requirements-testing.pip file, + # so we don't pass the option when that is the case + testing="" + fi + pip uninstall leap.soledad.${pkg} + (cd ${BASEDIR}/${pkg} \ + && ./pkg/pip_install_requirements.sh ${testing} --use-leap-wheels \ + && python setup.py develop) + done +} + + +start_couch() { + # currently soledad trial tests need a running couch on environment + ${CMD} couch start +} + + +run_tests() { + trial leap.soledad.common +} + + +main() { + install_deps + start_couch + run_tests +} + +main diff --git a/scripts/docker/files/bin/run-trial.sh b/scripts/docker/files/bin/run-trial.sh new file mode 100755 index 00000000..f38f3124 --- /dev/null +++ b/scripts/docker/files/bin/run-trial.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# Run Soledad trial tests. +# +# This script is meant to be copied to the docker container and run upon +# container start. + +CMD="/usr/local/soledad/setup-test-env.py" +REPO="/var/local/soledad" + +if [ ! -z "${SOLEDAD_REMOTE}" ]; then + git -C ${REPO} remote set-url origin ${SOLEDAD_REMOTE} + git -C ${REPO} fetch origin +fi + +if [ ! -z "${SOLEDAD_BRANCH}" ]; then + git -C ${REPO} checkout ${SOLEDAD_BRANCH} +fi + +# currently soledad trial tests need a running couch on environment +${CMD} couch start + +trial leap.soledad.common diff --git a/scripts/docker/files/bin/setup-test-env.py b/scripts/docker/files/bin/setup-test-env.py new file mode 100755 index 00000000..0f3ea6f4 --- /dev/null +++ b/scripts/docker/files/bin/setup-test-env.py @@ -0,0 +1,641 @@ +#!/usr/bin/env python + + +""" +This script knows how to build a minimum environment for Soledad Server, which +includes the following: + + - Couch server startup + - Token and shared database initialization + - Soledad Server startup + +Options can be passed for configuring the different environments, so this may +be used by other programs to setup different environments for arbitrary tests. +Use the --help option to get information on usage. + +For some commands you will need an environment with Soledad python packages +available, thus you might want to explicitly call python and not rely in the +shebang line. +""" + + +import time +import os +import signal +import tempfile +import psutil +from argparse import ArgumentParser +from subprocess import call +from couchdb import Server +from couchdb.http import PreconditionFailed +from couchdb.http import ResourceConflict +from couchdb.http import ResourceNotFound +from hashlib import sha512 + +from leap.soledad.common.l2db.errors import DatabaseDoesNotExist + + +# +# Utilities +# + +def get_pid(pidfile): + if not os.path.isfile(pidfile): + return 0 + try: + with open(pidfile) as f: + return int(f.read()) + except IOError: + return 0 + + +def pid_is_running(pid): + try: + psutil.Process(pid) + return True + except psutil.NoSuchProcess: + return False + + +def pidfile_is_running(pidfile): + try: + pid = get_pid(pidfile) + psutil.Process(pid) + return pid + except psutil.NoSuchProcess: + return False + + +def status_from_pidfile(args, default_basedir, name): + basedir = _get_basedir(args, default_basedir) + pidfile = os.path.join(basedir, args.pidfile) + try: + pid = get_pid(pidfile) + psutil.Process(pid) + print "[+] %s is running with pid %d" % (name, pid) + except (IOError, psutil.NoSuchProcess): + print "[-] %s stopped" % name + + +def kill_all_executables(args): + basename = os.path.basename(args.executable) + pids = [int(pid) for pid in os.listdir('/proc') if pid.isdigit()] + for pid in pids: + try: + p = psutil.Process(pid) + if p.name() == basename: + print '[!] killing - pid: %d' % pid + os.kill(pid, signal.SIGKILL) + except: + pass + + +# +# Couch Server control +# + +COUCH_EXECUTABLE = '/usr/bin/couchdb' +ERLANG_EXECUTABLE = 'beam.smp' +COUCH_TEMPLATE = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + './conf/couchdb_default.ini') +COUCH_TEMPLATE +COUCH_PIDFILE = 'couchdb.pid' +COUCH_LOGFILE = 'couchdb.log' +COUCH_PORT = 5984 +COUCH_HOST = '127.0.0.1' +COUCH_BASEDIR = '/tmp/couch_test' + + +def _get_basedir(args, default): + basedir = args.basedir + if not basedir: + basedir = default + if not os.path.isdir(basedir): + os.mkdir(basedir) + return basedir + + +def couch_server_start(args): + basedir = _get_basedir(args, COUCH_BASEDIR) + pidfile = os.path.join(basedir, args.pidfile) + logfile = os.path.join(basedir, args.logfile) + + # check if already running + pid = get_pid(pidfile) + if pid_is_running(pid): + print '[*] error: already running - pid: %d' % pid + exit(1) + if os.path.isfile(pidfile): + os.unlink(pidfile) + + # generate a config file from template if needed + config_file = args.config_file + if not config_file: + config_file = tempfile.mktemp(prefix='couch_config_', dir=basedir) + lines = [] + with open(args.template) as f: + lines = f.readlines() + lines = map(lambda l: l.replace('BASEDIR', basedir), lines) + with open(config_file, 'w') as f: + f.writelines(lines) + + # start couch server + try: + call([ + args.executable, + '-n', # reset configuration file chain (including system default) + '-a %s' % config_file, # add configuration FILE to chain + '-b', # spawn as a background process + '-p %s' % pidfile, # set the background PID FILE + '-o %s' % logfile, # redirect background stdout to FILE + '-e %s' % logfile]) # redirect background stderr to FILE + except Exception as e: + print '[*] error: could not start couch server - %s' % str(e) + exit(1) + + # couch may take a bit to store the pid in the pidfile, so we just wait + # until it does + pid = None + while not pid: + try: + pid = get_pid(pidfile) + break + except: + time.sleep(0.1) + + print '[+] couch is running with pid: %d' % pid + + +def couch_server_stop(args): + basedir = _get_basedir(args, COUCH_BASEDIR) + pidfile = os.path.join(basedir, args.pidfile) + pid = get_pid(pidfile) + if not pid_is_running(pid): + print '[*] error: no running server found' + exit(1) + call([ + args.executable, + '-p %s' % pidfile, # set the background PID FILE + '-k']) # kill the background process, will respawn if needed + print '[-] stopped couch server with pid %d ' % pid + + +def couch_status_from_pidfile(args): + status_from_pidfile(args, COUCH_BASEDIR, 'couch') + + +# +# User DB maintenance # +# + +def user_db_create(args): + from leap.soledad.common.couch import CouchDatabase + url = 'http://localhost:%d/user-%s' % (args.port, args.uuid) + try: + CouchDatabase.open_database( + url=url, create=False, replica_uid=None, ensure_ddocs=True) + print '[*] error: database "user-%s" already exists' % args.uuid + exit(1) + except DatabaseDoesNotExist: + CouchDatabase.open_database( + url=url, create=True, replica_uid=None, ensure_ddocs=True) + print '[+] database created: user-%s' % args.uuid + + +def user_db_delete(args): + s = _couch_get_server(args) + try: + dbname = 'user-%s' % args.uuid + s.delete(dbname) + print '[-] database deleted: %s' % dbname + except ResourceNotFound: + print '[*] error: database "%s" does not exist' % dbname + exit(1) + + +# +# Soledad Server control +# + +TWISTD_EXECUTABLE = 'twistd' # use whatever is available on path + +SOLEDAD_SERVER_BASEDIR = '/tmp/soledad_server_test' +SOLEDAD_SERVER_CONFIG_FILE = './conf/soledad_default.ini' +SOLEDAD_SERVER_PIDFILE = 'soledad.pid' +SOLEDAD_SERVER_LOGFILE = 'soledad.log' +SOLEDAD_SERVER_PRIVKEY = 'soledad_privkey.pem' +SOLEDAD_SERVER_CERTKEY = 'soledad_certkey.pem' +SOLEDAD_SERVER_PORT = 2424 +SOLEDAD_SERVER_AUTH_TOKEN = 'an-auth-token' +SOLEDAD_SERVER_URL = 'https://localhost:2424' + +SOLEDAD_CLIENT_PASS = '12345678' +SOLEDAD_CLIENT_BASEDIR = '/tmp/soledad_client_test' +SOLEDAD_CLIENT_UUID = '1234567890abcdef' + + +def soledad_server_start(args): + basedir = _get_basedir(args, SOLEDAD_SERVER_BASEDIR) + pidfile = os.path.join(basedir, args.pidfile) + logfile = os.path.join(basedir, args.logfile) + private_key = os.path.join(basedir, args.private_key) + cert_key = os.path.join(basedir, args.cert_key) + + pid = get_pid(pidfile) + if pid_is_running(pid): + pid = get_pid(pidfile) + print "[*] error: already running - pid: %d" % pid + exit(1) + + port = args.port + if args.tls: + port = 'ssl:%d:privateKey=%s:certKey=%s:sslmethod=SSLv23_METHOD' \ + % (args.port, private_key, cert_key) + params = [ + '--logfile=%s' % logfile, + '--pidfile=%s' % pidfile, + 'web', + '--wsgi=leap.soledad.server.application', + '--port=%s' % port + ] + if args.no_daemonize: + params.insert(0, '--nodaemon') + + call([args.executable] + params) + + pid = get_pid(pidfile) + print '[+] soledad-server is running with pid %d' % pid + + +def soledad_server_stop(args): + basedir = _get_basedir(args, SOLEDAD_SERVER_BASEDIR) + pidfile = os.path.join(basedir, args.pidfile) + pid = get_pid(pidfile) + if not pid_is_running(pid): + print '[*] error: no running server found' + exit(1) + os.kill(pid, signal.SIGKILL) + print '[-] stopped - pid: %d' % pid + + +def soledad_server_status_from_pidfile(args): + status_from_pidfile(args, SOLEDAD_SERVER_BASEDIR, 'soledad-server') + + +# couch helpers + +def _couch_get_server(args): + url = 'http://%s:%d/' % (args.host, args.port) + return Server(url=url) + + +def _couch_create_db(args, dbname): + s = _couch_get_server(args) + # maybe create the database + try: + s.create(dbname) + print '[+] database created: %s' % dbname + except PreconditionFailed as e: + error_code, _ = e.message + if error_code == 'file_exists': + print '[*] error: "%s" database already exists' % dbname + exit(1) + return s + + +def _couch_delete_db(args, dbname): + s = _couch_get_server(args) + # maybe create the database + try: + s.delete(dbname) + print '[-] database deleted: %s' % dbname + except ResourceNotFound: + print '[*] error: "%s" database does not exist' % dbname + exit(1) + + +def _token_dbname(): + dbname = 'tokens_' + \ + str(int(time.time() / (30 * 24 * 3600))) + return dbname + + +def token_db_create(args): + dbname = _token_dbname() + _couch_create_db(args, dbname) + + +def token_db_insert_token(args): + s = _couch_get_server(args) + try: + dbname = _token_dbname() + db = s[dbname] + token = sha512(args.auth_token).hexdigest() + db[token] = { + 'type': 'Token', + 'user_id': args.uuid, + } + print '[+] token for uuid "%s" created in tokens database' % args.uuid + except ResourceConflict: + print '[*] error: token for uuid "%s" already exists in tokens database' \ + % args.uuid + exit(1) + + +def token_db_delete(args): + dbname = _token_dbname() + _couch_delete_db(args, dbname) + + +# +# Shared DB creation +# + +def shared_db_create(args): + _couch_create_db(args, 'shared') + + +def shared_db_delete(args): + _couch_delete_db(args, 'shared') + + +# +# Certificate creation +# + +CERT_CONFIG_FILE = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + './conf/cert_default.conf') + + +def cert_create(args): + private_key = os.path.join(args.basedir, args.private_key) + cert_key = os.path.join(args.basedir, args.cert_key) + os.mkdir(args.basedir) + call([ + 'openssl', + 'req', + '-x509', + '-sha256', + '-nodes', + '-days', '365', + '-newkey', 'rsa:2048', + '-config', args.config_file, + '-keyout', private_key, + '-out', cert_key]) + + +def cert_delete(args): + private_key = os.path.join(args.basedir, args.private_key) + cert_key = os.path.join(args.basedir, args.cert_key) + os.unlink(private_key) + os.unlink(cert_key) + + +# +# Soledad Client Control +# + +def soledad_client_test(args): + + # maybe infer missing parameters + basedir = args.basedir + if not basedir: + basedir = tempfile.mkdtemp() + server_url = args.server_url + if not server_url: + server_url = 'http://127.0.0.1:%d' % args.port + + # get a soledad instance + from client_side_db import _get_soledad_instance + _get_soledad_instance( + args.uuid, + unicode(args.passphrase), + basedir, + server_url, + args.cert_key, + args.auth_token) + + +# +# Command Line Interface +# + +class Command(object): + + def __init__(self, parser=ArgumentParser()): + self.commands = [] + self.parser = parser + self.subparsers = None + + def add_command(self, *args, **kwargs): + # pop out the func parameter to use later + func = None + if 'func' in kwargs.keys(): + func = kwargs.pop('func') + # eventually create a subparser + if not self.subparsers: + self.subparsers = self.parser.add_subparsers() + # create command and associate a function with it + command = Command(self.subparsers.add_parser(*args, **kwargs)) + if func: + command.parser.set_defaults(func=func) + self.commands.append(command) + return command + + def set_func(self, func): + self.parser.set_defaults(func=func) + + def add_argument(self, *args, **kwargs): + self.parser.add_argument(*args, **kwargs) + + def add_arguments(self, arglist): + for args, kwargs in arglist: + self.add_argument(*args, **kwargs) + + def parse_args(self): + return self.parser.parse_args() + + +# +# Command Line Interface +# + +def run_cli(): + cli = Command() + + # couch command with subcommands + cmd_couch = cli.add_command('couch', help="manage couch server") + + cmd_couch_start = cmd_couch.add_command('start', func=couch_server_start) + cmd_couch_start.add_arguments([ + (['--executable', '-e'], {'default': COUCH_EXECUTABLE}), + (['--basedir', '-b'], {}), + (['--config-file', '-c'], {}), + (['--template', '-t'], {'default': COUCH_TEMPLATE}), + (['--pidfile', '-p'], {'default': COUCH_PIDFILE}), + (['--logfile', '-l'], {'default': COUCH_LOGFILE}) + ]) + + cmd_couch_stop = cmd_couch.add_command('stop', func=couch_server_stop) + cmd_couch_stop.add_arguments([ + (['--executable', '-e'], {'default': COUCH_EXECUTABLE}), + (['--basedir', '-b'], {}), + (['--pidfile', '-p'], {'default': COUCH_PIDFILE}), + ]) + + cmd_couch_status = cmd_couch.add_command( + 'status', func=couch_status_from_pidfile) + cmd_couch_status.add_arguments([ + (['--basedir', '-b'], {}), + (['--pidfile', '-p'], {'default': COUCH_PIDFILE})]) + + cmd_couch_kill = cmd_couch.add_command('kill', func=kill_all_executables) + cmd_couch_kill.add_argument( + '--executable', '-e', default=ERLANG_EXECUTABLE) + + # user database maintenance + cmd_user_db = cli.add_command('user-db') + + cmd_user_db_create = cmd_user_db.add_command('create', func=user_db_create) + cmd_user_db_create.add_arguments([ + (['--host', '-H'], {'default': COUCH_HOST}), + (['--port', '-P'], {'type': int, 'default': COUCH_PORT}), + (['--uuid', '-u'], {'default': SOLEDAD_CLIENT_UUID}), + ]) + + cmd_user_db_create = cmd_user_db.add_command( + 'delete', func=user_db_delete) + cmd_user_db_create.add_arguments([ + (['--host', '-H'], {'default': COUCH_HOST}), + (['--port', '-P'], {'type': int, 'default': COUCH_PORT}), + (['--uuid', '-u'], {'default': SOLEDAD_CLIENT_UUID}) + ]) + + # soledad server command with subcommands + cmd_sol_server = cli.add_command( + 'soledad-server', help="manage soledad server") + + cmd_sol_server_start = cmd_sol_server.add_command( + 'start', func=soledad_server_start) + cmd_sol_server_start.add_arguments([ + (['--executable', '-e'], {'default': TWISTD_EXECUTABLE}), + (['--config-file', '-c'], {'default': SOLEDAD_SERVER_CONFIG_FILE}), + (['--pidfile', '-p'], {'default': SOLEDAD_SERVER_PIDFILE}), + (['--logfile', '-l'], {'default': SOLEDAD_SERVER_LOGFILE}), + (['--port', '-P'], {'type': int, 'default': SOLEDAD_SERVER_PORT}), + (['--tls', '-t'], {'action': 'store_true'}), + (['--private-key', '-K'], {'default': SOLEDAD_SERVER_PRIVKEY}), + (['--cert-key', '-C'], {'default': SOLEDAD_SERVER_CERTKEY}), + (['--no-daemonize', '-n'], {'action': 'store_true'}), + (['--basedir', '-b'], {'default': SOLEDAD_SERVER_BASEDIR}), + ]) + + cmd_sol_server_stop = cmd_sol_server.add_command( + 'stop', func=soledad_server_stop) + cmd_sol_server_stop.add_arguments([ + (['--basedir', '-b'], {'default': SOLEDAD_SERVER_BASEDIR}), + (['--pidfile', '-p'], {'default': SOLEDAD_SERVER_PIDFILE}), + ]) + + cmd_sol_server_status = cmd_sol_server.add_command( + 'status', func=soledad_server_status_from_pidfile) + cmd_sol_server_status.add_arguments([ + (['--basedir', '-b'], {'default': SOLEDAD_SERVER_BASEDIR}), + (['--pidfile', '-p'], {'default': SOLEDAD_SERVER_PIDFILE}), + ]) + + cmd_sol_server_kill = cmd_sol_server.add_command( + 'kill', func=kill_all_executables) + cmd_sol_server_kill.add_argument( + '--executable', '-e', default=TWISTD_EXECUTABLE) + + # token db maintenance + cmd_token_db = cli.add_command('token-db') + cmd_token_db_create = cmd_token_db.add_command( + 'create', func=token_db_create) + cmd_token_db_create.add_arguments([ + (['--host', '-H'], {'default': COUCH_HOST}), + (['--uuid', '-u'], {'default': SOLEDAD_CLIENT_UUID}), + (['--port', '-P'], {'type': int, 'default': COUCH_PORT}), + ]) + + cmd_token_db_insert_token = cmd_token_db.add_command( + 'insert-token', func=token_db_insert_token) + cmd_token_db_insert_token.add_arguments([ + (['--host', '-H'], {'default': COUCH_HOST}), + (['--uuid', '-u'], {'default': SOLEDAD_CLIENT_UUID}), + (['--port', '-P'], {'type': int, 'default': COUCH_PORT}), + (['--auth-token', '-a'], {'default': SOLEDAD_SERVER_AUTH_TOKEN}), + ]) + + cmd_token_db_delete = cmd_token_db.add_command( + 'delete', func=token_db_delete) + cmd_token_db_delete.add_arguments([ + (['--host', '-H'], {'default': COUCH_HOST}), + (['--uuid', '-u'], {'default': SOLEDAD_CLIENT_UUID}), + (['--port', '-P'], {'type': int, 'default': COUCH_PORT}), + ]) + + # shared db creation + cmd_shared_db = cli.add_command('shared-db') + + cmd_shared_db_create = cmd_shared_db.add_command( + 'create', func=shared_db_create) + cmd_shared_db_create.add_arguments([ + (['--host', '-H'], {'default': COUCH_HOST}), + (['--port', '-P'], {'type': int, 'default': COUCH_PORT}), + ]) + + cmd_shared_db_delete = cmd_shared_db.add_command( + 'delete', func=shared_db_delete) + cmd_shared_db_delete.add_arguments([ + (['--host', '-H'], {'default': COUCH_HOST}), + (['--port', '-P'], {'type': int, 'default': COUCH_PORT}), + ]) + + # certificate generation + cmd_cert = cli.add_command('cert', help="create tls certificates") + + cmd_cert_create = cmd_cert.add_command('create', func=cert_create) + cmd_cert_create.add_arguments([ + (['--basedir', '-b'], {'default': SOLEDAD_SERVER_BASEDIR}), + (['--config-file', '-c'], {'default': CERT_CONFIG_FILE}), + (['--private-key', '-K'], {'default': SOLEDAD_SERVER_PRIVKEY}), + (['--cert-key', '-C'], {'default': SOLEDAD_SERVER_CERTKEY}), + ]) + + cmd_cert_create = cmd_cert.add_command('delete', func=cert_delete) + cmd_cert_create.add_arguments([ + (['--basedir', '-b'], {'default': SOLEDAD_SERVER_BASEDIR}), + (['--private-key', '-K'], {'default': SOLEDAD_SERVER_PRIVKEY}), + (['--cert-key', '-C'], {'default': SOLEDAD_SERVER_CERTKEY}), + ]) + + # soledad client command with subcommands + cmd_sol_client = cli.add_command( + 'soledad-client', help="manage soledad client") + + cmd_sol_client_test = cmd_sol_client.add_command( + 'test', func=soledad_client_test) + cmd_sol_client_test.add_arguments([ + (['--port', '-P'], {'type': int, 'default': SOLEDAD_SERVER_PORT}), + (['--tls', '-t'], {'action': 'store_true'}), + (['--uuid', '-u'], {'default': SOLEDAD_CLIENT_UUID}), + (['--passphrase', '-k'], {'default': SOLEDAD_CLIENT_PASS}), + (['--basedir', '-b'], {'default': SOLEDAD_CLIENT_BASEDIR}), + (['--server-url', '-s'], {'default': SOLEDAD_SERVER_URL}), + (['--cert-key', '-C'], {'default': os.path.join( + SOLEDAD_SERVER_BASEDIR, + SOLEDAD_SERVER_CERTKEY)}), + (['--auth-token', '-a'], {'default': SOLEDAD_SERVER_AUTH_TOKEN}), + ]) + + # parse and run cli + args = cli.parse_args() + args.func(args) + + +if __name__ == '__main__': + run_cli() diff --git a/scripts/docker/files/bin/util.py b/scripts/docker/files/bin/util.py new file mode 100644 index 00000000..e7e2ef9a --- /dev/null +++ b/scripts/docker/files/bin/util.py @@ -0,0 +1,75 @@ +import re +import psutil +import time +import threading +import argparse +import pytz +import datetime + + +class ValidateUserHandle(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + m = re.compile('^([^@]+)@([^@]+\.[^@]+)$') + res = m.match(values) + if res == None: + parser.error('User handle should have the form user@provider.') + setattr(namespace, 'username', res.groups()[0]) + setattr(namespace, 'provider', res.groups()[1]) + + +class StatsLogger(threading.Thread): + + def __init__(self, name, fname, procs=[], interval=0.01): + threading.Thread.__init__(self) + self._stopped = True + self._name = name + self._fname = fname + self._procs = self._find_procs(procs) + self._interval = interval + + def _find_procs(self, procs): + return filter(lambda p: p.name in procs, psutil.process_iter()) + + def run(self): + self._stopped = False + with open(self._fname, 'w') as f: + self._start = time.time() + f.write(self._make_header()) + while self._stopped is False: + f.write('%s %s\n' % + (self._make_general_stats(), self._make_proc_stats())) + time.sleep(self._interval) + f.write(self._make_footer()) + + def _make_general_stats(self): + now = time.time() + stats = [] + stats.append("%f" % (now - self._start)) # elapsed time + stats.append("%f" % psutil.cpu_percent()) # total cpu + stats.append("%f" % psutil.virtual_memory().percent) # total memory + return ' '.join(stats) + + def _make_proc_stats(self): + stats = [] + for p in self._procs: + stats.append('%f' % p.get_cpu_percent()) # proc cpu + stats.append('%f' % p.get_memory_percent()) # proc memory + return ' '.join(stats) + + def _make_header(self): + header = [] + header.append('# test_name: %s' % self._name) + header.append('# start_time: %s' % datetime.datetime.now(pytz.utc)) + header.append( + '# elapsed_time total_cpu total_memory proc_cpu proc_memory ') + return '\n'.join(header) + '\n' + + def _make_footer(self): + footer = [] + footer.append('# end_time: %s' % datetime.datetime.now(pytz.utc)) + return '\n'.join(footer) + + def stop(self): + self._stopped = True + + diff --git a/scripts/docker/files/bin/util.sh b/scripts/docker/files/bin/util.sh new file mode 100644 index 00000000..77287d0d --- /dev/null +++ b/scripts/docker/files/bin/util.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +configure_soledad_repos() { + if [ ! -z "${SOLEDAD_REMOTE}" ]; then + git -C ${REPO} remote set-url origin ${SOLEDAD_REMOTE} + git -C ${REPO} fetch origin + fi + + if [ ! -z "${SOLEDAD_BRANCH}" ]; then + git -C ${REPO} checkout ${SOLEDAD_BRANCH} + fi +} diff --git a/scripts/docker/files/build/install-deps-from-repos.sh b/scripts/docker/files/build/install-deps-from-repos.sh new file mode 100755 index 00000000..46530c86 --- /dev/null +++ b/scripts/docker/files/build/install-deps-from-repos.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# Install dependencies needed to run client and server in a test environment. +# +# In details, this script does the following: +# +# - install dependencies for packages in /var/local from their requirements +# files in each of the repositories, using python wheels when possible. +# +# - install the python packages in development mode +# +# This script is meant to be copied to the docker container during container +# build and run after system dependencies have been installed. + +BASEDIR="/var/local" + +# install dependencies and packages +install_script="pkg/pip_install_requirements.sh" +opts="--use-leap-wheels" +pkgs="leap_pycommon soledad/common soledad/client soledad/server" + +for pkg in ${pkgs}; do + pkgdir=${BASEDIR}/${pkg} + testing="" + if [ -f ${pkgdir}/pkg/requirements-testing.pip ]; then + testing="--testing" + fi + (cd ${pkgdir} && ${install_script} ${testing} ${opts}) + (cd ${pkgdir} && python setup.py develop) +done diff --git a/scripts/docker/helper/get-container-ip.sh b/scripts/docker/helper/get-container-ip.sh new file mode 100755 index 00000000..2b392350 --- /dev/null +++ b/scripts/docker/helper/get-container-ip.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +# Print the IP of a container to stdout, given its id. Check the output for +# the `docker inspect` commmand for more details: +# +# https://docs.docker.com/engine/reference/commandline/inspect/ + +if [ ${#} -ne 1 ]; then + echo "Usage: ${0} container_id" + exit 1 +fi + +container_id=${1} + +/usr/bin/docker \ + inspect \ + --format='{{.NetworkSettings.IPAddress}}' \ + ${container_id} diff --git a/scripts/docker/helper/run-test.sh b/scripts/docker/helper/run-test.sh new file mode 100755 index 00000000..9b3ec0c9 --- /dev/null +++ b/scripts/docker/helper/run-test.sh @@ -0,0 +1,75 @@ +#!/bin/sh + +# Run 2 docker images, one with soledad server and another with a soledad +# client running a test. +# +# As there are many possible, tests, you have to pass an argument to the +# script saying which test you want to run. Currently, possible values are +# "connect" and "perf". +# +# After launching the server container, the script waits for TIMEOUT seconds +# for it to come up. If we fail to detect the server, the script exits with +# nonzero status. + +# seconds to wait before giving up waiting from server +TIMEOUT=20 + +# parse command +if [ ${#} -lt 1 -o ${#} -gt 2 ]; then + echo "Usage: ${0} perf|bootstrap" + exit 1 +fi + +test=${1} +if [ "${test}" != "perf" -a "${test}" != "bootstrap" ]; then + echo "Usage: ${0} perf|bootstrap" + exit 1 +fi + +branch="" +if [ ${#} -eq 2 ]; then + branch="SOLEDAD_BRANCH=${2}" +fi + +# make sure the image is up to date +make image + +# get script name and path +script=$(readlink -f "$0") +scriptpath=$(dirname "${script}") + +# run the server +tempfile=`mktemp -u` +make run-server CONTAINER_ID_FILE=${tempfile} ${branch} + +# wait for server until timeout +container_id=`cat ${tempfile}` +server_ip=`${scriptpath}/get-container-ip.sh ${container_id}` +start=`date +%s` +elapsed=0 + +echo "Waiting for soledad server container to come up..." + +while [ ${elapsed} -lt ${TIMEOUT} ]; do + curl -s http://${server_ip}:2424 > /dev/null + if [ ${?} -eq 0 ]; then + echo "Soledad server container is up!" + break + else + sleep 1 + fi + now=`date +%s` + elapsed=`expr ${now} - ${start}` +done + +# exit with an error code if timed out waiting for server +if [ ${elapsed} -ge ${TIMEOUT} ]; then + echo "Error: server unreachable at ${server_ip} after ${TIMEOUT} seconds." + exit 1 +fi + +set -e + +# run the test +make run-client-${test} CONTAINER_ID_FILE=${tempfile} ${branch} +rm -r ${tempfile} diff --git a/scripts/docker/helper/run-until-error.sh b/scripts/docker/helper/run-until-error.sh new file mode 100755 index 00000000..a4cab6ec --- /dev/null +++ b/scripts/docker/helper/run-until-error.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +status=0 +runs=10 + +while [ ${status} -eq 0 -a ${runs} -gt 0 ]; do + echo "=== RUN ${runs}" + make rm-all-containers + make run-perf-test + status=${?} + runs=`expr ${runs} - 1` +done diff --git a/scripts/gitlab/run_soledad_tests.sh b/scripts/gitlab/run_soledad_tests.sh new file mode 100644 index 00000000..328fcc4d --- /dev/null +++ b/scripts/gitlab/run_soledad_tests.sh @@ -0,0 +1,132 @@ +#!/bin/sh + + +# Name: +# run_tests.sh -- run soledad tests from a given source directory +# +# Usage: +# run_tests.sh +# +# Description: +# This script sets up a minimal test environment from a soledad source +# directory and runs all tests, returning the same exit status as the test +# process. As it is intended to be called by a GitLab Runner, it expects the +# following GitLab CI predefined variable to be set in the environment: +# +# CI_PROJECT_DIR The full path where the repository is cloned and where +# the build is ran +# +# Example: +# CI_PROJECT_DIR=/tmp/soledad run_tests.sh + + +# Fail if expected variable is not a directory. +if [ ! -d "${CI_PROJECT_DIR}" ]; then + echo "Error! Not a directory: ${CI_PROJECT_DIR}" + exit 1 +fi + + +# Setup pip to use wheels because it is prebuilt and thus faster to deploy. + +PIP_INSTALL="pip install -U \ + --find-links=https://lizard.leap.se/wheels \ + --trusted-host lizard.leap.se" + + +# Use a fresh python virtual envinroment each time. + +setup_venv() { + venv=${1} + virtualenv ${venv} + . ${venv}/bin/activate +} + + +# Most of the dependencies are installed directly from soledad source pip +# requirement files. Some of them need alternative ways of installing because +# of distinct reasons, see below. + +install_deps() { + install_leap_common + install_scrypt + install_soledad_deps +} + + +# Install scrypt manually to avoid import problems as the ones described in +# https://leap.se/code/issues/4948 + +install_scrypt() { + pip install scrypt +} + + +# Iterate through soledad components and use the special pip install script to +# install (mostly) all requirements for testing. +# +# TODO: Soledad tests should depend on almost nothing and have every component +# from other leap packages mocked. + +install_soledad_deps() { + for pkg in common client server; do + testing="--testing" + if [ "${pkg}" = "server" ]; then + # soledad server doesn't currently have a requirements-testing.pip file, + # so we don't pass the option when that is the case + testing="" + fi + (cd ${CI_PROJECT_DIR}/${pkg} \ + && ./pkg/pip_install_requirements.sh ${testing} --use-leap-wheels \ + && python setup.py develop) + done +} + + +# We have to manually install leap.common from source because: +# +# - the leap.common package is not currently set as a "testing dependency" +# for soledad; and +# +# - having another package from the leap namespace installed from egg or +# wheels may confuse the python interpreter when importing modules. + +install_leap_common() { + temp=`mktemp -d` + host="git://github.com/leapcode" + proj="leap_pycommon" + git clone ${host}/${proj} ${temp}/${proj} + (cd ${temp}/${proj} \ + && ./pkg/pip_install_requirements.sh \ + && python setup.py develop) +} + + +# Run soledad tests. The exit status of the following function is used as the +# script's exit status. + +run_tests() { + trial leap.soledad.common.tests +} + + +# Cleanup leftovers before finishing. + +cleanup_venv() { + venv=${1} + rm -rf ${venv} +} + + +main() { + venv="`mktemp -d`/venv" + setup_venv ${venv} + install_deps + run_tests + status=$? + cleanup_venv ${venv} + exit ${status} +} + + +main diff --git a/scripts/build_debian_package.sh b/scripts/packaging/build_debian_package.sh index b9fb93a9..b9fb93a9 100755 --- a/scripts/build_debian_package.sh +++ b/scripts/packaging/build_debian_package.sh diff --git a/scripts/compile_design_docs.py b/scripts/packaging/compile_design_docs.py index 7ffebb10..b2b5729a 100644 --- a/scripts/compile_design_docs.py +++ b/scripts/packaging/compile_design_docs.py @@ -108,4 +108,5 @@ if __name__ == '__main__': ddoc_filename = "%s.json" % ddoc with open(join(args.target, ddoc_filename), 'w') as f: f.write("%s" % json.dumps(ddocs[ddoc], indent=3)) - print "Wrote _design/%s content in %s" % (ddoc, join(args.target, ddoc_filename,)) + print "Wrote _design/%s content in %s" \ + % (ddoc, join(args.target, ddoc_filename,)) diff --git a/scripts/profiling/backends_cpu_usage/test_u1db_sync.py b/scripts/profiling/backends_cpu_usage/test_u1db_sync.py index 26ef8f9f..5ae68c81 100755 --- a/scripts/profiling/backends_cpu_usage/test_u1db_sync.py +++ b/scripts/profiling/backends_cpu_usage/test_u1db_sync.py @@ -1,18 +1,16 @@ #!/usr/bin/python -import u1db import tempfile import logging import shutil import os -import argparse import time import binascii -import random - +from leap.soledad.common import l2db from leap.soledad.client.sqlcipher import open as sqlcipher_open + from log_cpu_usage import LogCpuUsage from u1dblite import open as u1dblite_open from u1dbcipher import open as u1dbcipher_open @@ -24,10 +22,10 @@ BIGGEST_DOC_SIZE = 100 * 1024 # 100 KB def get_data(size): - return binascii.hexlify(os.urandom(size/2)) + return binascii.hexlify(os.urandom(size / 2)) -def run_test(testname, open_fun, tempdir, docs, *args): +def run_test(testname, open_fun, tempdir, docs, *args): logger.info('Starting test \"%s\".' % testname) # instantiate dbs @@ -36,8 +34,7 @@ def run_test(testname, open_fun, tempdir, docs, *args): # get sync target and synchsonizer target = db2.get_sync_target() - synchronizer = u1db.sync.Synchronizer(db1, target) - + synchronizer = l2db.sync.Synchronizer(db1, target) # generate lots of small documents logger.info('Creating %d documents in source db...' % DOCS_TO_SYNC) @@ -80,30 +77,28 @@ def run_test(testname, open_fun, tempdir, docs, *args): if __name__ == '__main__': - + # configure logger logger = logging.getLogger(__name__) LOG_FORMAT = '%(asctime)s %(message)s' logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) - # get a temporary dir tempdir = tempfile.mkdtemp() logger.info('Using temporary directory %s' % tempdir) - # create a lot of documents with random sizes docs = [] for i in xrange(DOCS_TO_SYNC): docs.append({ 'index': i, - #'data': get_data( + # 'data': get_data( # random.randrange( # SMALLEST_DOC_SIZE, BIGGEST_DOC_SIZE)) }) # run tests - run_test('sqlite', u1db.open, tempdir, docs, True) + run_test('sqlite', l2db.open, tempdir, docs, True) run_test('sqlcipher', sqlcipher_open, tempdir, docs, '123456', True) run_test('u1dblite', u1dblite_open, tempdir, docs) run_test('u1dbcipher', u1dbcipher_open, tempdir, docs, '123456', True) diff --git a/scripts/profiling/sync/profile-sync.py b/scripts/profiling/sync/profile-sync.py index 8c18bde8..34e66f03 100755 --- a/scripts/profiling/sync/profile-sync.py +++ b/scripts/profiling/sync/profile-sync.py @@ -1,4 +1,10 @@ #!/usr/bin/env python +""" +Example of usage: + time ./profile-sync.py --no-stats --send-num 5 --payload-file sample \ + --repeat-payload -p password -b /tmp/foobarsync \ + test_soledad_sync_001@cdev.bitmask.net +""" import argparse import commands @@ -13,7 +19,9 @@ from twisted.internet import reactor from util import StatsLogger, ValidateUserHandle from client_side_db import _get_soledad_instance, _get_soledad_info + from leap.common.events import flags +from leap.soledad.client.api import Soledad flags.set_events_enabled(False) @@ -70,6 +78,23 @@ def create_docs(soledad, args): payload = fmap.read(docsize * 1024) s.create_doc({payload: payload}) + +def _get_soledad_instance_from_uuid(uuid, passphrase, basedir, server_url, + cert_file, token): + secrets_path = os.path.join(basedir, '%s.secret' % uuid) + local_db_path = os.path.join(basedir, '%s.db' % uuid) + return Soledad( + uuid, + unicode(passphrase), + secrets_path=secrets_path, + local_db_path=local_db_path, + server_url=server_url, + cert_file=cert_file, + auth_token=token, + defer_encryption=True, + syncable=True) + + # main program if __name__ == '__main__': @@ -79,6 +104,9 @@ if __name__ == '__main__': parser.add_argument( 'user@provider', action=ValidateUserHandle, help='the user handle') parser.add_argument( + '-u', dest='uuid', required=False, default=None, + help='uuid for local tests') + parser.add_argument( '-b', dest='basedir', required=False, default=None, help='soledad base directory') parser.add_argument( @@ -102,6 +130,7 @@ if __name__ == '__main__': parser.add_argument( '--payload-file', dest="payload_f", default=None, help='path to a sample file to use for the payloads') + parser.add_argument( '--no-stats', dest='do_stats', action='store_false', help='skip system stats') @@ -132,12 +161,20 @@ if __name__ == '__main__': basedir = tempfile.mkdtemp() logger.info('Using %s as base directory.' % basedir) - uuid, server_url, cert_file, token = \ - _get_soledad_info( - args.username, args.provider, passphrase, basedir) - # get the soledad instance - s = _get_soledad_instance( - uuid, passphrase, basedir, server_url, cert_file, token) + if args.uuid: + # We got an uuid. This is a local test, and we bypass + # authentication and encryption. + s = _get_soledad_instance_from_uuid( + args.uuid, passphrase, basedir, 'http://localhost:2323', '', '') + + else: + # Remote server. First, get remote info... + uuid, server_url, cert_file, token = \ + _get_soledad_info( + args.username, args.provider, passphrase, basedir) + # ...and then get the soledad instance + s = _get_soledad_instance( + uuid, passphrase, basedir, server_url, cert_file, token) if args.do_send: create_docs(s, args) diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh deleted file mode 100755 index e36466f8..00000000 --- a/scripts/run_tests.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -cd common -python setup.py test diff --git a/scripts/develop_mode.sh b/scripts/testing/develop_mode.sh index 8d2ebaa8..8d2ebaa8 100755 --- a/scripts/develop_mode.sh +++ b/scripts/testing/develop_mode.sh diff --git a/server/changes/next-changelog.rst b/server/changes/next-changelog.rst index bdc9f893..fc4cbc30 100644 --- a/server/changes/next-changelog.rst +++ b/server/changes/next-changelog.rst @@ -1,4 +1,4 @@ -0.8.1 - ... +0.8.2 - ... ++++++++++++++++++++ Please add lines to this file, they will be moved to the CHANGELOG.rst during diff --git a/server/pkg/create-user-db b/server/pkg/create-user-db index 54856643..5e48d4de 100755 --- a/server/pkg/create-user-db +++ b/server/pkg/create-user-db @@ -25,6 +25,9 @@ from leap.soledad.common.couch import list_users_dbs from leap.soledad.server import load_configuration +BYPASS_AUTH = os.environ.get('SOLEDAD_BYPASS_AUTH', False) + + description = """ Creates a user database. This is meant to be used by Soledad Server. @@ -40,16 +43,23 @@ NETRC_PATH = CONF['soledad-server']['admin_netrc'] def url_for_db(dbname): - if not os.path.exists(NETRC_PATH): - print ('netrc not found in %s' % NETRC_PATH) - sys.exit(1) - parsed_netrc = netrc.netrc(NETRC_PATH) - host, (login, _, password) = parsed_netrc.hosts.items()[0] - url = ('http://%(login)s:%(password)s@%(host)s:5984/%(dbname)s' % { - 'login': login, - 'password': password, - 'host': host, - 'dbname': dbname}) + if BYPASS_AUTH: + login = '' + password = '' + host = 'localhost' + url = 'http://localhost:5984/%(dbname)s' % { + 'dbname': dbname} + else: + if not os.path.exists(NETRC_PATH): + print ('netrc not found in %s' % NETRC_PATH) + sys.exit(1) + parsed_netrc = netrc.netrc(NETRC_PATH) + host, (login, _, password) = parsed_netrc.hosts.items()[0] + url = ('http://%(login)s:%(password)s@%(host)s:5984/%(dbname)s' % { + 'login': login, + 'password': password, + 'host': host, + 'dbname': dbname}) return url diff --git a/server/pkg/generate_wheels.sh b/server/pkg/generate_wheels.sh index e29c327e..a13e2c7a 100755 --- a/server/pkg/generate_wheels.sh +++ b/server/pkg/generate_wheels.sh @@ -7,7 +7,7 @@ if [ "$WHEELHOUSE" = "" ]; then fi pip wheel --wheel-dir $WHEELHOUSE pip -pip wheel --wheel-dir $WHEELHOUSE --allow-external u1db --allow-unverified u1db --allow-external dirspec --allow-unverified dirspec -r pkg/requirements.pip +pip wheel --wheel-dir $WHEELHOUSE -r pkg/requirements.pip if [ -f pkg/requirements-testing.pip ]; then pip wheel --wheel-dir $WHEELHOUSE -r pkg/requirements-testing.pip fi diff --git a/server/pkg/pip_install_requirements.sh b/server/pkg/pip_install_requirements.sh index d0479365..f4b5f67a 100755 --- a/server/pkg/pip_install_requirements.sh +++ b/server/pkg/pip_install_requirements.sh @@ -4,7 +4,7 @@ # Use at your own risk. # See $usage for help -insecure_packages="u1db dirspec" +insecure_packages="" leap_wheelhouse=https://lizard.leap.se/wheels show_help() { @@ -80,5 +80,5 @@ insecure_flags=`return_insecure_flags` packages=`return_packages` pip install -U wheel -pip install $install_options pip +pip install -U pip pip install $install_options $insecure_flags $packages diff --git a/server/pkg/requirements-latest.pip b/server/pkg/requirements-latest.pip index a629aa57..46a7ccba 100644 --- a/server/pkg/requirements-latest.pip +++ b/server/pkg/requirements-latest.pip @@ -1,8 +1,5 @@ --index-url https://pypi.python.org/simple/ ---allow-external u1db --allow-unverified u1db ---allow-external dirspec --allow-unverified dirspec - -e 'git+https://github.com/pixelated-project/leap_pycommon.git@develop#egg=leap.common' -e '../common' -e . diff --git a/server/pkg/requirements.pip b/server/pkg/requirements.pip index f9cce08e..2d845f24 100644 --- a/server/pkg/requirements.pip +++ b/server/pkg/requirements.pip @@ -1,13 +1,6 @@ configparser -u1db -routes PyOpenSSL twisted>=12.3.0 #pinned for wheezy compatibility Beaker==1.6.3 #wheezy couchdb==0.8 #wheezy - -# XXX -- fix me! -# oauth is not strictly needed by us, but we need it until u1db adds it to its -# release as a dep. -oauth diff --git a/server/setup.py b/server/setup.py index 8a7fbe45..b3b26010 100644 --- a/server/setup.py +++ b/server/setup.py @@ -77,14 +77,20 @@ class freeze_debianver(Command): # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. -version_version = '{version}' -full_revisionid = '{full_revisionid}' -""" - templatefun = r""" - -def get_versions(default={}, verbose=False): - return {'version': version_version, - 'full-revisionid': full_revisionid} +import json +import sys + +version_json = ''' +{ + "dirty": false, + "error": null, + "full-revisionid": "FULL_REVISIONID", + "version": "VERSION_STRING" +} +''' # END VERSION_JSON + +def get_versions(): + return json.loads(version_json) """ def initialize_options(self): @@ -99,9 +105,9 @@ def get_versions(default={}, verbose=False): if proceed != "y": print("He. You scared. Aborting.") return - subst_template = self.template.format( - version=VERSION_SHORT, - full_revisionid=VERSION_REVISION) + self.templatefun + subst_template = self.template.replace( + 'VERSION_STRING', VERSION_SHORT).replace( + 'FULL_REVISIONID', VERSION_REVISION) versioneer_cfg = versioneer.get_config_from_root('.') with open(versioneer_cfg.versionfile_source, 'w') as f: f.write(subst_template) diff --git a/server/src/leap/soledad/server/__init__.py b/server/src/leap/soledad/server/__init__.py index 22894dac..34570b52 100644 --- a/server/src/leap/soledad/server/__init__.py +++ b/server/src/leap/soledad/server/__init__.py @@ -49,9 +49,7 @@ synchronization is handled by the `leap.soledad.server.auth` module. Shared database --------------- -Each user may store password-encrypted recovery data in the shared database, -as well as obtain a lock on the shared database in order to prevent creation -of multiple secrets in parallel. +Each user may store password-encrypted recovery data in the shared database. Recovery documents are stored in the database without any information that may identify the user. In order to achieve this, the doc_id of recovery @@ -77,26 +75,19 @@ This has some implications: * Because of the above, it is recommended that recovery documents expire (not implemented yet) to prevent excess storage. -Lock documents, on the other hand, may be more thoroughly protected by the -server. Their doc_id's are calculated from the SHARED_DB_LOCK_DOC_ID_PREFIX -and the user's uid. - The authorization for creating, updating, deleting and retrieving recovery -and lock documents on the shared database is handled by -`leap.soledad.server.auth` module. +documents on the shared database is handled by `leap.soledad.server.auth` +module. """ import configparser import urlparse import sys -from u1db.remote import http_app, utils - -from ._version import get_versions +from leap.soledad.common.l2db.remote import http_app, utils from leap.soledad.server.auth import SoledadTokenAuthMiddleware from leap.soledad.server.gzip_middleware import GzipMiddleware -from leap.soledad.server.lock_resource import LockResource from leap.soledad.server.sync import ( SyncResource, MAX_REQUEST_SIZE, @@ -107,6 +98,8 @@ from leap.soledad.common import SHARED_DB_NAME from leap.soledad.common.backend import SoledadBackend from leap.soledad.common.couch.state import CouchServerState +from ._version import get_versions + # ---------------------------------------------------------------------------- # Soledad WSGI application # ---------------------------------------------------------------------------- @@ -155,7 +148,6 @@ http_app.url_to_resource.register(http_app.DocsResource) http_app.url_to_resource.register(http_app.DocResource) # register Soledad's new or modified resources -http_app.url_to_resource.register(LockResource) http_app.url_to_resource.register(SyncResource) @@ -312,16 +304,34 @@ def load_configuration(file_path): # Run as Twisted WSGI Resource # ---------------------------------------------------------------------------- -def application(environ, start_response): + +def _load_config(): conf = load_configuration('/etc/soledad/soledad-server.conf') - conf = conf['soledad-server'] + return conf['soledad-server'] + + +def _get_couch_state(): + conf = _load_config() state = CouchServerState(conf['couch_url'], create_cmd=conf['create_cmd']) - SoledadBackend.BATCH_SUPPORT = conf['batching'] - # WSGI application that may be used by `twistd -web` + SoledadBackend.BATCH_SUPPORT = conf.get('batching', False) + return state + + +def application(environ, start_response): + """return WSGI application that may be used by `twistd -web`""" + state = _get_couch_state() application = GzipMiddleware( SoledadTokenAuthMiddleware(SoledadApp(state))) + return application(environ, start_response) + +def debug_local_application_do_not_use(environ, start_response): + """in where we bypass token auth middleware for ease of mind while + debugging in your local environment""" + state = _get_couch_state() + application = SoledadApp(state) return application(environ, start_response) + __version__ = get_versions()['version'] del get_versions diff --git a/server/src/leap/soledad/server/auth.py b/server/src/leap/soledad/server/auth.py index ccbd6fbd..ecee2d5d 100644 --- a/server/src/leap/soledad/server/auth.py +++ b/server/src/leap/soledad/server/auth.py @@ -14,21 +14,17 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. - - """ Authentication facilities for Soledad Server. """ - - import httplib import json -from u1db import DBNAME_CONSTRAINTS, errors as u1db_errors from abc import ABCMeta, abstractmethod from routes.mapper import Mapper from twisted.python import log +from leap.soledad.common.l2db import DBNAME_CONSTRAINTS, errors as u1db_errors from leap.soledad.common import SHARED_DB_NAME from leap.soledad.common import USER_DB_PREFIX @@ -101,7 +97,6 @@ class URLToAuthorization(object): /shared-db/docs | - /shared-db/doc/{any_id} | GET, PUT, DELETE /shared-db/sync-from/{source} | - - /shared-db/lock/{uuid} | PUT, DELETE /user-db | GET, PUT, DELETE /user-db/docs | - /user-db/doc/{id} | - @@ -118,10 +113,6 @@ class URLToAuthorization(object): '/%s/doc/{id:.*}' % SHARED_DB_NAME, [self.HTTP_METHOD_GET, self.HTTP_METHOD_PUT, self.HTTP_METHOD_DELETE]) - # auth info for shared-db lock resource - self._register( - '/%s/lock/%s' % (SHARED_DB_NAME, self._uuid), - [self.HTTP_METHOD_PUT, self.HTTP_METHOD_DELETE]) # auth info for user-db database resource self._register( '/%s' % self._user_db_name, diff --git a/server/src/leap/soledad/server/lock_resource.py b/server/src/leap/soledad/server/lock_resource.py deleted file mode 100644 index 0a602e26..00000000 --- a/server/src/leap/soledad/server/lock_resource.py +++ /dev/null @@ -1,231 +0,0 @@ -# -*- coding: utf-8 -*- -# lock_resource.py -# Copyright (C) 2013 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - - -""" -LockResource: a lock based on a document in the shared database. -""" - - -import hashlib -import time -import os -import tempfile -import errno - - -from u1db.remote import http_app -from twisted.python.lockfile import FilesystemLock - - -from leap.soledad.common import ( - SHARED_DB_NAME, - SHARED_DB_LOCK_DOC_ID_PREFIX, -) -from leap.soledad.common.errors import ( - InvalidTokenError, - NotLockedError, - AlreadyLockedError, - LockTimedOutError, - CouldNotObtainLockError, -) - - -class LockResource(object): - """ - Handle requests for locking documents. - - This class uses Twisted's Filesystem lock to manage a lock in the shared - database. - """ - - url_pattern = '/%s/lock/{uuid}' % SHARED_DB_NAME - """ - """ - - TIMEOUT = 300 # XXX is 5 minutes reasonable? - """ - The timeout after which the lock expires. - """ - - # used for lock doc storage - TIMESTAMP_KEY = '_timestamp' - LOCK_TOKEN_KEY = '_token' - - FILESYSTEM_LOCK_TRIES = 5 - FILESYSTEM_LOCK_SLEEP_SECONDS = 1 - - def __init__(self, uuid, state, responder): - """ - Initialize the lock resource. Parameters to this constructor are - automatically passed by u1db. - - :param uuid: The user unique id. - :type uuid: str - :param state: The backend database state. - :type state: u1db.remote.ServerState - :param responder: The infrastructure to send responses to client. - :type responder: u1db.remote.HTTPResponder - """ - self._shared_db = state.open_database(SHARED_DB_NAME) - self._lock_doc_id = '%s%s' % (SHARED_DB_LOCK_DOC_ID_PREFIX, uuid) - self._lock = FilesystemLock( - os.path.join( - tempfile.gettempdir(), - hashlib.sha512(self._lock_doc_id).hexdigest())) - self._state = state - self._responder = responder - - @http_app.http_method(content=str) - def put(self, content=None): - """ - Handle a PUT request to the lock document. - - A lock is a document in the shared db with doc_id equal to - 'lock-<uuid>' and the timestamp of its creation as content. This - method obtains a threaded-lock and creates a lock document if it does - not exist or if it has expired. - - It returns '201 Created' and a pair containing a token to unlock and - the lock timeout, or '403 AlreadyLockedError' and the remaining amount - of seconds the lock will still be valid. - - :param content: The content of the PUT request. It is only here - because PUT requests with empty content are considered - invalid requests by u1db. - :type content: str - """ - # obtain filesystem lock - if not self._try_obtain_filesystem_lock(): - self._responder.send_response_json( - LockTimedOutError.status, # error: request timeout - error=LockTimedOutError.wire_description) - return - - created_lock = False - now = time.time() - token = hashlib.sha256(os.urandom(10)).hexdigest() # for releasing - lock_doc = self._shared_db.get_doc(self._lock_doc_id) - remaining = self._remaining(lock_doc, now) - - # if there's no lock, create one - if lock_doc is None: - lock_doc = self._shared_db.create_doc( - { - self.TIMESTAMP_KEY: now, - self.LOCK_TOKEN_KEY: token, - }, - doc_id=self._lock_doc_id) - created_lock = True - else: - if remaining == 0: - # lock expired, create new one - lock_doc.content = { - self.TIMESTAMP_KEY: now, - self.LOCK_TOKEN_KEY: token, - } - self._shared_db.put_doc(lock_doc) - created_lock = True - - self._try_release_filesystem_lock() - - # send response to client - if created_lock is True: - self._responder.send_response_json( - 201, timeout=self.TIMEOUT, token=token) # success: created - else: - self._responder.send_response_json( - AlreadyLockedError.status, # error: forbidden - error=AlreadyLockedError.wire_description, remaining=remaining) - - @http_app.http_method(token=str) - def delete(self, token=None): - """ - Delete the lock if the C{token} is valid. - - Delete the lock document in case C{token} is equal to the token stored - in the lock document. - - :param token: The token returned when locking. - :type token: str - - :raise NotLockedError: Raised in case the lock is not locked. - :raise InvalidTokenError: Raised in case the token is invalid for - unlocking. - """ - lock_doc = self._shared_db.get_doc(self._lock_doc_id) - if lock_doc is None or self._remaining(lock_doc, time.time()) == 0: - self._responder.send_response_json( - NotLockedError.status, # error: not found - error=NotLockedError.wire_description) - elif token != lock_doc.content[self.LOCK_TOKEN_KEY]: - self._responder.send_response_json( - InvalidTokenError.status, # error: unauthorized - error=InvalidTokenError.wire_description) - else: - self._shared_db.delete_doc(lock_doc) - # respond success: should use 204 but u1db does not support it. - self._responder.send_response_json(200) - - def _remaining(self, lock_doc, now): - """ - Return the number of seconds the lock contained in C{lock_doc} is - still valid, when compared to C{now}. - - :param lock_doc: The document containing the lock. - :type lock_doc: u1db.Document - :param now: The time to which to compare the lock timestamp. - :type now: float - - :return: The amount of seconds the lock is still valid. - :rtype: float - """ - if lock_doc is not None: - lock_timestamp = lock_doc.content[self.TIMESTAMP_KEY] - remaining = lock_timestamp + self.TIMEOUT - now - return remaining if remaining > 0 else 0.0 - return 0.0 - - def _try_obtain_filesystem_lock(self): - """ - Try to obtain the file system lock. - - @return: Whether the lock was succesfully obtained. - @rtype: bool - """ - tries = self.FILESYSTEM_LOCK_TRIES - while tries > 0: - try: - return self._lock.lock() - except OSError as e: - tries -= 1 - if tries == 0: - raise CouldNotObtainLockError(e.message) - time.sleep(self.FILESYSTEM_LOCK_SLEEP_SECONDS) - return False - - def _try_release_filesystem_lock(self): - """ - Release the filesystem lock. - """ - try: - self._lock.unlock() - return True - except OSError as e: - if e.errno == errno.ENOENT: - return True - return False diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 96f65912..3f5c4aba 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -17,10 +17,10 @@ """ Server side synchronization infrastructure. """ -from u1db import sync, Document -from u1db.remote import http_app -from leap.soledad.server.state import ServerSyncState +from leap.soledad.common.l2db import sync, Document +from leap.soledad.common.l2db.remote import http_app from leap.soledad.server.caching import get_cache_for +from leap.soledad.server.state import ServerSyncState MAX_REQUEST_SIZE = 200 # in Mb diff --git a/testing/setup.py b/testing/setup.py new file mode 100644 index 00000000..059b2489 --- /dev/null +++ b/testing/setup.py @@ -0,0 +1,9 @@ +from setuptools import setup +from setuptools import find_packages + + +setup( + name='test_soledad', + packages=find_packages('.'), + package_data={'': ['*.conf']} +) diff --git a/testing/test_soledad/__init__.py b/testing/test_soledad/__init__.py new file mode 100644 index 00000000..c07c8b0e --- /dev/null +++ b/testing/test_soledad/__init__.py @@ -0,0 +1,5 @@ +from test_soledad import util + +__all__ = [ + 'util', +] diff --git a/common/src/leap/soledad/common/tests/fixture_soledad.conf b/testing/test_soledad/fixture_soledad.conf index 8d8161c3..8d8161c3 100644 --- a/common/src/leap/soledad/common/tests/fixture_soledad.conf +++ b/testing/test_soledad/fixture_soledad.conf diff --git a/common/src/leap/soledad/common/tests/u1db_tests/README b/testing/test_soledad/u1db_tests/README index 0525cfdb..546dfdc9 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/README +++ b/testing/test_soledad/u1db_tests/README @@ -17,9 +17,7 @@ u1db tests depend on the following python packages: hgtools testtools discover - oauth testscenarios - dirspec paste routes cython diff --git a/common/src/leap/soledad/common/tests/u1db_tests/__init__.py b/testing/test_soledad/u1db_tests/__init__.py index 01da9381..ba776864 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/__init__.py +++ b/testing/test_soledad/u1db_tests/__init__.py @@ -26,7 +26,6 @@ import json from wsgiref import simple_server -from oauth import oauth from pysqlcipher import dbapi2 from StringIO import StringIO @@ -36,13 +35,13 @@ from twisted.web.server import Site from twisted.web.wsgi import WSGIResource from twisted.internet import reactor -from u1db import errors -from u1db import Document -from u1db.backends import inmemory -from u1db.backends import sqlite_backend -from u1db.remote import server_state -from u1db.remote import http_app -from u1db.remote import http_target +from leap.soledad.common.l2db import errors +from leap.soledad.common.l2db import Document +from leap.soledad.common.l2db.backends import inmemory +from leap.soledad.common.l2db.backends import sqlite_backend +from leap.soledad.common.l2db.remote import server_state +from leap.soledad.common.l2db.remote import http_app +from leap.soledad.common.l2db.remote import http_target class TestCase(unittest.TestCase): @@ -394,44 +393,6 @@ def socket_pair(): return server_sock, client_sock -# OAuth related testing - -consumer1 = oauth.OAuthConsumer('K1', 'S1') -token1 = oauth.OAuthToken('kkkk1', 'XYZ') -consumer2 = oauth.OAuthConsumer('K2', 'S2') -token2 = oauth.OAuthToken('kkkk2', 'ZYX') -token3 = oauth.OAuthToken('kkkk3', 'ZYX') - - -class TestingOAuthDataStore(oauth.OAuthDataStore): - - """In memory predefined OAuthDataStore for testing.""" - - consumers = { - consumer1.key: consumer1, - consumer2.key: consumer2, - } - - tokens = { - token1.key: token1, - token2.key: token2 - } - - def lookup_consumer(self, key): - return self.consumers.get(key) - - def lookup_token(self, token_type, token_token): - return self.tokens.get(token_token) - - def lookup_nonce(self, oauth_consumer, oauth_token, nonce): - return None - -testingOAuthStore = TestingOAuthDataStore() - -sign_meth_HMAC_SHA1 = oauth.OAuthSignatureMethod_HMAC_SHA1() -sign_meth_PLAINTEXT = oauth.OAuthSignatureMethod_PLAINTEXT() - - def load_with_scenarios(loader, standard_tests, pattern): """Load the tests in a given module. @@ -452,10 +413,3 @@ def make_http_app(state): def http_sync_target(test, path): return http_target.HTTPSyncTarget(test.getURL(path)) - - -def make_oauth_http_app(state): - app = http_app.HTTPApp(state) - application = oauth_middleware.OAuthMiddleware(app, None, prefix='/~/') - application.get_oauth_data_store = lambda: tests.testingOAuthStore - return application diff --git a/common/src/leap/soledad/common/tests/u1db_tests/test_backends.py b/testing/test_soledad/u1db_tests/test_backends.py index 410d838f..10dcdff9 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/test_backends.py +++ b/testing/test_soledad/u1db_tests/test_backends.py @@ -1,8 +1,9 @@ # Copyright 2011 Canonical Ltd. +# Copyright 2016 LEAP Encryption Access Project # -# This file is part of u1db. +# This file is part of leap.soledad.common # -# u1db is free software: you can redistribute it and/or modify +# leap.soledad.common is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License version 3 # as published by the Free Software Foundation. # @@ -14,20 +15,18 @@ # You should have received a copy of the GNU Lesser General Public License # along with u1db. If not, see <http://www.gnu.org/licenses/>. -"""The backend class for U1DB. This deals with hiding storage details.""" +""" +The backend class for L2DB. This deals with hiding storage details. +""" import json -from u1db import DocumentBase -from u1db import errors -from u1db import vectorclock +from leap.soledad.common.l2db import DocumentBase +from leap.soledad.common.l2db import errors +from leap.soledad.common.l2db import vectorclock +from leap.soledad.common.l2db.remote import http_database -from leap.soledad.common.tests import u1db_tests as tests - -from leap.soledad.common.tests.u1db_tests import make_http_app -from leap.soledad.common.tests.u1db_tests import make_oauth_http_app - -from u1db.remote import http_database +from test_soledad import u1db_tests as tests from unittest import skip @@ -50,25 +49,6 @@ def copy_http_database_for_test(test, db): return test.request_state._copy_database(db) -def make_oauth_http_database_for_test(test, replica_uid): - http_db = make_http_database_for_test(test, replica_uid, '~/test') - http_db.set_oauth_credentials(tests.consumer1.key, tests.consumer1.secret, - tests.token1.key, tests.token1.secret) - return http_db - - -def copy_oauth_http_database_for_test(test, db): - # DO NOT COPY OR REUSE THIS CODE OUTSIDE TESTS: COPYING U1DB DATABASES IS - # THE WRONG THING TO DO, THE ONLY REASON WE DO SO HERE IS TO TEST THAT WE - # CORRECTLY DETECT IT HAPPENING SO THAT WE CAN RAISE ERRORS RATHER THAN - # CORRUPT USER DATA. USE SYNC INSTEAD, OR WE WILL SEND NINJA TO YOUR - # HOUSE. - http_db = test.request_state._copy_database(db) - http_db.set_oauth_credentials(tests.consumer1.key, tests.consumer1.secret, - tests.token1.key, tests.token1.secret) - return http_db - - class TestAlternativeDocument(DocumentBase): """A (not very) alternative implementation of Document.""" @@ -81,13 +61,7 @@ class AllDatabaseTests(tests.DatabaseBaseTests, tests.TestCaseWithServer): ('http', {'make_database_for_test': make_http_database_for_test, 'copy_database_for_test': copy_http_database_for_test, 'make_document_for_test': tests.make_document_for_test, - 'make_app_with_state': make_http_app}), - ('oauth_http', {'make_database_for_test': - make_oauth_http_database_for_test, - 'copy_database_for_test': - copy_oauth_http_database_for_test, - 'make_document_for_test': tests.make_document_for_test, - 'make_app_with_state': make_oauth_http_app}) + 'make_app_with_state': tests.make_http_app}), ] def test_close(self): diff --git a/common/src/leap/soledad/common/tests/u1db_tests/test_document.py b/testing/test_soledad/u1db_tests/test_document.py index 23502b4b..a7ead2d1 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/test_document.py +++ b/testing/test_soledad/u1db_tests/test_document.py @@ -1,8 +1,9 @@ # Copyright 2011 Canonical Ltd. +# Copyright 2016 LEAP Encryption Access Project # -# This file is part of u1db. +# This file is part of leap.soledad.common # -# u1db is free software: you can redistribute it and/or modify +# leap.soledad.common is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License version 3 # as published by the Free Software Foundation. # @@ -13,12 +14,11 @@ # # You should have received a copy of the GNU Lesser General Public License # along with u1db. If not, see <http://www.gnu.org/licenses/>. - - from unittest import skip -from u1db import errors -from leap.soledad.common.tests import u1db_tests as tests +from leap.soledad.common.l2db import errors + +from test_soledad import u1db_tests as tests @skip("Skiping tests imported from U1DB.") diff --git a/common/src/leap/soledad/common/tests/u1db_tests/test_http_client.py b/testing/test_soledad/u1db_tests/test_http_client.py index 973c3b26..e9516236 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/test_http_client.py +++ b/testing/test_soledad/u1db_tests/test_http_client.py @@ -1,4 +1,5 @@ # Copyright 2011-2012 Canonical Ltd. +# Copyright 2016 LEAP Encryption Access Project # # This file is part of u1db. # @@ -14,22 +15,17 @@ # You should have received a copy of the GNU Lesser General Public License # along with u1db. If not, see <http://www.gnu.org/licenses/>. -"""Tests for HTTPDatabase""" - -from oauth import oauth +""" +Tests for HTTPDatabase +""" import json -from u1db import ( - errors, -) - from unittest import skip -from leap.soledad.common.tests import u1db_tests as tests +from leap.soledad.common.l2db import errors +from leap.soledad.common.l2db.remote import http_client -from u1db.remote import ( - http_client, -) +from test_soledad import u1db_tests as tests @skip("Skiping tests imported from U1DB.") @@ -110,26 +106,6 @@ class TestHTTPClientBase(tests.TestCaseWithServer): else: start_response(status, [('Content-Type', 'application/json')]) return [json.dumps(response)] - elif '/oauth' in environ['PATH_INFO']: - base_url = self.getURL('').rstrip('/') - oauth_req = oauth.OAuthRequest.from_request( - http_method=environ['REQUEST_METHOD'], - http_url=base_url + environ['PATH_INFO'], - headers={'Authorization': environ['HTTP_AUTHORIZATION']}, - query_string=environ['QUERY_STRING'] - ) - oauth_server = oauth.OAuthServer(tests.testingOAuthStore) - oauth_server.add_signature_method(tests.sign_meth_HMAC_SHA1) - try: - consumer, token, params = oauth_server.verify_request( - oauth_req) - except oauth.OAuthError, e: - start_response("401 Unauthorized", - [('Content-Type', 'application/json')]) - return [json.dumps({"error": "unauthorized", - "message": e.message})] - start_response("200 OK", [('Content-Type', 'application/json')]) - return [json.dumps([environ['PATH_INFO'], token.key, params])] def make_app(self): return self.app @@ -321,44 +297,8 @@ class TestHTTPClientBase(tests.TestCaseWithServer): self.assertEqual("<Bad Request>", e.message) self.assertTrue("content-type" in e.headers) - def test_oauth(self): - cli = self.getClient() - cli.set_oauth_credentials(tests.consumer1.key, tests.consumer1.secret, - tests.token1.key, tests.token1.secret) - params = {'x': u'\xf0', 'y': "foo"} - res, headers = cli._request('GET', ['doc', 'oauth'], params) - self.assertEqual( - ['/dbase/doc/oauth', tests.token1.key, params], json.loads(res)) - - # oauth does its own internal quoting - params = {'x': u'\xf0', 'y': "foo"} - res, headers = cli._request('GET', ['doc', 'oauth', 'foo bar'], params) - self.assertEqual( - ['/dbase/doc/oauth/foo bar', tests.token1.key, params], - json.loads(res)) - - def test_oauth_ctr_creds(self): - cli = self.getClient(creds={'oauth': { - 'consumer_key': tests.consumer1.key, - 'consumer_secret': tests.consumer1.secret, - 'token_key': tests.token1.key, - 'token_secret': tests.token1.secret, - }}) - params = {'x': u'\xf0', 'y': "foo"} - res, headers = cli._request('GET', ['doc', 'oauth'], params) - self.assertEqual( - ['/dbase/doc/oauth', tests.token1.key, params], json.loads(res)) - def test_unknown_creds(self): self.assertRaises(errors.UnknownAuthMethod, self.getClient, creds={'foo': {}}) self.assertRaises(errors.UnknownAuthMethod, self.getClient, creds={}) - - def test_oauth_Unauthorized(self): - cli = self.getClient() - cli.set_oauth_credentials(tests.consumer1.key, tests.consumer1.secret, - tests.token1.key, "WRONG") - params = {'y': 'foo'} - self.assertRaises(errors.Unauthorized, cli._request, 'GET', - ['doc', 'oauth'], params) diff --git a/common/src/leap/soledad/common/tests/u1db_tests/test_http_database.py b/testing/test_soledad/u1db_tests/test_http_database.py index 015e6e69..a3ed9361 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/test_http_database.py +++ b/testing/test_soledad/u1db_tests/test_http_database.py @@ -21,13 +21,12 @@ import json from unittest import skip -from u1db import errors -from u1db import Document -from u1db.remote import http_database -from u1db.remote import http_target - -from leap.soledad.common.tests import u1db_tests as tests -from leap.soledad.common.tests.u1db_tests import make_http_app +from leap.soledad.common.l2db import errors +from leap.soledad.common.l2db import Document +from leap.soledad.common.l2db.remote import http_database +from leap.soledad.common.l2db.remote import http_target +from test_soledad import u1db_tests as tests +from test_soledad.u1db_tests import make_http_app @skip("Skiping tests imported from U1DB.") @@ -175,26 +174,6 @@ class TestHTTPDatabaseSimpleOperations(tests.TestCase): self.assertIsInstance(st, http_target.HTTPSyncTarget) self.assertEqual(st._url, self.db._url) - def test_get_sync_target_inherits_oauth_credentials(self): - self.db.set_oauth_credentials(tests.consumer1.key, - tests.consumer1.secret, - tests.token1.key, tests.token1.secret) - st = self.db.get_sync_target() - self.assertEqual(self.db._creds, st._creds) - - -@skip("Skiping tests imported from U1DB.") -class TestHTTPDatabaseCtrWithCreds(tests.TestCase): - - def test_ctr_with_creds(self): - db1 = http_database.HTTPDatabase('http://dbs/db', creds={'oauth': { - 'consumer_key': tests.consumer1.key, - 'consumer_secret': tests.consumer1.secret, - 'token_key': tests.token1.key, - 'token_secret': tests.token1.secret - }}) - self.assertIn('oauth', db1._creds) - @skip("Skiping tests imported from U1DB.") class TestHTTPDatabaseIntegration(tests.TestCaseWithServer): diff --git a/common/src/leap/soledad/common/tests/u1db_tests/test_https.py b/testing/test_soledad/u1db_tests/test_https.py index e177a808..baffa723 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/test_https.py +++ b/testing/test_soledad/u1db_tests/test_https.py @@ -7,12 +7,10 @@ import sys from paste import httpserver from unittest import skip -from u1db.remote import http_client -from u1db.remote import http_target +from leap.soledad.common.l2db.remote import http_client from leap import soledad -from leap.soledad.common.tests import u1db_tests as tests -from leap.soledad.common.tests.u1db_tests import make_oauth_http_app +from test_soledad import u1db_tests as tests def https_server_def(): @@ -40,25 +38,10 @@ def https_server_def(): return make_server, "shutdown", "https" -def oauth_https_sync_target(test, host, path): - _, port = test.server.server_address - st = http_target.HTTPSyncTarget('https://%s:%d/~/%s' % (host, port, path)) - st.set_oauth_credentials(tests.consumer1.key, tests.consumer1.secret, - tests.token1.key, tests.token1.secret) - return st - - @skip("Skiping tests imported from U1DB.") class TestHttpSyncTargetHttpsSupport(tests.TestCaseWithServer): - scenarios = [ - ('oauth_https', {'server_def': https_server_def, - 'make_app_with_state': make_oauth_http_app, - 'make_document_for_test': - tests.make_document_for_test, - 'sync_target': oauth_https_sync_target - }), - ] + scenarios = [] def setUp(self): try: diff --git a/common/src/leap/soledad/common/tests/u1db_tests/test_open.py b/testing/test_soledad/u1db_tests/test_open.py index ee249e6e..30d4de00 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/test_open.py +++ b/testing/test_soledad/u1db_tests/test_open.py @@ -1,4 +1,5 @@ # Copyright 2011 Canonical Ltd. +# Copyright 2016 LEAP Encryption Access Project # # This file is part of u1db. # @@ -17,15 +18,14 @@ """Test u1db.open""" import os +from unittest import skip -from u1db import ( - errors, - open as u1db_open, +from leap.soledad.common.l2db import ( + errors, open as u1db_open, ) -from unittest import skip -from leap.soledad.common.tests import u1db_tests as tests -from u1db.backends import sqlite_backend -from leap.soledad.common.tests.u1db_tests.test_backends \ +from test_soledad import u1db_tests as tests +from leap.soledad.common.l2db.backends import sqlite_backend +from test_soledad.u1db_tests.test_backends \ import TestAlternativeDocument diff --git a/common/src/leap/soledad/common/tests/u1db_tests/testing-certs/Makefile b/testing/test_soledad/u1db_tests/testing-certs/Makefile index 2385e75b..2385e75b 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/testing-certs/Makefile +++ b/testing/test_soledad/u1db_tests/testing-certs/Makefile diff --git a/common/src/leap/soledad/common/tests/u1db_tests/testing-certs/cacert.pem b/testing/test_soledad/u1db_tests/testing-certs/cacert.pem index c019a730..c019a730 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/testing-certs/cacert.pem +++ b/testing/test_soledad/u1db_tests/testing-certs/cacert.pem diff --git a/common/src/leap/soledad/common/tests/u1db_tests/testing-certs/testing.cert b/testing/test_soledad/u1db_tests/testing-certs/testing.cert index 985684fb..985684fb 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/testing-certs/testing.cert +++ b/testing/test_soledad/u1db_tests/testing-certs/testing.cert diff --git a/common/src/leap/soledad/common/tests/u1db_tests/testing-certs/testing.key b/testing/test_soledad/u1db_tests/testing-certs/testing.key index d83d4920..d83d4920 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/testing-certs/testing.key +++ b/testing/test_soledad/u1db_tests/testing-certs/testing.key diff --git a/common/src/leap/soledad/common/tests/util.py b/testing/test_soledad/util.py index d4510686..033a55df 100644 --- a/common/src/leap/soledad/common/tests/util.py +++ b/testing/test_soledad/util.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# -*- CODING: UTF-8 -*- # util.py # Copyright (C) 2013 LEAP # @@ -26,7 +26,6 @@ import tempfile import shutil import random import string -import u1db import couchdb from uuid import uuid4 @@ -35,13 +34,14 @@ from urlparse import urljoin from StringIO import StringIO from pysqlcipher import dbapi2 -from u1db import sync -from u1db.remote import http_database - from twisted.trial import unittest from leap.common.testing.basetest import BaseLeapTest +from leap.soledad.common import l2db +from leap.soledad.common.l2db import sync +from leap.soledad.common.l2db.remote import http_database + from leap.soledad.common import soledad_assert from leap.soledad.common.document import SoledadDocument from leap.soledad.common.couch import CouchDatabase @@ -70,6 +70,10 @@ def make_local_db_and_target(test): return db, st +def make_document_for_test(test, doc_id, rev, content, has_conflicts=False): + return SoledadDocument(doc_id, rev, content, has_conflicts=has_conflicts) + + def make_sqlcipher_database_for_test(test, replica_uid): db = SQLCipherDatabase( SQLCipherOptions(':memory:', PASSWORD)) @@ -97,6 +101,13 @@ def copy_sqlcipher_database_for_test(test, db): return new_db +SQLCIPHER_SCENARIOS = [ + ('sqlcipher', {'make_database_for_test': make_sqlcipher_database_for_test, + 'copy_database_for_test': copy_sqlcipher_database_for_test, + 'make_document_for_test': make_document_for_test, }), +] + + def make_soledad_app(state): return SoledadApp(state) @@ -174,8 +185,6 @@ class MockedSharedDBTest(object): class defaultMockSharedDB(object): get_doc = Mock(return_value=get_doc_return_value) put_doc = Mock(side_effect=put_doc_side_effect) - lock = Mock(return_value=('atoken', 300)) - unlock = Mock(return_value=True) open = Mock(return_value=None) close = Mock(return_value=None) syncable = True @@ -236,9 +245,9 @@ class BaseSoledadTest(BaseLeapTest, MockedSharedDBTest): self.db2_file = os.path.join(self.tempdir, "db2.u1db") self.email = ADDRESS # open test dbs - self._db1 = u1db.open(self.db1_file, create=True, + self._db1 = l2db.open(self.db1_file, create=True, document_factory=SoledadDocument) - self._db2 = u1db.open(self.db2_file, create=True, + self._db2 = l2db.open(self.db2_file, create=True, document_factory=SoledadDocument) # get a random prefix for each test, so we do not mess with # concurrency during initialization and shutting down of @@ -286,8 +295,7 @@ class BaseSoledadTest(BaseLeapTest, MockedSharedDBTest): server_url='https://127.0.0.1/', cert_file=None, shared_db_class=None, - auth_token='auth-token', - userid=ADDRESS): + auth_token='auth-token'): def _put_doc_side_effect(doc): self._doc_put = doc @@ -305,12 +313,11 @@ class BaseSoledadTest(BaseLeapTest, MockedSharedDBTest): self.tempdir, prefix, secrets_path), local_db_path=os.path.join( self.tempdir, prefix, local_db_path), - server_url=server_url, # Soledad will fail if not given an url. + server_url=server_url, # Soledad will fail if not given an url cert_file=cert_file, defer_encryption=self.defer_sync_encryption, shared_db=MockSharedDB(), - auth_token=auth_token, - userid=userid) + auth_token=auth_token) self.addCleanup(soledad.close) return soledad diff --git a/testing/tests/client/__init__.py b/testing/tests/client/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/testing/tests/client/__init__.py diff --git a/common/src/leap/soledad/common/tests/test_soledad_app.py b/testing/tests/client/test_app.py index 4598a7bb..fef2f371 100644 --- a/common/src/leap/soledad/common/tests/test_soledad_app.py +++ b/testing/tests/client/test_app.py @@ -14,41 +14,27 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. - - """ Test ObjectStore and Couch backend bits. """ - - from testscenarios import TestWithScenarios -from leap.soledad.common.tests.util import BaseSoledadTest -from leap.soledad.common.tests.util import make_soledad_document_for_test -from leap.soledad.common.tests.util import make_soledad_app -from leap.soledad.common.tests.util import make_token_soledad_app -from leap.soledad.common.tests.util import make_token_http_database_for_test -from leap.soledad.common.tests.util import copy_token_http_database_for_test -from leap.soledad.common.tests.u1db_tests import test_backends +from test_soledad.util import BaseSoledadTest +from test_soledad.util import make_soledad_document_for_test +from test_soledad.util import make_token_soledad_app +from test_soledad.util import make_token_http_database_for_test +from test_soledad.util import copy_token_http_database_for_test +from test_soledad.u1db_tests import test_backends # ----------------------------------------------------------------------------- # The following tests come from `u1db.tests.test_backends`. # ----------------------------------------------------------------------------- -LEAP_SCENARIOS = [ - ('http', { - 'make_database_for_test': test_backends.make_http_database_for_test, - 'copy_database_for_test': test_backends.copy_http_database_for_test, - 'make_document_for_test': make_soledad_document_for_test, - 'make_app_with_state': make_soledad_app}), -] - - class SoledadTests( TestWithScenarios, test_backends.AllDatabaseTests, BaseSoledadTest): - scenarios = LEAP_SCENARIOS + [ + scenarios = [ ('token_http', { 'make_database_for_test': make_token_http_database_for_test, 'copy_database_for_test': copy_token_http_database_for_test, diff --git a/testing/tests/client/test_aux_methods.py b/testing/tests/client/test_aux_methods.py new file mode 100644 index 00000000..c25ff8ca --- /dev/null +++ b/testing/tests/client/test_aux_methods.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +# test_soledad.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Tests for general Soledad functionality. +""" +import os + +from twisted.internet import defer + +from leap.soledad.common.errors import DatabaseAccessError +from leap.soledad.client import Soledad +from leap.soledad.client.adbapi import U1DBConnectionPool +from leap.soledad.client.secrets import PassphraseTooShort + +from test_soledad.util import BaseSoledadTest + + +class AuxMethodsTestCase(BaseSoledadTest): + + def test__init_dirs(self): + sol = self._soledad_instance(prefix='_init_dirs') + local_db_dir = os.path.dirname(sol.local_db_path) + secrets_path = os.path.dirname(sol.secrets.secrets_path) + self.assertTrue(os.path.isdir(local_db_dir)) + self.assertTrue(os.path.isdir(secrets_path)) + + def _close_soledad(results): + sol.close() + + d = sol.create_doc({}) + d.addCallback(_close_soledad) + return d + + def test__init_u1db_sqlcipher_backend(self): + sol = self._soledad_instance(prefix='_init_db') + self.assertIsInstance(sol._dbpool, U1DBConnectionPool) + self.assertTrue(os.path.isfile(sol.local_db_path)) + sol.close() + + def test__init_config_with_defaults(self): + """ + Test if configuration defaults point to the correct place. + """ + + class SoledadMock(Soledad): + + def __init__(self): + pass + + # instantiate without initializing so we just test + # _init_config_with_defaults() + sol = SoledadMock() + sol._passphrase = u'' + sol._server_url = '' + sol._init_config_with_defaults() + # assert value of local_db_path + self.assertEquals( + os.path.join(sol.default_prefix, 'soledad.u1db'), + sol.local_db_path) + + def test__init_config_from_params(self): + """ + Test if configuration is correctly read from file. + """ + sol = self._soledad_instance( + 'leap@leap.se', + passphrase=u'123', + secrets_path='value_3', + local_db_path='value_2', + server_url='value_1', + cert_file=None) + self.assertEqual( + os.path.join(self.tempdir, 'value_3'), + sol.secrets.secrets_path) + self.assertEqual( + os.path.join(self.tempdir, 'value_2'), + sol.local_db_path) + self.assertEqual('value_1', sol._server_url) + sol.close() + + @defer.inlineCallbacks + def test_change_passphrase(self): + """ + Test if passphrase can be changed. + """ + prefix = '_change_passphrase' + sol = self._soledad_instance( + 'leap@leap.se', + passphrase=u'123', + prefix=prefix, + ) + + doc1 = yield sol.create_doc({'simple': 'doc'}) + sol.change_passphrase(u'654321') + sol.close() + + with self.assertRaises(DatabaseAccessError): + self._soledad_instance( + 'leap@leap.se', + passphrase=u'123', + prefix=prefix) + + sol2 = self._soledad_instance( + 'leap@leap.se', + passphrase=u'654321', + prefix=prefix) + doc2 = yield sol2.get_doc(doc1.doc_id) + + self.assertEqual(doc1, doc2) + + sol2.close() + + def test_change_passphrase_with_short_passphrase_raises(self): + """ + Test if attempt to change passphrase passing a short passphrase + raises. + """ + sol = self._soledad_instance( + 'leap@leap.se', + passphrase=u'123') + # check that soledad complains about new passphrase length + self.assertRaises( + PassphraseTooShort, + sol.change_passphrase, u'54321') + sol.close() + + def test_get_passphrase(self): + """ + Assert passphrase getter works fine. + """ + sol = self._soledad_instance() + self.assertEqual('123', sol._passphrase) + sol.close() diff --git a/common/src/leap/soledad/common/tests/test_crypto.py b/testing/tests/client/test_crypto.py index ca10a1e1..77252b46 100644 --- a/common/src/leap/soledad/common/tests/test_crypto.py +++ b/testing/tests/client/test_crypto.py @@ -23,10 +23,9 @@ import binascii from leap.soledad.client import crypto from leap.soledad.common.document import SoledadDocument -from leap.soledad.common.tests.util import BaseSoledadTest +from test_soledad.util import BaseSoledadTest from leap.soledad.common.crypto import WrongMacError from leap.soledad.common.crypto import UnknownMacMethodError -from leap.soledad.common.crypto import EncryptionMethods from leap.soledad.common.crypto import ENC_JSON_KEY from leap.soledad.common.crypto import ENC_SCHEME_KEY from leap.soledad.common.crypto import MAC_KEY @@ -67,7 +66,7 @@ class RecoveryDocumentTestCase(BaseSoledadTest): rd = self._soledad.secrets._export_recovery_document() secret_id = rd[self._soledad.secrets.STORAGE_SECRETS_KEY].items()[0][0] # assert exported secret is the same - secret = self._soledad.secrets._decrypt_storage_secret( + secret = self._soledad.secrets._decrypt_storage_secret_version_1( rd[self._soledad.secrets.STORAGE_SECRETS_KEY][secret_id]) self.assertEqual(secret_id, self._soledad.secrets._secret_id) self.assertEqual(secret, self._soledad.secrets._secrets[secret_id]) @@ -93,56 +92,58 @@ class RecoveryDocumentTestCase(BaseSoledadTest): class SoledadSecretsTestCase(BaseSoledadTest): - def test__gen_secret(self): - # instantiate and save secret_id - sol = self._soledad_instance(user='user@leap.se') - self.assertTrue(len(sol.secrets._secrets) == 1) - secret_id_1 = sol.secrets.secret_id - # assert id is hash of secret + def test_new_soledad_instance_generates_one_secret(self): self.assertTrue( - secret_id_1 == hashlib.sha256(sol.storage_secret).hexdigest()) + self._soledad.storage_secret is not None, + "Expected secret to be something different than None") + number_of_secrets = len(self._soledad.secrets._secrets) + self.assertTrue( + number_of_secrets == 1, + "Expected exactly 1 secret, got %d instead." % number_of_secrets) + + def test_generated_secret_is_of_correct_type(self): + expected_type = str + self.assertIsInstance( + self._soledad.storage_secret, expected_type, + "Expected secret to be of type %s" % expected_type) + + def test_generated_secret_has_correct_lengt(self): + expected_length = self._soledad.secrets.GEN_SECRET_LENGTH + actual_length = len(self._soledad.storage_secret) + self.assertTrue( + expected_length == actual_length, + "Expected secret with length %d, got %d instead." + % (expected_length, actual_length)) + + def test_generated_secret_id_is_sha256_hash_of_secret(self): + generated = self._soledad.secrets.secret_id + expected = hashlib.sha256(self._soledad.storage_secret).hexdigest() + self.assertTrue( + generated == expected, + "Expeceted generated secret id to be sha256 hash, got something " + "else instead.") + + def test_generate_new_secret_generates_different_secret_id(self): # generate new secret - secret_id_2 = sol.secrets._gen_secret() - self.assertTrue(secret_id_1 != secret_id_2) - sol.close() - # re-instantiate - sol = self._soledad_instance(user='user@leap.se') - sol.secrets.set_secret_id(secret_id_1) - # assert ids are valid - self.assertTrue(len(sol.secrets._secrets) == 2) - self.assertTrue(secret_id_1 in sol.secrets._secrets) - self.assertTrue(secret_id_2 in sol.secrets._secrets) - # assert format of secret 1 - self.assertTrue(sol.storage_secret is not None) - self.assertIsInstance(sol.storage_secret, str) - secret_length = sol.secrets.GEN_SECRET_LENGTH - self.assertTrue(len(sol.storage_secret) == secret_length) - # assert format of secret 2 - sol.secrets.set_secret_id(secret_id_2) - self.assertTrue(sol.storage_secret is not None) - self.assertIsInstance(sol.storage_secret, str) - self.assertTrue(len(sol.storage_secret) == secret_length) - # assert id is hash of new secret + secret_id_1 = self._soledad.secrets.secret_id + secret_id_2 = self._soledad.secrets._gen_secret() self.assertTrue( - secret_id_2 == hashlib.sha256(sol.storage_secret).hexdigest()) - sol.close() + len(self._soledad.secrets._secrets) == 2, + "Expected exactly 2 secrets.") + self.assertTrue( + secret_id_1 != secret_id_2, + "Expected IDs of secrets to be distinct.") + self.assertTrue( + secret_id_1 in self._soledad.secrets._secrets, + "Expected to find ID of first secret in Soledad Secrets.") + self.assertTrue( + secret_id_2 in self._soledad.secrets._secrets, + "Expected to find ID of second secret in Soledad Secrets.") def test__has_secret(self): - sol = self._soledad_instance( - user='user@leap.se', prefix=self.rand_prefix) - self.assertTrue( - sol.secrets._has_secret(), - "Should have a secret at this point") - # setting secret id to None should not interfere in the fact we have a - # secret. - sol.secrets.set_secret_id(None) self.assertTrue( - sol.secrets._has_secret(), + self._soledad._secrets._has_secret(), "Should have a secret at this point") - # but not being able to decrypt correctly should - sol.secrets._secrets[sol.secrets.secret_id] = None - self.assertFalse(sol.secrets._has_secret()) - sol.close() class MacAuthTestCase(BaseSoledadTest): diff --git a/testing/tests/client/test_doc.py b/testing/tests/client/test_doc.py new file mode 100644 index 00000000..e158d768 --- /dev/null +++ b/testing/tests/client/test_doc.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# test_soledad_doc.py +# Copyright (C) 2013, 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Test Leap backend bits: soledad docs +""" +from testscenarios import TestWithScenarios + +from test_soledad.u1db_tests import test_document +from test_soledad.util import BaseSoledadTest +from test_soledad.util import make_soledad_document_for_test + + +# ----------------------------------------------------------------------------- +# The following tests come from `u1db.tests.test_document`. +# ----------------------------------------------------------------------------- + +class TestSoledadDocument( + TestWithScenarios, + test_document.TestDocument, BaseSoledadTest): + + scenarios = ([( + 'leap', { + 'make_document_for_test': make_soledad_document_for_test})]) + + +class TestSoledadPyDocument( + TestWithScenarios, + test_document.TestPyDocument, BaseSoledadTest): + + scenarios = ([( + 'leap', { + 'make_document_for_test': make_soledad_document_for_test})]) diff --git a/common/src/leap/soledad/common/tests/test_http.py b/testing/tests/client/test_http.py index bc486fe3..47df4b4a 100644 --- a/common/src/leap/soledad/common/tests/test_http.py +++ b/testing/tests/client/test_http.py @@ -17,10 +17,11 @@ """ Test Leap backend bits: test http database """ -from u1db.remote import http_database + +from twisted.trial import unittest from leap.soledad.client import auth -from leap.soledad.common.tests.u1db_tests import test_http_database +from leap.soledad.common.l2db.remote import http_database # ----------------------------------------------------------------------------- @@ -41,8 +42,7 @@ class _HTTPDatabase(http_database.HTTPDatabase, auth.TokenBasedAuth): self, method, url_query, params) -class TestHTTPDatabaseWithCreds( - test_http_database.TestHTTPDatabaseCtrWithCreds): +class TestHTTPDatabaseWithCreds(unittest.TestCase): def test_get_sync_target_inherits_token_credentials(self): # this test was from TestDatabaseSimpleOperations but we put it here diff --git a/common/src/leap/soledad/common/tests/test_http_client.py b/testing/tests/client/test_http_client.py index 700ae3b6..a107930a 100644 --- a/common/src/leap/soledad/common/tests/test_http_client.py +++ b/testing/tests/client/test_http_client.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # test_http_client.py -# Copyright (C) 2013, 2014 LEAP +# Copyright (C) 2013-2016 LEAP # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,12 +19,11 @@ Test Leap backend bits: sync target """ import json -from u1db.remote import http_client - from testscenarios import TestWithScenarios from leap.soledad.client import auth -from leap.soledad.common.tests.u1db_tests import test_http_client +from leap.soledad.common.l2db.remote import http_client +from test_soledad.u1db_tests import test_http_client from leap.soledad.server.auth import SoledadTokenAuthMiddleware @@ -40,6 +39,12 @@ class TestSoledadClientBase( This class should be used to test Token auth. """ + def getClient(self, **kwds): + cli = self.getClientWithToken(**kwds) + if 'creds' not in kwds: + cli.set_token_credentials('user-uuid', 'auth-token') + return cli + def getClientWithToken(self, **kwds): self.startServer() @@ -55,24 +60,6 @@ class TestSoledadClientBase( return _HTTPClientWithToken(self.getURL('dbase'), **kwds) - def test_oauth(self): - """ - Suppress oauth test (we test for token auth here). - """ - pass - - def test_oauth_ctr_creds(self): - """ - Suppress oauth test (we test for token auth here). - """ - pass - - def test_oauth_Unauthorized(self): - """ - Suppress oauth test (we test for token auth here). - """ - pass - def app(self, environ, start_response): res = test_http_client.TestHTTPClientBase.app( self, environ, start_response) @@ -84,14 +71,17 @@ class TestSoledadClientBase( if not auth: start_response("401 Unauthorized", [('Content-Type', 'application/json')]) - return [json.dumps({"error": "unauthorized", - "message": e.message})] + return [ + json.dumps( + {"error": "unauthorized", + "message": "no token found in environment"}) + ] scheme, encoded = auth.split(None, 1) if scheme.lower() != 'token': start_response("401 Unauthorized", [('Content-Type', 'application/json')]) return [json.dumps({"error": "unauthorized", - "message": e.message})] + "message": "unknown scheme: %s" % scheme})] uuid, token = encoded.decode('base64').split(':', 1) if uuid != 'user-uuid' and token != 'auth-token': return Exception("Incorrect address or token.") diff --git a/common/src/leap/soledad/common/tests/test_https.py b/testing/tests/client/test_https.py index eeeb4982..caac16da 100644 --- a/common/src/leap/soledad/common/tests/test_https.py +++ b/testing/tests/client/test_https.py @@ -14,22 +14,19 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. - - """ Test Leap backend bits: https """ +from unittest import skip - -from u1db.remote import http_client +from testscenarios import TestWithScenarios from leap.soledad import client -from testscenarios import TestWithScenarios - -from leap.soledad.common.tests.u1db_tests import test_backends -from leap.soledad.common.tests.u1db_tests import test_https -from leap.soledad.common.tests.util import ( +from leap.soledad.common.l2db.remote import http_client +from test_soledad.u1db_tests import test_backends +from test_soledad.u1db_tests import test_https +from test_soledad.util import ( BaseSoledadTest, make_soledad_document_for_test, make_soledad_app, @@ -65,17 +62,19 @@ def token_leap_https_sync_target(test, host, path, cert_file=None): return st +@skip("Skiping tests imported from U1DB.") class TestSoledadHTTPSyncTargetHttpsSupport( TestWithScenarios, - test_https.TestHttpSyncTargetHttpsSupport, + # test_https.TestHttpSyncTargetHttpsSupport, BaseSoledadTest): scenarios = [ ('token_soledad_https', - {'server_def': test_https.https_server_def, - 'make_app_with_state': make_token_soledad_app, - 'make_document_for_test': make_soledad_document_for_test, - 'sync_target': token_leap_https_sync_target}), + { + # 'server_def': test_https.https_server_def, + 'make_app_with_state': make_token_soledad_app, + 'make_document_for_test': make_soledad_document_for_test, + 'sync_target': token_leap_https_sync_target}), ] def setUp(self): diff --git a/testing/tests/client/test_shared_db.py b/testing/tests/client/test_shared_db.py new file mode 100644 index 00000000..aac766c2 --- /dev/null +++ b/testing/tests/client/test_shared_db.py @@ -0,0 +1,50 @@ +from leap.soledad.common.document import SoledadDocument +from leap.soledad.client.shared_db import SoledadSharedDatabase + +from test_soledad.util import BaseSoledadTest +from test_soledad.util import ADDRESS + + +class SoledadSharedDBTestCase(BaseSoledadTest): + + """ + These tests ensure the functionalities of the shared recovery database. + """ + + def setUp(self): + BaseSoledadTest.setUp(self) + self._shared_db = SoledadSharedDatabase( + 'https://provider/', ADDRESS, document_factory=SoledadDocument, + creds=None) + + def tearDown(self): + BaseSoledadTest.tearDown(self) + + def test__get_secrets_from_shared_db(self): + """ + Ensure the shared db is queried with the correct doc_id. + """ + doc_id = self._soledad.secrets._shared_db_doc_id() + self._soledad.secrets._get_secrets_from_shared_db() + self.assertTrue( + self._soledad.shared_db.get_doc.assert_called_with( + doc_id) is None, + 'Wrong doc_id when fetching recovery document.') + + def test__put_secrets_in_shared_db(self): + """ + Ensure recovery document is put into shared recover db. + """ + doc_id = self._soledad.secrets._shared_db_doc_id() + self._soledad.secrets._put_secrets_in_shared_db() + self.assertTrue( + self._soledad.shared_db.get_doc.assert_called_with( + doc_id) is None, + 'Wrong doc_id when fetching recovery document.') + self.assertTrue( + self._soledad.shared_db.put_doc.assert_called_with( + self._doc_put) is None, + 'Wrong document when putting recovery document.') + self.assertTrue( + self._doc_put.doc_id == doc_id, + 'Wrong doc_id when putting recovery document.') diff --git a/testing/tests/client/test_signals.py b/testing/tests/client/test_signals.py new file mode 100644 index 00000000..4e9ebfd0 --- /dev/null +++ b/testing/tests/client/test_signals.py @@ -0,0 +1,165 @@ +from mock import Mock +from twisted.internet import defer + +from leap import soledad +from leap.common.events import catalog +from leap.soledad.common.document import SoledadDocument + +from test_soledad.util import ADDRESS +from test_soledad.util import BaseSoledadTest + + +class SoledadSignalingTestCase(BaseSoledadTest): + + """ + These tests ensure signals are correctly emmited by Soledad. + """ + + EVENTS_SERVER_PORT = 8090 + + def setUp(self): + # mock signaling + soledad.client.signal = Mock() + soledad.client.secrets.events.emit_async = Mock() + # run parent's setUp + BaseSoledadTest.setUp(self) + + def tearDown(self): + BaseSoledadTest.tearDown(self) + + def _pop_mock_call(self, mocked): + mocked.call_args_list.pop() + mocked.mock_calls.pop() + mocked.call_args = mocked.call_args_list[-1] + + def test_stage3_bootstrap_signals(self): + """ + Test that a fresh soledad emits all bootstrap signals. + + Signals are: + - downloading keys / done downloading keys. + - creating keys / done creating keys. + - downloading keys / done downloading keys. + - uploading keys / done uploading keys. + """ + soledad.client.secrets.events.emit_async.reset_mock() + # get a fresh instance so it emits all bootstrap signals + sol = self._soledad_instance( + secrets_path='alternative_stage3.json', + local_db_path='alternative_stage3.u1db') + # reverse call order so we can verify in the order the signals were + # expected + soledad.client.secrets.events.emit_async.mock_calls.reverse() + soledad.client.secrets.events.emit_async.call_args = \ + soledad.client.secrets.events.emit_async.call_args_list[0] + soledad.client.secrets.events.emit_async.call_args_list.reverse() + + user_data = {'userid': ADDRESS, 'uuid': ADDRESS} + + # downloading keys signals + soledad.client.secrets.events.emit_async.assert_called_with( + catalog.SOLEDAD_DOWNLOADING_KEYS, user_data + ) + self._pop_mock_call(soledad.client.secrets.events.emit_async) + soledad.client.secrets.events.emit_async.assert_called_with( + catalog.SOLEDAD_DONE_DOWNLOADING_KEYS, user_data + ) + # creating keys signals + self._pop_mock_call(soledad.client.secrets.events.emit_async) + soledad.client.secrets.events.emit_async.assert_called_with( + catalog.SOLEDAD_CREATING_KEYS, user_data + ) + self._pop_mock_call(soledad.client.secrets.events.emit_async) + soledad.client.secrets.events.emit_async.assert_called_with( + catalog.SOLEDAD_DONE_CREATING_KEYS, user_data + ) + # downloading once more (inside _put_keys_in_shared_db) + self._pop_mock_call(soledad.client.secrets.events.emit_async) + soledad.client.secrets.events.emit_async.assert_called_with( + catalog.SOLEDAD_DOWNLOADING_KEYS, user_data + ) + self._pop_mock_call(soledad.client.secrets.events.emit_async) + soledad.client.secrets.events.emit_async.assert_called_with( + catalog.SOLEDAD_DONE_DOWNLOADING_KEYS, user_data + ) + # uploading keys signals + self._pop_mock_call(soledad.client.secrets.events.emit_async) + soledad.client.secrets.events.emit_async.assert_called_with( + catalog.SOLEDAD_UPLOADING_KEYS, user_data + ) + self._pop_mock_call(soledad.client.secrets.events.emit_async) + soledad.client.secrets.events.emit_async.assert_called_with( + catalog.SOLEDAD_DONE_UPLOADING_KEYS, user_data + ) + sol.close() + + def test_stage2_bootstrap_signals(self): + """ + Test that if there are keys in server, soledad will download them and + emit corresponding signals. + """ + # get existing instance so we have access to keys + sol = self._soledad_instance() + # create a document with secrets + doc = SoledadDocument(doc_id=sol.secrets._shared_db_doc_id()) + doc.content = sol.secrets._export_recovery_document() + sol.close() + # reset mock + soledad.client.secrets.events.emit_async.reset_mock() + # get a fresh instance so it emits all bootstrap signals + shared_db = self.get_default_shared_mock(get_doc_return_value=doc) + sol = self._soledad_instance( + secrets_path='alternative_stage2.json', + local_db_path='alternative_stage2.u1db', + shared_db_class=shared_db) + # reverse call order so we can verify in the order the signals were + # expected + soledad.client.secrets.events.emit_async.mock_calls.reverse() + soledad.client.secrets.events.emit_async.call_args = \ + soledad.client.secrets.events.emit_async.call_args_list[0] + soledad.client.secrets.events.emit_async.call_args_list.reverse() + # assert download keys signals + soledad.client.secrets.events.emit_async.assert_called_with( + catalog.SOLEDAD_DOWNLOADING_KEYS, + {'userid': ADDRESS, 'uuid': ADDRESS} + ) + self._pop_mock_call(soledad.client.secrets.events.emit_async) + soledad.client.secrets.events.emit_async.assert_called_with( + catalog.SOLEDAD_DONE_DOWNLOADING_KEYS, + {'userid': ADDRESS, 'uuid': ADDRESS}, + ) + sol.close() + + def test_stage1_bootstrap_signals(self): + """ + Test that if soledad already has a local secret, it emits no signals. + """ + soledad.client.signal.reset_mock() + # get an existent instance so it emits only some of bootstrap signals + sol = self._soledad_instance() + self.assertEqual([], soledad.client.signal.mock_calls) + sol.close() + + @defer.inlineCallbacks + def test_sync_signals(self): + """ + Test Soledad emits SOLEDAD_CREATING_KEYS signal. + """ + # get a fresh instance so it emits all bootstrap signals + sol = self._soledad_instance() + soledad.client.signal.reset_mock() + + # mock the actual db sync so soledad does not try to connect to the + # server + d = defer.Deferred() + d.callback(None) + sol._dbsyncer.sync = Mock(return_value=d) + + yield sol.sync() + + # assert the signal has been emitted + soledad.client.events.emit_async.assert_called_with( + catalog.SOLEDAD_DONE_DATA_SYNC, + {'userid': ADDRESS, 'uuid': ADDRESS}, + ) + sol.close() diff --git a/common/src/leap/soledad/common/tests/test_soledad_doc.py b/testing/tests/client/test_soledad_doc.py index df9fd09e..e158d768 100644 --- a/common/src/leap/soledad/common/tests/test_soledad_doc.py +++ b/testing/tests/client/test_soledad_doc.py @@ -19,9 +19,9 @@ Test Leap backend bits: soledad docs """ from testscenarios import TestWithScenarios -from leap.soledad.common.tests.u1db_tests import test_document -from leap.soledad.common.tests.util import BaseSoledadTest -from leap.soledad.common.tests.util import make_soledad_document_for_test +from test_soledad.u1db_tests import test_document +from test_soledad.util import BaseSoledadTest +from test_soledad.util import make_soledad_document_for_test # ----------------------------------------------------------------------------- diff --git a/testing/tests/couch/__init__.py b/testing/tests/couch/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/testing/tests/couch/__init__.py diff --git a/testing/tests/couch/common.py b/testing/tests/couch/common.py new file mode 100644 index 00000000..b08e1fa3 --- /dev/null +++ b/testing/tests/couch/common.py @@ -0,0 +1,81 @@ +from uuid import uuid4 +from urlparse import urljoin +from couchdb.client import Server + +from leap.soledad.common import couch +from leap.soledad.common.document import ServerDocument + +from test_soledad import u1db_tests as tests + + +simple_doc = tests.simple_doc +nested_doc = tests.nested_doc + + +def make_couch_database_for_test(test, replica_uid): + port = str(test.couch_port) + dbname = ('test-%s' % uuid4().hex) + db = couch.CouchDatabase.open_database( + urljoin('http://localhost:' + port, dbname), + create=True, + replica_uid=replica_uid or 'test', + ensure_ddocs=True) + test.addCleanup(test.delete_db, dbname) + return db + + +def copy_couch_database_for_test(test, db): + port = str(test.couch_port) + couch_url = 'http://localhost:' + port + new_dbname = db._dbname + '_copy' + new_db = couch.CouchDatabase.open_database( + urljoin(couch_url, new_dbname), + create=True, + replica_uid=db._replica_uid or 'test') + # copy all docs + session = couch.Session() + old_couch_db = Server(couch_url, session=session)[db._dbname] + new_couch_db = Server(couch_url, session=session)[new_dbname] + for doc_id in old_couch_db: + doc = old_couch_db.get(doc_id) + # bypass u1db_config document + if doc_id == 'u1db_config': + pass + # copy design docs + elif doc_id.startswith('_design'): + del doc['_rev'] + new_couch_db.save(doc) + # copy u1db docs + elif 'u1db_rev' in doc: + new_doc = { + '_id': doc['_id'], + 'u1db_transactions': doc['u1db_transactions'], + 'u1db_rev': doc['u1db_rev'] + } + attachments = [] + if ('u1db_conflicts' in doc): + new_doc['u1db_conflicts'] = doc['u1db_conflicts'] + for c_rev in doc['u1db_conflicts']: + attachments.append('u1db_conflict_%s' % c_rev) + new_couch_db.save(new_doc) + # save conflict data + attachments.append('u1db_content') + for att_name in attachments: + att = old_couch_db.get_attachment(doc_id, att_name) + if (att is not None): + new_couch_db.put_attachment(new_doc, att, + filename=att_name) + # cleanup connections to prevent file descriptor leaking + return new_db + + +def make_document_for_test(test, doc_id, rev, content, has_conflicts=False): + return ServerDocument( + doc_id, rev, content, has_conflicts=has_conflicts) + + +COUCH_SCENARIOS = [ + ('couch', {'make_database_for_test': make_couch_database_for_test, + 'copy_database_for_test': copy_couch_database_for_test, + 'make_document_for_test': make_document_for_test, }), +] diff --git a/common/src/leap/soledad/common/tests/couchdb.ini.template b/testing/tests/couch/couchdb.ini.template index 174d9d86..174d9d86 100644 --- a/common/src/leap/soledad/common/tests/couchdb.ini.template +++ b/testing/tests/couch/couchdb.ini.template diff --git a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py b/testing/tests/couch/test_atomicity.py index 8cd3ae08..aec9c6cf 100644 --- a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py +++ b/testing/tests/couch/test_atomicity.py @@ -29,13 +29,13 @@ from leap.soledad.client import Soledad from leap.soledad.common.couch.state import CouchServerState from leap.soledad.common.couch import CouchDatabase -from leap.soledad.common.tests.util import ( +from test_soledad.util import ( make_token_soledad_app, make_soledad_document_for_test, soledad_sync_target, ) -from leap.soledad.common.tests.test_couch import CouchDBTestCase -from leap.soledad.common.tests.u1db_tests import TestCaseWithServer +from test_soledad.util import CouchDBTestCase +from test_soledad.u1db_tests import TestCaseWithServer REPEAT_TIMES = 20 diff --git a/testing/tests/couch/test_backend.py b/testing/tests/couch/test_backend.py new file mode 100644 index 00000000..f178e8a5 --- /dev/null +++ b/testing/tests/couch/test_backend.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +# test_couch.py +# Copyright (C) 2013-2016 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Test ObjectStore and Couch backend bits. +""" + +from uuid import uuid4 +from urlparse import urljoin +from testscenarios import TestWithScenarios +from twisted.trial import unittest + +from leap.soledad.common import couch + +from test_soledad.util import CouchDBTestCase +from test_soledad.u1db_tests import test_backends + +from common import COUCH_SCENARIOS + + +# ----------------------------------------------------------------------------- +# The following tests come from `u1db.tests.test_common_backend`. +# ----------------------------------------------------------------------------- + +class TestCouchBackendImpl(CouchDBTestCase): + + def test__allocate_doc_id(self): + db = couch.CouchDatabase.open_database( + urljoin( + 'http://localhost:' + str(self.couch_port), + ('test-%s' % uuid4().hex) + ), + create=True, + ensure_ddocs=True) + doc_id1 = db._allocate_doc_id() + self.assertTrue(doc_id1.startswith('D-')) + self.assertEqual(34, len(doc_id1)) + int(doc_id1[len('D-'):], 16) + self.assertNotEqual(doc_id1, db._allocate_doc_id()) + self.delete_db(db._dbname) + + +# ----------------------------------------------------------------------------- +# The following tests come from `u1db.tests.test_backends`. +# ----------------------------------------------------------------------------- + +class CouchTests( + TestWithScenarios, test_backends.AllDatabaseTests, CouchDBTestCase): + + scenarios = COUCH_SCENARIOS + + +class CouchBackendTests( + TestWithScenarios, + test_backends.LocalDatabaseTests, + CouchDBTestCase): + + scenarios = COUCH_SCENARIOS + + +class CouchValidateGenNTransIdTests( + TestWithScenarios, + test_backends.LocalDatabaseValidateGenNTransIdTests, + CouchDBTestCase): + + scenarios = COUCH_SCENARIOS + + +class CouchValidateSourceGenTests( + TestWithScenarios, + test_backends.LocalDatabaseValidateSourceGenTests, + CouchDBTestCase): + + scenarios = COUCH_SCENARIOS + + +class CouchWithConflictsTests( + TestWithScenarios, + test_backends.LocalDatabaseWithConflictsTests, + CouchDBTestCase): + + scenarios = COUCH_SCENARIOS + + +# Notice: the CouchDB backend does not have indexing capabilities, so we do +# not test indexing now. + +# class CouchIndexTests(test_backends.DatabaseIndexTests, CouchDBTestCase): +# +# scenarios = COUCH_SCENARIOS +# +# def tearDown(self): +# self.db.delete_database() +# test_backends.DatabaseIndexTests.tearDown(self) + + +class DatabaseNameValidationTest(unittest.TestCase): + + def test_database_name_validation(self): + inject = couch.state.is_db_name_valid("user-deadbeef | cat /secret") + self.assertFalse(inject) + self.assertTrue(couch.state.is_db_name_valid("user-cafe1337")) diff --git a/testing/tests/couch/test_command.py b/testing/tests/couch/test_command.py new file mode 100644 index 00000000..f61e118d --- /dev/null +++ b/testing/tests/couch/test_command.py @@ -0,0 +1,28 @@ +from twisted.trial import unittest + +from leap.soledad.common import couch +from leap.soledad.common.l2db import errors as u1db_errors + +from mock import Mock + + +class CommandBasedDBCreationTest(unittest.TestCase): + + def test_ensure_db_using_custom_command(self): + state = couch.state.CouchServerState("url", create_cmd="echo") + mock_db = Mock() + mock_db.replica_uid = 'replica_uid' + state.open_database = Mock(return_value=mock_db) + db, replica_uid = state.ensure_database("user-1337") # works + self.assertEquals(mock_db, db) + self.assertEquals(mock_db.replica_uid, replica_uid) + + def test_raises_unauthorized_on_failure(self): + state = couch.state.CouchServerState("url", create_cmd="inexistent") + self.assertRaises(u1db_errors.Unauthorized, + state.ensure_database, "user-1337") + + def test_raises_unauthorized_by_default(self): + state = couch.state.CouchServerState("url") + self.assertRaises(u1db_errors.Unauthorized, + state.ensure_database, "user-1337") diff --git a/testing/tests/couch/test_ddocs.py b/testing/tests/couch/test_ddocs.py new file mode 100644 index 00000000..9ff32633 --- /dev/null +++ b/testing/tests/couch/test_ddocs.py @@ -0,0 +1,209 @@ +from uuid import uuid4 + +from leap.soledad.common.couch import errors +from leap.soledad.common import couch + +from test_soledad.util import CouchDBTestCase + + +class CouchDesignDocsTests(CouchDBTestCase): + + def setUp(self): + CouchDBTestCase.setUp(self) + + def create_db(self, ensure=True, dbname=None): + if not dbname: + dbname = ('test-%s' % uuid4().hex) + if dbname not in self.couch_server: + self.couch_server.create(dbname) + self.db = couch.CouchDatabase( + ('http://127.0.0.1:%d' % self.couch_port), + dbname, + ensure_ddocs=ensure) + + def tearDown(self): + self.db.delete_database() + self.db.close() + CouchDBTestCase.tearDown(self) + + def test_missing_design_doc_raises(self): + """ + Test that all methods that access design documents will raise if the + design docs are not present. + """ + self.create_db(ensure=False) + # get_generation_info() + self.assertRaises( + errors.MissingDesignDocError, + self.db.get_generation_info) + # get_trans_id_for_gen() + self.assertRaises( + errors.MissingDesignDocError, + self.db.get_trans_id_for_gen, 1) + # get_transaction_log() + self.assertRaises( + errors.MissingDesignDocError, + self.db.get_transaction_log) + # whats_changed() + self.assertRaises( + errors.MissingDesignDocError, + self.db.whats_changed) + + def test_missing_design_doc_functions_raises(self): + """ + Test that all methods that access design documents list functions + will raise if the functions are not present. + """ + self.create_db(ensure=True) + # erase views from _design/transactions + transactions = self.db._database['_design/transactions'] + transactions['lists'] = {} + self.db._database.save(transactions) + # get_generation_info() + self.assertRaises( + errors.MissingDesignDocListFunctionError, + self.db.get_generation_info) + # get_trans_id_for_gen() + self.assertRaises( + errors.MissingDesignDocListFunctionError, + self.db.get_trans_id_for_gen, 1) + # whats_changed() + self.assertRaises( + errors.MissingDesignDocListFunctionError, + self.db.whats_changed) + + def test_absent_design_doc_functions_raises(self): + """ + Test that all methods that access design documents list functions + will raise if the functions are not present. + """ + self.create_db(ensure=True) + # erase views from _design/transactions + transactions = self.db._database['_design/transactions'] + del transactions['lists'] + self.db._database.save(transactions) + # get_generation_info() + self.assertRaises( + errors.MissingDesignDocListFunctionError, + self.db.get_generation_info) + # _get_trans_id_for_gen() + self.assertRaises( + errors.MissingDesignDocListFunctionError, + self.db.get_trans_id_for_gen, 1) + # whats_changed() + self.assertRaises( + errors.MissingDesignDocListFunctionError, + self.db.whats_changed) + + def test_missing_design_doc_named_views_raises(self): + """ + Test that all methods that access design documents' named views will + raise if the views are not present. + """ + self.create_db(ensure=True) + # erase views from _design/docs + docs = self.db._database['_design/docs'] + del docs['views'] + self.db._database.save(docs) + # erase views from _design/syncs + syncs = self.db._database['_design/syncs'] + del syncs['views'] + self.db._database.save(syncs) + # erase views from _design/transactions + transactions = self.db._database['_design/transactions'] + del transactions['views'] + self.db._database.save(transactions) + # get_generation_info() + self.assertRaises( + errors.MissingDesignDocNamedViewError, + self.db.get_generation_info) + # _get_trans_id_for_gen() + self.assertRaises( + errors.MissingDesignDocNamedViewError, + self.db.get_trans_id_for_gen, 1) + # _get_transaction_log() + self.assertRaises( + errors.MissingDesignDocNamedViewError, + self.db.get_transaction_log) + # whats_changed() + self.assertRaises( + errors.MissingDesignDocNamedViewError, + self.db.whats_changed) + + def test_deleted_design_doc_raises(self): + """ + Test that all methods that access design documents will raise if the + design docs are not present. + """ + self.create_db(ensure=True) + # delete _design/docs + del self.db._database['_design/docs'] + # delete _design/syncs + del self.db._database['_design/syncs'] + # delete _design/transactions + del self.db._database['_design/transactions'] + # get_generation_info() + self.assertRaises( + errors.MissingDesignDocDeletedError, + self.db.get_generation_info) + # get_trans_id_for_gen() + self.assertRaises( + errors.MissingDesignDocDeletedError, + self.db.get_trans_id_for_gen, 1) + # get_transaction_log() + self.assertRaises( + errors.MissingDesignDocDeletedError, + self.db.get_transaction_log) + # whats_changed() + self.assertRaises( + errors.MissingDesignDocDeletedError, + self.db.whats_changed) + + def test_ensure_ddoc_independently(self): + """ + Test that a missing ddocs other than _design/docs will be ensured + even if _design/docs is there. + """ + self.create_db(ensure=True) + del self.db._database['_design/transactions'] + self.assertRaises( + errors.MissingDesignDocDeletedError, + self.db.get_transaction_log) + self.create_db(ensure=True, dbname=self.db._dbname) + self.db.get_transaction_log() + + def test_ensure_security_doc(self): + """ + Ensure_security creates a _security ddoc to ensure that only soledad + will have the lowest privileged access to an user db. + """ + self.create_db(ensure=False) + self.assertFalse(self.db._database.resource.get_json('_security')[2]) + self.db.ensure_security_ddoc() + security_ddoc = self.db._database.resource.get_json('_security')[2] + self.assertIn('admins', security_ddoc) + self.assertFalse(security_ddoc['admins']['names']) + self.assertIn('members', security_ddoc) + self.assertIn('soledad', security_ddoc['members']['names']) + + def test_ensure_security_from_configuration(self): + """ + Given a configuration, follow it to create the security document + """ + self.create_db(ensure=False) + configuration = {'members': ['user1', 'user2'], + 'members_roles': ['role1', 'role2'], + 'admins': ['admin'], + 'admins_roles': ['administrators'] + } + self.db.ensure_security_ddoc(configuration) + + security_ddoc = self.db._database.resource.get_json('_security')[2] + self.assertEquals(configuration['admins'], + security_ddoc['admins']['names']) + self.assertEquals(configuration['admins_roles'], + security_ddoc['admins']['roles']) + self.assertEquals(configuration['members'], + security_ddoc['members']['names']) + self.assertEquals(configuration['members_roles'], + security_ddoc['members']['roles']) diff --git a/common/src/leap/soledad/common/tests/test_couch.py b/testing/tests/couch/test_sync.py index 7ba50e11..bccbfe43 100644 --- a/common/src/leap/soledad/common/tests/test_couch.py +++ b/testing/tests/couch/test_sync.py @@ -1,533 +1,17 @@ -# -*- coding: utf-8 -*- -# test_couch.py -# Copyright (C) 2013 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - - -""" -Test ObjectStore and Couch backend bits. -""" - - -import json - -from urlparse import urljoin -from couchdb.client import Server -from uuid import uuid4 +from leap.soledad.common.l2db import vectorclock +from leap.soledad.common.l2db import errors as u1db_errors from testscenarios import TestWithScenarios -from twisted.trial import unittest -from mock import Mock - -from u1db import errors as u1db_errors -from u1db import SyncTarget -from u1db import vectorclock - -from leap.soledad.common import couch -from leap.soledad.common.document import ServerDocument -from leap.soledad.common.couch import errors - -from leap.soledad.common.tests import u1db_tests as tests -from leap.soledad.common.tests.util import CouchDBTestCase -from leap.soledad.common.tests.util import make_local_db_and_target -from leap.soledad.common.tests.util import sync_via_synchronizer - -from leap.soledad.common.tests.u1db_tests import test_backends -from leap.soledad.common.tests.u1db_tests import DatabaseBaseTests - - -# ----------------------------------------------------------------------------- -# The following tests come from `u1db.tests.test_common_backend`. -# ----------------------------------------------------------------------------- - -class TestCouchBackendImpl(CouchDBTestCase): - - def test__allocate_doc_id(self): - db = couch.CouchDatabase.open_database( - urljoin( - 'http://localhost:' + str(self.couch_port), - ('test-%s' % uuid4().hex) - ), - create=True, - ensure_ddocs=True) - doc_id1 = db._allocate_doc_id() - self.assertTrue(doc_id1.startswith('D-')) - self.assertEqual(34, len(doc_id1)) - int(doc_id1[len('D-'):], 16) - self.assertNotEqual(doc_id1, db._allocate_doc_id()) - self.delete_db(db._dbname) - - -# ----------------------------------------------------------------------------- -# The following tests come from `u1db.tests.test_backends`. -# ----------------------------------------------------------------------------- - -def make_couch_database_for_test(test, replica_uid): - port = str(test.couch_port) - dbname = ('test-%s' % uuid4().hex) - db = couch.CouchDatabase.open_database( - urljoin('http://localhost:' + port, dbname), - create=True, - replica_uid=replica_uid or 'test', - ensure_ddocs=True) - test.addCleanup(test.delete_db, dbname) - return db - -def copy_couch_database_for_test(test, db): - port = str(test.couch_port) - couch_url = 'http://localhost:' + port - new_dbname = db._dbname + '_copy' - new_db = couch.CouchDatabase.open_database( - urljoin(couch_url, new_dbname), - create=True, - replica_uid=db._replica_uid or 'test') - # copy all docs - session = couch.Session() - old_couch_db = Server(couch_url, session=session)[db._dbname] - new_couch_db = Server(couch_url, session=session)[new_dbname] - for doc_id in old_couch_db: - doc = old_couch_db.get(doc_id) - # bypass u1db_config document - if doc_id == 'u1db_config': - pass - # copy design docs - elif doc_id.startswith('_design'): - del doc['_rev'] - new_couch_db.save(doc) - # copy u1db docs - elif 'u1db_rev' in doc: - new_doc = { - '_id': doc['_id'], - 'u1db_transactions': doc['u1db_transactions'], - 'u1db_rev': doc['u1db_rev'] - } - attachments = [] - if ('u1db_conflicts' in doc): - new_doc['u1db_conflicts'] = doc['u1db_conflicts'] - for c_rev in doc['u1db_conflicts']: - attachments.append('u1db_conflict_%s' % c_rev) - new_couch_db.save(new_doc) - # save conflict data - attachments.append('u1db_content') - for att_name in attachments: - att = old_couch_db.get_attachment(doc_id, att_name) - if (att is not None): - new_couch_db.put_attachment(new_doc, att, - filename=att_name) - # cleanup connections to prevent file descriptor leaking - return new_db - - -def make_document_for_test(test, doc_id, rev, content, has_conflicts=False): - return ServerDocument( - doc_id, rev, content, has_conflicts=has_conflicts) - - -COUCH_SCENARIOS = [ - ('couch', {'make_database_for_test': make_couch_database_for_test, - 'copy_database_for_test': copy_couch_database_for_test, - 'make_document_for_test': make_document_for_test, }), -] - - -class CouchTests( - TestWithScenarios, test_backends.AllDatabaseTests, CouchDBTestCase): - - scenarios = COUCH_SCENARIOS - - -class SoledadBackendTests( - TestWithScenarios, - test_backends.LocalDatabaseTests, - CouchDBTestCase): - - scenarios = COUCH_SCENARIOS - - -class CouchValidateGenNTransIdTests( - TestWithScenarios, - test_backends.LocalDatabaseValidateGenNTransIdTests, - CouchDBTestCase): - - scenarios = COUCH_SCENARIOS - - -class CouchValidateSourceGenTests( - TestWithScenarios, - test_backends.LocalDatabaseValidateSourceGenTests, - CouchDBTestCase): +from test_soledad import u1db_tests as tests +from test_soledad.util import CouchDBTestCase +from test_soledad.util import sync_via_synchronizer +from test_soledad.u1db_tests import DatabaseBaseTests - scenarios = COUCH_SCENARIOS +from common import simple_doc +from common import COUCH_SCENARIOS -class CouchWithConflictsTests( - TestWithScenarios, - test_backends.LocalDatabaseWithConflictsTests, - CouchDBTestCase): - - scenarios = COUCH_SCENARIOS - - -# Notice: the CouchDB backend does not have indexing capabilities, so we do -# not test indexing now. - -# class CouchIndexTests(test_backends.DatabaseIndexTests, CouchDBTestCase): -# -# scenarios = COUCH_SCENARIOS -# -# def tearDown(self): -# self.db.delete_database() -# test_backends.DatabaseIndexTests.tearDown(self) - - -# ----------------------------------------------------------------------------- -# The following tests come from `u1db.tests.test_sync`. -# ----------------------------------------------------------------------------- - -target_scenarios = [ - ('local', {'create_db_and_target': make_local_db_and_target}), ] - - -simple_doc = tests.simple_doc -nested_doc = tests.nested_doc - - -class SoledadBackendSyncTargetTests( - TestWithScenarios, - DatabaseBaseTests, - CouchDBTestCase): - - # TODO: implement _set_trace_hook(_shallow) in CouchSyncTarget so - # skipped tests can be succesfully executed. - - # whitebox true means self.db is the actual local db object - # against which the sync is performed - whitebox = True - - scenarios = (tests.multiply_scenarios(COUCH_SCENARIOS, target_scenarios)) - - def set_trace_hook(self, callback, shallow=False): - setter = (self.st._set_trace_hook if not shallow else - self.st._set_trace_hook_shallow) - try: - setter(callback) - except NotImplementedError: - self.skipTest("%s does not implement _set_trace_hook" - % (self.st.__class__.__name__,)) - - def setUp(self): - CouchDBTestCase.setUp(self) - # other stuff - self.db, self.st = self.create_db_and_target(self) - self.other_changes = [] - - def tearDown(self): - self.db.close() - CouchDBTestCase.tearDown(self) - - def receive_doc(self, doc, gen, trans_id): - self.other_changes.append( - (doc.doc_id, doc.rev, doc.get_json(), gen, trans_id)) - - def test_sync_exchange_returns_many_new_docs(self): - # This test was replicated to allow dictionaries to be compared after - # JSON expansion (because one dictionary may have many different - # serialized representations). - doc = self.db.create_doc_from_json(simple_doc) - doc2 = self.db.create_doc_from_json(nested_doc) - self.assertTransactionLog([doc.doc_id, doc2.doc_id], self.db) - new_gen, _ = self.st.sync_exchange( - [], 'other-replica', last_known_generation=0, - last_known_trans_id=None, return_doc_cb=self.receive_doc) - self.assertTransactionLog([doc.doc_id, doc2.doc_id], self.db) - self.assertEqual(2, new_gen) - self.assertEqual( - [(doc.doc_id, doc.rev, json.loads(simple_doc), 1), - (doc2.doc_id, doc2.rev, json.loads(nested_doc), 2)], - [c[:-3] + (json.loads(c[-3]), c[-2]) for c in self.other_changes]) - if self.whitebox: - self.assertEqual( - self.db._last_exchange_log['return'], - {'last_gen': 2, 'docs': - [(doc.doc_id, doc.rev), (doc2.doc_id, doc2.rev)]}) - - def test_get_sync_target(self): - self.assertIsNot(None, self.st) - - def test_get_sync_info(self): - self.assertEqual( - ('test', 0, '', 0, ''), self.st.get_sync_info('other')) - - def test_create_doc_updates_sync_info(self): - self.assertEqual( - ('test', 0, '', 0, ''), self.st.get_sync_info('other')) - self.db.create_doc_from_json(simple_doc) - self.assertEqual(1, self.st.get_sync_info('other')[1]) - - def test_record_sync_info(self): - self.st.record_sync_info('replica', 10, 'T-transid') - self.assertEqual( - ('test', 0, '', 10, 'T-transid'), self.st.get_sync_info('replica')) - - def test_sync_exchange(self): - docs_by_gen = [ - (self.make_document('doc-id', 'replica:1', simple_doc), 10, - 'T-sid')] - new_gen, trans_id = self.st.sync_exchange( - docs_by_gen, 'replica', last_known_generation=0, - last_known_trans_id=None, return_doc_cb=self.receive_doc) - self.assertGetDoc(self.db, 'doc-id', 'replica:1', simple_doc, False) - self.assertTransactionLog(['doc-id'], self.db) - last_trans_id = self.getLastTransId(self.db) - self.assertEqual(([], 1, last_trans_id), - (self.other_changes, new_gen, last_trans_id)) - self.assertEqual(10, self.st.get_sync_info('replica')[3]) - - def test_sync_exchange_deleted(self): - doc = self.db.create_doc_from_json('{}') - edit_rev = 'replica:1|' + doc.rev - docs_by_gen = [ - (self.make_document(doc.doc_id, edit_rev, None), 10, 'T-sid')] - new_gen, trans_id = self.st.sync_exchange( - docs_by_gen, 'replica', last_known_generation=0, - last_known_trans_id=None, return_doc_cb=self.receive_doc) - self.assertGetDocIncludeDeleted( - self.db, doc.doc_id, edit_rev, None, False) - self.assertTransactionLog([doc.doc_id, doc.doc_id], self.db) - last_trans_id = self.getLastTransId(self.db) - self.assertEqual(([], 2, last_trans_id), - (self.other_changes, new_gen, trans_id)) - self.assertEqual(10, self.st.get_sync_info('replica')[3]) - - def test_sync_exchange_push_many(self): - docs_by_gen = [ - (self.make_document('doc-id', 'replica:1', simple_doc), 10, 'T-1'), - (self.make_document('doc-id2', 'replica:1', nested_doc), 11, - 'T-2')] - new_gen, trans_id = self.st.sync_exchange( - docs_by_gen, 'replica', last_known_generation=0, - last_known_trans_id=None, return_doc_cb=self.receive_doc) - self.assertGetDoc(self.db, 'doc-id', 'replica:1', simple_doc, False) - self.assertGetDoc(self.db, 'doc-id2', 'replica:1', nested_doc, False) - self.assertTransactionLog(['doc-id', 'doc-id2'], self.db) - last_trans_id = self.getLastTransId(self.db) - self.assertEqual(([], 2, last_trans_id), - (self.other_changes, new_gen, trans_id)) - self.assertEqual(11, self.st.get_sync_info('replica')[3]) - - def test_sync_exchange_refuses_conflicts(self): - doc = self.db.create_doc_from_json(simple_doc) - self.assertTransactionLog([doc.doc_id], self.db) - new_doc = '{"key": "altval"}' - docs_by_gen = [ - (self.make_document(doc.doc_id, 'replica:1', new_doc), 10, - 'T-sid')] - new_gen, _ = self.st.sync_exchange( - docs_by_gen, 'replica', last_known_generation=0, - last_known_trans_id=None, return_doc_cb=self.receive_doc) - self.assertTransactionLog([doc.doc_id], self.db) - self.assertEqual( - (doc.doc_id, doc.rev, simple_doc, 1), self.other_changes[0][:-1]) - self.assertEqual(1, new_gen) - if self.whitebox: - self.assertEqual(self.db._last_exchange_log['return'], - {'last_gen': 1, 'docs': [(doc.doc_id, doc.rev)]}) - - def test_sync_exchange_ignores_convergence(self): - doc = self.db.create_doc_from_json(simple_doc) - self.assertTransactionLog([doc.doc_id], self.db) - gen, txid = self.db._get_generation_info() - docs_by_gen = [ - (self.make_document(doc.doc_id, doc.rev, simple_doc), 10, 'T-sid')] - new_gen, _ = self.st.sync_exchange( - docs_by_gen, 'replica', last_known_generation=gen, - last_known_trans_id=txid, return_doc_cb=self.receive_doc) - self.assertTransactionLog([doc.doc_id], self.db) - self.assertEqual(([], 1), (self.other_changes, new_gen)) - - def test_sync_exchange_returns_new_docs(self): - doc = self.db.create_doc_from_json(simple_doc) - self.assertTransactionLog([doc.doc_id], self.db) - new_gen, _ = self.st.sync_exchange( - [], 'other-replica', last_known_generation=0, - last_known_trans_id=None, return_doc_cb=self.receive_doc) - self.assertTransactionLog([doc.doc_id], self.db) - self.assertEqual( - (doc.doc_id, doc.rev, simple_doc, 1), self.other_changes[0][:-1]) - self.assertEqual(1, new_gen) - if self.whitebox: - self.assertEqual(self.db._last_exchange_log['return'], - {'last_gen': 1, 'docs': [(doc.doc_id, doc.rev)]}) - - def test_sync_exchange_returns_deleted_docs(self): - doc = self.db.create_doc_from_json(simple_doc) - self.db.delete_doc(doc) - self.assertTransactionLog([doc.doc_id, doc.doc_id], self.db) - new_gen, _ = self.st.sync_exchange( - [], 'other-replica', last_known_generation=0, - last_known_trans_id=None, return_doc_cb=self.receive_doc) - self.assertTransactionLog([doc.doc_id, doc.doc_id], self.db) - self.assertEqual( - (doc.doc_id, doc.rev, None, 2), self.other_changes[0][:-1]) - self.assertEqual(2, new_gen) - if self.whitebox: - self.assertEqual(self.db._last_exchange_log['return'], - {'last_gen': 2, 'docs': [(doc.doc_id, doc.rev)]}) - - def test_sync_exchange_getting_newer_docs(self): - doc = self.db.create_doc_from_json(simple_doc) - self.assertTransactionLog([doc.doc_id], self.db) - new_doc = '{"key": "altval"}' - docs_by_gen = [ - (self.make_document(doc.doc_id, 'test:1|z:2', new_doc), 10, - 'T-sid')] - new_gen, _ = self.st.sync_exchange( - docs_by_gen, 'other-replica', last_known_generation=0, - last_known_trans_id=None, return_doc_cb=self.receive_doc) - self.assertTransactionLog([doc.doc_id, doc.doc_id], self.db) - self.assertEqual(([], 2), (self.other_changes, new_gen)) - - def test_sync_exchange_with_concurrent_updates_of_synced_doc(self): - expected = [] - - def before_whatschanged_cb(state): - if state != 'before whats_changed': - return - cont = '{"key": "cuncurrent"}' - conc_rev = self.db.put_doc( - self.make_document(doc.doc_id, 'test:1|z:2', cont)) - expected.append((doc.doc_id, conc_rev, cont, 3)) - - self.set_trace_hook(before_whatschanged_cb) - doc = self.db.create_doc_from_json(simple_doc) - self.assertTransactionLog([doc.doc_id], self.db) - new_doc = '{"key": "altval"}' - docs_by_gen = [ - (self.make_document(doc.doc_id, 'test:1|z:2', new_doc), 10, - 'T-sid')] - new_gen, _ = self.st.sync_exchange( - docs_by_gen, 'other-replica', last_known_generation=0, - last_known_trans_id=None, return_doc_cb=self.receive_doc) - self.assertEqual(expected, [c[:-1] for c in self.other_changes]) - self.assertEqual(3, new_gen) - - def test_sync_exchange_with_concurrent_updates(self): - - def after_whatschanged_cb(state): - if state != 'after whats_changed': - return - self.db.create_doc_from_json('{"new": "doc"}') - - self.set_trace_hook(after_whatschanged_cb) - doc = self.db.create_doc_from_json(simple_doc) - self.assertTransactionLog([doc.doc_id], self.db) - new_doc = '{"key": "altval"}' - docs_by_gen = [ - (self.make_document(doc.doc_id, 'test:1|z:2', new_doc), 10, - 'T-sid')] - new_gen, _ = self.st.sync_exchange( - docs_by_gen, 'other-replica', last_known_generation=0, - last_known_trans_id=None, return_doc_cb=self.receive_doc) - self.assertEqual(([], 2), (self.other_changes, new_gen)) - - def test_sync_exchange_converged_handling(self): - doc = self.db.create_doc_from_json(simple_doc) - docs_by_gen = [ - (self.make_document('new', 'other:1', '{}'), 4, 'T-foo'), - (self.make_document(doc.doc_id, doc.rev, doc.get_json()), 5, - 'T-bar')] - new_gen, _ = self.st.sync_exchange( - docs_by_gen, 'other-replica', last_known_generation=0, - last_known_trans_id=None, return_doc_cb=self.receive_doc) - self.assertEqual(([], 2), (self.other_changes, new_gen)) - - def test_sync_exchange_detect_incomplete_exchange(self): - def before_get_docs_explode(state): - if state != 'before get_docs': - return - raise u1db_errors.U1DBError("fail") - self.set_trace_hook(before_get_docs_explode) - # suppress traceback printing in the wsgiref server - # self.patch(simple_server.ServerHandler, - # 'log_exception', lambda h, exc_info: None) - doc = self.db.create_doc_from_json(simple_doc) - self.assertTransactionLog([doc.doc_id], self.db) - self.assertRaises( - (u1db_errors.U1DBError, u1db_errors.BrokenSyncStream), - self.st.sync_exchange, [], 'other-replica', - last_known_generation=0, last_known_trans_id=None, - return_doc_cb=self.receive_doc) - - def test_sync_exchange_doc_ids(self): - sync_exchange_doc_ids = getattr(self.st, 'sync_exchange_doc_ids', None) - if sync_exchange_doc_ids is None: - self.skipTest("sync_exchange_doc_ids not implemented") - db2 = self.create_database('test2') - doc = db2.create_doc_from_json(simple_doc) - new_gen, trans_id = sync_exchange_doc_ids( - db2, [(doc.doc_id, 10, 'T-sid')], 0, None, - return_doc_cb=self.receive_doc) - self.assertGetDoc(self.db, doc.doc_id, doc.rev, simple_doc, False) - self.assertTransactionLog([doc.doc_id], self.db) - last_trans_id = self.getLastTransId(self.db) - self.assertEqual(([], 1, last_trans_id), - (self.other_changes, new_gen, trans_id)) - self.assertEqual(10, self.st.get_sync_info(db2._replica_uid)[3]) - - def test__set_trace_hook(self): - called = [] - - def cb(state): - called.append(state) - - self.set_trace_hook(cb) - self.st.sync_exchange([], 'replica', 0, None, self.receive_doc) - self.st.record_sync_info('replica', 0, 'T-sid') - self.assertEqual(['before whats_changed', - 'after whats_changed', - 'before get_docs', - 'record_sync_info', - ], - called) - - def test__set_trace_hook_shallow(self): - st_trace_shallow = self.st._set_trace_hook_shallow - target_st_trace_shallow = SyncTarget._set_trace_hook_shallow - same_meth = st_trace_shallow == self.st._set_trace_hook - same_fun = st_trace_shallow.im_func == target_st_trace_shallow.im_func - if (same_meth or same_fun): - # shallow same as full - expected = ['before whats_changed', - 'after whats_changed', - 'before get_docs', - 'record_sync_info', - ] - else: - expected = ['sync_exchange', 'record_sync_info'] - - called = [] - - def cb(state): - called.append(state) - - self.set_trace_hook(cb, shallow=True) - self.st.sync_exchange([], 'replica', 0, None, self.receive_doc) - self.st.record_sync_info('replica', 0, 'T-sid') - self.assertEqual(expected, called) - sync_scenarios = [] for name, scenario in COUCH_SCENARIOS: scenario = dict(scenario) @@ -536,7 +20,11 @@ for name, scenario in COUCH_SCENARIOS: scenario = dict(scenario) -class SoledadBackendSyncTests( +# ----------------------------------------------------------------------------- +# The following tests come from `u1db.tests.test_sync`. +# ----------------------------------------------------------------------------- + +class CouchBackendSyncTests( TestWithScenarios, DatabaseBaseTests, CouchDBTestCase): @@ -1210,236 +698,3 @@ class SoledadBackendSyncTests( self.sync(self.db1, self.db2) self.assertEqual(cont1, self.db2.get_doc("1").get_json()) self.assertEqual(cont2, self.db1.get_doc("2").get_json()) - - -class SoledadBackendExceptionsTests(CouchDBTestCase): - - def setUp(self): - CouchDBTestCase.setUp(self) - - def create_db(self, ensure=True, dbname=None): - if not dbname: - dbname = ('test-%s' % uuid4().hex) - if dbname not in self.couch_server: - self.couch_server.create(dbname) - self.db = couch.CouchDatabase( - ('http://127.0.0.1:%d' % self.couch_port), - dbname, - ensure_ddocs=ensure) - - def tearDown(self): - self.db.delete_database() - self.db.close() - CouchDBTestCase.tearDown(self) - - def test_missing_design_doc_raises(self): - """ - Test that all methods that access design documents will raise if the - design docs are not present. - """ - self.create_db(ensure=False) - # get_generation_info() - self.assertRaises( - errors.MissingDesignDocError, - self.db.get_generation_info) - # get_trans_id_for_gen() - self.assertRaises( - errors.MissingDesignDocError, - self.db.get_trans_id_for_gen, 1) - # get_transaction_log() - self.assertRaises( - errors.MissingDesignDocError, - self.db.get_transaction_log) - # whats_changed() - self.assertRaises( - errors.MissingDesignDocError, - self.db.whats_changed) - - def test_missing_design_doc_functions_raises(self): - """ - Test that all methods that access design documents list functions - will raise if the functions are not present. - """ - self.create_db(ensure=True) - # erase views from _design/transactions - transactions = self.db._database['_design/transactions'] - transactions['lists'] = {} - self.db._database.save(transactions) - # get_generation_info() - self.assertRaises( - errors.MissingDesignDocListFunctionError, - self.db.get_generation_info) - # get_trans_id_for_gen() - self.assertRaises( - errors.MissingDesignDocListFunctionError, - self.db.get_trans_id_for_gen, 1) - # whats_changed() - self.assertRaises( - errors.MissingDesignDocListFunctionError, - self.db.whats_changed) - - def test_absent_design_doc_functions_raises(self): - """ - Test that all methods that access design documents list functions - will raise if the functions are not present. - """ - self.create_db(ensure=True) - # erase views from _design/transactions - transactions = self.db._database['_design/transactions'] - del transactions['lists'] - self.db._database.save(transactions) - # get_generation_info() - self.assertRaises( - errors.MissingDesignDocListFunctionError, - self.db.get_generation_info) - # _get_trans_id_for_gen() - self.assertRaises( - errors.MissingDesignDocListFunctionError, - self.db.get_trans_id_for_gen, 1) - # whats_changed() - self.assertRaises( - errors.MissingDesignDocListFunctionError, - self.db.whats_changed) - - def test_missing_design_doc_named_views_raises(self): - """ - Test that all methods that access design documents' named views will - raise if the views are not present. - """ - self.create_db(ensure=True) - # erase views from _design/docs - docs = self.db._database['_design/docs'] - del docs['views'] - self.db._database.save(docs) - # erase views from _design/syncs - syncs = self.db._database['_design/syncs'] - del syncs['views'] - self.db._database.save(syncs) - # erase views from _design/transactions - transactions = self.db._database['_design/transactions'] - del transactions['views'] - self.db._database.save(transactions) - # get_generation_info() - self.assertRaises( - errors.MissingDesignDocNamedViewError, - self.db.get_generation_info) - # _get_trans_id_for_gen() - self.assertRaises( - errors.MissingDesignDocNamedViewError, - self.db.get_trans_id_for_gen, 1) - # _get_transaction_log() - self.assertRaises( - errors.MissingDesignDocNamedViewError, - self.db.get_transaction_log) - # whats_changed() - self.assertRaises( - errors.MissingDesignDocNamedViewError, - self.db.whats_changed) - - def test_deleted_design_doc_raises(self): - """ - Test that all methods that access design documents will raise if the - design docs are not present. - """ - self.create_db(ensure=True) - # delete _design/docs - del self.db._database['_design/docs'] - # delete _design/syncs - del self.db._database['_design/syncs'] - # delete _design/transactions - del self.db._database['_design/transactions'] - # get_generation_info() - self.assertRaises( - errors.MissingDesignDocDeletedError, - self.db.get_generation_info) - # get_trans_id_for_gen() - self.assertRaises( - errors.MissingDesignDocDeletedError, - self.db.get_trans_id_for_gen, 1) - # get_transaction_log() - self.assertRaises( - errors.MissingDesignDocDeletedError, - self.db.get_transaction_log) - # whats_changed() - self.assertRaises( - errors.MissingDesignDocDeletedError, - self.db.whats_changed) - - def test_ensure_ddoc_independently(self): - """ - Test that a missing ddocs other than _design/docs will be ensured - even if _design/docs is there. - """ - self.create_db(ensure=True) - del self.db._database['_design/transactions'] - self.assertRaises( - errors.MissingDesignDocDeletedError, - self.db.get_transaction_log) - self.create_db(ensure=True, dbname=self.db._dbname) - self.db.get_transaction_log() - - def test_ensure_security_doc(self): - """ - Ensure_security creates a _security ddoc to ensure that only soledad - will have the lowest privileged access to an user db. - """ - self.create_db(ensure=False) - self.assertFalse(self.db._database.resource.get_json('_security')[2]) - self.db.ensure_security_ddoc() - security_ddoc = self.db._database.resource.get_json('_security')[2] - self.assertIn('admins', security_ddoc) - self.assertFalse(security_ddoc['admins']['names']) - self.assertIn('members', security_ddoc) - self.assertIn('soledad', security_ddoc['members']['names']) - - def test_ensure_security_from_configuration(self): - """ - Given a configuration, follow it to create the security document - """ - self.create_db(ensure=False) - configuration = {'members': ['user1', 'user2'], - 'members_roles': ['role1', 'role2'], - 'admins': ['admin'], - 'admins_roles': ['administrators'] - } - self.db.ensure_security_ddoc(configuration) - - security_ddoc = self.db._database.resource.get_json('_security')[2] - self.assertEquals(configuration['admins'], - security_ddoc['admins']['names']) - self.assertEquals(configuration['admins_roles'], - security_ddoc['admins']['roles']) - self.assertEquals(configuration['members'], - security_ddoc['members']['names']) - self.assertEquals(configuration['members_roles'], - security_ddoc['members']['roles']) - - -class DatabaseNameValidationTest(unittest.TestCase): - - def test_database_name_validation(self): - inject = couch.state.is_db_name_valid("user-deadbeef | cat /secret") - self.assertFalse(inject) - self.assertTrue(couch.state.is_db_name_valid("user-cafe1337")) - - -class CommandBasedDBCreationTest(unittest.TestCase): - - def test_ensure_db_using_custom_command(self): - state = couch.state.CouchServerState("url", create_cmd="echo") - mock_db = Mock() - mock_db.replica_uid = 'replica_uid' - state.open_database = Mock(return_value=mock_db) - db, replica_uid = state.ensure_database("user-1337") # works - self.assertEquals(mock_db, db) - self.assertEquals(mock_db.replica_uid, replica_uid) - - def test_raises_unauthorized_on_failure(self): - state = couch.state.CouchServerState("url", create_cmd="inexistent") - self.assertRaises(u1db_errors.Unauthorized, - state.ensure_database, "user-1337") - - def test_raises_unauthorized_by_default(self): - state = couch.state.CouchServerState("url") - self.assertRaises(u1db_errors.Unauthorized, - state.ensure_database, "user-1337") diff --git a/testing/tests/couch/test_sync_target.py b/testing/tests/couch/test_sync_target.py new file mode 100644 index 00000000..e792fb76 --- /dev/null +++ b/testing/tests/couch/test_sync_target.py @@ -0,0 +1,343 @@ +import json + +from leap.soledad.common.l2db import SyncTarget +from leap.soledad.common.l2db import errors as u1db_errors + +from testscenarios import TestWithScenarios + +from test_soledad import u1db_tests as tests +from test_soledad.util import CouchDBTestCase +from test_soledad.util import make_local_db_and_target +from test_soledad.u1db_tests import DatabaseBaseTests + +from common import simple_doc +from common import nested_doc +from common import COUCH_SCENARIOS + + +target_scenarios = [ + ('local', {'create_db_and_target': make_local_db_and_target}), ] + + +class CouchBackendSyncTargetTests( + TestWithScenarios, + DatabaseBaseTests, + CouchDBTestCase): + + # TODO: implement _set_trace_hook(_shallow) in CouchSyncTarget so + # skipped tests can be succesfully executed. + + # whitebox true means self.db is the actual local db object + # against which the sync is performed + whitebox = True + + scenarios = (tests.multiply_scenarios(COUCH_SCENARIOS, target_scenarios)) + + def set_trace_hook(self, callback, shallow=False): + setter = (self.st._set_trace_hook if not shallow else + self.st._set_trace_hook_shallow) + try: + setter(callback) + except NotImplementedError: + self.skipTest("%s does not implement _set_trace_hook" + % (self.st.__class__.__name__,)) + + def setUp(self): + CouchDBTestCase.setUp(self) + # other stuff + self.db, self.st = self.create_db_and_target(self) + self.other_changes = [] + + def tearDown(self): + self.db.close() + CouchDBTestCase.tearDown(self) + + def receive_doc(self, doc, gen, trans_id): + self.other_changes.append( + (doc.doc_id, doc.rev, doc.get_json(), gen, trans_id)) + + def test_sync_exchange_returns_many_new_docs(self): + # This test was replicated to allow dictionaries to be compared after + # JSON expansion (because one dictionary may have many different + # serialized representations). + doc = self.db.create_doc_from_json(simple_doc) + doc2 = self.db.create_doc_from_json(nested_doc) + self.assertTransactionLog([doc.doc_id, doc2.doc_id], self.db) + new_gen, _ = self.st.sync_exchange( + [], 'other-replica', last_known_generation=0, + last_known_trans_id=None, return_doc_cb=self.receive_doc) + self.assertTransactionLog([doc.doc_id, doc2.doc_id], self.db) + self.assertEqual(2, new_gen) + self.assertEqual( + [(doc.doc_id, doc.rev, json.loads(simple_doc), 1), + (doc2.doc_id, doc2.rev, json.loads(nested_doc), 2)], + [c[:-3] + (json.loads(c[-3]), c[-2]) for c in self.other_changes]) + if self.whitebox: + self.assertEqual( + self.db._last_exchange_log['return'], + {'last_gen': 2, 'docs': + [(doc.doc_id, doc.rev), (doc2.doc_id, doc2.rev)]}) + + def test_get_sync_target(self): + self.assertIsNot(None, self.st) + + def test_get_sync_info(self): + self.assertEqual( + ('test', 0, '', 0, ''), self.st.get_sync_info('other')) + + def test_create_doc_updates_sync_info(self): + self.assertEqual( + ('test', 0, '', 0, ''), self.st.get_sync_info('other')) + self.db.create_doc_from_json(simple_doc) + self.assertEqual(1, self.st.get_sync_info('other')[1]) + + def test_record_sync_info(self): + self.st.record_sync_info('replica', 10, 'T-transid') + self.assertEqual( + ('test', 0, '', 10, 'T-transid'), self.st.get_sync_info('replica')) + + def test_sync_exchange(self): + docs_by_gen = [ + (self.make_document('doc-id', 'replica:1', simple_doc), 10, + 'T-sid')] + new_gen, trans_id = self.st.sync_exchange( + docs_by_gen, 'replica', last_known_generation=0, + last_known_trans_id=None, return_doc_cb=self.receive_doc) + self.assertGetDoc(self.db, 'doc-id', 'replica:1', simple_doc, False) + self.assertTransactionLog(['doc-id'], self.db) + last_trans_id = self.getLastTransId(self.db) + self.assertEqual(([], 1, last_trans_id), + (self.other_changes, new_gen, last_trans_id)) + self.assertEqual(10, self.st.get_sync_info('replica')[3]) + + def test_sync_exchange_deleted(self): + doc = self.db.create_doc_from_json('{}') + edit_rev = 'replica:1|' + doc.rev + docs_by_gen = [ + (self.make_document(doc.doc_id, edit_rev, None), 10, 'T-sid')] + new_gen, trans_id = self.st.sync_exchange( + docs_by_gen, 'replica', last_known_generation=0, + last_known_trans_id=None, return_doc_cb=self.receive_doc) + self.assertGetDocIncludeDeleted( + self.db, doc.doc_id, edit_rev, None, False) + self.assertTransactionLog([doc.doc_id, doc.doc_id], self.db) + last_trans_id = self.getLastTransId(self.db) + self.assertEqual(([], 2, last_trans_id), + (self.other_changes, new_gen, trans_id)) + self.assertEqual(10, self.st.get_sync_info('replica')[3]) + + def test_sync_exchange_push_many(self): + docs_by_gen = [ + (self.make_document('doc-id', 'replica:1', simple_doc), 10, 'T-1'), + (self.make_document('doc-id2', 'replica:1', nested_doc), 11, + 'T-2')] + new_gen, trans_id = self.st.sync_exchange( + docs_by_gen, 'replica', last_known_generation=0, + last_known_trans_id=None, return_doc_cb=self.receive_doc) + self.assertGetDoc(self.db, 'doc-id', 'replica:1', simple_doc, False) + self.assertGetDoc(self.db, 'doc-id2', 'replica:1', nested_doc, False) + self.assertTransactionLog(['doc-id', 'doc-id2'], self.db) + last_trans_id = self.getLastTransId(self.db) + self.assertEqual(([], 2, last_trans_id), + (self.other_changes, new_gen, trans_id)) + self.assertEqual(11, self.st.get_sync_info('replica')[3]) + + def test_sync_exchange_refuses_conflicts(self): + doc = self.db.create_doc_from_json(simple_doc) + self.assertTransactionLog([doc.doc_id], self.db) + new_doc = '{"key": "altval"}' + docs_by_gen = [ + (self.make_document(doc.doc_id, 'replica:1', new_doc), 10, + 'T-sid')] + new_gen, _ = self.st.sync_exchange( + docs_by_gen, 'replica', last_known_generation=0, + last_known_trans_id=None, return_doc_cb=self.receive_doc) + self.assertTransactionLog([doc.doc_id], self.db) + self.assertEqual( + (doc.doc_id, doc.rev, simple_doc, 1), self.other_changes[0][:-1]) + self.assertEqual(1, new_gen) + if self.whitebox: + self.assertEqual(self.db._last_exchange_log['return'], + {'last_gen': 1, 'docs': [(doc.doc_id, doc.rev)]}) + + def test_sync_exchange_ignores_convergence(self): + doc = self.db.create_doc_from_json(simple_doc) + self.assertTransactionLog([doc.doc_id], self.db) + gen, txid = self.db._get_generation_info() + docs_by_gen = [ + (self.make_document(doc.doc_id, doc.rev, simple_doc), 10, 'T-sid')] + new_gen, _ = self.st.sync_exchange( + docs_by_gen, 'replica', last_known_generation=gen, + last_known_trans_id=txid, return_doc_cb=self.receive_doc) + self.assertTransactionLog([doc.doc_id], self.db) + self.assertEqual(([], 1), (self.other_changes, new_gen)) + + def test_sync_exchange_returns_new_docs(self): + doc = self.db.create_doc_from_json(simple_doc) + self.assertTransactionLog([doc.doc_id], self.db) + new_gen, _ = self.st.sync_exchange( + [], 'other-replica', last_known_generation=0, + last_known_trans_id=None, return_doc_cb=self.receive_doc) + self.assertTransactionLog([doc.doc_id], self.db) + self.assertEqual( + (doc.doc_id, doc.rev, simple_doc, 1), self.other_changes[0][:-1]) + self.assertEqual(1, new_gen) + if self.whitebox: + self.assertEqual(self.db._last_exchange_log['return'], + {'last_gen': 1, 'docs': [(doc.doc_id, doc.rev)]}) + + def test_sync_exchange_returns_deleted_docs(self): + doc = self.db.create_doc_from_json(simple_doc) + self.db.delete_doc(doc) + self.assertTransactionLog([doc.doc_id, doc.doc_id], self.db) + new_gen, _ = self.st.sync_exchange( + [], 'other-replica', last_known_generation=0, + last_known_trans_id=None, return_doc_cb=self.receive_doc) + self.assertTransactionLog([doc.doc_id, doc.doc_id], self.db) + self.assertEqual( + (doc.doc_id, doc.rev, None, 2), self.other_changes[0][:-1]) + self.assertEqual(2, new_gen) + if self.whitebox: + self.assertEqual(self.db._last_exchange_log['return'], + {'last_gen': 2, 'docs': [(doc.doc_id, doc.rev)]}) + + def test_sync_exchange_getting_newer_docs(self): + doc = self.db.create_doc_from_json(simple_doc) + self.assertTransactionLog([doc.doc_id], self.db) + new_doc = '{"key": "altval"}' + docs_by_gen = [ + (self.make_document(doc.doc_id, 'test:1|z:2', new_doc), 10, + 'T-sid')] + new_gen, _ = self.st.sync_exchange( + docs_by_gen, 'other-replica', last_known_generation=0, + last_known_trans_id=None, return_doc_cb=self.receive_doc) + self.assertTransactionLog([doc.doc_id, doc.doc_id], self.db) + self.assertEqual(([], 2), (self.other_changes, new_gen)) + + def test_sync_exchange_with_concurrent_updates_of_synced_doc(self): + expected = [] + + def before_whatschanged_cb(state): + if state != 'before whats_changed': + return + cont = '{"key": "cuncurrent"}' + conc_rev = self.db.put_doc( + self.make_document(doc.doc_id, 'test:1|z:2', cont)) + expected.append((doc.doc_id, conc_rev, cont, 3)) + + self.set_trace_hook(before_whatschanged_cb) + doc = self.db.create_doc_from_json(simple_doc) + self.assertTransactionLog([doc.doc_id], self.db) + new_doc = '{"key": "altval"}' + docs_by_gen = [ + (self.make_document(doc.doc_id, 'test:1|z:2', new_doc), 10, + 'T-sid')] + new_gen, _ = self.st.sync_exchange( + docs_by_gen, 'other-replica', last_known_generation=0, + last_known_trans_id=None, return_doc_cb=self.receive_doc) + self.assertEqual(expected, [c[:-1] for c in self.other_changes]) + self.assertEqual(3, new_gen) + + def test_sync_exchange_with_concurrent_updates(self): + + def after_whatschanged_cb(state): + if state != 'after whats_changed': + return + self.db.create_doc_from_json('{"new": "doc"}') + + self.set_trace_hook(after_whatschanged_cb) + doc = self.db.create_doc_from_json(simple_doc) + self.assertTransactionLog([doc.doc_id], self.db) + new_doc = '{"key": "altval"}' + docs_by_gen = [ + (self.make_document(doc.doc_id, 'test:1|z:2', new_doc), 10, + 'T-sid')] + new_gen, _ = self.st.sync_exchange( + docs_by_gen, 'other-replica', last_known_generation=0, + last_known_trans_id=None, return_doc_cb=self.receive_doc) + self.assertEqual(([], 2), (self.other_changes, new_gen)) + + def test_sync_exchange_converged_handling(self): + doc = self.db.create_doc_from_json(simple_doc) + docs_by_gen = [ + (self.make_document('new', 'other:1', '{}'), 4, 'T-foo'), + (self.make_document(doc.doc_id, doc.rev, doc.get_json()), 5, + 'T-bar')] + new_gen, _ = self.st.sync_exchange( + docs_by_gen, 'other-replica', last_known_generation=0, + last_known_trans_id=None, return_doc_cb=self.receive_doc) + self.assertEqual(([], 2), (self.other_changes, new_gen)) + + def test_sync_exchange_detect_incomplete_exchange(self): + def before_get_docs_explode(state): + if state != 'before get_docs': + return + raise u1db_errors.U1DBError("fail") + self.set_trace_hook(before_get_docs_explode) + # suppress traceback printing in the wsgiref server + # self.patch(simple_server.ServerHandler, + # 'log_exception', lambda h, exc_info: None) + doc = self.db.create_doc_from_json(simple_doc) + self.assertTransactionLog([doc.doc_id], self.db) + self.assertRaises( + (u1db_errors.U1DBError, u1db_errors.BrokenSyncStream), + self.st.sync_exchange, [], 'other-replica', + last_known_generation=0, last_known_trans_id=None, + return_doc_cb=self.receive_doc) + + def test_sync_exchange_doc_ids(self): + sync_exchange_doc_ids = getattr(self.st, 'sync_exchange_doc_ids', None) + if sync_exchange_doc_ids is None: + self.skipTest("sync_exchange_doc_ids not implemented") + db2 = self.create_database('test2') + doc = db2.create_doc_from_json(simple_doc) + new_gen, trans_id = sync_exchange_doc_ids( + db2, [(doc.doc_id, 10, 'T-sid')], 0, None, + return_doc_cb=self.receive_doc) + self.assertGetDoc(self.db, doc.doc_id, doc.rev, simple_doc, False) + self.assertTransactionLog([doc.doc_id], self.db) + last_trans_id = self.getLastTransId(self.db) + self.assertEqual(([], 1, last_trans_id), + (self.other_changes, new_gen, trans_id)) + self.assertEqual(10, self.st.get_sync_info(db2._replica_uid)[3]) + + def test__set_trace_hook(self): + called = [] + + def cb(state): + called.append(state) + + self.set_trace_hook(cb) + self.st.sync_exchange([], 'replica', 0, None, self.receive_doc) + self.st.record_sync_info('replica', 0, 'T-sid') + self.assertEqual(['before whats_changed', + 'after whats_changed', + 'before get_docs', + 'record_sync_info', + ], + called) + + def test__set_trace_hook_shallow(self): + st_trace_shallow = self.st._set_trace_hook_shallow + target_st_trace_shallow = SyncTarget._set_trace_hook_shallow + same_meth = st_trace_shallow == self.st._set_trace_hook + same_fun = st_trace_shallow.im_func == target_st_trace_shallow.im_func + if (same_meth or same_fun): + # shallow same as full + expected = ['before whats_changed', + 'after whats_changed', + 'before get_docs', + 'record_sync_info', + ] + else: + expected = ['sync_exchange', 'record_sync_info'] + + called = [] + + def cb(state): + called.append(state) + + self.set_trace_hook(cb, shallow=True) + self.st.sync_exchange([], 'replica', 0, None, self.receive_doc) + self.st.record_sync_info('replica', 0, 'T-sid') + self.assertEqual(expected, called) diff --git a/testing/tests/server/__init__.py b/testing/tests/server/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/testing/tests/server/__init__.py diff --git a/common/src/leap/soledad/common/tests/test_server.py b/testing/tests/server/test_server.py index 20fe8579..b99d1939 100644 --- a/common/src/leap/soledad/common/tests/test_server.py +++ b/testing/tests/server/test_server.py @@ -17,24 +17,24 @@ """ Tests for server-related functionality. """ +import binascii +import mock import os import tempfile -import mock -import time -import binascii + +from hashlib import sha512 from pkg_resources import resource_filename +from urlparse import urljoin from uuid import uuid4 -from hashlib import sha512 -from urlparse import urljoin from twisted.internet import defer from twisted.trial import unittest from leap.soledad.common.couch.state import CouchServerState from leap.soledad.common.couch import CouchDatabase -from leap.soledad.common.tests.u1db_tests import TestCaseWithServer -from leap.soledad.common.tests.test_couch import CouchDBTestCase -from leap.soledad.common.tests.util import ( +from test_soledad.u1db_tests import TestCaseWithServer +from test_soledad.util import CouchDBTestCase +from test_soledad.util import ( make_token_soledad_app, make_soledad_document_for_test, soledad_sync_target, @@ -43,7 +43,6 @@ from leap.soledad.common.tests.util import ( from leap.soledad.common import crypto from leap.soledad.client import Soledad -from leap.soledad.server import LockResource from leap.soledad.server import load_configuration from leap.soledad.server import CONFIG_DEFAULTS from leap.soledad.server.auth import URLToAuthorization @@ -370,6 +369,9 @@ class EncryptedSyncTestCase( self.startTwistedServer() user = 'user-' + uuid4().hex + # this will store all docs ids to avoid get_all_docs + created_ids = [] + # instantiate soledad and create a document sol1 = self._soledad_instance( user=user, @@ -400,34 +402,32 @@ class EncryptedSyncTestCase( deferreds = [] for i in xrange(number_of_docs): content = binascii.hexlify(os.urandom(doc_size / 2)) - deferreds.append(sol1.create_doc({'data': content})) + d = sol1.create_doc({'data': content}) + d.addCallback(created_ids.append) + deferreds.append(d) return defer.DeferredList(deferreds) def _db1AssertDocsSyncedToServer(results): - _, sol_doclist = results - self.assertEqual(number_of_docs, len(sol_doclist)) - # assert doc was sent to couch db - _, couch_doclist = db.get_all_docs() - self.assertEqual(number_of_docs, len(couch_doclist)) - for i in xrange(number_of_docs): - soldoc = sol_doclist.pop() - couchdoc = couch_doclist.pop() + self.assertEqual(number_of_docs, len(created_ids)) + for soldoc in created_ids: + couchdoc = db.get_doc(soldoc.doc_id) + self.assertTrue(couchdoc) # assert document structure in couch server self.assertEqual(soldoc.doc_id, couchdoc.doc_id) self.assertEqual(soldoc.rev, couchdoc.rev) - self.assertEqual(6, len(couchdoc.content)) - self.assertTrue(crypto.ENC_JSON_KEY in couchdoc.content) - self.assertTrue(crypto.ENC_SCHEME_KEY in couchdoc.content) - self.assertTrue(crypto.ENC_METHOD_KEY in couchdoc.content) - self.assertTrue(crypto.ENC_IV_KEY in couchdoc.content) - self.assertTrue(crypto.MAC_KEY in couchdoc.content) - self.assertTrue(crypto.MAC_METHOD_KEY in couchdoc.content) + couch_content = couchdoc.content.keys() + self.assertEqual(6, len(couch_content)) + self.assertTrue(crypto.ENC_JSON_KEY in couch_content) + self.assertTrue(crypto.ENC_SCHEME_KEY in couch_content) + self.assertTrue(crypto.ENC_METHOD_KEY in couch_content) + self.assertTrue(crypto.ENC_IV_KEY in couch_content) + self.assertTrue(crypto.MAC_KEY in couch_content) + self.assertTrue(crypto.MAC_METHOD_KEY in couch_content) d = sol1.get_all_docs() d.addCallback(_db1AssertEmptyDocList) d.addCallback(_db1CreateDocs) d.addCallback(lambda _: sol1.sync()) - d.addCallback(lambda _: sol1.get_all_docs()) d.addCallback(_db1AssertDocsSyncedToServer) def _db2AssertEmptyDocList(results): @@ -494,134 +494,6 @@ class EncryptedSyncTestCase( return self._test_encrypted_sym_sync(doc_size=2, number_of_docs=100) -class LockResourceTestCase( - CouchDBTestCase, TestCaseWithServer): - - """ - Tests for use of PUT and DELETE on lock resource. - """ - - @staticmethod - def make_app_with_state(state): - return make_token_soledad_app(state) - - make_document_for_test = make_soledad_document_for_test - - sync_target = soledad_sync_target - - def setUp(self): - # the order of the following initializations is crucial because of - # dependencies. - # XXX explain better - CouchDBTestCase.setUp(self) - self.tempdir = tempfile.mkdtemp(prefix="leap_tests-") - TestCaseWithServer.setUp(self) - # create the databases - db = CouchDatabase.open_database( - urljoin(self.couch_url, ('shared-%s' % (uuid4().hex))), - create=True, - ensure_ddocs=True) - self.addCleanup(db.delete_database) - self._state = CouchServerState(self.couch_url) - self._state.open_database = mock.Mock(return_value=db) - - def tearDown(self): - CouchDBTestCase.tearDown(self) - TestCaseWithServer.tearDown(self) - - def test__try_obtain_filesystem_lock(self): - responder = mock.Mock() - lock_uuid = uuid4().hex - lr = LockResource(lock_uuid, self._state, responder) - self.assertFalse(lr._lock.locked) - self.assertTrue(lr._try_obtain_filesystem_lock()) - self.assertTrue(lr._lock.locked) - lr._try_release_filesystem_lock() - - def test__try_release_filesystem_lock(self): - responder = mock.Mock() - lock_uuid = uuid4().hex - lr = LockResource(lock_uuid, self._state, responder) - lr._try_obtain_filesystem_lock() - self.assertTrue(lr._lock.locked) - lr._try_release_filesystem_lock() - self.assertFalse(lr._lock.locked) - - def test_put(self): - responder = mock.Mock() - lock_uuid = uuid4().hex - lr = LockResource(lock_uuid, self._state, responder) - # lock! - lr.put({}, None) - # assert lock document was correctly written - lock_doc = lr._shared_db.get_doc('lock-' + lock_uuid) - self.assertIsNotNone(lock_doc) - self.assertTrue(LockResource.TIMESTAMP_KEY in lock_doc.content) - self.assertTrue(LockResource.LOCK_TOKEN_KEY in lock_doc.content) - timestamp = lock_doc.content[LockResource.TIMESTAMP_KEY] - token = lock_doc.content[LockResource.LOCK_TOKEN_KEY] - self.assertTrue(timestamp < time.time()) - self.assertTrue(time.time() < timestamp + LockResource.TIMEOUT) - # assert response to user - responder.send_response_json.assert_called_with( - 201, token=token, - timeout=LockResource.TIMEOUT) - - def test_delete(self): - responder = mock.Mock() - lock_uuid = uuid4().hex - lr = LockResource(lock_uuid, self._state, responder) - # lock! - lr.put({}, None) - lock_doc = lr._shared_db.get_doc('lock-' + lock_uuid) - token = lock_doc.content[LockResource.LOCK_TOKEN_KEY] - # unlock! - lr.delete({'token': token}, None) - self.assertFalse(lr._lock.locked) - self.assertIsNone(lr._shared_db.get_doc('lock-' + lock_uuid)) - responder.send_response_json.assert_called_with(200) - - def test_put_while_locked_fails(self): - responder = mock.Mock() - lock_uuid = uuid4().hex - lr = LockResource(lock_uuid, self._state, responder) - # lock! - lr.put({}, None) - # try to lock again! - lr.put({}, None) - self.assertEqual( - len(responder.send_response_json.call_args), 2) - self.assertEqual( - responder.send_response_json.call_args[0], (403,)) - self.assertEqual( - len(responder.send_response_json.call_args[1]), 2) - self.assertTrue( - 'remaining' in responder.send_response_json.call_args[1]) - self.assertTrue( - responder.send_response_json.call_args[1]['remaining'] > 0) - - def test_unlock_unexisting_lock_fails(self): - responder = mock.Mock() - lock_uuid = uuid4().hex - lr = LockResource(lock_uuid, self._state, responder) - # unlock! - lr.delete({'token': 'anything'}, None) - responder.send_response_json.assert_called_with( - 404, error='lock not found') - - def test_unlock_with_wrong_token_fails(self): - responder = mock.Mock() - lock_uuid = uuid4().hex - lr = LockResource(lock_uuid, self._state, responder) - # lock! - lr.put({}, None) - # unlock! - lr.delete({'token': 'wrongtoken'}, None) - self.assertIsNotNone(lr._shared_db.get_doc('lock-' + lock_uuid)) - responder.send_response_json.assert_called_with( - 401, error='unlock unauthorized') - - class ConfigurationParsingTest(unittest.TestCase): def setUp(self): @@ -634,7 +506,7 @@ class ConfigurationParsingTest(unittest.TestCase): def test_security_values_configuration(self): # given - config_path = resource_filename('leap.soledad.common.tests', + config_path = resource_filename('test_soledad', 'fixture_soledad.conf') # when config = load_configuration(config_path) @@ -648,7 +520,7 @@ class ConfigurationParsingTest(unittest.TestCase): def test_server_values_configuration(self): # given - config_path = resource_filename('leap.soledad.common.tests', + config_path = resource_filename('test_soledad', 'fixture_soledad.conf') # when config = load_configuration(config_path) diff --git a/common/src/leap/soledad/common/tests/hacker_crackdown.txt b/testing/tests/sqlcipher/hacker_crackdown.txt index a01eb509..a01eb509 100644 --- a/common/src/leap/soledad/common/tests/hacker_crackdown.txt +++ b/testing/tests/sqlcipher/hacker_crackdown.txt diff --git a/common/src/leap/soledad/common/tests/test_async.py b/testing/tests/sqlcipher/test_async.py index 302ecc37..42c315fe 100644 --- a/common/src/leap/soledad/common/tests/test_async.py +++ b/testing/tests/sqlcipher/test_async.py @@ -14,14 +14,12 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. - - import os import hashlib from twisted.internet import defer -from leap.soledad.common.tests.util import BaseSoledadTest +from test_soledad.util import BaseSoledadTest from leap.soledad.client import adbapi from leap.soledad.client.sqlcipher import SQLCipherOptions @@ -34,6 +32,14 @@ class ASyncSQLCipherRetryTestCase(BaseSoledadTest): NUM_DOCS = 5000 + def setUp(self): + BaseSoledadTest.setUp(self) + self._dbpool = self._get_dbpool() + + def tearDown(self): + self._dbpool.close() + BaseSoledadTest.tearDown(self) + def _get_dbpool(self): tmpdb = os.path.join(self.tempdir, "test.soledad") opts = SQLCipherOptions(tmpdb, "secret", create=True) @@ -74,10 +80,8 @@ class ASyncSQLCipherRetryTestCase(BaseSoledadTest): adbapi.SQLCIPHER_CONNECTION_TIMEOUT = 1 adbapi.SQLCIPHER_MAX_RETRIES = 1 - dbpool = self._get_dbpool() - def _create_doc(doc): - return dbpool.runU1DBQuery("create_doc", doc) + return self._dbpool.runU1DBQuery("create_doc", doc) def _insert_docs(): deferreds = [] @@ -97,7 +101,7 @@ class ASyncSQLCipherRetryTestCase(BaseSoledadTest): raise Exception d = _insert_docs() - d.addCallback(lambda _: dbpool.runU1DBQuery("get_all_docs")) + d.addCallback(lambda _: self._dbpool.runU1DBQuery("get_all_docs")) d.addErrback(_errback) return d @@ -117,10 +121,8 @@ class ASyncSQLCipherRetryTestCase(BaseSoledadTest): above will fail and we should remove this comment from here. """ - dbpool = self._get_dbpool() - def _create_doc(doc): - return dbpool.runU1DBQuery("create_doc", doc) + return self._dbpool.runU1DBQuery("create_doc", doc) def _insert_docs(): deferreds = [] @@ -139,6 +141,6 @@ class ASyncSQLCipherRetryTestCase(BaseSoledadTest): raise Exception d = _insert_docs() - d.addCallback(lambda _: dbpool.runU1DBQuery("get_all_docs")) + d.addCallback(lambda _: self._dbpool.runU1DBQuery("get_all_docs")) d.addCallback(_count_docs) return d diff --git a/common/src/leap/soledad/common/tests/test_sqlcipher.py b/testing/tests/sqlcipher/test_backend.py index 8105c56e..11472d46 100644 --- a/common/src/leap/soledad/common/tests/test_sqlcipher.py +++ b/testing/tests/sqlcipher/test_backend.py @@ -26,10 +26,11 @@ import shutil from pysqlcipher import dbapi2 from testscenarios import TestWithScenarios -# u1db stuff. -from u1db import errors -from u1db import query_parser -from u1db.backends.sqlite_backend import SQLitePartialExpandDatabase +# l2db stuff. +from leap.soledad.common.l2db import errors +from leap.soledad.common.l2db import query_parser +from leap.soledad.common.l2db.backends.sqlite_backend \ + import SQLitePartialExpandDatabase # soledad stuff. from leap.soledad.common import soledad_assert @@ -39,13 +40,12 @@ from leap.soledad.client.sqlcipher import SQLCipherOptions from leap.soledad.client.sqlcipher import DatabaseIsNotEncrypted # u1db tests stuff. -from leap.soledad.common.tests import u1db_tests as tests -from leap.soledad.common.tests.u1db_tests import test_backends -from leap.soledad.common.tests.u1db_tests import test_open -from leap.soledad.common.tests.util import make_sqlcipher_database_for_test -from leap.soledad.common.tests.util import copy_sqlcipher_database_for_test -from leap.soledad.common.tests.util import PASSWORD -from leap.soledad.common.tests.util import BaseSoledadTest +from test_soledad import u1db_tests as tests +from test_soledad.u1db_tests import test_backends +from test_soledad.u1db_tests import test_open +from test_soledad.util import SQLCIPHER_SCENARIOS +from test_soledad.util import PASSWORD +from test_soledad.util import BaseSoledadTest def sqlcipher_open(path, passphrase, create=True, document_factory=None): @@ -73,17 +73,6 @@ class TestSQLCipherBackendImpl(tests.TestCase): # The following tests come from `u1db.tests.test_backends`. # ----------------------------------------------------------------------------- -def make_document_for_test(test, doc_id, rev, content, has_conflicts=False): - return SoledadDocument(doc_id, rev, content, has_conflicts=has_conflicts) - - -SQLCIPHER_SCENARIOS = [ - ('sqlcipher', {'make_database_for_test': make_sqlcipher_database_for_test, - 'copy_database_for_test': copy_sqlcipher_database_for_test, - 'make_document_for_test': make_document_for_test, }), -] - - class SQLCipherTests(TestWithScenarios, test_backends.AllDatabaseTests): scenarios = SQLCIPHER_SCENARIOS @@ -175,11 +164,6 @@ class TestSQLCipherDatabase(tests.TestCase): db1.close() -class TestAlternativeDocument(SoledadDocument): - - """A (not very) alternative implementation of Document.""" - - class TestSQLCipherPartialExpandDatabase(tests.TestCase): """ Tests from u1db.tests.test_sqlite_backend.TestSQLitePartialExpandDatabase. @@ -607,7 +591,7 @@ class SQLCipherOpen(test_open.TestU1DBOpen): def test_open_with_factory(self): db = sqlcipher_open(self.db_path, PASSWORD, create=True, - document_factory=TestAlternativeDocument) + document_factory=SoledadDocument) self.addCleanup(db.close) doc = db.create_doc({}) self.assertTrue(isinstance(doc, SoledadDocument)) diff --git a/testing/tests/sync/__init__.py b/testing/tests/sync/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/testing/tests/sync/__init__.py diff --git a/common/src/leap/soledad/common/tests/test_encdecpool.py b/testing/tests/sync/test_encdecpool.py index 694eb7ad..82e99a47 100644 --- a/common/src/leap/soledad/common/tests/test_encdecpool.py +++ b/testing/tests/sync/test_encdecpool.py @@ -20,14 +20,16 @@ Tests for encryption and decryption pool. import json from random import shuffle +from mock import MagicMock from twisted.internet.defer import inlineCallbacks from leap.soledad.client.encdecpool import SyncEncrypterPool from leap.soledad.client.encdecpool import SyncDecrypterPool from leap.soledad.common.document import SoledadDocument -from leap.soledad.common.tests.util import BaseSoledadTest - +from test_soledad.util import BaseSoledadTest +from twisted.internet import defer +from twisted.test.proto_helpers import MemoryReactorClock DOC_ID = "mydoc" DOC_REV = "rev" @@ -57,13 +59,13 @@ class TestSyncEncrypterPool(BaseSoledadTest): self.assertIsNone(doc) @inlineCallbacks - def test_enqueue_doc_for_encryption_and_get_encrypted_doc(self): + def test_encrypt_doc_and_get_it_back(self): """ Test that the pool actually encrypts a document added to the queue. """ doc = SoledadDocument( doc_id=DOC_ID, rev=DOC_REV, json=json.dumps(DOC_CONTENT)) - self._pool.enqueue_doc_for_encryption(doc) + self._pool.encrypt_doc(doc) # exhaustivelly attempt to get the encrypted document encrypted = None @@ -84,14 +86,18 @@ class TestSyncDecrypterPool(BaseSoledadTest): """ self._inserted_docs.append((doc, gen, trans_id)) - def setUp(self): - BaseSoledadTest.setUp(self) - # setup the pool - self._pool = SyncDecrypterPool( + def _setup_pool(self, sync_db=None): + sync_db = sync_db or self._soledad._sync_db + return SyncDecrypterPool( self._soledad._crypto, - self._soledad._sync_db, + sync_db, source_replica_uid=self._soledad._dbpool.replica_uid, insert_doc_cb=self._insert_doc_cb) + + def setUp(self): + BaseSoledadTest.setUp(self) + # setup the pool + self._pool = self._setup_pool() # reset the inserted docs mock self._inserted_docs = [] @@ -117,6 +123,34 @@ class TestSyncDecrypterPool(BaseSoledadTest): self._pool.deferred.addCallback(_assert_doc_was_inserted) return self._pool.deferred + def test_looping_control(self): + """ + Start and stop cleanly. + """ + self._pool.start(10) + self.assertTrue(self._pool.running) + self._pool.stop() + self.assertFalse(self._pool.running) + self.assertTrue(self._pool.deferred.called) + + def test_sync_id_col_is_created_if_non_existing_in_docs_recvd_table(self): + """ + Test that docs_received table is migrated, and has the sync_id column + """ + mock_run_query = MagicMock(return_value=defer.succeed(None)) + mock_sync_db = MagicMock() + mock_sync_db.runQuery = mock_run_query + pool = self._setup_pool(mock_sync_db) + d = pool.start(10) + pool.stop() + + def assert_trial_to_create_sync_id_column(_): + mock_run_query.assert_called_once_with( + "ALTER TABLE docs_received ADD COLUMN sync_id") + + d.addCallback(assert_trial_to_create_sync_id_column) + return d + def test_insert_received_doc_many(self): """ Test that many documents added to the pool are inserted using the @@ -179,6 +213,43 @@ class TestSyncDecrypterPool(BaseSoledadTest): _assert_doc_was_decrypted_and_inserted) return self._pool.deferred + @inlineCallbacks + def test_processing_order(self): + """ + This test ensures that processing of documents only occur if there is + a sequence in place. + """ + reactor_clock = MemoryReactorClock() + self._pool._loop.clock = reactor_clock + + crypto = self._soledad._crypto + + docs = [] + for i in xrange(1, 10): + i = str(i) + doc = SoledadDocument( + doc_id=DOC_ID + i, rev=DOC_REV + i, + json=json.dumps(DOC_CONTENT)) + encrypted_content = json.loads(crypto.encrypt_doc(doc)) + docs.append((doc, encrypted_content)) + + # insert the encrypted document in the pool + self._pool.start(10) # pool is expecting to process 10 docs + # first three arrives, forming a sequence + for i, (doc, encrypted_content) in enumerate(docs[:3]): + gen = idx = i + 1 + yield self._pool.insert_encrypted_received_doc( + doc.doc_id, doc.rev, encrypted_content, gen, "trans_id", idx) + # last one arrives alone, so it can't be processed + doc, encrypted_content = docs[-1] + yield self._pool.insert_encrypted_received_doc( + doc.doc_id, doc.rev, encrypted_content, 10, "trans_id", 10) + + reactor_clock.advance(self._pool.DECRYPT_LOOP_PERIOD) + yield self._pool._decrypt_and_recurse() + + self.assertEqual(3, self._pool._processed_docs) + def test_insert_encrypted_received_doc_many(self, many=100): """ Test that many encrypted documents added to the pool are decrypted and @@ -241,3 +312,4 @@ class TestSyncDecrypterPool(BaseSoledadTest): decrypted_docs = yield self._pool._get_docs(encrypted=False) # check that decrypted docs staging is clean self.assertEquals([], decrypted_docs) + self._pool.stop() diff --git a/common/src/leap/soledad/common/tests/test_sqlcipher_sync.py b/testing/tests/sync/test_sqlcipher_sync.py index 439fc070..3cbefc8b 100644 --- a/common/src/leap/soledad/common/tests/test_sqlcipher_sync.py +++ b/testing/tests/sync/test_sqlcipher_sync.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # test_sqlcipher.py -# Copyright (C) 2013 LEAP +# Copyright (C) 2013-2016 LEAP # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,28 +17,25 @@ """ Test sqlcipher backend sync. """ - - import os -from u1db import sync -from u1db import vectorclock -from u1db import errors from uuid import uuid4 from testscenarios import TestWithScenarios +from leap.soledad.common.l2db import sync +from leap.soledad.common.l2db import vectorclock +from leap.soledad.common.l2db import errors + from leap.soledad.common.crypto import ENC_SCHEME_KEY -from leap.soledad.client.http_target import SoledadHTTPSyncTarget from leap.soledad.client.crypto import decrypt_doc_dict +from leap.soledad.client.http_target import SoledadHTTPSyncTarget -from leap.soledad.common.tests import u1db_tests as tests -from leap.soledad.common.tests.test_sqlcipher import SQLCIPHER_SCENARIOS -from leap.soledad.common.tests.util import make_soledad_app -from leap.soledad.common.tests.test_sync_target import \ - SoledadDatabaseSyncTargetTests -from leap.soledad.common.tests.util import soledad_sync_target -from leap.soledad.common.tests.util import BaseSoledadTest +from test_soledad import u1db_tests as tests +from test_soledad.util import SQLCIPHER_SCENARIOS +from test_soledad.util import make_soledad_app +from test_soledad.util import soledad_sync_target +from test_soledad.util import BaseSoledadTest # ----------------------------------------------------------------------------- @@ -731,14 +728,3 @@ target_scenarios = [ 'make_app_with_state': make_soledad_app, 'do_sync': sync_via_synchronizer_and_soledad}), ] - - -class SQLCipherSyncTargetTests(SoledadDatabaseSyncTargetTests): - - # TODO: implement _set_trace_hook(_shallow) in SoledadHTTPSyncTarget so - # skipped tests can be succesfully executed. - - scenarios = (tests.multiply_scenarios(SQLCIPHER_SCENARIOS, - target_scenarios)) - - whitebox = False diff --git a/common/src/leap/soledad/common/tests/test_sync.py b/testing/tests/sync/test_sync.py index 1041367b..095884ce 100644 --- a/common/src/leap/soledad/common/tests/test_sync.py +++ b/testing/tests/sync/test_sync.py @@ -14,8 +14,6 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. - - import json import tempfile import threading @@ -29,15 +27,15 @@ from testscenarios import TestWithScenarios from leap.soledad.common import couch from leap.soledad.client import sync -from leap.soledad.common.tests import u1db_tests as tests -from leap.soledad.common.tests.u1db_tests import TestCaseWithServer -from leap.soledad.common.tests.u1db_tests import simple_doc -from leap.soledad.common.tests.util import make_token_soledad_app -from leap.soledad.common.tests.util import make_soledad_document_for_test -from leap.soledad.common.tests.util import soledad_sync_target -from leap.soledad.common.tests.util import BaseSoledadTest -from leap.soledad.common.tests.util import SoledadWithCouchServerMixin -from leap.soledad.common.tests.test_couch import CouchDBTestCase +from test_soledad import u1db_tests as tests +from test_soledad.u1db_tests import TestCaseWithServer +from test_soledad.u1db_tests import simple_doc +from test_soledad.util import make_token_soledad_app +from test_soledad.util import make_soledad_document_for_test +from test_soledad.util import soledad_sync_target +from test_soledad.util import BaseSoledadTest +from test_soledad.util import SoledadWithCouchServerMixin +from test_soledad.util import CouchDBTestCase class InterruptableSyncTestCase( diff --git a/common/src/leap/soledad/common/tests/test_sync_deferred.py b/testing/tests/sync/test_sync_deferred.py index c62bd156..4948aaf8 100644 --- a/common/src/leap/soledad/common/tests/test_sync_deferred.py +++ b/testing/tests/sync/test_sync_deferred.py @@ -34,11 +34,11 @@ from leap.soledad.client.sqlcipher import SQLCipherDatabase from testscenarios import TestWithScenarios -from leap.soledad.common.tests import u1db_tests as tests -from leap.soledad.common.tests.util import ADDRESS -from leap.soledad.common.tests.util import SoledadWithCouchServerMixin -from leap.soledad.common.tests.util import make_soledad_app -from leap.soledad.common.tests.util import soledad_sync_target +from test_soledad import u1db_tests as tests +from test_soledad.util import ADDRESS +from test_soledad.util import SoledadWithCouchServerMixin +from test_soledad.util import make_soledad_app +from test_soledad.util import soledad_sync_target # Just to make clear how this test is different... :) diff --git a/common/src/leap/soledad/common/tests/test_sync_mutex.py b/testing/tests/sync/test_sync_mutex.py index 973a8587..787cfee8 100644 --- a/common/src/leap/soledad/common/tests/test_sync_mutex.py +++ b/testing/tests/sync/test_sync_mutex.py @@ -35,13 +35,13 @@ from leap.soledad.client.sync import SoledadSynchronizer from leap.soledad.common.couch.state import CouchServerState from leap.soledad.common.couch import CouchDatabase -from leap.soledad.common.tests.u1db_tests import TestCaseWithServer -from leap.soledad.common.tests.test_couch import CouchDBTestCase +from test_soledad.u1db_tests import TestCaseWithServer -from leap.soledad.common.tests.util import BaseSoledadTest -from leap.soledad.common.tests.util import make_token_soledad_app -from leap.soledad.common.tests.util import make_soledad_document_for_test -from leap.soledad.common.tests.util import soledad_sync_target +from test_soledad.util import CouchDBTestCase +from test_soledad.util import BaseSoledadTest +from test_soledad.util import make_token_soledad_app +from test_soledad.util import make_soledad_document_for_test +from test_soledad.util import soledad_sync_target # monkey-patch the soledad synchronizer so it stores start and finish times diff --git a/common/src/leap/soledad/common/tests/test_sync_target.py b/testing/tests/sync/test_sync_target.py index f25e84dd..964468ce 100644 --- a/common/src/leap/soledad/common/tests/test_sync_target.py +++ b/testing/tests/sync/test_sync_target.py @@ -21,7 +21,6 @@ import cStringIO import os import time import json -import u1db import random import string import shutil @@ -36,16 +35,18 @@ from leap.soledad.client.sqlcipher import SQLCipherU1DBSync from leap.soledad.client.sqlcipher import SQLCipherOptions from leap.soledad.client.sqlcipher import SQLCipherDatabase -from leap.soledad.common.document import SoledadDocument +from leap.soledad.common import l2db -from leap.soledad.common.tests import u1db_tests as tests -from leap.soledad.common.tests.util import make_sqlcipher_database_for_test -from leap.soledad.common.tests.util import make_soledad_app -from leap.soledad.common.tests.util import make_token_soledad_app -from leap.soledad.common.tests.util import make_soledad_document_for_test -from leap.soledad.common.tests.util import soledad_sync_target -from leap.soledad.common.tests.util import SoledadWithCouchServerMixin -from leap.soledad.common.tests.util import ADDRESS +from leap.soledad.common.document import SoledadDocument +from test_soledad import u1db_tests as tests +from test_soledad.util import make_sqlcipher_database_for_test +from test_soledad.util import make_soledad_app +from test_soledad.util import make_token_soledad_app +from test_soledad.util import make_soledad_document_for_test +from test_soledad.util import soledad_sync_target +from test_soledad.util import SoledadWithCouchServerMixin +from test_soledad.util import ADDRESS +from test_soledad.util import SQLCIPHER_SCENARIOS # ----------------------------------------------------------------------------- @@ -90,53 +91,53 @@ class TestSoledadParseReceivedDocResponse(SoledadWithCouchServerMixin): doc.get_json(), doc.doc_id, doc.rev, key, secret) - with self.assertRaises(u1db.errors.BrokenSyncStream): + with self.assertRaises(l2db.errors.BrokenSyncStream): self.target._parse_received_doc_response("[\r\n{},\r\n]") - with self.assertRaises(u1db.errors.BrokenSyncStream): + with self.assertRaises(l2db.errors.BrokenSyncStream): self.target._parse_received_doc_response( ('[\r\n{},\r\n{"id": "i", "rev": "r", ' + '"content": %s, "gen": 3, "trans_id": "T-sid"}' + ',\r\n]') % json.dumps(enc_json)) def test_wrong_start(self): - with self.assertRaises(u1db.errors.BrokenSyncStream): + with self.assertRaises(l2db.errors.BrokenSyncStream): self.target._parse_received_doc_response("{}\r\n]") - with self.assertRaises(u1db.errors.BrokenSyncStream): + with self.assertRaises(l2db.errors.BrokenSyncStream): self.target._parse_received_doc_response("\r\n{}\r\n]") - with self.assertRaises(u1db.errors.BrokenSyncStream): + with self.assertRaises(l2db.errors.BrokenSyncStream): self.target._parse_received_doc_response("") def test_wrong_end(self): - with self.assertRaises(u1db.errors.BrokenSyncStream): + with self.assertRaises(l2db.errors.BrokenSyncStream): self.target._parse_received_doc_response("[\r\n{}") - with self.assertRaises(u1db.errors.BrokenSyncStream): + with self.assertRaises(l2db.errors.BrokenSyncStream): self.target._parse_received_doc_response("[\r\n") def test_missing_comma(self): - with self.assertRaises(u1db.errors.BrokenSyncStream): + with self.assertRaises(l2db.errors.BrokenSyncStream): self.target._parse_received_doc_response( '[\r\n{}\r\n{"id": "i", "rev": "r", ' '"content": "c", "gen": 3}\r\n]') def test_no_entries(self): - with self.assertRaises(u1db.errors.BrokenSyncStream): + with self.assertRaises(l2db.errors.BrokenSyncStream): self.target._parse_received_doc_response("[\r\n]") def test_error_in_stream(self): - with self.assertRaises(u1db.errors.BrokenSyncStream): + with self.assertRaises(l2db.errors.BrokenSyncStream): self.target._parse_received_doc_response( '[\r\n{"new_generation": 0},' '\r\n{"error": "unavailable"}\r\n') - with self.assertRaises(u1db.errors.BrokenSyncStream): + with self.assertRaises(l2db.errors.BrokenSyncStream): self.target._parse_received_doc_response( '[\r\n{"error": "unavailable"}\r\n') - with self.assertRaises(u1db.errors.BrokenSyncStream): + with self.assertRaises(l2db.errors.BrokenSyncStream): self.target._parse_received_doc_response('[\r\n{"error": "?"}\r\n') # @@ -256,7 +257,7 @@ class TestSoledadSyncTarget( replica_trans_id=None, number_of_docs=None, doc_idx=None, sync_id=None): if doc.doc_id in trigger_ids: - raise u1db.errors.U1DBError + raise l2db.errors.U1DBError return _put_doc_if_newer(doc, save_conflict=save_conflict, replica_uid=replica_uid, replica_gen=replica_gen, @@ -278,7 +279,7 @@ class TestSoledadSyncTarget( doc2 = self.make_document('doc-here2', 'replica:1', '{"value": "here2"}') - with self.assertRaises(u1db.errors.U1DBError): + with self.assertRaises(l2db.errors.U1DBError): yield remote_target.sync_exchange( [(doc1, 10, 'T-sid'), (doc2, 11, 'T-sud')], 'replica', @@ -706,7 +707,7 @@ class SoledadDatabaseSyncTargetTests( def before_get_docs_explode(state): if state != 'before get_docs': return - raise u1db.errors.U1DBError("fail") + raise l2db.errors.U1DBError("fail") self.set_trace_hook(before_get_docs_explode) # suppress traceback printing in the wsgiref server # self.patch(simple_server.ServerHandler, @@ -714,7 +715,7 @@ class SoledadDatabaseSyncTargetTests( doc = self.db.create_doc_from_json(tests.simple_doc) self.assertTransactionLog([doc.doc_id], self.db) self.assertRaises( - (u1db.errors.U1DBError, u1db.errors.BrokenSyncStream), + (l2db.errors.U1DBError, l2db.errors.BrokenSyncStream), self.st.sync_exchange, [], 'other-replica', last_known_generation=0, last_known_trans_id=None, insert_doc_cb=self.receive_doc) @@ -954,3 +955,14 @@ class TestSoledadDbSync( d.addCallback(_assert_successful_sync) return d + + +class SQLCipherSyncTargetTests(SoledadDatabaseSyncTargetTests): + + # TODO: implement _set_trace_hook(_shallow) in SoledadHTTPSyncTarget so + # skipped tests can be succesfully executed. + + scenarios = (tests.multiply_scenarios(SQLCIPHER_SCENARIOS, + target_scenarios)) + + whitebox = False diff --git a/testing/tox.ini b/testing/tox.ini new file mode 100644 index 00000000..3663eef3 --- /dev/null +++ b/testing/tox.ini @@ -0,0 +1,23 @@ +[tox] +envlist = py27 + +[testenv] +commands = py.test --pep8 {posargs} +changedir = tests +deps = + pytest + pytest-flake8 + pytest-pep8 + mock + testscenarios + setuptools-trial + pep8 + pdbpp + couchdb +# install soledad local packages + -e../common + -e../client + -e../server +setenv = + HOME=/tmp +install_command = pip install {opts} {packages} |