From fda2acf0c8aaf123359470ced37f56e8223a3286 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 21 Jul 2016 13:51:06 +0200 Subject: [feat] use _local couch docs for metadata storage --- common/src/leap/soledad/common/couch/__init__.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index 523a50a0..21ffd036 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -110,6 +110,9 @@ class CouchDatabase(object): CouchDB details from backend code. """ + CONFIG_DOC_ID = '_local/config' + SYNC_DOC_ID_PREFIX = '_local/sync_' + @classmethod def open_database(cls, url, create, ensure_ddocs=False, replica_uid=None, database_security=None): @@ -261,12 +264,12 @@ class CouchDatabase(object): """ try: # set on existent config document - doc = self._database['u1db_config'] + doc = self._database[self.CONFIG_DOC_ID] doc['replica_uid'] = replica_uid except ResourceNotFound: # or create the config document doc = { - '_id': 'u1db_config', + '_id': self.CONFIG_DOC_ID, 'replica_uid': replica_uid, } self._database.save(doc) @@ -280,7 +283,7 @@ class CouchDatabase(object): """ try: # grab replica_uid from server - doc = self._database['u1db_config'] + doc = self._database[self.CONFIG_DOC_ID] replica_uid = doc['replica_uid'] return replica_uid except ResourceNotFound: @@ -499,7 +502,7 @@ class CouchDatabase(object): synchronized with the replica, this is (0, ''). :rtype: (int, str) """ - doc_id = 'u1db_sync_%s' % other_replica_uid + doc_id = '%s%s' % (self.SYNC_DOC_ID_PREFIX, other_replica_uid) try: doc = self._database[doc_id] except ResourceNotFound: @@ -562,7 +565,7 @@ class CouchDatabase(object): generation. :type other_transaction_id: str """ - doc_id = 'u1db_sync_%s' % other_replica_uid + doc_id = '%s%s' % (self.SYNC_DOC_ID_PREFIX, other_replica_uid) try: doc = self._database[doc_id] except ResourceNotFound: -- cgit v1.2.3 From 239273e765ffc2cf3f74dc2bdd6b7ac8a8acdccd Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 22 Jul 2016 19:53:21 +0200 Subject: [feat] do not use couch views for sync metadata When compared to plain couch document get, the use of the simplest view functions takes around double the time, while the use of the simplest list function can take more than 8 times: get 100 docs: total: 0.440337 secs mean: 0.004403 query 100 views: total: 0.911425 secs mean: 0.009114 query 100 lists: total: 3.711537 secs mean: 0.037115 Besides that, the current implementation of sync metadata storage over couch is dependent of timestamps of document puts, what can lead to metadata corruption if the clock of the system is changed for any reason. Because of these reasons, we seek to change the implementation of database metadata. This commit implements the storage of transaction log data on couch documents with special ids, in the form "gen-xxxxxxxxxx", where the x's are replaced by the generation index. Each generation document holds a dictionary containing the generation, doc_id and transaction_id for the changed document. For each modified document, a generation document is inserted holding the transaction metadata. --- common/src/leap/soledad/common/couch/__init__.py | 121 ++++++++++++----------- 1 file changed, 65 insertions(+), 56 deletions(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index 21ffd036..6cad2b19 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -103,6 +103,10 @@ def couch_server(url): THREAD_POOL = ThreadPool(20) +def _get_gen_doc_id(gen): + return 'gen-%s' % str(gen).zfill(10) + + class CouchDatabase(object): """ Holds CouchDB related code. @@ -213,7 +217,7 @@ class CouchDatabase(object): Ensure that the design documents used by the backend exist on the couch database. """ - for ddoc_name in ['docs', 'syncs', 'transactions']: + for ddoc_name in ['docs', 'syncs']: try: self.json_from_resource(['_design'] + ddoc_name.split('/') + ['_info'], @@ -437,8 +441,6 @@ class CouchDatabase(object): result['_attachments']['u1db_conflicts']['data'])))) # store couch revision doc.couch_rev = result['_rev'] - # store transactions - doc.transactions = result['u1db_transactions'] return doc def _build_conflicts(self, doc_id, attached_conflicts): @@ -474,14 +476,11 @@ class CouchDatabase(object): """ if generation == 0: return '' - # query a couch list function - ddoc_path = [ - '_design', 'transactions', '_list', 'trans_id_for_gen', 'log' - ] - response = self.json_from_resource(ddoc_path, gen=generation) - if response == {}: + log = self._get_transaction_log(start=generation, end=generation) + if not log: raise InvalidGeneration - return response['transaction_id'] + _, _, trans_id = log[0] + return trans_id def get_replica_gen_and_trans_id(self, other_replica_uid): """ @@ -512,8 +511,8 @@ class CouchDatabase(object): 'transaction_id': '', } self._database.save(doc) - result = doc['generation'], doc['transaction_id'] - return result + gen, trans_id = doc['generation'], doc['transaction_id'] + return gen, trans_id def get_doc_conflicts(self, doc_id, couch_rev=None): """ @@ -581,12 +580,32 @@ class CouchDatabase(object): :return: The complete transaction log. :rtype: [(str, str)] """ - # query a couch view - ddoc_path = ['_design', 'transactions', '_view', 'log'] - response = self.json_from_resource(ddoc_path) - return map( - lambda row: (row['id'], row['value']), - response['rows']) + log = self._get_transaction_log() + return map(lambda i: (i[1], i[2]), log) + + def _get_gen_docs( + self, start=0, end=9999999999, descending=None, limit=None): + params = {} + if descending: + params['descending'] = 'true' + # honor couch way of traversing the view tree in reverse order + start, end = end, start + params['startkey'] = _get_gen_doc_id(start) + params['endkey'] = _get_gen_doc_id(end) + params['include_docs'] = 'true' + if limit: + params['limit'] = limit + view = self._database.view("_all_docs", **params) + return view.rows + + def _get_transaction_log(self, start=0, end=9999999999): + # get current gen and trans_id + rows = self._get_gen_docs(start=start, end=end) + log = [] + for row in rows: + doc = row['doc'] + log.append((doc['gen'], doc['doc_id'], doc['trans_id'])) + return log def whats_changed(self, old_generation=0): """ @@ -605,32 +624,18 @@ class CouchDatabase(object): changes first) :rtype: (int, str, [(str, int, str)]) """ - # query a couch list function - ddoc_path = [ - '_design', 'transactions', '_list', 'whats_changed', 'log' - ] - response = self.json_from_resource(ddoc_path, old_gen=old_generation) - results = map( - lambda row: - (row['generation'], row['doc_id'], row['transaction_id']), - response['transactions']) - results.reverse() - cur_gen = old_generation - seen = set() changes = [] - newest_trans_id = '' - for generation, doc_id, trans_id in results: + cur_generation, last_trans_id = self.get_generation_info() + relevant_tail = self._get_transaction_log(start=old_generation + 1) + seen = set() + generation = cur_generation + for _, doc_id, trans_id in reversed(relevant_tail): if doc_id not in seen: changes.append((doc_id, generation, trans_id)) seen.add(doc_id) - if changes: - cur_gen = changes[0][1] # max generation - newest_trans_id = changes[0][2] - changes.reverse() - else: - cur_gen, newest_trans_id = self.get_generation_info() - - return cur_gen, newest_trans_id, changes + generation -= 1 + changes.reverse() + return (cur_generation, last_trans_id, changes) def get_generation_info(self): """ @@ -641,10 +646,11 @@ class CouchDatabase(object): """ if self.batching and self.batch_generation: return self.batch_generation - # query a couch list function - ddoc_path = ['_design', 'transactions', '_list', 'generation', 'log'] - info = self.json_from_resource(ddoc_path) - return (info['generation'], info['transaction_id']) + rows = self._get_gen_docs(descending=True, limit=1) + if not rows: + return 0, '' + gen_doc = rows.pop()['doc'] + return gen_doc['gen'], gen_doc['trans_id'] def json_from_resource(self, ddoc_path, check_missing_ddoc=True, **kwargs): @@ -740,21 +746,25 @@ class CouchDatabase(object): 'length': len(conflicts), } parts.append(conflicts) - # store old transactions, if any - transactions = old_doc.transactions[:] if old_doc is not None else [] - # create a new transaction id and timestamp it so the transaction log - # is consistent when querying the database. - transactions.append( - # here we store milliseconds to keep consistent with javascript - # Date.prototype.getTime() which was used before inside a couchdb - # update handler. - (int(time.time() * 1000), - transaction_id)) + # add the gen document + while True: # TODO: add a lock, remove this while + try: + gen, _ = self.get_generation_info() + new_gen = gen + 1 + gen_doc = { + '_id': _get_gen_doc_id(new_gen), + 'gen': new_gen, + 'doc_id': doc.doc_id, + 'trans_id': transaction_id, + } + self._database.save(gen_doc) + break + except ResourceConflict: + pass # build the couch document couch_doc = { '_id': doc.doc_id, 'u1db_rev': doc.rev, - 'u1db_transactions': transactions, '_attachments': attachments, } # if we are updating a doc we have to add the couch doc revision @@ -786,7 +796,6 @@ class CouchDatabase(object): self.batch_docs[doc.doc_id] = couch_doc last_gen, last_trans_id = self.batch_generation self.batch_generation = (last_gen + 1, transaction_id) - return transactions[-1][1] def _new_resource(self, *path): """ -- cgit v1.2.3 From c3e0f52080041e2a01cfa483efe73f8503a10f31 Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 22 Jul 2016 20:33:06 +0200 Subject: [feat] remove usage of design documents in couch Design documents are slow and we already have alternatives to all uses we used to make of them, so this commit completelly removes all usage of design documents. --- common/src/leap/soledad/common/.gitignore | 1 - common/src/leap/soledad/common/README.txt | 9 -- common/src/leap/soledad/common/couch/__init__.py | 78 ++--------- common/src/leap/soledad/common/couch/errors.py | 144 --------------------- common/src/leap/soledad/common/ddocs/README.txt | 34 ----- .../soledad/common/ddocs/docs/views/get/map.js | 20 --- .../soledad/common/ddocs/syncs/updates/state.js | 105 --------------- .../ddocs/syncs/views/changes_to_return/map.js | 20 --- .../common/ddocs/syncs/views/seen_ids/map.js | 11 -- .../soledad/common/ddocs/syncs/views/state/map.js | 17 --- .../common/ddocs/transactions/lists/generation.js | 20 --- .../ddocs/transactions/lists/trans_id_for_gen.js | 19 --- .../ddocs/transactions/lists/whats_changed.js | 22 ---- .../common/ddocs/transactions/views/log/map.js | 7 - 14 files changed, 9 insertions(+), 498 deletions(-) delete mode 100644 common/src/leap/soledad/common/.gitignore delete mode 100644 common/src/leap/soledad/common/couch/errors.py delete mode 100644 common/src/leap/soledad/common/ddocs/README.txt delete mode 100644 common/src/leap/soledad/common/ddocs/docs/views/get/map.js delete mode 100644 common/src/leap/soledad/common/ddocs/syncs/updates/state.js delete mode 100644 common/src/leap/soledad/common/ddocs/syncs/views/changes_to_return/map.js delete mode 100644 common/src/leap/soledad/common/ddocs/syncs/views/seen_ids/map.js delete mode 100644 common/src/leap/soledad/common/ddocs/syncs/views/state/map.js delete mode 100644 common/src/leap/soledad/common/ddocs/transactions/lists/generation.js delete mode 100644 common/src/leap/soledad/common/ddocs/transactions/lists/trans_id_for_gen.js delete mode 100644 common/src/leap/soledad/common/ddocs/transactions/lists/whats_changed.js delete mode 100644 common/src/leap/soledad/common/ddocs/transactions/views/log/map.js (limited to 'common/src') diff --git a/common/src/leap/soledad/common/.gitignore b/common/src/leap/soledad/common/.gitignore deleted file mode 100644 index 3378c78a..00000000 --- a/common/src/leap/soledad/common/.gitignore +++ /dev/null @@ -1 +0,0 @@ -ddocs.py diff --git a/common/src/leap/soledad/common/README.txt b/common/src/leap/soledad/common/README.txt index 38b9858e..0a252650 100644 --- a/common/src/leap/soledad/common/README.txt +++ b/common/src/leap/soledad/common/README.txt @@ -60,15 +60,6 @@ implemented in a way that all changes will be pushed with just one operation. * delete_index * create_index -Couch views and update functions are used in order to achieve atomicity on the -Couch backend. Transactions are stored in the `u1db_transactions` field of the -couch document. Document's content and conflicted versions are stored as couch -document attachments with names, respectivelly, `u1db_content` and -`u1db_conflicts`. - -A map of methods and couch query URI can be found on the `./ddocs/README.txt` -document. - Notes: * Currently, the couch backend does not implement indexing, so what is diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index 6cad2b19..ca0c2855 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -37,7 +37,6 @@ from couchdb.client import Server, Database from couchdb.http import ( ResourceConflict, ResourceNotFound, - ServerError, Session, urljoin as couch_urljoin, Resource, @@ -50,9 +49,6 @@ from leap.soledad.common.l2db.errors import ( from leap.soledad.common.l2db.remote import http_app -from leap.soledad.common import ddocs -from .errors import raise_server_error -from .errors import raise_missing_design_doc_error from .support import MultipartWriter from leap.soledad.common.errors import InvalidURLError from leap.soledad.common.document import ServerDocument @@ -177,7 +173,6 @@ class CouchDatabase(object): self.batch_generation = None self.batch_docs = {} if ensure_ddocs: - self.ensure_ddocs_on_db() self.ensure_security_ddoc(database_security) def batch_start(self): @@ -212,22 +207,6 @@ class CouchDatabase(object): except ResourceNotFound: raise DatabaseDoesNotExist() - def ensure_ddocs_on_db(self): - """ - Ensure that the design documents used by the backend exist on the - couch database. - """ - for ddoc_name in ['docs', 'syncs']: - try: - self.json_from_resource(['_design'] + - ddoc_name.split('/') + ['_info'], - check_missing_ddoc=False) - except ResourceNotFound: - ddoc = json.loads( - binascii.a2b_base64( - getattr(ddocs, ddoc_name))) - self._database.save(ddoc) - def ensure_security_ddoc(self, security_config=None): """ Make sure that only soledad user is able to access this database as @@ -539,7 +518,6 @@ class CouchDatabase(object): try: response = self.json_from_resource([doc_id, 'u1db_conflicts'], - check_missing_ddoc=False, **params) return conflicts + self._build_conflicts( doc_id, json.loads(response.read())) @@ -652,48 +630,23 @@ class CouchDatabase(object): gen_doc = rows.pop()['doc'] return gen_doc['gen'], gen_doc['trans_id'] - def json_from_resource(self, ddoc_path, check_missing_ddoc=True, - **kwargs): + def json_from_resource(self, doc_path, **kwargs): """ Get a resource from it's path and gets a doc's JSON using provided - parameters, also checking for missing design docs by default. + parameters. - :param ddoc_path: The path to resource. - :type ddoc_path: [str] - :param check_missing_ddoc: Raises info on what design doc is missing. - :type check_missin_ddoc: bool + :param doc_path: The path to resource. + :type doc_path: [str] :return: The request's data parsed from JSON to a dict. :rtype: dict - - :raise MissingDesignDocError: Raised when tried to access a missing - design document. - :raise MissingDesignDocListFunctionError: Raised when trying to access - a missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a - design document for an yet - unknown reason. - """ - if ddoc_path is not None: - resource = self._database.resource(*ddoc_path) + """ + if doc_path is not None: + resource = self._database.resource(*doc_path) else: resource = self._database.resource() - try: - _, _, data = resource.get_json(**kwargs) - return data - except ResourceNotFound as e: - if check_missing_ddoc: - raise_missing_design_doc_error(e, ddoc_path) - else: - raise e - except ServerError as e: - raise_server_error(e, ddoc_path) + _, _, data = resource.get_json(**kwargs) + return data def save_document(self, old_doc, doc, transaction_id): """ @@ -710,19 +663,6 @@ class CouchDatabase(object): :raise RevisionConflict: Raised when trying to update a document but couch revisions mismatch. - :raise MissingDesignDocError: Raised when tried to access a missing - design document. - :raise MissingDesignDocListFunctionError: Raised when trying to access - a missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a - design document for an yet - unknown reason. """ attachments = {} # we save content and conflicts as attachments parts = [] # and we put it using couch's multipart PUT diff --git a/common/src/leap/soledad/common/couch/errors.py b/common/src/leap/soledad/common/couch/errors.py deleted file mode 100644 index 9b287c76..00000000 --- a/common/src/leap/soledad/common/couch/errors.py +++ /dev/null @@ -1,144 +0,0 @@ -# -*- coding: utf-8 -*- -# errors.py -# Copyright (C) 2015 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -from leap.soledad.common.errors import SoledadError, BackendNotReadyError -from leap.soledad.common.errors import register_exception - -""" -Specific errors that can be raised by CouchDatabase. -""" - - -@register_exception -class MissingDesignDocError(BackendNotReadyError): - - """ - Raised when trying to access a missing couch design document. - """ - - wire_description = "missing design document" - status = 500 - - -@register_exception -class MissingDesignDocNamedViewError(SoledadError): - - """ - Raised when trying to access a missing named view on a couch design - document. - """ - - wire_description = "missing design document named function" - status = 500 - - -@register_exception -class MissingDesignDocListFunctionError(SoledadError): - - """ - Raised when trying to access a missing list function on a couch design - document. - """ - - wire_description = "missing design document list function" - status = 500 - - -@register_exception -class MissingDesignDocDeletedError(SoledadError): - - """ - Raised when trying to access a deleted couch design document. - """ - - wire_description = "design document was deleted" - status = 500 - - -@register_exception -class DesignDocUnknownError(SoledadError): - - """ - Raised when trying to access a couch design document and getting an - unknown error. - """ - - wire_description = "missing design document unknown error" - status = 500 - - -def raise_missing_design_doc_error(exc, ddoc_path): - """ - Raise an appropriate exception when catching a ResourceNotFound when - accessing a design document. - - :param exc: The exception cought. - :type exc: ResourceNotFound - :param ddoc_path: A list representing the requested path. - :type ddoc_path: list - - :raise MissingDesignDocError: Raised when tried to access a missing design - document. - :raise MissingDesignDocListFunctionError: Raised when trying to access a - missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a design - document for an yet unknown reason. - """ - path = "".join(ddoc_path) - if exc.message[1] == 'missing': - raise MissingDesignDocError(path) - elif exc.message[1] == 'missing function' or \ - exc.message[1].startswith('missing lists function'): - raise MissingDesignDocListFunctionError(path) - elif exc.message[1] == 'missing_named_view': - raise MissingDesignDocNamedViewError(path) - elif exc.message[1] == 'deleted': - raise MissingDesignDocDeletedError(path) - # other errors are unknown for now - raise DesignDocUnknownError("%s: %s" % (path, str(exc.message))) - - -def raise_server_error(exc, ddoc_path): - """ - Raise an appropriate exception when catching a ServerError when - accessing a design document. - - :param exc: The exception cought. - :type exc: ResourceNotFound - :param ddoc_path: A list representing the requested path. - :type ddoc_path: list - - :raise MissingDesignDocListFunctionError: Raised when trying to access a - missing list function on a - design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a design - document for an yet unknown reason. - """ - path = "".join(ddoc_path) - msg = exc.message[1][0] - if msg == 'unnamed_error': - raise MissingDesignDocListFunctionError(path) - elif msg == 'TypeError': - if 'point is undefined' in exc.message[1][1]: - raise MissingDesignDocListFunctionError - # other errors are unknown for now - raise DesignDocUnknownError("%s: %s" % (path, str(exc.message))) diff --git a/common/src/leap/soledad/common/ddocs/README.txt b/common/src/leap/soledad/common/ddocs/README.txt deleted file mode 100644 index 5569d929..00000000 --- a/common/src/leap/soledad/common/ddocs/README.txt +++ /dev/null @@ -1,34 +0,0 @@ -This directory holds a folder structure containing javascript files that -represent the design documents needed by the CouchDB U1DB backend. These files -are compiled into the `../ddocs.py` file by setuptools when creating the -source distribution. - -The following table depicts the U1DB CouchDB backend method and the URI that -is queried to obtain/update data from/to the server. - - +----------------------------------+------------------------------------------------------------------+ - | u1db backend method | URI | - |----------------------------------+------------------------------------------------------------------| - | _get_generation | _design/transactions/_list/generation/log | - | _get_generation_info | _design/transactions/_list/generation/log | - | _get_trans_id_for_gen | _design/transactions/_list/trans_id_for_gen/log | - | _get_transaction_log | _design/transactions/_view/log | - | _get_doc (*) | _design/docs/_view/get?key= | - | _has_conflicts | _design/docs/_view/get?key= | - | get_all_docs | _design/docs/_view/get | - | _put_doc | _design/docs/_update/put/ | - | _whats_changed | _design/transactions/_list/whats_changed/log?old_gen= | - | _get_conflicts (*) | _design/docs/_view/conflicts?key= | - | _get_replica_gen_and_trans_id | _design/syncs/_view/log?other_replica_uid= | - | _do_set_replica_gen_and_trans_id | _design/syncs/_update/put/u1db_sync_log | - | _add_conflict | _design/docs/_update/add_conflict/ | - | _delete_conflicts | _design/docs/_update/delete_conflicts/?doc_rev= | - | list_indexes | not implemented | - | _get_index_definition | not implemented | - | delete_index | not implemented | - | _get_indexed_fields | not implemented | - | _put_and_update_indexes | not implemented | - +----------------------------------+------------------------------------------------------------------+ - -(*) These methods also request CouchDB document attachments that store U1DB - document contents. diff --git a/common/src/leap/soledad/common/ddocs/docs/views/get/map.js b/common/src/leap/soledad/common/ddocs/docs/views/get/map.js deleted file mode 100644 index ae08d9e9..00000000 --- a/common/src/leap/soledad/common/ddocs/docs/views/get/map.js +++ /dev/null @@ -1,20 +0,0 @@ -function(doc) { - if (doc.u1db_rev) { - var is_tombstone = true; - var has_conflicts = false; - if (doc._attachments) { - if (doc._attachments.u1db_content) - is_tombstone = false; - if (doc._attachments.u1db_conflicts) - has_conflicts = true; - } - emit(doc._id, - { - "couch_rev": doc._rev, - "u1db_rev": doc.u1db_rev, - "is_tombstone": is_tombstone, - "has_conflicts": has_conflicts, - } - ); - } -} diff --git a/common/src/leap/soledad/common/ddocs/syncs/updates/state.js b/common/src/leap/soledad/common/ddocs/syncs/updates/state.js deleted file mode 100644 index d62aeb40..00000000 --- a/common/src/leap/soledad/common/ddocs/syncs/updates/state.js +++ /dev/null @@ -1,105 +0,0 @@ -/** - * This update handler stores information about ongoing synchronization - * attempts from distinct source replicas. - * - * Normally, u1db synchronization occurs during one POST request. In order to - * split that into many serial POST requests, we store the state of each sync - * in the server, using a document with id 'u1db_sync_state'. To identify - * each sync attempt, we use a sync_id sent by the client. If we ever receive - * a new sync_id, we trash current data for that source replica and start - * over. - * - * We expect the following in the document body: - * - * { - * 'source_replica_uid': '', - * 'sync_id': '', - * 'seen_ids': [['', ], ...], // optional - * 'changes_to_return': [ // optional - * 'gen': , - * 'trans_id': '', - * 'changes_to_return': [[', , ''], ...] - * ], - * } - * - * The format of the final document stored on server is: - * - * { - * '_id': '', - * '_rev' '', - * 'ongoing_syncs': { - * '': { - * 'sync_id': '', - * 'seen_ids': [['', [, ...], - * 'changes_to_return': { - * 'gen': , - * 'trans_id': '', - * 'changes_to_return': [ - * ['', , ''], - * ..., - * ], - * }, - * }, - * ... // info about other source replicas here - * } - * } - */ -function(doc, req) { - - // prevent updates to alien documents - if (doc != null && doc['_id'] != 'u1db_sync_state') - return [null, 'invalid data']; - - // create the document if it doesn't exist - if (!doc) - doc = { - '_id': 'u1db_sync_state', - 'ongoing_syncs': {}, - }; - - // parse and validate incoming data - var body = JSON.parse(req.body); - if (body['source_replica_uid'] == null) - return [null, 'invalid data']; - var source_replica_uid = body['source_replica_uid']; - - if (body['sync_id'] == null) - return [null, 'invalid data']; - var sync_id = body['sync_id']; - - // trash outdated sync data for that replica if that exists - if (doc['ongoing_syncs'][source_replica_uid] != null && - doc['ongoing_syncs'][source_replica_uid]['sync_id'] != sync_id) - delete doc['ongoing_syncs'][source_replica_uid]; - - // create an entry for that source replica - if (doc['ongoing_syncs'][source_replica_uid] == null) - doc['ongoing_syncs'][source_replica_uid] = { - 'sync_id': sync_id, - 'seen_ids': {}, - 'changes_to_return': null, - }; - - // incoming meta-data values should be exclusive, so we count how many - // arrived and deny to accomplish the transaction if the count is high. - var incoming_values = 0; - var info = doc['ongoing_syncs'][source_replica_uid] - - // add incoming seen id - if ('seen_id' in body) { - info['seen_ids'][body['seen_id'][0]] = body['seen_id'][1]; - incoming_values += 1; - } - - // add incoming changes_to_return - if ('changes_to_return' in body) { - info['changes_to_return'] = body['changes_to_return']; - incoming_values += 1; - } - - if (incoming_values != 1) - return [null, 'invalid data']; - - return [doc, 'ok']; -} - diff --git a/common/src/leap/soledad/common/ddocs/syncs/views/changes_to_return/map.js b/common/src/leap/soledad/common/ddocs/syncs/views/changes_to_return/map.js deleted file mode 100644 index 94b7e767..00000000 --- a/common/src/leap/soledad/common/ddocs/syncs/views/changes_to_return/map.js +++ /dev/null @@ -1,20 +0,0 @@ -function(doc) { - if (doc['_id'] == 'u1db_sync_state' && doc['ongoing_syncs'] != null) - for (var source_replica_uid in doc['ongoing_syncs']) { - var changes = doc['ongoing_syncs'][source_replica_uid]['changes_to_return']; - var sync_id = doc['ongoing_syncs'][source_replica_uid]['sync_id']; - if (changes == null) - emit([source_replica_uid, sync_id, 0], null); - else if (changes.length == 0) - emit([source_replica_uid, sync_id, 0], []); - else - for (var i = 0; i < changes['changes_to_return'].length; i++) - emit( - [source_replica_uid, sync_id, i], - { - 'gen': changes['gen'], - 'trans_id': changes['trans_id'], - 'next_change_to_return': changes['changes_to_return'][i], - }); - } -} diff --git a/common/src/leap/soledad/common/ddocs/syncs/views/seen_ids/map.js b/common/src/leap/soledad/common/ddocs/syncs/views/seen_ids/map.js deleted file mode 100644 index 16118e88..00000000 --- a/common/src/leap/soledad/common/ddocs/syncs/views/seen_ids/map.js +++ /dev/null @@ -1,11 +0,0 @@ -function(doc) { - if (doc['_id'] == 'u1db_sync_state' && doc['ongoing_syncs'] != null) - for (var source_replica_uid in doc['ongoing_syncs']) { - var sync_id = doc['ongoing_syncs'][source_replica_uid]['sync_id']; - emit( - [source_replica_uid, sync_id], - { - 'seen_ids': doc['ongoing_syncs'][source_replica_uid]['seen_ids'], - }); - } -} diff --git a/common/src/leap/soledad/common/ddocs/syncs/views/state/map.js b/common/src/leap/soledad/common/ddocs/syncs/views/state/map.js deleted file mode 100644 index e88c6ebb..00000000 --- a/common/src/leap/soledad/common/ddocs/syncs/views/state/map.js +++ /dev/null @@ -1,17 +0,0 @@ -function(doc) { - if (doc['_id'] == 'u1db_sync_state' && doc['ongoing_syncs'] != null) - for (var source_replica_uid in doc['ongoing_syncs']) { - var changes = doc['ongoing_syncs'][source_replica_uid]['changes_to_return']; - var sync_id = doc['ongoing_syncs'][source_replica_uid]['sync_id']; - if (changes == null) - emit([source_replica_uid, sync_id], null); - else - emit( - [source_replica_uid, sync_id], - { - 'gen': changes['gen'], - 'trans_id': changes['trans_id'], - 'number_of_changes': changes['changes_to_return'].length - }); - } -} diff --git a/common/src/leap/soledad/common/ddocs/transactions/lists/generation.js b/common/src/leap/soledad/common/ddocs/transactions/lists/generation.js deleted file mode 100644 index dbdfff0d..00000000 --- a/common/src/leap/soledad/common/ddocs/transactions/lists/generation.js +++ /dev/null @@ -1,20 +0,0 @@ -function(head, req) { - var row; - var rows=[]; - // fetch all rows - while(row = getRow()) { - rows.push(row); - } - if (rows.length > 0) - send(JSON.stringify({ - "generation": rows.length, - "doc_id": rows[rows.length-1]['id'], - "transaction_id": rows[rows.length-1]['value'] - })); - else - send(JSON.stringify({ - "generation": 0, - "doc_id": "", - "transaction_id": "", - })); -} diff --git a/common/src/leap/soledad/common/ddocs/transactions/lists/trans_id_for_gen.js b/common/src/leap/soledad/common/ddocs/transactions/lists/trans_id_for_gen.js deleted file mode 100644 index 2ec91794..00000000 --- a/common/src/leap/soledad/common/ddocs/transactions/lists/trans_id_for_gen.js +++ /dev/null @@ -1,19 +0,0 @@ -function(head, req) { - var row; - var rows=[]; - var i = 1; - var gen = 1; - if (req.query.gen) - gen = parseInt(req.query['gen']); - // fetch all rows - while(row = getRow()) - rows.push(row); - if (gen <= rows.length) - send(JSON.stringify({ - "generation": gen, - "doc_id": rows[gen-1]['id'], - "transaction_id": rows[gen-1]['value'], - })); - else - send('{}'); -} diff --git a/common/src/leap/soledad/common/ddocs/transactions/lists/whats_changed.js b/common/src/leap/soledad/common/ddocs/transactions/lists/whats_changed.js deleted file mode 100644 index b35cdf51..00000000 --- a/common/src/leap/soledad/common/ddocs/transactions/lists/whats_changed.js +++ /dev/null @@ -1,22 +0,0 @@ -function(head, req) { - var row; - var gen = 1; - var old_gen = 0; - if (req.query.old_gen) - old_gen = parseInt(req.query['old_gen']); - send('{"transactions":[\n'); - // fetch all rows - while(row = getRow()) { - if (gen > old_gen) { - if (gen > old_gen+1) - send(',\n'); - send(JSON.stringify({ - "generation": gen, - "doc_id": row["id"], - "transaction_id": row["value"] - })); - } - gen++; - } - send('\n]}'); -} diff --git a/common/src/leap/soledad/common/ddocs/transactions/views/log/map.js b/common/src/leap/soledad/common/ddocs/transactions/views/log/map.js deleted file mode 100644 index 94ef63ca..00000000 --- a/common/src/leap/soledad/common/ddocs/transactions/views/log/map.js +++ /dev/null @@ -1,7 +0,0 @@ -function(doc) { - if (doc.u1db_transactions) - doc.u1db_transactions.forEach(function(t) { - emit(t[0], // use timestamp as key so the results are ordered - t[1]); // value is the transaction_id - }); -} -- cgit v1.2.3 From 8596668c4bb38251da8891e8fd7a23bcf972c21a Mon Sep 17 00:00:00 2001 From: drebs Date: Sat, 23 Jul 2016 17:21:58 +0200 Subject: [bug] fix order of multipart serialization when writing to couch The couch backend makes use of attachments and multipart structure for writing the document to the couch database. For that to work, the order in which attachments are described must match the actual order in which attachments are written to the couch http stream. This was not being properly taken care of, and eventually the json serializer was arbitrarilly ordering the attachments description in a way that it didn't match the actual order of attachments writing. This commit fixes that by using json.dumps() sort_keys parameter and making sure conflicts are always written before content. --- common/src/leap/soledad/common/couch/__init__.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index ca0c2855..ba3192a4 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -714,7 +714,18 @@ class CouchDatabase(object): if not self.batching: buf = StringIO() envelope = MultipartWriter(buf) - envelope.add('application/json', json.dumps(couch_doc)) + # the order in which attachments are described inside the + # serialization of the couch document must match the order in which + # they are actually written in the multipart structure. Because of + # that, we use `sorted_keys=True` in the json serialization (so + # "u1db_conflicts" comes before "u1db_content" on the couch + # document attachments description), and also reverse the order of + # the parts before writing them, so the "conflict" part is written + # before the "content" part. + envelope.add( + 'application/json', + json.dumps(couch_doc, sort_keys=True)) + parts.reverse() for part in parts: envelope.add('application/octet-stream', part) envelope.close() -- cgit v1.2.3 From 1893611393a3ec69d8099a8601fb21262e5f36f4 Mon Sep 17 00:00:00 2001 From: drebs Date: Sun, 24 Jul 2016 07:02:39 -0300 Subject: [feat] use a lock for updating couch gen data --- common/src/leap/soledad/common/couch/__init__.py | 38 +++++++++++++++--------- 1 file changed, 24 insertions(+), 14 deletions(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index ba3192a4..effb2be2 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -27,10 +27,12 @@ import time import functools +from collections import defaultdict from StringIO import StringIO from urlparse import urljoin from contextlib import contextmanager from multiprocessing.pool import ThreadPool +from threading import Lock from couchdb.client import Server, Database @@ -113,6 +115,8 @@ class CouchDatabase(object): CONFIG_DOC_ID = '_local/config' SYNC_DOC_ID_PREFIX = '_local/sync_' + _put_doc_lock = defaultdict(Lock) + @classmethod def open_database(cls, url, create, ensure_ddocs=False, replica_uid=None, database_security=None): @@ -675,6 +679,7 @@ class CouchDatabase(object): 'length': len(content), } parts.append(content) + # save conflicts as attachment if doc.has_conflicts is True: conflicts = json.dumps( @@ -686,21 +691,26 @@ class CouchDatabase(object): 'length': len(conflicts), } parts.append(conflicts) + # add the gen document - while True: # TODO: add a lock, remove this while - try: - gen, _ = self.get_generation_info() - new_gen = gen + 1 - gen_doc = { - '_id': _get_gen_doc_id(new_gen), - 'gen': new_gen, - 'doc_id': doc.doc_id, - 'trans_id': transaction_id, - } - self._database.save(gen_doc) - break - except ResourceConflict: - pass + + # TODO: in u1db protocol, the increment of database generation should + # be made in the same atomic transaction as the actual document save, + # otherwise the same document might be concurrently updated by + # concurrent syncs from other replicas. A simple lock based on the uuid + # and doc_id would be enough to prevent that, if all entry points to + # database update are made through the soledad api. + with self._put_doc_lock[self._database.name]: + gen, _ = self.get_generation_info() + new_gen = gen + 1 + gen_doc = { + '_id': _get_gen_doc_id(new_gen), + 'gen': new_gen, + 'doc_id': doc.doc_id, + 'trans_id': transaction_id, + } + self._database.save(gen_doc) + # build the couch document couch_doc = { '_id': doc.doc_id, -- cgit v1.2.3 From 72ee56e3863729b148cf8cc16e4004dc7b52acdd Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 28 Jul 2016 12:38:10 -0300 Subject: [feat] standardize metadata storage in couch backend. --- common/src/leap/soledad/common/couch/__init__.py | 55 +++++++++++++++--------- 1 file changed, 34 insertions(+), 21 deletions(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index effb2be2..032f230b 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -105,6 +105,17 @@ def _get_gen_doc_id(gen): return 'gen-%s' % str(gen).zfill(10) +GENERATION_KEY = 'gen' +TRANSACTION_ID_KEY = 'trans_id' +REPLICA_UID_KEY = 'replica_uid' +DOC_ID_KEY = 'doc_id' +SCHEMA_VERSION_KEY = 'schema_version' + +CONFIG_DOC_ID = '_local/config' +SYNC_DOC_ID_PREFIX = '_local/sync_' +SCHEMA_VERSION = 1 + + class CouchDatabase(object): """ Holds CouchDB related code. @@ -112,9 +123,6 @@ class CouchDatabase(object): CouchDB details from backend code. """ - CONFIG_DOC_ID = '_local/config' - SYNC_DOC_ID_PREFIX = '_local/sync_' - _put_doc_lock = defaultdict(Lock) @classmethod @@ -251,13 +259,14 @@ class CouchDatabase(object): """ try: # set on existent config document - doc = self._database[self.CONFIG_DOC_ID] - doc['replica_uid'] = replica_uid + doc = self._database[CONFIG_DOC_ID] + doc[REPLICA_UID_KEY] = replica_uid except ResourceNotFound: # or create the config document doc = { - '_id': self.CONFIG_DOC_ID, - 'replica_uid': replica_uid, + '_id': CONFIG_DOC_ID, + REPLICA_UID_KEY: replica_uid, + SCHEMA_VERSION_KEY: SCHEMA_VERSION, } self._database.save(doc) @@ -270,8 +279,8 @@ class CouchDatabase(object): """ try: # grab replica_uid from server - doc = self._database[self.CONFIG_DOC_ID] - replica_uid = doc['replica_uid'] + doc = self._database[CONFIG_DOC_ID] + replica_uid = doc[REPLICA_UID_KEY] return replica_uid except ResourceNotFound: # create a unique replica_uid @@ -484,17 +493,18 @@ class CouchDatabase(object): synchronized with the replica, this is (0, ''). :rtype: (int, str) """ - doc_id = '%s%s' % (self.SYNC_DOC_ID_PREFIX, other_replica_uid) + doc_id = '%s%s' % (SYNC_DOC_ID_PREFIX, other_replica_uid) try: doc = self._database[doc_id] except ResourceNotFound: doc = { '_id': doc_id, - 'generation': 0, - 'transaction_id': '', + GENERATION_KEY: 0, + REPLICA_UID_KEY: str(other_replica_uid), + TRANSACTION_ID_KEY: '', } self._database.save(doc) - gen, trans_id = doc['generation'], doc['transaction_id'] + gen, trans_id = doc[GENERATION_KEY], doc[TRANSACTION_ID_KEY] return gen, trans_id def get_doc_conflicts(self, doc_id, couch_rev=None): @@ -546,13 +556,13 @@ class CouchDatabase(object): generation. :type other_transaction_id: str """ - doc_id = '%s%s' % (self.SYNC_DOC_ID_PREFIX, other_replica_uid) + doc_id = '%s%s' % (SYNC_DOC_ID_PREFIX, other_replica_uid) try: doc = self._database[doc_id] except ResourceNotFound: doc = {'_id': doc_id} - doc['generation'] = other_generation - doc['transaction_id'] = other_transaction_id + doc[GENERATION_KEY] = other_generation + doc[TRANSACTION_ID_KEY] = other_transaction_id self._database.save(doc) def get_transaction_log(self): @@ -586,7 +596,10 @@ class CouchDatabase(object): log = [] for row in rows: doc = row['doc'] - log.append((doc['gen'], doc['doc_id'], doc['trans_id'])) + log.append(( + doc[GENERATION_KEY], + doc[DOC_ID_KEY], + doc[TRANSACTION_ID_KEY])) return log def whats_changed(self, old_generation=0): @@ -632,7 +645,7 @@ class CouchDatabase(object): if not rows: return 0, '' gen_doc = rows.pop()['doc'] - return gen_doc['gen'], gen_doc['trans_id'] + return gen_doc[GENERATION_KEY], gen_doc[TRANSACTION_ID_KEY] def json_from_resource(self, doc_path, **kwargs): """ @@ -705,9 +718,9 @@ class CouchDatabase(object): new_gen = gen + 1 gen_doc = { '_id': _get_gen_doc_id(new_gen), - 'gen': new_gen, - 'doc_id': doc.doc_id, - 'trans_id': transaction_id, + GENERATION_KEY: new_gen, + DOC_ID_KEY: doc.doc_id, + TRANSACTION_ID_KEY: transaction_id, } self._database.save(gen_doc) -- cgit v1.2.3 From 1106d871e9a7e09cedac436a0488fc87af177b67 Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 29 Jul 2016 20:34:19 -0300 Subject: [bug] use couch lock to atomize saving of document --- common/src/leap/soledad/common/couch/__init__.py | 98 ++++++++++++------------ 1 file changed, 50 insertions(+), 48 deletions(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index 032f230b..2d57635a 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -705,8 +705,6 @@ class CouchDatabase(object): } parts.append(conflicts) - # add the gen document - # TODO: in u1db protocol, the increment of database generation should # be made in the same atomic transaction as the actual document save, # otherwise the same document might be concurrently updated by @@ -714,6 +712,8 @@ class CouchDatabase(object): # and doc_id would be enough to prevent that, if all entry points to # database update are made through the soledad api. with self._put_doc_lock[self._database.name]: + + # add the gen document gen, _ = self.get_generation_info() new_gen = gen + 1 gen_doc = { @@ -724,52 +724,54 @@ class CouchDatabase(object): } self._database.save(gen_doc) - # build the couch document - couch_doc = { - '_id': doc.doc_id, - 'u1db_rev': doc.rev, - '_attachments': attachments, - } - # if we are updating a doc we have to add the couch doc revision - if old_doc is not None and hasattr(old_doc, 'couch_rev'): - couch_doc['_rev'] = old_doc.couch_rev - # prepare the multipart PUT - if not self.batching: - buf = StringIO() - envelope = MultipartWriter(buf) - # the order in which attachments are described inside the - # serialization of the couch document must match the order in which - # they are actually written in the multipart structure. Because of - # that, we use `sorted_keys=True` in the json serialization (so - # "u1db_conflicts" comes before "u1db_content" on the couch - # document attachments description), and also reverse the order of - # the parts before writing them, so the "conflict" part is written - # before the "content" part. - envelope.add( - 'application/json', - json.dumps(couch_doc, sort_keys=True)) - parts.reverse() - for part in parts: - envelope.add('application/octet-stream', part) - envelope.close() - # try to save and fail if there's a revision conflict - try: - resource = self._new_resource() - resource.put_json( - doc.doc_id, body=str(buf.getvalue()), - headers=envelope.headers) - except ResourceConflict: - raise RevisionConflict() - else: - for name, attachment in attachments.items(): - del attachment['follows'] - del attachment['length'] - index = 0 if name is 'u1db_content' else 1 - attachment['data'] = binascii.b2a_base64(parts[index]).strip() - couch_doc['_attachments'] = attachments - self.batch_docs[doc.doc_id] = couch_doc - last_gen, last_trans_id = self.batch_generation - self.batch_generation = (last_gen + 1, transaction_id) + # build the couch document + couch_doc = { + '_id': doc.doc_id, + 'u1db_rev': doc.rev, + '_attachments': attachments, + } + # if we are updating a doc we have to add the couch doc revision + if old_doc is not None and hasattr(old_doc, 'couch_rev'): + couch_doc['_rev'] = old_doc.couch_rev + # prepare the multipart PUT + if not self.batching: + buf = StringIO() + envelope = MultipartWriter(buf) + # the order in which attachments are described inside the + # serialization of the couch document must match the order in + # which they are actually written in the multipart structure. + # Because of that, we use `sorted_keys=True` in the json + # serialization (so "u1db_conflicts" comes before + # "u1db_content" on the couch document attachments + # description), and also reverse the order of the parts before + # writing them, so the "conflict" part is written before the + # "content" part. + envelope.add( + 'application/json', + json.dumps(couch_doc, sort_keys=True)) + parts.reverse() + for part in parts: + envelope.add('application/octet-stream', part) + envelope.close() + # try to save and fail if there's a revision conflict + try: + resource = self._new_resource() + resource.put_json( + doc.doc_id, body=str(buf.getvalue()), + headers=envelope.headers) + except ResourceConflict: + raise RevisionConflict() + else: + for name, attachment in attachments.items(): + del attachment['follows'] + del attachment['length'] + index = 0 if name is 'u1db_content' else 1 + attachment['data'] = binascii.b2a_base64( + parts[index]).strip() + couch_doc['_attachments'] = attachments + self.batch_docs[doc.doc_id] = couch_doc + last_gen, last_trans_id = self.batch_generation + self.batch_generation = (last_gen + 1, transaction_id) def _new_resource(self, *path): """ -- cgit v1.2.3 From 027d0b5f40944973807e1a4fc497c496e78b3eeb Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 29 Jul 2016 20:36:59 -0300 Subject: [refactor] simplify couch whats_changed calculation --- common/src/leap/soledad/common/couch/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index 2d57635a..9edbe380 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -623,12 +623,10 @@ class CouchDatabase(object): cur_generation, last_trans_id = self.get_generation_info() relevant_tail = self._get_transaction_log(start=old_generation + 1) seen = set() - generation = cur_generation - for _, doc_id, trans_id in reversed(relevant_tail): + for generation, doc_id, trans_id in reversed(relevant_tail): if doc_id not in seen: changes.append((doc_id, generation, trans_id)) seen.add(doc_id) - generation -= 1 changes.reverse() return (cur_generation, last_trans_id, changes) -- cgit v1.2.3 From 3b237bb46743a93feed4bb6f3c839d72fc28df48 Mon Sep 17 00:00:00 2001 From: drebs Date: Sun, 31 Jul 2016 10:37:35 -0300 Subject: [feat] use couch _all_docs for get_docs() and get_all_docs() The previous solution would make use of concurrent get's to couch backend in a pool of threads to implement the get_docs() and get_all_docs() CouchDatabase backend methods. This commit replaces those by a simpler implementation use the `_all_docs` couchdb view api. It passes all needed IDs to the view and r etrieves all documents with content in the same request. A comparison between both implementations shows an improvement of at least 15 times for large number of documents. The table below shows the time for different implementations of get_all_docs() for different number of documents and threads versus _all_docs implementation: +-------+-----------------+------------------+-------------+ | | threads | _all_docs | improvement | +-------+-----------------+------------------+-------------+ | 10 | 0.0728030204773 | 0.00782012939453 | 9.3 | | 100 | 0.609349966049 | 0.0377721786499 | 16.1 | | 1000 | 5.86522197723 | 0.370730876923 | 15.8 | | 10000 | 66.1713931561 | 3.61764383316 | 18.3 | +-------+-----------------+------------------+-------------+ --- common/src/leap/soledad/common/couch/__init__.py | 59 +++++++++++++----------- 1 file changed, 33 insertions(+), 26 deletions(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index 9edbe380..d0c1a7ba 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -23,15 +23,12 @@ import json import re import uuid import binascii -import time -import functools from collections import defaultdict from StringIO import StringIO from urlparse import urljoin from contextlib import contextmanager -from multiprocessing.pool import ThreadPool from threading import Lock @@ -98,9 +95,6 @@ def couch_server(url): yield server -THREAD_POOL = ThreadPool(20) - - def _get_gen_doc_id(gen): return 'gen-%s' % str(gen).zfill(10) @@ -307,8 +301,8 @@ class CouchDatabase(object): """ generation, _ = self.get_generation_info() - results = list(self.get_docs(self._database, - include_deleted=include_deleted)) + results = list( + self._get_docs(None, True, include_deleted)) return (generation, results) def get_docs(self, doc_ids, check_for_conflicts=True, @@ -329,24 +323,37 @@ class CouchDatabase(object): in matching doc_ids order. :rtype: iterable """ - # Workaround for: - # - # http://bugs.python.org/issue7980 - # https://leap.se/code/issues/5449 - # - # python-couchdb uses time.strptime, which is not thread safe. In - # order to avoid the problem described on the issues above, we preload - # strptime here by evaluating the conversion of an arbitrary date. - # This will not be needed when/if we switch from python-couchdb to - # paisley. - time.strptime('Mar 8 1917', '%b %d %Y') - get_one = functools.partial( - self.get_doc, check_for_conflicts=check_for_conflicts) - docs = [THREAD_POOL.apply_async(get_one, [doc_id]) - for doc_id in doc_ids] - for doc in docs: - doc = doc.get() - if not doc or not include_deleted and doc.is_tombstone(): + return self._get_docs(doc_ids, check_for_conflicts, include_deleted) + + def _get_docs(self, doc_ids, check_for_conflicts, include_deleted): + """ + Use couch's `_all_docs` view to get the documents indicated in + `doc_ids`, + + :param doc_ids: A list of document identifiers or None for all. + :type doc_ids: list + :param check_for_conflicts: If set to False, then the conflict check + will be skipped, and 'None' will be + returned instead of True/False. + :type check_for_conflicts: bool + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted + documents will not be included in the results. + + :return: iterable giving the Document object for each document id + in matching doc_ids order. + :rtype: iterable + """ + params = {'include_docs': 'true', 'attachments': 'true'} + if doc_ids is not None: + params['keys'] = doc_ids + view = self._database.view("_all_docs", **params) + for row in view.rows: + result = row['doc'] + doc = self.__parse_doc_from_couch( + result, result['_id'], check_for_conflicts=check_for_conflicts) + # filter out non-u1db or deleted documents + if not doc or (not include_deleted and doc.is_tombstone()): continue yield doc -- cgit v1.2.3 From 49cd07b909f2185b116bda5b30cfcfe0095291e0 Mon Sep 17 00:00:00 2001 From: drebs Date: Sun, 31 Jul 2016 18:25:48 -0300 Subject: [bug] retry allocation of gen instead of using a lock The use of a lock to allocate the next generation of a change in couch backend suffers from at least 2 problems: 1. all modification to the couch database would have to be made through a soledad server entrypoint, otherwise the lock would have no effect. 2. introducing a lock makes code uglier, harder to debug, and prone to undesired blocks. The solution implemented by this commit is not so elegant, but works for what we need right now. Now, concurrent threads updating the couch database will race for the allocation of a new generation, and retry when they fail to do so. There's no high risk of getting blocked for too much time in the while loop because (1) there's always one thread that wins (what makes the expected number of retries to be N/2 if N is the number of concurrent threads), and (2) the number of concurrent attempts to update the user database is limited by the number of devices syncing at the same time. --- common/src/leap/soledad/common/couch/__init__.py | 166 +++++++++++++---------- 1 file changed, 95 insertions(+), 71 deletions(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index d0c1a7ba..06c94c27 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -25,11 +25,9 @@ import uuid import binascii -from collections import defaultdict from StringIO import StringIO from urlparse import urljoin from contextlib import contextmanager -from threading import Lock from couchdb.client import Server, Database @@ -117,8 +115,6 @@ class CouchDatabase(object): CouchDB details from backend code. """ - _put_doc_lock = defaultdict(Lock) - @classmethod def open_database(cls, url, create, ensure_ddocs=False, replica_uid=None, database_security=None): @@ -670,6 +666,51 @@ class CouchDatabase(object): _, _, data = resource.get_json(**kwargs) return data + def _allocate_new_generation(self, doc_id, transaction_id): + """ + Allocate a new generation number for a document modification. + + We need to allocate a new generation to this document modification by + creating a new gen doc. In order to avoid concurrent database updates + from allocating the same new generation, we will try to create the + document until we succeed, meaning that no other piece of code holds + the same generation number as ours. + + The loop below would only be executed more than once if: + + 1. there's more than one thread trying to modify the user's database, + and + + 2. the execution of getting the current generation and saving the gen + doc different threads get interleaved (one of them will succeed + and the others will fail and try again). + + Number 1 only happens when more than one user device is syncing at the + same time. Number 2 depends on not-so-frequent coincidence of + code execution. + + Also, in the race between threads for a generation number there's + always one thread that wins. so if there are N threads in the race, the + expected number of repetitions of the loop for each thread would be + N/2. If N is equal to the number of devices that the user has, the + number of possible repetitions of the loop should always be low. + """ + while True: + try: + # add the gen document + gen, _ = self.get_generation_info() + new_gen = gen + 1 + gen_doc = { + '_id': _get_gen_doc_id(new_gen), + GENERATION_KEY: new_gen, + DOC_ID_KEY: doc_id, + TRANSACTION_ID_KEY: transaction_id, + } + self._database.save(gen_doc) + break # succeeded allocating a new generation, proceed + except ResourceConflict: + pass # try again! + def save_document(self, old_doc, doc, transaction_id): """ Put the document in the Couch backend database. @@ -710,73 +751,56 @@ class CouchDatabase(object): } parts.append(conflicts) - # TODO: in u1db protocol, the increment of database generation should - # be made in the same atomic transaction as the actual document save, - # otherwise the same document might be concurrently updated by - # concurrent syncs from other replicas. A simple lock based on the uuid - # and doc_id would be enough to prevent that, if all entry points to - # database update are made through the soledad api. - with self._put_doc_lock[self._database.name]: - - # add the gen document - gen, _ = self.get_generation_info() - new_gen = gen + 1 - gen_doc = { - '_id': _get_gen_doc_id(new_gen), - GENERATION_KEY: new_gen, - DOC_ID_KEY: doc.doc_id, - TRANSACTION_ID_KEY: transaction_id, - } - self._database.save(gen_doc) - - # build the couch document - couch_doc = { - '_id': doc.doc_id, - 'u1db_rev': doc.rev, - '_attachments': attachments, - } - # if we are updating a doc we have to add the couch doc revision - if old_doc is not None and hasattr(old_doc, 'couch_rev'): - couch_doc['_rev'] = old_doc.couch_rev - # prepare the multipart PUT - if not self.batching: - buf = StringIO() - envelope = MultipartWriter(buf) - # the order in which attachments are described inside the - # serialization of the couch document must match the order in - # which they are actually written in the multipart structure. - # Because of that, we use `sorted_keys=True` in the json - # serialization (so "u1db_conflicts" comes before - # "u1db_content" on the couch document attachments - # description), and also reverse the order of the parts before - # writing them, so the "conflict" part is written before the - # "content" part. - envelope.add( - 'application/json', - json.dumps(couch_doc, sort_keys=True)) - parts.reverse() - for part in parts: - envelope.add('application/octet-stream', part) - envelope.close() - # try to save and fail if there's a revision conflict - try: - resource = self._new_resource() - resource.put_json( - doc.doc_id, body=str(buf.getvalue()), - headers=envelope.headers) - except ResourceConflict: - raise RevisionConflict() - else: - for name, attachment in attachments.items(): - del attachment['follows'] - del attachment['length'] - index = 0 if name is 'u1db_content' else 1 - attachment['data'] = binascii.b2a_base64( - parts[index]).strip() - couch_doc['_attachments'] = attachments - self.batch_docs[doc.doc_id] = couch_doc - last_gen, last_trans_id = self.batch_generation - self.batch_generation = (last_gen + 1, transaction_id) + self._allocate_new_generation(doc.doc_id, transaction_id) + + # build the couch document + couch_doc = { + '_id': doc.doc_id, + 'u1db_rev': doc.rev, + '_attachments': attachments, + } + # if we are updating a doc we have to add the couch doc revision + if old_doc is not None and hasattr(old_doc, 'couch_rev'): + couch_doc['_rev'] = old_doc.couch_rev + # prepare the multipart PUT + if not self.batching: + buf = StringIO() + envelope = MultipartWriter(buf) + # the order in which attachments are described inside the + # serialization of the couch document must match the order in + # which they are actually written in the multipart structure. + # Because of that, we use `sorted_keys=True` in the json + # serialization (so "u1db_conflicts" comes before + # "u1db_content" on the couch document attachments + # description), and also reverse the order of the parts before + # writing them, so the "conflict" part is written before the + # "content" part. + envelope.add( + 'application/json', + json.dumps(couch_doc, sort_keys=True)) + parts.reverse() + for part in parts: + envelope.add('application/octet-stream', part) + envelope.close() + # try to save and fail if there's a revision conflict + try: + resource = self._new_resource() + resource.put_json( + doc.doc_id, body=str(buf.getvalue()), + headers=envelope.headers) + except ResourceConflict: + raise RevisionConflict() + else: + for name, attachment in attachments.items(): + del attachment['follows'] + del attachment['length'] + index = 0 if name is 'u1db_content' else 1 + attachment['data'] = binascii.b2a_base64( + parts[index]).strip() + couch_doc['_attachments'] = attachments + self.batch_docs[doc.doc_id] = couch_doc + last_gen, last_trans_id = self.batch_generation + self.batch_generation = (last_gen + 1, transaction_id) def _new_resource(self, *path): """ -- cgit v1.2.3 From 18e4ffe5562be61efe9cd206494b9853e063a897 Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 5 Aug 2016 12:34:13 -0300 Subject: [bug] create gen document after saving the actual document in couch If we create the gen document before saving the actual document in couch, we may run into problems if more than one client is syncing and trying to save documents with the same id at the same time. By moving the gen document creation to after the actual document save in couch, we rely on couch/u1db resolution of conflicts before actually allocating a new generation, and the problem above doesn't occur. --- common/src/leap/soledad/common/couch/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index 06c94c27..be30210c 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -751,8 +751,6 @@ class CouchDatabase(object): } parts.append(conflicts) - self._allocate_new_generation(doc.doc_id, transaction_id) - # build the couch document couch_doc = { '_id': doc.doc_id, @@ -802,6 +800,8 @@ class CouchDatabase(object): last_gen, last_trans_id = self.batch_generation self.batch_generation = (last_gen + 1, transaction_id) + self._allocate_new_generation(doc.doc_id, transaction_id) + def _new_resource(self, *path): """ Return a new resource for accessing a couch database. -- cgit v1.2.3 From dc0bae8b6025a060297b55520674cd7238f0186b Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 17 Aug 2016 23:00:34 -0300 Subject: [bug] remove misleading ensure_ddoc ensure_ddoc doesnt make sense anymore as we dont have any ddoc other than _security, which has its own method for setting. 'ensure_security' is explicit and is set internally when user is creating a database, otherwise it will be False as it's only used during creation. This isn't exposed externally (of couch module) to avoid confusion. This confusion was making create-user-db fail to create a security ddoc as it wasn't passing ensure_ddocs=True. -- Resolves: #8388 --- common/src/leap/soledad/common/couch/__init__.py | 15 ++++++--------- common/src/leap/soledad/common/couch/state.py | 2 +- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index be30210c..0f4102db 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -116,7 +116,7 @@ class CouchDatabase(object): """ @classmethod - def open_database(cls, url, create, ensure_ddocs=False, replica_uid=None, + def open_database(cls, url, create, replica_uid=None, database_security=None): """ Open a U1DB database using CouchDB as backend. @@ -127,8 +127,6 @@ class CouchDatabase(object): :type create: bool :param replica_uid: an optional unique replica identifier :type replica_uid: str - :param ensure_ddocs: Ensure that the design docs exist on server. - :type ensure_ddocs: bool :param database_security: security rules as CouchDB security doc :type database_security: dict @@ -149,21 +147,20 @@ class CouchDatabase(object): server.create(dbname) else: raise DatabaseDoesNotExist() - db = cls(url, - dbname, ensure_ddocs=ensure_ddocs, + db = cls(url, dbname, ensure_security=create, database_security=database_security) return SoledadBackend( db, replica_uid=replica_uid) - def __init__(self, url, dbname, ensure_ddocs=True, + def __init__(self, url, dbname, ensure_security=False, database_security=None): """ :param url: Couch server URL with necessary credentials :type url: string :param dbname: Couch database name :type dbname: string - :param ensure_ddocs: Ensure that the design docs exist on server. - :type ensure_ddocs: bool + :param ensure_security: will PUT a _security ddoc if set + :type ensure_security: bool :param database_security: security rules as CouchDB security doc :type database_security: dict """ @@ -174,7 +171,7 @@ class CouchDatabase(object): self.batching = False self.batch_generation = None self.batch_docs = {} - if ensure_ddocs: + if ensure_security: self.ensure_security_ddoc(database_security) def batch_start(self): diff --git a/common/src/leap/soledad/common/couch/state.py b/common/src/leap/soledad/common/couch/state.py index 9b40a264..9ff9fe55 100644 --- a/common/src/leap/soledad/common/couch/state.py +++ b/common/src/leap/soledad/common/couch/state.py @@ -80,7 +80,7 @@ class CouchServerState(ServerState): :rtype: SoledadBackend """ url = urljoin(self.couch_url, dbname) - db = CouchDatabase.open_database(url, create=False, ensure_ddocs=False) + db = CouchDatabase.open_database(url, create=False) return db def ensure_database(self, dbname): -- cgit v1.2.3 From 0690eb5338953dadc68c53b8d4010ca40adc3b0b Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 25 Aug 2016 20:25:30 -0300 Subject: [pkg] remove leftover simplejson imports from l2db --- common/src/leap/soledad/common/l2db/__init__.py | 5 +---- common/src/leap/soledad/common/l2db/backends/__init__.py | 5 +---- common/src/leap/soledad/common/l2db/backends/inmemory.py | 5 +---- common/src/leap/soledad/common/l2db/backends/sqlite_backend.py | 9 +++------ .../src/leap/soledad/common/l2db/remote/basic_auth_middleware.py | 6 ++---- common/src/leap/soledad/common/l2db/remote/http_app.py | 5 +---- common/src/leap/soledad/common/l2db/remote/http_client.py | 5 +---- common/src/leap/soledad/common/l2db/remote/http_database.py | 5 +---- common/src/leap/soledad/common/l2db/remote/http_target.py | 5 +---- 9 files changed, 12 insertions(+), 38 deletions(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/l2db/__init__.py b/common/src/leap/soledad/common/l2db/__init__.py index c0bd15fe..568897c4 100644 --- a/common/src/leap/soledad/common/l2db/__init__.py +++ b/common/src/leap/soledad/common/l2db/__init__.py @@ -16,10 +16,7 @@ """L2DB""" -try: - import simplejson as json -except ImportError: - import json # noqa +import json from leap.soledad.common.l2db.errors import InvalidJSON, InvalidContent diff --git a/common/src/leap/soledad/common/l2db/backends/__init__.py b/common/src/leap/soledad/common/l2db/backends/__init__.py index 922daafd..c731c3d3 100644 --- a/common/src/leap/soledad/common/l2db/backends/__init__.py +++ b/common/src/leap/soledad/common/l2db/backends/__init__.py @@ -17,10 +17,7 @@ """Abstract classes and common implementations for the backends.""" import re -try: - import simplejson as json -except ImportError: - import json # noqa +import json import uuid from leap.soledad.common import l2db diff --git a/common/src/leap/soledad/common/l2db/backends/inmemory.py b/common/src/leap/soledad/common/l2db/backends/inmemory.py index 06a934a6..6fd251af 100644 --- a/common/src/leap/soledad/common/l2db/backends/inmemory.py +++ b/common/src/leap/soledad/common/l2db/backends/inmemory.py @@ -16,10 +16,7 @@ """The in-memory Database class for U1DB.""" -try: - import simplejson as json -except ImportError: - import json # noqa +import json from leap.soledad.common.l2db import ( Document, errors, diff --git a/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py b/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py index ba273039..d73c0d16 100644 --- a/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py +++ b/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py @@ -21,17 +21,14 @@ A L2DB implementation that uses SQLite as its persistence layer. import errno import os -try: - import simplejson as json -except ImportError: - import json # noqa -from sqlite3 import dbapi2 +import json import sys import time import uuid - import pkg_resources +from sqlite3 import dbapi2 + from leap.soledad.common.l2db.backends import CommonBackend, CommonSyncTarget from leap.soledad.common.l2db import ( Document, errors, diff --git a/common/src/leap/soledad/common/l2db/remote/basic_auth_middleware.py b/common/src/leap/soledad/common/l2db/remote/basic_auth_middleware.py index a2cbff62..96d0d872 100644 --- a/common/src/leap/soledad/common/l2db/remote/basic_auth_middleware.py +++ b/common/src/leap/soledad/common/l2db/remote/basic_auth_middleware.py @@ -15,10 +15,8 @@ # along with u1db. If not, see . """U1DB Basic Auth authorisation WSGI middleware.""" import httplib -try: - import simplejson as json -except ImportError: - import json # noqa +import json + from wsgiref.util import shift_path_info diff --git a/common/src/leap/soledad/common/l2db/remote/http_app.py b/common/src/leap/soledad/common/l2db/remote/http_app.py index 65277bd1..5cf6645e 100644 --- a/common/src/leap/soledad/common/l2db/remote/http_app.py +++ b/common/src/leap/soledad/common/l2db/remote/http_app.py @@ -23,10 +23,7 @@ HTTP Application exposing U1DB. import functools import httplib import inspect -try: - import simplejson as json -except ImportError: - import json # noqa +import json import sys import urlparse diff --git a/common/src/leap/soledad/common/l2db/remote/http_client.py b/common/src/leap/soledad/common/l2db/remote/http_client.py index a65264b6..53363c0a 100644 --- a/common/src/leap/soledad/common/l2db/remote/http_client.py +++ b/common/src/leap/soledad/common/l2db/remote/http_client.py @@ -17,10 +17,7 @@ """Base class to make requests to a remote HTTP server.""" import httplib -try: - import simplejson as json -except ImportError: - import json # noqa +import json import socket import ssl import sys diff --git a/common/src/leap/soledad/common/l2db/remote/http_database.py b/common/src/leap/soledad/common/l2db/remote/http_database.py index b2b48dee..7512379f 100644 --- a/common/src/leap/soledad/common/l2db/remote/http_database.py +++ b/common/src/leap/soledad/common/l2db/remote/http_database.py @@ -16,10 +16,7 @@ """HTTPDatabase to access a remote db over the HTTP API.""" -try: - import simplejson as json -except ImportError: - import json # noqa +import json import uuid from leap.soledad.common.l2db import ( diff --git a/common/src/leap/soledad/common/l2db/remote/http_target.py b/common/src/leap/soledad/common/l2db/remote/http_target.py index 7e7f366f..38804f01 100644 --- a/common/src/leap/soledad/common/l2db/remote/http_target.py +++ b/common/src/leap/soledad/common/l2db/remote/http_target.py @@ -16,10 +16,7 @@ """SyncTarget API implementation to a remote HTTP server.""" -try: - import simplejson as json -except ImportError: - import json # noqa +import json from leap.soledad.common.l2db import Document, SyncTarget from leap.soledad.common.l2db.errors import BrokenSyncStream -- cgit v1.2.3 From b7340a962bfeae9af28c4b514d0eb077f41dd832 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 22 Sep 2016 14:44:46 -0300 Subject: [feat] centralize logging and use twisted.logger by default --- common/src/leap/soledad/common/couch/state.py | 4 +-- common/src/leap/soledad/common/log.py | 45 +++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 common/src/leap/soledad/common/log.py (limited to 'common/src') diff --git a/common/src/leap/soledad/common/couch/state.py b/common/src/leap/soledad/common/couch/state.py index 9ff9fe55..e3cd1a24 100644 --- a/common/src/leap/soledad/common/couch/state.py +++ b/common/src/leap/soledad/common/couch/state.py @@ -17,12 +17,12 @@ """ Server state using CouchDatabase as backend. """ -import logging import re import time from urlparse import urljoin from hashlib import sha512 +from leap.soledad.common.log import getLogger from leap.soledad.common.couch import CouchDatabase from leap.soledad.common.couch import couch_server from leap.soledad.common.command import exec_validated_cmd @@ -30,7 +30,7 @@ from leap.soledad.common.l2db.remote.server_state import ServerState from leap.soledad.common.l2db.errors import Unauthorized -logger = logging.getLogger(__name__) +logger = getLogger(__name__) def is_db_name_valid(name): diff --git a/common/src/leap/soledad/common/log.py b/common/src/leap/soledad/common/log.py new file mode 100644 index 00000000..3f026045 --- /dev/null +++ b/common/src/leap/soledad/common/log.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# log.py +# Copyright (C) 2016 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +""" +This module centralizes logging facilities and allows for different behaviours, +as using the python logging module instead of twisted logger, and to print logs +to stdout, mainly for development purposes. +""" + + +import os +import sys + +from twisted.logger import Logger +from twisted.logger import textFileLogObserver + + +def getLogger(*args, **kwargs): + + if os.environ.get('SOLEDAD_USE_PYTHON_LOGGING'): + import logging + return logging.getLogger(__name__) + + if os.environ.get('SOLEDAD_LOG_TO_STDOUT'): + kwargs({'observer': textFileLogObserver(sys.stdout)}) + + return Logger(*args, **kwargs) + + +__all__ = ['getLogger'] -- cgit v1.2.3 From 887fd917a19654aa6a7c6c54be3f22c3b6c79b92 Mon Sep 17 00:00:00 2001 From: drebs Date: Sat, 24 Sep 2016 15:57:54 -0300 Subject: [test] add flake8 code check and generalize name of tox env --- common/src/leap/soledad/common/l2db/remote/server_state.py | 2 -- common/src/leap/soledad/common/l2db/sync.py | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/l2db/remote/server_state.py b/common/src/leap/soledad/common/l2db/remote/server_state.py index f131e09e..e20b4679 100644 --- a/common/src/leap/soledad/common/l2db/remote/server_state.py +++ b/common/src/leap/soledad/common/l2db/remote/server_state.py @@ -15,8 +15,6 @@ # along with u1db. If not, see . """State for servers exposing a set of U1DB databases.""" -import os -import errno class ServerState(object): diff --git a/common/src/leap/soledad/common/l2db/sync.py b/common/src/leap/soledad/common/l2db/sync.py index c612629f..5e9b22f4 100644 --- a/common/src/leap/soledad/common/l2db/sync.py +++ b/common/src/leap/soledad/common/l2db/sync.py @@ -126,8 +126,8 @@ class Synchronizer(object): target_last_known_gen, target_last_known_trans_id = 0, '' else: target_last_known_gen, target_last_known_trans_id = ( - self.source._get_replica_gen_and_trans_id( # nopep8 - self.target_replica_uid)) + self.source._get_replica_gen_and_trans_id( # nopep8 + self.target_replica_uid)) if not changes and target_last_known_gen == target_gen: if target_trans_id != target_last_known_trans_id: raise errors.InvalidTransactionId -- cgit v1.2.3 From 4e06eb370b99f2d343e96f774a3ad9b8b77c9548 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 3 Oct 2016 19:27:42 -0300 Subject: [feature] check for user dbs couch schema versions --- common/src/leap/soledad/common/couch/state.py | 34 ++++++++++++++++++++++++++- common/src/leap/soledad/common/errors.py | 13 +++++++++- 2 files changed, 45 insertions(+), 2 deletions(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/couch/state.py b/common/src/leap/soledad/common/couch/state.py index e3cd1a24..1d045a9d 100644 --- a/common/src/leap/soledad/common/couch/state.py +++ b/common/src/leap/soledad/common/couch/state.py @@ -17,6 +17,7 @@ """ Server state using CouchDatabase as backend. """ +import couchdb import re import time from urlparse import urljoin @@ -25,9 +26,14 @@ from hashlib import sha512 from leap.soledad.common.log import getLogger from leap.soledad.common.couch import CouchDatabase from leap.soledad.common.couch import couch_server +from leap.soledad.common.couch import CONFIG_DOC_ID +from leap.soledad.common.couch import SCHEMA_VERSION +from leap.soledad.common.couch import SCHEMA_VERSION_KEY from leap.soledad.common.command import exec_validated_cmd from leap.soledad.common.l2db.remote.server_state import ServerState from leap.soledad.common.l2db.errors import Unauthorized +from leap.soledad.common.errors import WrongCouchSchemaVersionError +from leap.soledad.common.errors import MissingCouchConfigDocumentError logger = getLogger(__name__) @@ -59,15 +65,41 @@ class CouchServerState(ServerState): TOKENS_TYPE_DEF = "Token" TOKENS_USER_ID_KEY = "user_id" - def __init__(self, couch_url, create_cmd=None): + def __init__(self, couch_url, create_cmd=None, + check_schema_versions=True): """ Initialize the couch server state. :param couch_url: The URL for the couch database. :type couch_url: str + :param check_schema_versions: Whether to check couch schema version of + user dbs. + :type check_schema_versions: bool """ self.couch_url = couch_url self.create_cmd = create_cmd + if check_schema_versions: + self._check_schema_versions() + + def _check_schema_versions(self): + """ + Check that all user databases use the correct couch schema. + """ + server = couchdb.client.Server(self.couch_url) + for dbname in server: + if not dbname.startswith('user-'): + continue + db = server[dbname] + + # if there are documents, ensure that a config doc exists + config_doc = db.get(CONFIG_DOC_ID) + if config_doc: + if config_doc[SCHEMA_VERSION_KEY] != SCHEMA_VERSION: + raise WrongCouchSchemaVersionError(dbname) + else: + result = db.view('_all_docs', limit=1) + if result.total_rows != 0: + raise MissingCouchConfigDocumentError(dbname) def open_database(self, dbname): """ diff --git a/common/src/leap/soledad/common/errors.py b/common/src/leap/soledad/common/errors.py index dec871c9..d543a3de 100644 --- a/common/src/leap/soledad/common/errors.py +++ b/common/src/leap/soledad/common/errors.py @@ -77,7 +77,6 @@ http_errors.ERROR_STATUSES = set( class InvalidURLError(Exception): - """ Exception raised when Soledad encounters a malformed URL. """ @@ -90,3 +89,15 @@ class BackendNotReadyError(SoledadError): """ wire_description = "backend not ready" status = 500 + + +class WrongCouchSchemaVersionError(SoledadError): + """ + Raised in case there is a user database with wrong couch schema version. + """ + + +class MissingCouchConfigDocumentError(SoledadError): + """ + Raised if a database has documents but lacks the couch config document. + """ -- cgit v1.2.3 From 09a62dd1d6b076fcc7ac001d0b998ebb119feaad Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 5 Oct 2016 19:52:58 -0300 Subject: [tests] make check_schema_versions default to False CouchServerState is spread across test codebase and this option is intended to be used only on server startup. This commit makes it default to False and explicitly set it to True on where it's necessary. --- common/src/leap/soledad/common/couch/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/couch/state.py b/common/src/leap/soledad/common/couch/state.py index 1d045a9d..70c5fa36 100644 --- a/common/src/leap/soledad/common/couch/state.py +++ b/common/src/leap/soledad/common/couch/state.py @@ -66,7 +66,7 @@ class CouchServerState(ServerState): TOKENS_USER_ID_KEY = "user_id" def __init__(self, couch_url, create_cmd=None, - check_schema_versions=True): + check_schema_versions=False): """ Initialize the couch server state. -- cgit v1.2.3 From ee4dc679c8ae1a87a9b5ef3b2757a3113218e4c6 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 6 Oct 2016 18:55:20 -0300 Subject: [docs] explain CouchServerState parameters create_cmd lacked an explanation and check_schema_versions lacked reasoning on why it defaults to False. --- common/src/leap/soledad/common/couch/state.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'common/src') diff --git a/common/src/leap/soledad/common/couch/state.py b/common/src/leap/soledad/common/couch/state.py index 70c5fa36..523ac0b0 100644 --- a/common/src/leap/soledad/common/couch/state.py +++ b/common/src/leap/soledad/common/couch/state.py @@ -72,8 +72,14 @@ class CouchServerState(ServerState): :param couch_url: The URL for the couch database. :type couch_url: str + :param create_cmd: Command to be executed for user db creation. It will + receive a properly sanitized parameter with user db + name and should access CouchDB with necessary + privileges, which server lacks for security reasons. + :type create_cmd: str :param check_schema_versions: Whether to check couch schema version of - user dbs. + user dbs. Set to False as this is only + intended to run once during start-up. :type check_schema_versions: bool """ self.couch_url = couch_url -- cgit v1.2.3