From 0192f5923932ce738656c5b9ec25167a1b74386a Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sun, 30 Aug 2015 05:37:21 -0300 Subject: [pkg] add Beaker as a server dependency Soledad server will use Beaker as cache provider, starting with sync_state being in memory. --- server/pkg/requirements.pip | 1 + 1 file changed, 1 insertion(+) diff --git a/server/pkg/requirements.pip b/server/pkg/requirements.pip index d75678b2..3e1aa992 100644 --- a/server/pkg/requirements.pip +++ b/server/pkg/requirements.pip @@ -4,6 +4,7 @@ u1db routes PyOpenSSL twisted +Beaker # XXX -- fix me! # oauth is not strictly needed by us, but we need it until u1db adds it to its -- cgit v1.2.3 From a10ae06bb33c6caaaf3a8474cc6d7b01c33abc9f Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sun, 30 Aug 2015 05:17:20 -0300 Subject: [feat] first draft of sync_state in memory This commit changes sync_state to be in memory, with all tests passing. The memory variable for now is a dict with each key composed by source_replica_uid and sync_id, replicating CouchDB implementation. Next steps includes migrating this to Beaker and refactor/clean up code. Changed the module's INFO dict to use Beaker's caching and adapted methods to get and save from it. Still needs refactoring, all tests passes. Beaker is now using memory as default; It is configurable, but we aren't opening the possibility of config now for security. We need to check what can be misconfigured first. We are not sure if beaker will be the definitive solution for server side caching. This change isolates it with more granularity. In order to replace it, just change get_cache_for to return the proper caching object using another implementation. This caching object is supposed to behave as a dict. --- server/src/leap/soledad/server/caching.py | 32 ++++++ server/src/leap/soledad/server/state.py | 143 ++++++++++++++++++++++++ server/src/leap/soledad/server/sync.py | 173 +----------------------------- 3 files changed, 177 insertions(+), 171 deletions(-) create mode 100644 server/src/leap/soledad/server/caching.py create mode 100644 server/src/leap/soledad/server/state.py diff --git a/server/src/leap/soledad/server/caching.py b/server/src/leap/soledad/server/caching.py new file mode 100644 index 00000000..cd5d8dd4 --- /dev/null +++ b/server/src/leap/soledad/server/caching.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# caching.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Server side caching. Using beaker for now. +""" +from beaker.cache import CacheManager + + +def setup_caching(): + _cache_manager = CacheManager(type='memory') + return _cache_manager + + +_cache_manager = setup_caching() + + +def get_cache_for(key): + return _cache_manager.get_cache(key) diff --git a/server/src/leap/soledad/server/state.py b/server/src/leap/soledad/server/state.py new file mode 100644 index 00000000..d1225170 --- /dev/null +++ b/server/src/leap/soledad/server/state.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- +# state.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Server side synchronization infrastructure. +""" +from leap.soledad.server import caching + + +class ServerSyncState(object): + """ + The state of one sync session, as stored on backend server. + + On server side, the ongoing syncs metadata is maintained in a caching layer. + """ + + def __init__(self, source_replica_uid, sync_id): + """ + Initialize the sync state object. + + :param sync_id: The id of current sync + :type sync_id: str + :param source_replica_uid: The source replica uid + :type source_replica_uid: str + """ + self._source_replica_uid = source_replica_uid + self._sync_id = sync_id + self._storage = caching.get_cache_for(source_replica_uid + sync_id) + + def _put_dict_info(self, key, value): + """ + Put some information about the sync state. + + :param key: The key for the info to be put. + :type key: str + :param value: The value for the info to be put. + :type value: str + """ + if key not in self._storage: + self._storage[key] = [] + info_list = self._storage.get(key) + info_list.append(value) + self._storage[key] = info_list + + def put_seen_id(self, seen_id, gen): + """ + Put one seen id on the sync state. + + :param seen_id: The doc_id of a document seen during sync. + :type seen_id: str + :param gen: The corresponding db generation for that document. + :type gen: int + """ + self._put_dict_info( + 'seen_id', + (seen_id, gen)) + + def seen_ids(self): + """ + Return all document ids seen during the sync. + + :return: A dict with doc ids seen during the sync. + :rtype: dict + """ + if 'seen_id' in self._storage: + seen_ids = self._storage.get('seen_id') + else: + seen_ids = [] + return dict(seen_ids) + + def put_changes_to_return(self, gen, trans_id, changes_to_return): + """ + Put the calculated changes to return in the backend sync state. + + :param gen: The target database generation that will be synced. + :type gen: int + :param trans_id: The target database transaction id that will be + synced. + :type trans_id: str + :param changes_to_return: A list of tuples with the changes to be + returned during the sync process. + :type changes_to_return: list + """ + self._put_dict_info( + 'changes_to_return', + { + 'gen': gen, + 'trans_id': trans_id, + 'changes_to_return': changes_to_return, + } + ) + + def sync_info(self): + """ + Return information about the current sync state. + + :return: The generation and transaction id of the target database + which will be synced, and the number of documents to return, + or a tuple of Nones if those have not already been sent to + server. + :rtype: tuple + """ + info = self._storage.get('changes_to_return') if 'changes_to_return' in self._storage else None + gen = None + trans_id = None + number_of_changes = None + if info: + info = info[0] + gen = info['gen'] + trans_id = info['trans_id'] + number_of_changes = len(info['changes_to_return']) + return gen, trans_id, number_of_changes + + def next_change_to_return(self, received): + """ + Return the next change to be returned to the source syncing replica. + + :param received: How many documents the source replica has already + received during the current sync process. + :type received: int + """ + info = self._storage.get('changes_to_return') if 'changes_to_return' in self._storage else None + gen = trans_id = next_change_to_return = None + if info: + info = info[0] + gen = info['gen'] + trans_id = info['trans_id'] + if received < len(info['changes_to_return']): + next_change_to_return = tuple(info['changes_to_return'][received]) + return gen, trans_id, next_change_to_return diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 18c4ee40..64f7e4e7 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -17,183 +17,15 @@ """ Server side synchronization infrastructure. """ -import json - -from leap.soledad.common.couch import CouchDatabase from u1db import sync, Document from u1db.remote import http_app +from leap.soledad.server.state import ServerSyncState MAX_REQUEST_SIZE = 200 # in Mb MAX_ENTRY_SIZE = 200 # in Mb -class ServerSyncState(object): - """ - The state of one sync session, as stored on backend server. - - This object performes queries to distinct design documents: - - _design/syncs/_update/state - _design/syncs/_view/state - _design/syncs/_view/seen_ids - _design/syncs/_view/changes_to_return - - On server side, the ongoing syncs metadata is maintained in a document - called 'u1db_sync_state'. - """ - - def __init__(self, db, source_replica_uid, sync_id): - """ - Initialize the sync state object. - - :param db: The target syncing database. - :type db: CouchDatabase. - :param source_replica_uid: CouchDatabase - :type source_replica_uid: str - """ - self._db = db - self._source_replica_uid = source_replica_uid - self._sync_id = sync_id - - def _key(self, key): - """ - Format a key to be used on couch views. - - :param key: The lookup key. - :type key: json serializable object - - :return: The properly formatted key. - :rtype: str - """ - return json.dumps(key, separators=(',', ':')) - - def _put_info(self, key, value): - """ - Put some information on the sync state document. - - This method works in conjunction with the - _design/syncs/_update/state update handler couch backend. - - :param key: The key for the info to be put. - :type key: str - :param value: The value for the info to be put. - :type value: str - """ - ddoc_path = [ - '_design', 'syncs', '_update', 'state', - 'u1db_sync_state'] - res = self._db._database.resource(*ddoc_path) - with CouchDatabase.sync_info_lock[self._db.replica_uid]: - res.put_json( - body={ - 'sync_id': self._sync_id, - 'source_replica_uid': self._source_replica_uid, - key: value, - }, - headers={'content-type': 'application/json'}) - - def put_seen_id(self, seen_id, gen): - """ - Put one seen id on the sync state document. - - :param seen_id: The doc_id of a document seen during sync. - :type seen_id: str - :param gen: The corresponding db generation for that document. - :type gen: int - """ - self._put_info( - 'seen_id', - [seen_id, gen]) - - def seen_ids(self): - """ - Return all document ids seen during the sync. - - :return: A list with doc ids seen during the sync. - :rtype: list - """ - ddoc_path = ['_design', 'syncs', '_view', 'seen_ids'] - resource = self._db._database.resource(*ddoc_path) - response = resource.get_json( - key=self._key([self._source_replica_uid, self._sync_id])) - data = response[2] - if data['rows']: - entry = data['rows'].pop() - return entry['value']['seen_ids'] - return [] - - def put_changes_to_return(self, gen, trans_id, changes_to_return): - """ - Put the calculated changes to return in the backend sync state - document. - - :param gen: The target database generation that will be synced. - :type gen: int - :param trans_id: The target database transaction id that will be - synced. - :type trans_id: str - :param changes_to_return: A list of tuples with the changes to be - returned during the sync process. - :type changes_to_return: list - """ - self._put_info( - 'changes_to_return', - { - 'gen': gen, - 'trans_id': trans_id, - 'changes_to_return': changes_to_return, - } - ) - - def sync_info(self): - """ - Return information about the current sync state. - - :return: The generation and transaction id of the target database - which will be synced, and the number of documents to return, - or a tuple of Nones if those have not already been sent to - server. - :rtype: tuple - """ - ddoc_path = ['_design', 'syncs', '_view', 'state'] - resource = self._db._database.resource(*ddoc_path) - response = resource.get_json( - key=self._key([self._source_replica_uid, self._sync_id])) - data = response[2] - gen = None - trans_id = None - number_of_changes = None - if data['rows'] and data['rows'][0]['value'] is not None: - value = data['rows'][0]['value'] - gen = value['gen'] - trans_id = value['trans_id'] - number_of_changes = value['number_of_changes'] - return gen, trans_id, number_of_changes - - def next_change_to_return(self, received): - """ - Return the next change to be returned to the source syncing replica. - - :param received: How many documents the source replica has already - received during the current sync process. - :type received: int - """ - ddoc_path = ['_design', 'syncs', '_view', 'changes_to_return'] - resource = self._db._database.resource(*ddoc_path) - response = resource.get_json( - key=self._key( - [self._source_replica_uid, self._sync_id, received])) - data = response[2] - if not data['rows']: - return None, None, None - value = data['rows'][0]['value'] - gen = value['gen'] - trans_id = value['trans_id'] - next_change_to_return = value['next_change_to_return'] - return gen, trans_id, tuple(next_change_to_return) - - class SyncExchange(sync.SyncExchange): def __init__(self, db, source_replica_uid, last_known_generation, sync_id): @@ -216,8 +48,7 @@ class SyncExchange(sync.SyncExchange): self.new_trans_id = None self._trace_hook = None # recover sync state - self._sync_state = ServerSyncState( - self._db, self.source_replica_uid, sync_id) + self._sync_state = ServerSyncState(self.source_replica_uid, sync_id) def find_changes_to_return(self, received): """ -- cgit v1.2.3 From 92b86079d34da5252b826d32afabafde84dab1af Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 4 Sep 2015 17:30:13 -0300 Subject: [refactor] couch.py should use superclass methods _put_doc_if_newer is implemented on CommonBackend already. This was copied over to CouchBackend just to add ensure conflicts. We can do this before calling the super method instead. --- common/src/leap/soledad/common/couch.py | 91 +++++---------------------------- 1 file changed, 13 insertions(+), 78 deletions(-) diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 1c762036..771db69e 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -103,6 +103,7 @@ class CouchDocument(SoledadDocument): SoledadDocument.__init__(self, doc_id, rev, json, has_conflicts) self.couch_rev = None self.transactions = None + self._conflicts = None def get_conflicts(self): """ @@ -111,7 +112,7 @@ class CouchDocument(SoledadDocument): :return: The conflicted versions of the document. :rtype: [CouchDocument] """ - return self._conflicts + return self._conflicts or [] def set_conflicts(self, conflicts): """ @@ -1203,10 +1204,10 @@ class CouchDatabase(CommonBackend): :param doc: The document to be put. :type doc: CouchDocument """ - my_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) - doc.prune_conflicts( - vectorclock.VectorClockRev(doc.rev), self._replica_uid) - doc.add_conflict(my_doc) + my_doc = self._get_doc(doc.doc_id) + self._prune_conflicts(doc, vectorclock.VectorClockRev(doc.rev)) + doc.add_conflict(self._factory(doc.doc_id, my_doc.rev, my_doc.get_json())) + doc.has_conflicts = True self._put_doc(my_doc, doc) def resolve_doc(self, doc, conflicted_doc_revs): @@ -1320,30 +1321,14 @@ class CouchDatabase(CommonBackend): """ if not isinstance(doc, CouchDocument): doc = self._factory(doc.doc_id, doc.rev, doc.get_json()) - self._save_source_info(replica_uid, replica_gen, - replica_trans_id, number_of_docs, - doc_idx, sync_id) my_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) - if my_doc is not None: - my_doc.set_conflicts( - self.get_doc_conflicts(my_doc.doc_id, my_doc.couch_rev)) - state, save_doc = _process_incoming_doc( - my_doc, doc, save_conflict, self.replica_uid) - if save_doc: - self._put_doc(my_doc, save_doc) - doc.update(save_doc) - return state, self._get_generation() - - def _save_source_info(self, replica_uid, replica_gen, replica_trans_id, - number_of_docs, doc_idx, sync_id): - """ - Validate and save source information. - """ - self._validate_source(replica_uid, replica_gen, replica_trans_id) - self._set_replica_gen_and_trans_id( - replica_uid, replica_gen, replica_trans_id, - number_of_docs=number_of_docs, doc_idx=doc_idx, - sync_id=sync_id) + if my_doc: + doc.set_conflicts(my_doc.get_conflicts()) + return CommonBackend._put_doc_if_newer(self, doc, save_conflict, replica_uid, + replica_gen, replica_trans_id) + + def _put_and_update_indexes(self, cur_doc, doc): + self._put_doc(cur_doc, doc) def get_docs(self, doc_ids, check_for_conflicts=True, include_deleted=False): @@ -1494,53 +1479,3 @@ class CouchServerState(ServerState): delete databases. """ raise Unauthorized() - - -def _process_incoming_doc(my_doc, other_doc, save_conflict, replica_uid): - """ - Check document, save and return state. - """ - # at this point, `doc` has arrived from the other syncing party, and - # we will decide what to do with it. - # First, we prepare the arriving doc to update couch database. - new_doc = CouchDocument( - other_doc.doc_id, other_doc.rev, other_doc.get_json()) - if my_doc is None: - return 'inserted', new_doc - new_doc.couch_rev = my_doc.couch_rev - new_doc.set_conflicts(my_doc.get_conflicts()) - # fetch conflicts because we will eventually manipulate them - # from now on, it works just like u1db sqlite backend - doc_vcr = vectorclock.VectorClockRev(new_doc.rev) - cur_vcr = vectorclock.VectorClockRev(my_doc.rev) - if doc_vcr.is_newer(cur_vcr): - rev = new_doc.rev - new_doc.prune_conflicts(doc_vcr, replica_uid) - if new_doc.rev != rev: - # conflicts have been autoresolved - return 'superseded', new_doc - else: - return'inserted', new_doc - elif new_doc.rev == my_doc.rev: - # magical convergence - return 'converged', None - elif cur_vcr.is_newer(doc_vcr): - # Don't add this to seen_ids, because we have something newer, - # so we should send it back, and we should not generate a - # conflict - other_doc.update(new_doc) - return 'superseded', None - elif my_doc.same_content_as(new_doc): - # the documents have been edited to the same thing at both ends - doc_vcr.maximize(cur_vcr) - doc_vcr.increment(replica_uid) - new_doc.rev = doc_vcr.as_str() - return 'superseded', new_doc - else: - if save_conflict: - new_doc.prune_conflicts( - vectorclock.VectorClockRev(new_doc.rev), replica_uid) - new_doc.add_conflict(my_doc) - return 'conflicted', new_doc - other_doc.update(new_doc) - return 'conflicted', None -- cgit v1.2.3 From d39b408e92bb2fa6a2dac3835e2f5209d2eee94c Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 4 Sep 2015 19:37:32 -0300 Subject: [refactor] uses ThreadPool instead of own implementation Python has a native ThreadPool implementation that fits our needs. Changing it to use this instead and making some calls simpler. --- common/src/leap/soledad/common/couch.py | 99 ++++++--------------------------- 1 file changed, 18 insertions(+), 81 deletions(-) diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 771db69e..421fbac1 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -26,13 +26,12 @@ import logging import binascii import time import sys -import threading from StringIO import StringIO -from collections import defaultdict from urlparse import urljoin from contextlib import contextmanager +from multiprocessing.pool import ThreadPool from couchdb.client import Server, Database @@ -362,57 +361,15 @@ def couch_server(url): yield server +THREAD_POOL = ThreadPool(20) + + class CouchDatabase(CommonBackend): """ A U1DB implementation that uses CouchDB as its persistence layer. """ - # We spawn threads to parallelize the CouchDatabase.get_docs() method - MAX_GET_DOCS_THREADS = 20 - - update_handler_lock = defaultdict(threading.Lock) - sync_info_lock = defaultdict(threading.Lock) - - class _GetDocThread(threading.Thread): - - """ - A thread that gets a document from a database. - - TODO: switch this for a twisted deferred to thread. This depends on - replacing python-couchdb for paisley in this module. - """ - - def __init__(self, db, doc_id, check_for_conflicts, - release_fun): - """ - :param db: The database from where to get the document. - :type db: CouchDatabase - :param doc_id: The doc_id of the document to be retrieved. - :type doc_id: str - :param check_for_conflicts: Whether the get_doc() method should - check for existing conflicts. - :type check_for_conflicts: bool - :param release_fun: A function that releases a semaphore, to be - called after the document is fetched. - :type release_fun: function - """ - threading.Thread.__init__(self) - self._db = db - self._doc_id = doc_id - self._check_for_conflicts = check_for_conflicts - self._release_fun = release_fun - self._doc = None - - def run(self): - """ - Fetch the document, store it as a property, and call the release - function. - """ - self._doc = self._db._get_doc( - self._doc_id, self._check_for_conflicts) - self._release_fun() - @classmethod def open_database(cls, url, create, replica_uid=None, ensure_ddocs=False): """ @@ -477,9 +434,6 @@ class CouchDatabase(CommonBackend): self._set_replica_uid(replica_uid) if ensure_ddocs: self.ensure_ddocs_on_db() - # initialize a thread pool for parallelizing get_docs() - self._sem_pool = threading.BoundedSemaphore( - value=self.MAX_GET_DOCS_THREADS) def ensure_ddocs_on_db(self): """ @@ -735,6 +689,9 @@ class CouchDatabase(CommonBackend): attachments=True)[2] except ResourceNotFound: return None + return self.__parse_doc_from_couch(result, doc_id, check_for_conflicts) + + def __parse_doc_from_couch(self, result, doc_id, check_for_conflicts=False): # restrict to u1db documents if 'u1db_rev' not in result: return None @@ -799,11 +756,8 @@ class CouchDatabase(CommonBackend): """ generation = self._get_generation() - results = [] - for row in self._database.view('_all_docs'): - doc = self.get_doc(row.id, include_deleted=include_deleted) - if doc is not None: - results.append(doc) + results = list(self.get_docs(self._database, + include_deleted=include_deleted)) return (generation, results) def _put_doc(self, old_doc, doc): @@ -1335,12 +1289,12 @@ class CouchDatabase(CommonBackend): """ Get the JSON content for many documents. - :param doc_ids: A list of document identifiers. + :param doc_ids: A list of document identifiers or None for all. :type doc_ids: list :param check_for_conflicts: If set to False, then the conflict check will be skipped, and 'None' will be returned instead of True/False. - :type check_for_conflictsa: bool + :type check_for_conflicts: bool :param include_deleted: If set to True, deleted documents will be returned with empty content. Otherwise deleted documents will not be included in the results. @@ -1348,31 +1302,14 @@ class CouchDatabase(CommonBackend): in matching doc_ids order. :rtype: iterable """ - # Workaround for: - # - # http://bugs.python.org/issue7980 - # https://leap.se/code/issues/5449 - # - # python-couchdb uses time.strptime, which is not thread safe. In - # order to avoid the problem described on the issues above, we preload - # strptime here by evaluating the conversion of an arbitrary date. - # This will not be needed when/if we switch from python-couchdb to - # paisley. - time.strptime('Mar 8 1917', '%b %d %Y') - # spawn threads to retrieve docs - threads = [] - for doc_id in doc_ids: - self._sem_pool.acquire() - t = self._GetDocThread(self, doc_id, check_for_conflicts, - self._sem_pool.release) - t.start() - threads.append(t) - # join threads and yield docs - for t in threads: - t.join() - if t._doc.is_tombstone() and not include_deleted: + get_one = lambda doc_id: self._get_doc(doc_id, check_for_conflicts) + docs = [THREAD_POOL.apply_async(get_one, [doc_id]) + for doc_id in doc_ids] + for doc in docs: + doc = doc.get() + if not doc or not include_deleted and doc.is_tombstone(): continue - yield t._doc + yield doc def _prune_conflicts(self, doc, doc_vcr): """ -- cgit v1.2.3 From f9faa007bc0d5d681f85aa246ea05ec5fe35ccc5 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sun, 6 Sep 2015 05:39:05 -0300 Subject: [feat] adds caching for other gen and trans id There are two functions in couch.py used to save and retrieve the last know gen and trans id for the syncing replica. The get function is called very often, but is only set on one point. Added a simple caching to avoid queying couch for a value that we already have. If cache is empty, it just query as usual and fills it. --- common/src/leap/soledad/common/couch.py | 11 +++++++++-- server/src/leap/soledad/server/__init__.py | 4 +++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 421fbac1..3b120cd1 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -434,6 +434,7 @@ class CouchDatabase(CommonBackend): self._set_replica_uid(replica_uid) if ensure_ddocs: self.ensure_ddocs_on_db() + self.cache = {} def ensure_ddocs_on_db(self): """ @@ -1047,6 +1048,8 @@ class CouchDatabase(CommonBackend): synchronized with the replica, this is (0, ''). :rtype: (int, str) """ + if other_replica_uid in self.cache: + return self.cache[other_replica_uid] # query a couch view result = self._database.view('syncs/log') if len(result[other_replica_uid].rows) == 0: @@ -1129,6 +1132,7 @@ class CouchDatabase(CommonBackend): design document for an yet unknown reason. """ + self.cache[other_replica_uid] = (other_generation, other_transaction_id) # query a couch update function ddoc_path = ['_design', 'syncs', '_update', 'put', 'u1db_sync_log'] res = self._database.resource(*ddoc_path) @@ -1362,7 +1366,7 @@ class CouchServerState(ServerState): Inteface of the WSGI server with the CouchDB backend. """ - def __init__(self, couch_url): + def __init__(self, couch_url, cache=None): """ Initialize the couch server state. @@ -1370,6 +1374,7 @@ class CouchServerState(ServerState): :type couch_url: str """ self.couch_url = couch_url + self.cache = cache or {} def open_database(self, dbname): """ @@ -1381,10 +1386,12 @@ class CouchServerState(ServerState): :return: The CouchDatabase object. :rtype: CouchDatabase """ - return CouchDatabase( + db = CouchDatabase( self.couch_url, dbname, ensure_ddocs=False) + db.cache = self.cache + return db def ensure_database(self, dbname): """ diff --git a/server/src/leap/soledad/server/__init__.py b/server/src/leap/soledad/server/__init__.py index 1b795016..58f2b0b1 100644 --- a/server/src/leap/soledad/server/__init__.py +++ b/server/src/leap/soledad/server/__init__.py @@ -111,6 +111,7 @@ from leap.soledad.server.sync import ( from leap.soledad.common import SHARED_DB_NAME from leap.soledad.common.couch import CouchServerState +from leap.soledad.server.caching import get_cache_for old_tsafe = tsafe @@ -303,7 +304,8 @@ def load_configuration(file_path): def application(environ, start_response): conf = load_configuration('/etc/leap/soledad-server.conf') - state = CouchServerState(conf['couch_url']) + cache = get_cache_for('replica_cache') + state = CouchServerState(conf['couch_url'], cache=cache) # WSGI application that may be used by `twistd -web` application = GzipMiddleware( SoledadTokenAuthMiddleware(SoledadApp(state))) -- cgit v1.2.3 From 14300959a12e4908df7b00281d2590e627a47ba9 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Mon, 7 Sep 2015 03:56:02 -0300 Subject: [feat] full caching for each sync_id session The CouchDB backend implementation was accessing CouchDB too many times for the same values. Those values are known inside the same sync_id, which is the id of current sync session. This commit adds caching for all redundant calls to Couch inside the same sync_id for each replica. Refactoring is still needed, but for now couch.py works normally as if caching is not present, while sync.py injects the cache as a attribute to enable it. This needs a simpler implementation. --- common/src/leap/soledad/common/couch.py | 27 +++++++++++++++++++++++---- server/src/leap/soledad/server/__init__.py | 4 +--- server/src/leap/soledad/server/sync.py | 3 +++ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 3b120cd1..8c152353 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -434,7 +434,14 @@ class CouchDatabase(CommonBackend): self._set_replica_uid(replica_uid) if ensure_ddocs: self.ensure_ddocs_on_db() - self.cache = {} + self._cache = None + + @property + def cache(self): + if self._cache is not None: + return self._cache + else: + return {} def ensure_ddocs_on_db(self): """ @@ -513,7 +520,10 @@ class CouchDatabase(CommonBackend): :rtype: str """ if self._real_replica_uid is not None: + self.cache[self._url] = {'replica_uid': self._real_replica_uid} return self._real_replica_uid + if self._url in self.cache: + return self.cache[self._url]['replica_uid'] try: # grab replica_uid from server doc = self._database['u1db_config'] @@ -551,10 +561,13 @@ class CouchDatabase(CommonBackend): unknown reason. """ # query a couch list function + if self.replica_uid + '_gen' in self.cache: + return self.cache[self.replica_uid + '_gen']['generation'] ddoc_path = ['_design', 'transactions', '_list', 'generation', 'log'] res = self._database.resource(*ddoc_path) try: response = res.get_json() + self.cache[self.replica_uid + '_gen'] = response[2] return response[2]['generation'] except ResourceNotFound as e: raise_missing_design_doc_error(e, ddoc_path) @@ -582,11 +595,15 @@ class CouchDatabase(CommonBackend): design document for an yet unknown reason. """ + if self.replica_uid + '_gen' in self.cache: + response = self.cache[self.replica_uid + '_gen'] + return (response['generation'], response['transaction_id']) # query a couch list function ddoc_path = ['_design', 'transactions', '_list', 'generation', 'log'] res = self._database.resource(*ddoc_path) try: response = res.get_json() + self.cache[self.replica_uid + '_gen'] = response[2] return (response[2]['generation'], response[2]['transaction_id']) except ResourceNotFound as e: raise_missing_design_doc_error(e, ddoc_path) @@ -846,6 +863,10 @@ class CouchDatabase(CommonBackend): doc.doc_id, body=buf.getvalue(), headers=envelope.headers) except ResourceConflict: raise RevisionConflict() + if self.replica_uid + '_gen' in self.cache: + gen_info = self.cache[self.replica_uid + '_gen'] + gen_info['generation'] += 1 + gen_info['transaction_id'] = transactions[-1][1] def put_doc(self, doc): """ @@ -1366,7 +1387,7 @@ class CouchServerState(ServerState): Inteface of the WSGI server with the CouchDB backend. """ - def __init__(self, couch_url, cache=None): + def __init__(self, couch_url): """ Initialize the couch server state. @@ -1374,7 +1395,6 @@ class CouchServerState(ServerState): :type couch_url: str """ self.couch_url = couch_url - self.cache = cache or {} def open_database(self, dbname): """ @@ -1390,7 +1410,6 @@ class CouchServerState(ServerState): self.couch_url, dbname, ensure_ddocs=False) - db.cache = self.cache return db def ensure_database(self, dbname): diff --git a/server/src/leap/soledad/server/__init__.py b/server/src/leap/soledad/server/__init__.py index 58f2b0b1..1b795016 100644 --- a/server/src/leap/soledad/server/__init__.py +++ b/server/src/leap/soledad/server/__init__.py @@ -111,7 +111,6 @@ from leap.soledad.server.sync import ( from leap.soledad.common import SHARED_DB_NAME from leap.soledad.common.couch import CouchServerState -from leap.soledad.server.caching import get_cache_for old_tsafe = tsafe @@ -304,8 +303,7 @@ def load_configuration(file_path): def application(environ, start_response): conf = load_configuration('/etc/leap/soledad-server.conf') - cache = get_cache_for('replica_cache') - state = CouchServerState(conf['couch_url'], cache=cache) + state = CouchServerState(conf['couch_url']) # WSGI application that may be used by `twistd -web` application = GzipMiddleware( SoledadTokenAuthMiddleware(SoledadApp(state))) diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 64f7e4e7..262b6769 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -20,6 +20,7 @@ Server side synchronization infrastructure. from u1db import sync, Document from u1db.remote import http_app from leap.soledad.server.state import ServerSyncState +from leap.soledad.server.caching import get_cache_for MAX_REQUEST_SIZE = 200 # in Mb @@ -188,6 +189,8 @@ class SyncResource(http_app.SyncResource): db, self.replica_uid = self.state.ensure_database(self.dbname) else: db = self.state.open_database(self.dbname) + cache = get_cache_for('db-' + sync_id + db.replica_uid) + db._cache = cache # validate the information the client has about server replica db.validate_gen_and_trans_id( last_known_generation, last_known_trans_id) -- cgit v1.2.3 From fc5f4ae965ca48946af9f6982b2719562168131c Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 28 Aug 2015 17:25:41 -0300 Subject: [feat] use one doc per remote replica in sync log Before this change, we used a complicated update handler for storing the sync state on the couchdb backend. That update handler was implemented as an attempt to make couchdb take care of some validation for the update of the sync log during the sync exchange, mainly to allow concurrent received documents insertion during a sync. Right now we rely on the remote sending one document at a time and do not support concurrent insertions in the remote database backed by couch. Because of that, the code removed by this commit was unneeded. And more: it was a bottleneck of the sync process because we were writing to an unique file and using unnecessary couch design docs processing for that. So this commit both simplifies the storage of remote sync and removes a bottleneck of the sync process. Conflicts: common/src/leap/soledad/common/couch.py common/src/leap/soledad/common/tests/test_couch.py --- common/src/leap/soledad/common/couch.py | 63 +++------ .../leap/soledad/common/ddocs/syncs/updates/put.js | 151 --------------------- .../soledad/common/ddocs/syncs/views/log/map.js | 12 -- common/src/leap/soledad/common/tests/test_couch.py | 8 -- .../tests/test_couch_operations_atomicity.py | 53 +++----- 5 files changed, 42 insertions(+), 245 deletions(-) delete mode 100644 common/src/leap/soledad/common/ddocs/syncs/updates/put.js delete mode 100644 common/src/leap/soledad/common/ddocs/syncs/views/log/map.js diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 8c152353..83f542ab 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -1071,14 +1071,20 @@ class CouchDatabase(CommonBackend): """ if other_replica_uid in self.cache: return self.cache[other_replica_uid] - # query a couch view - result = self._database.view('syncs/log') - if len(result[other_replica_uid].rows) == 0: - return (0, '') - return ( - result[other_replica_uid].rows[0]['value']['known_generation'], - result[other_replica_uid].rows[0]['value']['known_transaction_id'] - ) + + doc_id = 'u1db_sync_%s' % other_replica_uid + try: + doc = self._database[doc_id] + except ResourceNotFound: + doc = { + '_id': doc_id, + 'generation': 0, + 'transaction_id': '', + } + self._database.save(doc) + result = doc['generation'], doc['transaction_id'] + self.cache[other_replica_uid] = result + return result def _set_replica_gen_and_trans_id(self, other_replica_uid, other_generation, other_transaction_id, @@ -1138,43 +1144,16 @@ class CouchDatabase(CommonBackend): :type doc_idx: int :param sync_id: The id of the current sync session. :type sync_id: str - - :raise MissingDesignDocError: Raised when tried to access a missing - design document. - :raise MissingDesignDocListFunctionError: Raised when trying to access - a missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a - design document for an yet - unknown reason. """ self.cache[other_replica_uid] = (other_generation, other_transaction_id) - # query a couch update function - ddoc_path = ['_design', 'syncs', '_update', 'put', 'u1db_sync_log'] - res = self._database.resource(*ddoc_path) + doc_id = 'u1db_sync_%s' % other_replica_uid try: - with CouchDatabase.update_handler_lock[self._get_replica_uid()]: - body = { - 'other_replica_uid': other_replica_uid, - 'other_generation': other_generation, - 'other_transaction_id': other_transaction_id, - } - if number_of_docs is not None: - body['number_of_docs'] = number_of_docs - if doc_idx is not None: - body['doc_idx'] = doc_idx - if sync_id is not None: - body['sync_id'] = sync_id - res.put_json( - body=body, - headers={'content-type': 'application/json'}) - except ResourceNotFound as e: - raise_missing_design_doc_error(e, ddoc_path) + doc = self._database[doc_id] + except ResourceNotFound: + doc = {'_id': doc_id} + doc['generation'] = other_generation + doc['transaction_id'] = other_transaction_id + self._database.save(doc) def _force_doc_sync_conflict(self, doc): """ diff --git a/common/src/leap/soledad/common/ddocs/syncs/updates/put.js b/common/src/leap/soledad/common/ddocs/syncs/updates/put.js deleted file mode 100644 index b0ae2de6..00000000 --- a/common/src/leap/soledad/common/ddocs/syncs/updates/put.js +++ /dev/null @@ -1,151 +0,0 @@ -/** - * The u1db_sync_log document stores both the actual sync log and a list of - * pending updates to the log, in case we receive incoming documents out of - * the correct order (i.e. if there are parallel PUTs during the sync - * process). - * - * The structure of the document is the following: - * - * { - * 'syncs': [ - * ['', , ''], - * ... - * ], - * 'pending': { - * 'other_replica_uid': { - * 'sync_id': '', - * 'log': [[, ''], ...] - * }, - * ... - * } - * } - * - * The update function below does the following: - * - * 0. If we do not receive a sync_id, we just update the 'syncs' list with - * the incoming info about the source replica state. - * - * 1. Otherwise, if the incoming sync_id differs from current stored - * sync_id, then we assume that the previous sync session for that source - * replica was interrupted and discard all pending data. - * - * 2. Then we append incoming info as pending data for that source replica - * and current sync_id, and sort the pending data by generation. - * - * 3. Then we go through pending data and find the most recent generation - * that we can use to update the actual sync log. - * - * 4. Finally, we insert the most up to date information into the sync log. - */ -function(doc, req){ - - // create the document if it doesn't exist - if (!doc) { - doc = {} - doc['_id'] = 'u1db_sync_log'; - doc['syncs'] = []; - } - - // get and validate incoming info - var body = JSON.parse(req.body); - var other_replica_uid = body['other_replica_uid']; - var other_generation = parseInt(body['other_generation']); - var other_transaction_id = body['other_transaction_id'] - var sync_id = body['sync_id']; - var number_of_docs = body['number_of_docs']; - var doc_idx = body['doc_idx']; - - // parse integers - if (number_of_docs != null) - number_of_docs = parseInt(number_of_docs); - if (doc_idx != null) - doc_idx = parseInt(doc_idx); - - if (other_replica_uid == null - || other_generation == null - || other_transaction_id == null) - return [null, 'invalid data']; - - // create slot for pending logs - if (doc['pending'] == null) - doc['pending'] = {}; - - // these are the values that will be actually inserted - var current_gen = other_generation; - var current_trans_id = other_transaction_id; - - /*------------- Wait for sequential values before storing -------------*/ - - // we just try to obtain pending log if we received a sync_id - if (sync_id != null) { - - // create slot for current source and sync_id pending log - if (doc['pending'][other_replica_uid] == null - || doc['pending'][other_replica_uid]['sync_id'] != sync_id) { - doc['pending'][other_replica_uid] = { - 'sync_id': sync_id, - 'log': [], - 'last_doc_idx': 0, - } - } - - // append incoming data to pending log - doc['pending'][other_replica_uid]['log'].push([ - other_generation, - other_transaction_id, - doc_idx, - ]) - - // sort pending log according to generation - doc['pending'][other_replica_uid]['log'].sort(function(a, b) { - return a[0] - b[0]; - }); - - // get most up-to-date information from pending log - var last_doc_idx = doc['pending'][other_replica_uid]['last_doc_idx']; - var pending_idx = doc['pending'][other_replica_uid]['log'][0][2]; - - current_gen = null; - current_trans_id = null; - - while (last_doc_idx + 1 == pending_idx) { - pending = doc['pending'][other_replica_uid]['log'].shift() - current_gen = pending[0]; - current_trans_id = pending[1]; - last_doc_idx = pending[2] - if (doc['pending'][other_replica_uid]['log'].length == 0) - break; - pending_idx = doc['pending'][other_replica_uid]['log'][0][2]; - } - - // leave the sync log untouched if we still did not receive enough docs - if (current_gen == null) - return [doc, 'ok']; - - // update last index of received doc - doc['pending'][other_replica_uid]['last_doc_idx'] = last_doc_idx; - - // eventually remove all pending data from that replica - if (last_doc_idx == number_of_docs) - delete doc['pending'][other_replica_uid] - } - - /*--------------- Store source replica info on sync log ---------------*/ - - // remove outdated info - doc['syncs'] = doc['syncs'].filter( - function (entry) { - return entry[0] != other_replica_uid; - } - ); - - // store in log - doc['syncs'].push([ - other_replica_uid, - current_gen, - current_trans_id - ]); - - return [doc, 'ok']; -} - diff --git a/common/src/leap/soledad/common/ddocs/syncs/views/log/map.js b/common/src/leap/soledad/common/ddocs/syncs/views/log/map.js deleted file mode 100644 index a63c7cf4..00000000 --- a/common/src/leap/soledad/common/ddocs/syncs/views/log/map.js +++ /dev/null @@ -1,12 +0,0 @@ -function(doc) { - if (doc._id == 'u1db_sync_log') { - if (doc.syncs) - doc.syncs.forEach(function (entry) { - emit(entry[0], - { - 'known_generation': entry[1], - 'known_transaction_id': entry[2] - }); - }); - } -} diff --git a/common/src/leap/soledad/common/tests/test_couch.py b/common/src/leap/soledad/common/tests/test_couch.py index a08ffd16..c8d13667 100644 --- a/common/src/leap/soledad/common/tests/test_couch.py +++ b/common/src/leap/soledad/common/tests/test_couch.py @@ -1359,10 +1359,6 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): self.assertRaises( errors.MissingDesignDocError, self.db.whats_changed) - # _do_set_replica_gen_and_trans_id() - self.assertRaises( - errors.MissingDesignDocError, - self.db._do_set_replica_gen_and_trans_id, 1, 2, 3) def test_missing_design_doc_functions_raises(self): """ @@ -1489,10 +1485,6 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): self.assertRaises( errors.MissingDesignDocDeletedError, self.db.whats_changed) - # _do_set_replica_gen_and_trans_id() - self.assertRaises( - errors.MissingDesignDocDeletedError, - self.db._do_set_replica_gen_and_trans_id, 1, 2, 3) def test_ensure_ddoc_independently(self): """ diff --git a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py index 25f709ca..3e8e8cce 100644 --- a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py +++ b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py @@ -163,6 +163,7 @@ class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer): 2, len(filter(lambda t: t[0] == doc_id, transaction_log))) + @defer.inlineCallbacks def test_correct_sync_log_after_sequential_syncs(self): """ Assert that the sync_log increases accordingly with sequential syncs. @@ -170,21 +171,21 @@ class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer): sol = self._soledad_instance( auth_token='auth-token', server_url=self.getURL()) + source_replica_uid = sol._dbpool.replica_uid - def _create_docs(results): + def _create_docs(): deferreds = [] for i in xrange(0, REPEAT_TIMES): deferreds.append(sol.create_doc({})) - return defer.DeferredList(deferreds) + return defer.gatherResults(deferreds) def _assert_transaction_and_sync_logs(results, sync_idx): # assert sizes of transaction and sync logs self.assertEqual( sync_idx * REPEAT_TIMES, len(self.db._get_transaction_log())) - self.assertEqual( - 1 if sync_idx > 0 else 0, - len(self.db._database.view('syncs/log').rows)) + gen, _ = self.db._get_replica_gen_and_trans_id(source_replica_uid) + self.assertEqual(sync_idx * REPEAT_TIMES, gen) def _assert_sync(results, sync_idx): gen, docs = results @@ -193,40 +194,28 @@ class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer): # assert sizes of transaction and sync logs self.assertEqual((sync_idx + 1) * REPEAT_TIMES, len(self.db._get_transaction_log())) - sync_log_rows = self.db._database.view('syncs/log').rows - sync_log = sync_log_rows[0].value - replica_uid = sync_log_rows[0].key - known_gen = sync_log['known_generation'] - known_trans_id = sync_log['known_transaction_id'] - # assert sync_log has exactly 1 row - self.assertEqual(1, len(sync_log_rows)) - # assert it has the correct replica_uid, gen and trans_id - self.assertEqual(sol._dbpool.replica_uid, replica_uid) + target_known_gen, target_known_trans_id = \ + self.db._get_replica_gen_and_trans_id(source_replica_uid) + # assert it has the correct gen and trans_id conn_key = sol._dbpool._u1dbconnections.keys().pop() conn = sol._dbpool._u1dbconnections[conn_key] sol_gen, sol_trans_id = conn._get_generation_info() - self.assertEqual(sol_gen, known_gen) - self.assertEqual(sol_trans_id, known_trans_id) - - # create some documents - d = _create_docs(None) + self.assertEqual(sol_gen, target_known_gen) + self.assertEqual(sol_trans_id, target_known_trans_id) # sync first time and assert success - d.addCallback(_assert_transaction_and_sync_logs, 0) - d.addCallback(lambda _: sol.sync()) - d.addCallback(lambda _: sol.get_all_docs()) - d.addCallback(_assert_sync, 0) + results = yield _create_docs() + _assert_transaction_and_sync_logs(results, 0) + yield sol.sync() + results = yield sol.get_all_docs() + _assert_sync(results, 0) # create more docs, sync second time and assert success - d.addCallback(_create_docs) - d.addCallback(_assert_transaction_and_sync_logs, 1) - d.addCallback(lambda _: sol.sync()) - d.addCallback(lambda _: sol.get_all_docs()) - d.addCallback(_assert_sync, 1) - - d.addCallback(lambda _: sol.close()) - - return d + results = yield _create_docs() + _assert_transaction_and_sync_logs(results, 1) + yield sol.sync() + results = yield sol.get_all_docs() + _assert_sync(results, 1) # # Concurrency tests -- cgit v1.2.3 From 0e954f3328b7b8c31c88e0bee796230e87bca829 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sun, 20 Sep 2015 17:47:07 -0300 Subject: [feat] adds cache expiration Now each backend object will be retrieved from cache for sync.py and values will live for 3600 by default. That is changed via parameter if needed. --- common/src/leap/soledad/common/couch.py | 2 ++ server/src/leap/soledad/server/caching.py | 4 ++-- server/src/leap/soledad/server/sync.py | 5 ++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 83f542ab..809af05d 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -527,6 +527,7 @@ class CouchDatabase(CommonBackend): try: # grab replica_uid from server doc = self._database['u1db_config'] + self.cache[self._url] = doc self._real_replica_uid = doc['replica_uid'] return self._real_replica_uid except ResourceNotFound: @@ -1154,6 +1155,7 @@ class CouchDatabase(CommonBackend): doc['generation'] = other_generation doc['transaction_id'] = other_transaction_id self._database.save(doc) + self.cache[other_replica_uid] = (other_generation, other_transaction_id) def _force_doc_sync_conflict(self, doc): """ diff --git a/server/src/leap/soledad/server/caching.py b/server/src/leap/soledad/server/caching.py index cd5d8dd4..9a049a39 100644 --- a/server/src/leap/soledad/server/caching.py +++ b/server/src/leap/soledad/server/caching.py @@ -28,5 +28,5 @@ def setup_caching(): _cache_manager = setup_caching() -def get_cache_for(key): - return _cache_manager.get_cache(key) +def get_cache_for(key, expire=3600): + return _cache_manager.get_cache(key, expire=expire) diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 262b6769..e4fd1260 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -185,12 +185,15 @@ class SyncResource(http_app.SyncResource): :type ensure: bool """ # create or open the database + cache = get_cache_for('db-' + sync_id + self.dbname) if ensure: db, self.replica_uid = self.state.ensure_database(self.dbname) + elif cache and 'instance' in cache: + db = cache['instance'] else: db = self.state.open_database(self.dbname) - cache = get_cache_for('db-' + sync_id + db.replica_uid) db._cache = cache + cache['instance'] = db # validate the information the client has about server replica db.validate_gen_and_trans_id( last_known_generation, last_known_trans_id) -- cgit v1.2.3 From cb7aa314ad4d47e9f32e9e111ec13976978ed02d Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sun, 20 Sep 2015 19:28:22 -0300 Subject: [feat] enable delayed commits We use CouchDB with single doc read/write. Following this documentation about performance, we should get more performance by enabling couch to delay and commit later. See: http://guide.couchdb.org/draft/performance.html#single --- common/src/leap/soledad/common/couch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 809af05d..014b3f38 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -357,7 +357,7 @@ def couch_server(url): :type url: str """ session = Session(timeout=COUCH_TIMEOUT) - server = Server(url=url, session=session) + server = Server(url=url, full_commit=False, session=session) yield server -- cgit v1.2.3 From df8c794395ee695ea1dfb30603a073b32b24ee58 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Mon, 21 Sep 2015 17:19:18 -0300 Subject: [refactor] init_caching instead of setting attr As meskio found commented, setting this attribute directly is ugly, CouchDatabase now has a init_caching method for setting up cache instance. --- common/src/leap/soledad/common/couch.py | 9 +++++++++ server/src/leap/soledad/server/sync.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 014b3f38..00a17715 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -443,6 +443,15 @@ class CouchDatabase(CommonBackend): else: return {} + def init_caching(self, cache): + """ + Start using cache by setting internal _cache attribute. + + :param cache: the cache instance, anything that behaves like a dict + :type cache: dict + """ + self._cache = cache + def ensure_ddocs_on_db(self): """ Ensure that the design documents used by the backend exist on the diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index e4fd1260..619be565 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -192,7 +192,7 @@ class SyncResource(http_app.SyncResource): db = cache['instance'] else: db = self.state.open_database(self.dbname) - db._cache = cache + db.init_caching(cache) cache['instance'] = db # validate the information the client has about server replica db.validate_gen_and_trans_id( -- cgit v1.2.3 From 5b20b469f22ab99533135beefd49129db49064e5 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 23 Sep 2015 15:43:06 -0300 Subject: [style] pep8 --- common/src/leap/soledad/common/couch.py | 15 +++++++++------ server/src/leap/soledad/server/state.py | 24 +++++++++++------------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 00a17715..38041c09 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -719,7 +719,8 @@ class CouchDatabase(CommonBackend): return None return self.__parse_doc_from_couch(result, doc_id, check_for_conflicts) - def __parse_doc_from_couch(self, result, doc_id, check_for_conflicts=False): + def __parse_doc_from_couch(self, result, doc_id, + check_for_conflicts=False): # restrict to u1db documents if 'u1db_rev' not in result: return None @@ -1155,7 +1156,8 @@ class CouchDatabase(CommonBackend): :param sync_id: The id of the current sync session. :type sync_id: str """ - self.cache[other_replica_uid] = (other_generation, other_transaction_id) + self.cache[other_replica_uid] = (other_generation, + other_transaction_id) doc_id = 'u1db_sync_%s' % other_replica_uid try: doc = self._database[doc_id] @@ -1164,7 +1166,6 @@ class CouchDatabase(CommonBackend): doc['generation'] = other_generation doc['transaction_id'] = other_transaction_id self._database.save(doc) - self.cache[other_replica_uid] = (other_generation, other_transaction_id) def _force_doc_sync_conflict(self, doc): """ @@ -1175,7 +1176,8 @@ class CouchDatabase(CommonBackend): """ my_doc = self._get_doc(doc.doc_id) self._prune_conflicts(doc, vectorclock.VectorClockRev(doc.rev)) - doc.add_conflict(self._factory(doc.doc_id, my_doc.rev, my_doc.get_json())) + doc.add_conflict(self._factory(doc.doc_id, my_doc.rev, + my_doc.get_json())) doc.has_conflicts = True self._put_doc(my_doc, doc) @@ -1293,8 +1295,9 @@ class CouchDatabase(CommonBackend): my_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) if my_doc: doc.set_conflicts(my_doc.get_conflicts()) - return CommonBackend._put_doc_if_newer(self, doc, save_conflict, replica_uid, - replica_gen, replica_trans_id) + return CommonBackend._put_doc_if_newer(self, doc, save_conflict, + replica_uid, replica_gen, + replica_trans_id) def _put_and_update_indexes(self, cur_doc, doc): self._put_doc(cur_doc, doc) diff --git a/server/src/leap/soledad/server/state.py b/server/src/leap/soledad/server/state.py index d1225170..f269b77e 100644 --- a/server/src/leap/soledad/server/state.py +++ b/server/src/leap/soledad/server/state.py @@ -24,7 +24,8 @@ class ServerSyncState(object): """ The state of one sync session, as stored on backend server. - On server side, the ongoing syncs metadata is maintained in a caching layer. + On server side, the ongoing syncs metadata is maintained in + a caching layer. """ def __init__(self, source_replica_uid, sync_id): @@ -38,7 +39,8 @@ class ServerSyncState(object): """ self._source_replica_uid = source_replica_uid self._sync_id = sync_id - self._storage = caching.get_cache_for(source_replica_uid + sync_id) + caching_key = source_replica_uid + sync_id + self._storage = caching.get_cache_for(caching_key) def _put_dict_info(self, key, value): """ @@ -61,7 +63,7 @@ class ServerSyncState(object): :param seen_id: The doc_id of a document seen during sync. :type seen_id: str - :param gen: The corresponding db generation for that document. + :param gen: The corresponding db generation. :type gen: int """ self._put_dict_info( @@ -113,12 +115,9 @@ class ServerSyncState(object): server. :rtype: tuple """ - info = self._storage.get('changes_to_return') if 'changes_to_return' in self._storage else None - gen = None - trans_id = None - number_of_changes = None - if info: - info = info[0] + gen = trans_id = number_of_changes = None + if 'changes_to_return' in self._storage: + info = self._storage.get('changes_to_return')[0] gen = info['gen'] trans_id = info['trans_id'] number_of_changes = len(info['changes_to_return']) @@ -132,12 +131,11 @@ class ServerSyncState(object): received during the current sync process. :type received: int """ - info = self._storage.get('changes_to_return') if 'changes_to_return' in self._storage else None gen = trans_id = next_change_to_return = None - if info: - info = info[0] + if 'changes_to_return' in self._storage: + info = self._storage.get('changes_to_return')[0] gen = info['gen'] trans_id = info['trans_id'] if received < len(info['changes_to_return']): - next_change_to_return = tuple(info['changes_to_return'][received]) + next_change_to_return = (info['changes_to_return'][received]) return gen, trans_id, next_change_to_return -- cgit v1.2.3 From 4113fffc993b715629b484fe8f5a06d7efb9f644 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Tue, 15 Sep 2015 19:51:49 -0300 Subject: [feat] Validate and execute commands by subprocess This commit adds a way to validate and execute commands using an argument validator. Commands are executed via subprocess. --- common/src/leap/soledad/common/command.py | 54 +++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 common/src/leap/soledad/common/command.py diff --git a/common/src/leap/soledad/common/command.py b/common/src/leap/soledad/common/command.py new file mode 100644 index 00000000..978cec91 --- /dev/null +++ b/common/src/leap/soledad/common/command.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# command.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +""" +Utility to sanitize and run shell commands. +""" + + +import subprocess + + +def exec_validated_cmd(cmd, args, validator=None): + """ + Executes cmd, validating args with validator. + + :param cmd: command. + :type dbname: str + :param args: arguments. + :type args: str + :param validator: optional function to validate args + :type validator: function + + :return: exit code and stdout or stderr (if code != 0) + :rtype: (int, str) + """ + if validator and not validator(args): + return 1, "invalid argument" + command = cmd.split(' ') + command.append(args) + try: + process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except OSError, e: + return 1, e + (out, err) = process.communicate() + code = process.wait() + if code is not 0: + return code, err + else: + return code, out -- cgit v1.2.3 From 66baa09a1760e911a6e58c87bce4f0062bd95805 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Tue, 15 Sep 2015 19:53:25 -0300 Subject: [tests] Tests for command validation and execution Checks if arguments validation occurs properly and command execution brings back status code and stdout or stderr on some scenarios. --- .../src/leap/soledad/common/tests/test_command.py | 53 ++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 common/src/leap/soledad/common/tests/test_command.py diff --git a/common/src/leap/soledad/common/tests/test_command.py b/common/src/leap/soledad/common/tests/test_command.py new file mode 100644 index 00000000..af4903eb --- /dev/null +++ b/common/src/leap/soledad/common/tests/test_command.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# test_command.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Tests for command execution using a validator function for arguments. +""" +from twisted.trial import unittest +from leap.soledad.common.command import exec_validated_cmd + + +class ExecuteValidatedCommandTest(unittest.TestCase): + + def test_argument_validation(self): + validator = lambda arg: True if arg is 'valid' else False + status, out = exec_validated_cmd("command", "invalid arg", validator) + self.assertEquals(status, 1) + self.assertEquals(out, "invalid argument") + status, out = exec_validated_cmd("echo", "valid", validator) + self.assertEquals(status, 0) + self.assertEquals(out, "valid\n") + + def test_return_status_code_success(self): + status, out = exec_validated_cmd("echo", "arg") + self.assertEquals(status, 0) + self.assertEquals(out, "arg\n") + + def test_handle_command_with_spaces(self): + status, out = exec_validated_cmd("echo I am", "an argument") + self.assertEquals(status, 0, out) + self.assertEquals(out, "I am an argument\n") + + def test_handle_oserror_on_invalid_command(self): + status, out = exec_validated_cmd("inexistent command with", "args") + self.assertEquals(status, 1) + self.assertIn("No such file or directory", out) + + def test_return_status_code_number_on_failure(self): + status, out = exec_validated_cmd("ls", "user-bebacafe") + self.assertEquals(status, 2) + self.assertIn('ls: cannot access user-bebacafe: No such file or directory\n', out) -- cgit v1.2.3 From eb6b66da6aa81ade4e61ef153ebbe8fba78cd335 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Tue, 15 Sep 2015 19:54:34 -0300 Subject: [feat] ensure_database is now able to call a cmd If CouchServerState is created with a create_cmd parameter, it can now use this parameter to invoke a command to create databases. A validator for database name is also used to ensure that command injection is not possible if user manages to manipulate database name argument. --- common/src/leap/soledad/common/couch.py | 46 +++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 38041c09..cd5185eb 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -60,6 +60,7 @@ from u1db.remote.server_state import ServerState from leap.soledad.common import ddocs, errors +from leap.soledad.common.command import exec_validated_cmd from leap.soledad.common.document import SoledadDocument @@ -1374,13 +1375,29 @@ class CouchSyncTarget(CommonSyncTarget): source_replica_transaction_id) +DB_NAME_MASK = "^user-[a-f0-9]+$" + + +def is_db_name_valid(name): + """ + Validate a user database using DB_NAME_MASK. + + :param name: database name. + :type name: str + + :return: boolean for name vailidity + :rtype: bool + """ + return re.match(DB_NAME_MASK, name) is not None + + class CouchServerState(ServerState): """ Inteface of the WSGI server with the CouchDB backend. """ - def __init__(self, couch_url): + def __init__(self, couch_url, create_cmd=None): """ Initialize the couch server state. @@ -1388,6 +1405,7 @@ class CouchServerState(ServerState): :type couch_url: str """ self.couch_url = couch_url + self.create_cmd = create_cmd def open_database(self, dbname): """ @@ -1409,20 +1427,26 @@ class CouchServerState(ServerState): """ Ensure couch database exists. - Usually, this method is used by the server to ensure the existence of - a database. In our setup, the Soledad user that accesses the underlying - couch server should never have permission to create (or delete) - databases. But, in case it ever does, by raising an exception here we - have one more guarantee that no modified client will be able to - enforce creation of a database when syncing. - :param dbname: The name of the database to ensure. :type dbname: str - :raise Unauthorized: Always, because Soledad server is not allowed to - create databases. + :raise Unauthorized: If disabled or other error was raised. + + :return: The CouchDatabase object and its replica_uid. + :rtype: (CouchDatabase, str) """ - raise Unauthorized() + if not self.create_cmd: + raise Unauthorized() + else: + code, out = exec_validated_cmd(self.create_cmd, dbname, + validator=is_db_name_valid) + if code is not 0: + logger.error(""" + Error while creating database (%s) with (%s) command. + Output: %s + Exit code: %d + """ % (dbname, self.create_cmd, out, code)) + raise Unauthorized() def delete_database(self, dbname): """ -- cgit v1.2.3 From 7591c95951e4618f7775c52340f4d170a1bdd961 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Tue, 15 Sep 2015 19:56:43 -0300 Subject: [tests] CouchServerState tests for ensure_database Tests that Unauthorized is raised in any failure scenario, leaving user blind for tips on what happened during execution. This should lower chances of information disclosure on execution failure. --- common/src/leap/soledad/common/tests/test_couch.py | 25 ++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/common/src/leap/soledad/common/tests/test_couch.py b/common/src/leap/soledad/common/tests/test_couch.py index c8d13667..d0a9dc3c 100644 --- a/common/src/leap/soledad/common/tests/test_couch.py +++ b/common/src/leap/soledad/common/tests/test_couch.py @@ -28,6 +28,7 @@ from couchdb.client import Server from uuid import uuid4 from testscenarios import TestWithScenarios +from twisted.trial import unittest from u1db import errors as u1db_errors from u1db import SyncTarget @@ -1498,3 +1499,27 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): self.db._get_transaction_log) self.create_db(ensure=True, dbname=self.db._dbname) self.db._get_transaction_log() + + +class DatabaseNameValidationTest(unittest.TestCase): + + def test_database_name_validation(self): + self.assertFalse(couch.is_db_name_valid("user-deadbeef | cat /secret")) + self.assertTrue(couch.is_db_name_valid("user-cafe1337")) + + +class CommandBasedDBCreationTest(unittest.TestCase): + + def test_ensure_db_using_custom_command(self): + state = couch.CouchServerState("url", create_cmd="echo") + state.ensure_database("user-1337") # works + + def test_raises_unauthorized_on_failure(self): + state = couch.CouchServerState("url", create_cmd="inexistent") + self.assertRaises(u1db_errors.Unauthorized, + state.ensure_database, "user-1337") + + def test_raises_unauthorized_by_default(self): + state = couch.CouchServerState("url") + self.assertRaises(u1db_errors.Unauthorized, + state.ensure_database, "user-1337") -- cgit v1.2.3 From e76ce03ad29b5582c4763d61a5acaed1aeba87e5 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Tue, 15 Sep 2015 19:59:14 -0300 Subject: [feat] conf for enabling db creation via custom sh We can now use a custom script to create databases by setting a parameter 'create_cmd' on soledad configuration. This will set CouchServerState to use it on ensure_database. --- server/src/leap/soledad/server/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/leap/soledad/server/__init__.py b/server/src/leap/soledad/server/__init__.py index 1b795016..bb1c6db0 100644 --- a/server/src/leap/soledad/server/__init__.py +++ b/server/src/leap/soledad/server/__init__.py @@ -285,6 +285,7 @@ def load_configuration(file_path): """ conf = { 'couch_url': 'http://localhost:5984', + 'create_cmd': None } config = configparser.ConfigParser() config.read(file_path) @@ -303,7 +304,7 @@ def load_configuration(file_path): def application(environ, start_response): conf = load_configuration('/etc/leap/soledad-server.conf') - state = CouchServerState(conf['couch_url']) + state = CouchServerState(conf['couch_url'], create_cmd=conf['create_cmd']) # WSGI application that may be used by `twistd -web` application = GzipMiddleware( SoledadTokenAuthMiddleware(SoledadApp(state))) -- cgit v1.2.3 From f7ff2e014e25b5c201f4e6209549518b53fc36b2 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Tue, 15 Sep 2015 20:12:01 -0300 Subject: [bug] ensure_database returns db and replica_uid ensure database needs to return a db and its replica_uid. Updated tests, doc and code to reflect that. --- common/src/leap/soledad/common/couch.py | 2 ++ common/src/leap/soledad/common/tests/test_couch.py | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index cd5185eb..36f6239e 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -1447,6 +1447,8 @@ class CouchServerState(ServerState): Exit code: %d """ % (dbname, self.create_cmd, out, code)) raise Unauthorized() + db = self.open_database(dbname) + return db, db.replica_uid def delete_database(self, dbname): """ diff --git a/common/src/leap/soledad/common/tests/test_couch.py b/common/src/leap/soledad/common/tests/test_couch.py index d0a9dc3c..a56cea21 100644 --- a/common/src/leap/soledad/common/tests/test_couch.py +++ b/common/src/leap/soledad/common/tests/test_couch.py @@ -29,6 +29,7 @@ from uuid import uuid4 from testscenarios import TestWithScenarios from twisted.trial import unittest +from mock import Mock from u1db import errors as u1db_errors from u1db import SyncTarget @@ -1512,7 +1513,12 @@ class CommandBasedDBCreationTest(unittest.TestCase): def test_ensure_db_using_custom_command(self): state = couch.CouchServerState("url", create_cmd="echo") - state.ensure_database("user-1337") # works + mock_db = Mock() + mock_db.replica_uid = 'replica_uid' + state.open_database = Mock(return_value=mock_db) + db, replica_uid = state.ensure_database("user-1337") # works + self.assertEquals(mock_db, db) + self.assertEquals(mock_db.replica_uid, replica_uid) def test_raises_unauthorized_on_failure(self): state = couch.CouchServerState("url", create_cmd="inexistent") -- cgit v1.2.3 From b065492f35006c3d108965b2b50144e080fbe678 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 17 Sep 2015 18:30:07 -0300 Subject: [feat] script for user db creation Added a simple script for user db creation and design docs creation. It uses a netrc from /etc/couchdb/couchdb-admin.netrc and same validator used on couch.py for database names. --- server/pkg/create-user-db | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100755 server/pkg/create-user-db diff --git a/server/pkg/create-user-db b/server/pkg/create-user-db new file mode 100755 index 00000000..edcb8a82 --- /dev/null +++ b/server/pkg/create-user-db @@ -0,0 +1,42 @@ +#!/usr/bin/env python +import os +import sys +import netrc +import argparse +from leap.soledad.common.couch import CouchDatabase +from leap.soledad.common.couch import is_db_name_valid + + +description = """ +Creates a user database. +This is meant to be used by Soledad Server. +""" +parser = argparse.ArgumentParser(description=description) +parser.add_argument('dbname', metavar='user-d34db33f', type=str, + help='database name on the format user-{uuid4}') +NETRC_PATH = '/etc/couchdb/couchdb-admin.netrc' + + +def url_for_db(dbname): + if not os.path.exists(NETRC_PATH): + print ('netrc not found in %s' % NETRC_PATH) + sys.exit(1) + parsed_netrc = netrc.netrc(NETRC_PATH) + host, (login, _, password) = parsed_netrc.hosts.items()[0] + url = ('http://%(login)s:%(password)s@%(host)s:5984/%(dbname)s' % { + 'login':login, + 'password':password, + 'host':host, + 'dbname':dbname}) + return url + + +if __name__ == '__main__': + args = parser.parse_args() + if not is_db_name_valid(args.dbname): + print ("Invalid name! %s" % args.dbname) + sys.exit(1) + url = url_for_db(args.dbname) + db = CouchDatabase.open_database(url=url, create=True, + replica_uid=None, ensure_ddocs=True) + print ('success! Created %s, replica_uid: %s' % (db._dbname, db.replica_uid)) -- cgit v1.2.3 From a660f60b9644836b0dbdf54cd04b15f4d4654d0f Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 18 Sep 2015 17:30:19 -0300 Subject: [feat] ensure security document Beyond ensuring ddocs, it is also necessary to ensure _security doc presence while creating a database. This document will tell couchdb to grant access to 'soledad' user as a member role and no one as admin. --- common/src/leap/soledad/common/couch.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 36f6239e..d9ed5026 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -435,6 +435,7 @@ class CouchDatabase(CommonBackend): self._set_replica_uid(replica_uid) if ensure_ddocs: self.ensure_ddocs_on_db() + self.ensure_security() self._cache = None @property @@ -467,6 +468,16 @@ class CouchDatabase(CommonBackend): getattr(ddocs, ddoc_name))) self._database.save(ddoc) + def ensure_security(self): + """ + Make sure that only soledad user is able to access this database as + a member. + """ + security = self._database.security + security['members'] = {'names': ['soledad'], 'roles': []} + security['admins'] = {'names': [], 'roles': []} + self._database.security = security + def get_sync_target(self): """ Return a SyncTarget object, for another u1db to synchronize with. -- cgit v1.2.3 From 7b1591e3d561aa6525318235e702eebeb6708560 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Fri, 18 Sep 2015 17:31:37 -0300 Subject: [tests] tests for ensure_security As the other tests does. Make sure that a fresh database gets proper security doc after calling ensure_security method. --- common/src/leap/soledad/common/tests/test_couch.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/common/src/leap/soledad/common/tests/test_couch.py b/common/src/leap/soledad/common/tests/test_couch.py index a56cea21..3622bb56 100644 --- a/common/src/leap/soledad/common/tests/test_couch.py +++ b/common/src/leap/soledad/common/tests/test_couch.py @@ -1501,6 +1501,20 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): self.create_db(ensure=True, dbname=self.db._dbname) self.db._get_transaction_log() + def test_ensure_security_doc(self): + """ + Ensure_security creates a _security ddoc to ensure that only soledad + will have member access to a db. + """ + self.create_db(ensure=False) + self.assertFalse(self.db._database.security) + self.db.ensure_security() + security_ddoc = self.db._database.security + self.assertIn('admins', security_ddoc) + self.assertFalse(security_ddoc['admins']['names']) + self.assertIn('members', security_ddoc) + self.assertIn('soledad', security_ddoc['members']['names']) + class DatabaseNameValidationTest(unittest.TestCase): -- cgit v1.2.3 From c4c280e12bf70dd41183d2d2ffbba200a45d0247 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 19 Sep 2015 14:25:30 -0300 Subject: [feat] handle DatabaseDoesNotExist during sync This error raises while getting info on target (or server) replica. On previous implementation there was nothing to do here, but now that we have db creation in place this error should be handled just like u1db original implementation. The reason is that db creation occurs during sync exchange, but before this we try to ask for info and the code that checks for info raises an error that will be used to signal the client that a database will be created and that it must save the replica uid returned by server, after sync exchange (where we send/fetch documents). --- client/src/leap/soledad/client/sync.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index 110baa0a..225d3e2d 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -69,9 +69,15 @@ class SoledadSynchronizer(Synchronizer): # get target identifier, its current generation, # and its last-seen database generation for this source ensure_callback = None - (self.target_replica_uid, target_gen, target_trans_id, - target_my_gen, target_my_trans_id) = yield \ - sync_target.get_sync_info(self.source._replica_uid) + try: + (self.target_replica_uid, target_gen, target_trans_id, + target_my_gen, target_my_trans_id) = yield \ + sync_target.get_sync_info(self.source._replica_uid) + except errors.DatabaseDoesNotExist: + logger.debug("Database isn't ready on server. Will be created.") + self.target_replica_uid = None + target_gen, target_trans_id = 0, '' + target_my_gen, target_my_trans_id = 0, '' logger.debug( "Soledad target sync info:\n" -- cgit v1.2.3 From de0cf00b4412e253a481ff19803bab66ffc4443e Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 24 Sep 2015 21:57:26 -0300 Subject: [refactor] kaliy's review and pep8 fixes README with information about latest change, missing docs and licenses, variable naming and pep8. --- README.rst | 33 ++++++++++++++++++++++ common/changes/create_db_cmd | 2 ++ common/src/leap/soledad/common/command.py | 17 +++++------ common/src/leap/soledad/common/couch.py | 19 +++++++------ .../src/leap/soledad/common/tests/test_command.py | 4 ++- common/src/leap/soledad/common/tests/test_couch.py | 4 +-- server/changes/create_db_cmd | 3 ++ server/pkg/create-user-db | 16 +++++++++++ 8 files changed, 79 insertions(+), 19 deletions(-) create mode 100644 common/changes/create_db_cmd create mode 100644 server/changes/create_db_cmd diff --git a/README.rst b/README.rst index b98eec06..a2755f92 100644 --- a/README.rst +++ b/README.rst @@ -55,3 +55,36 @@ to run tests in development mode you must do the following:: Note that to run CouchDB tests, be sure you have ``CouchDB`` installed on your system. + + +Privileges +----- +In order to prevent privilege escalation, Soledad should not be run as a +database administrator. This implies the following side effects: + +----------------- +Database creation: +----------------- +Can be done via a script located in ``server/pkg/create-user-db`` +It reads a netrc file that should be placed on +``/etc/couchdb/couchdb-admin.netrc``. +That file holds the admin credentials in netrc format and should be accessible +only by 'soledad-admin' user. + +The debian package will do the following in order to automate this: + +* create a user ``soledad-admin`` +* make this script available as ``create-user-db`` in ``/usr/bin`` +* grant restricted sudo access, that only enables user ``soledad`` to call this + exact command via ``soledad-admin`` user. + +The server side process, configured via ``/etc/leap/soledad-server.conf``, will +then use a parameter called 'create_cmd' to know which command is used to +allocate new databases. All steps of creation process is then handled +automatically by the server, following the same logic as u1db server. + +------------------ +Database deletion: +------------------ +No code at all handles this and privilege to do so needs to be removed as +explained before. This can be automated via a simple cron job. diff --git a/common/changes/create_db_cmd b/common/changes/create_db_cmd new file mode 100644 index 00000000..00bbdf71 --- /dev/null +++ b/common/changes/create_db_cmd @@ -0,0 +1,2 @@ + o Add a sanitized command executor for database creation and re-enable + user database creation on CouchServerState via command line. diff --git a/common/src/leap/soledad/common/command.py b/common/src/leap/soledad/common/command.py index 978cec91..811bf135 100644 --- a/common/src/leap/soledad/common/command.py +++ b/common/src/leap/soledad/common/command.py @@ -24,26 +24,27 @@ Utility to sanitize and run shell commands. import subprocess -def exec_validated_cmd(cmd, args, validator=None): +def exec_validated_cmd(cmd, argument, validator=None): """ - Executes cmd, validating args with validator. + Executes cmd, validating argument with a validator function. :param cmd: command. :type dbname: str - :param args: arguments. - :type args: str - :param validator: optional function to validate args + :param argument: argument. + :type argument: str + :param validator: optional function to validate argument :type validator: function :return: exit code and stdout or stderr (if code != 0) :rtype: (int, str) """ - if validator and not validator(args): + if validator and not validator(argument): return 1, "invalid argument" command = cmd.split(' ') - command.append(args) + command.append(argument) try: - process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + process = subprocess.Popen(command, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) except OSError, e: return 1, e (out, err) = process.communicate() diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index d9ed5026..4c5f6400 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -435,7 +435,7 @@ class CouchDatabase(CommonBackend): self._set_replica_uid(replica_uid) if ensure_ddocs: self.ensure_ddocs_on_db() - self.ensure_security() + self.ensure_security_ddoc() self._cache = None @property @@ -468,10 +468,15 @@ class CouchDatabase(CommonBackend): getattr(ddocs, ddoc_name))) self._database.save(ddoc) - def ensure_security(self): + def ensure_security_ddoc(self): """ Make sure that only soledad user is able to access this database as - a member. + an unprivileged member, meaning that administration access will + be forbidden even inside an user database. + The goal is to make sure that only the lowest access level is given + to the unprivileged CouchDB user set on the server process. + This is achieved by creating a _security design document, see: + http://docs.couchdb.org/en/latest/api/database/security.html """ security = self._database.security security['members'] = {'names': ['soledad'], 'roles': []} @@ -1386,12 +1391,9 @@ class CouchSyncTarget(CommonSyncTarget): source_replica_transaction_id) -DB_NAME_MASK = "^user-[a-f0-9]+$" - - def is_db_name_valid(name): """ - Validate a user database using DB_NAME_MASK. + Validate a user database using a regular expression. :param name: database name. :type name: str @@ -1399,7 +1401,8 @@ def is_db_name_valid(name): :return: boolean for name vailidity :rtype: bool """ - return re.match(DB_NAME_MASK, name) is not None + db_name_regex = "^user-[a-f0-9]+$" + return re.match(db_name_regex, name) is not None class CouchServerState(ServerState): diff --git a/common/src/leap/soledad/common/tests/test_command.py b/common/src/leap/soledad/common/tests/test_command.py index af4903eb..420f91ae 100644 --- a/common/src/leap/soledad/common/tests/test_command.py +++ b/common/src/leap/soledad/common/tests/test_command.py @@ -50,4 +50,6 @@ class ExecuteValidatedCommandTest(unittest.TestCase): def test_return_status_code_number_on_failure(self): status, out = exec_validated_cmd("ls", "user-bebacafe") self.assertEquals(status, 2) - self.assertIn('ls: cannot access user-bebacafe: No such file or directory\n', out) + self.assertIn( + 'ls: cannot access user-bebacafe: No such file or directory\n', + out) diff --git a/common/src/leap/soledad/common/tests/test_couch.py b/common/src/leap/soledad/common/tests/test_couch.py index 3622bb56..d1a07a3a 100644 --- a/common/src/leap/soledad/common/tests/test_couch.py +++ b/common/src/leap/soledad/common/tests/test_couch.py @@ -1504,11 +1504,11 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): def test_ensure_security_doc(self): """ Ensure_security creates a _security ddoc to ensure that only soledad - will have member access to a db. + will have the lowest privileged access to an user db. """ self.create_db(ensure=False) self.assertFalse(self.db._database.security) - self.db.ensure_security() + self.db.ensure_security_ddoc() security_ddoc = self.db._database.security self.assertIn('admins', security_ddoc) self.assertFalse(security_ddoc['admins']['names']) diff --git a/server/changes/create_db_cmd b/server/changes/create_db_cmd new file mode 100644 index 00000000..cee0a935 --- /dev/null +++ b/server/changes/create_db_cmd @@ -0,0 +1,3 @@ + o Adds a new config parameter 'create_cmd', which allows sysadmin to specify + which command will create a database. That command was added in + pkg/create-user-db and debian package automates steps needed for sudo access. diff --git a/server/pkg/create-user-db b/server/pkg/create-user-db index edcb8a82..dd68f792 100755 --- a/server/pkg/create-user-db +++ b/server/pkg/create-user-db @@ -1,4 +1,20 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- +# create-user-db +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . import os import sys import netrc -- cgit v1.2.3 From 01314341c8ef1e947b8f921ba023ac67d89a9ce7 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Mon, 28 Sep 2015 15:43:49 -0300 Subject: [test] remove old mocks Those hardcoded mocks are leaking into other tests and are unnecessary. --- .../soledad/common/tests/test_couch_operations_atomicity.py | 6 ------ common/src/leap/soledad/common/tests/test_server.py | 12 ------------ 2 files changed, 18 deletions(-) diff --git a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py index 3e8e8cce..507f2984 100644 --- a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py +++ b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py @@ -35,17 +35,11 @@ from leap.soledad.common.tests.util import ( ) from leap.soledad.common.tests.test_couch import CouchDBTestCase from leap.soledad.common.tests.u1db_tests import TestCaseWithServer -from leap.soledad.common.tests.test_server import _couch_ensure_database REPEAT_TIMES = 20 -# monkey path CouchServerState so it can ensure databases. - -CouchServerState.ensure_database = _couch_ensure_database - - class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer): @staticmethod diff --git a/common/src/leap/soledad/common/tests/test_server.py b/common/src/leap/soledad/common/tests/test_server.py index f512d6c1..19d2907d 100644 --- a/common/src/leap/soledad/common/tests/test_server.py +++ b/common/src/leap/soledad/common/tests/test_server.py @@ -46,18 +46,6 @@ from leap.soledad.server import LockResource from leap.soledad.server.auth import URLToAuthorization -# monkey path CouchServerState so it can ensure databases. - -def _couch_ensure_database(self, dbname): - db = CouchDatabase.open_database( - self.couch_url + '/' + dbname, - create=True, - ensure_ddocs=True) - return db, db._replica_uid - -CouchServerState.ensure_database = _couch_ensure_database - - class ServerAuthorizationTestCase(BaseSoledadTest): """ -- cgit v1.2.3 From 6427b73247e327c27d5f8e5c2036281fb77205b2 Mon Sep 17 00:00:00 2001 From: Gislene Pereira Date: Mon, 28 Sep 2015 16:04:46 -0300 Subject: [test] Making test_command pass on Mac OS X. --- common/src/leap/soledad/common/tests/test_command.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/common/src/leap/soledad/common/tests/test_command.py b/common/src/leap/soledad/common/tests/test_command.py index 420f91ae..c386bdd2 100644 --- a/common/src/leap/soledad/common/tests/test_command.py +++ b/common/src/leap/soledad/common/tests/test_command.py @@ -49,7 +49,5 @@ class ExecuteValidatedCommandTest(unittest.TestCase): def test_return_status_code_number_on_failure(self): status, out = exec_validated_cmd("ls", "user-bebacafe") - self.assertEquals(status, 2) - self.assertIn( - 'ls: cannot access user-bebacafe: No such file or directory\n', - out) + self.assertNotEquals(status, 0) + self.assertIn('No such file or directory\n', out) -- cgit v1.2.3 From 3c7a41574ed1a97ae168bbbc50b127d17694734a Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Mon, 28 Sep 2015 16:35:19 -0300 Subject: [style] pep8 --- server/pkg/create-user-db | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/server/pkg/create-user-db b/server/pkg/create-user-db index dd68f792..1a7e77a7 100755 --- a/server/pkg/create-user-db +++ b/server/pkg/create-user-db @@ -29,7 +29,7 @@ This is meant to be used by Soledad Server. """ parser = argparse.ArgumentParser(description=description) parser.add_argument('dbname', metavar='user-d34db33f', type=str, - help='database name on the format user-{uuid4}') + help='database name on the format user-{uuid4}') NETRC_PATH = '/etc/couchdb/couchdb-admin.netrc' @@ -40,10 +40,10 @@ def url_for_db(dbname): parsed_netrc = netrc.netrc(NETRC_PATH) host, (login, _, password) = parsed_netrc.hosts.items()[0] url = ('http://%(login)s:%(password)s@%(host)s:5984/%(dbname)s' % { - 'login':login, - 'password':password, - 'host':host, - 'dbname':dbname}) + 'login': login, + 'password': password, + 'host': host, + 'dbname': dbname}) return url @@ -55,4 +55,5 @@ if __name__ == '__main__': url = url_for_db(args.dbname) db = CouchDatabase.open_database(url=url, create=True, replica_uid=None, ensure_ddocs=True) - print ('success! Created %s, replica_uid: %s' % (db._dbname, db.replica_uid)) + print ('success! Created %s, replica_uid: %s' % + (db._dbname, db.replica_uid)) -- cgit v1.2.3