diff options
Diffstat (limited to 'common/src/leap/soledad')
28 files changed, 298 insertions, 647 deletions
diff --git a/common/src/leap/soledad/common/.gitignore b/common/src/leap/soledad/common/.gitignore deleted file mode 100644 index 3378c78a..00000000 --- a/common/src/leap/soledad/common/.gitignore +++ /dev/null @@ -1 +0,0 @@ -ddocs.py diff --git a/common/src/leap/soledad/common/README.txt b/common/src/leap/soledad/common/README.txt index 38b9858e..0a252650 100644 --- a/common/src/leap/soledad/common/README.txt +++ b/common/src/leap/soledad/common/README.txt @@ -60,15 +60,6 @@ implemented in a way that all changes will be pushed with just one operation. * delete_index * create_index -Couch views and update functions are used in order to achieve atomicity on the -Couch backend. Transactions are stored in the `u1db_transactions` field of the -couch document. Document's content and conflicted versions are stored as couch -document attachments with names, respectivelly, `u1db_content` and -`u1db_conflicts`. - -A map of methods and couch query URI can be found on the `./ddocs/README.txt` -document. - Notes: * Currently, the couch backend does not implement indexing, so what is diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index 523a50a0..0f4102db 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -23,21 +23,17 @@ import json import re import uuid import binascii -import time -import functools from StringIO import StringIO from urlparse import urljoin from contextlib import contextmanager -from multiprocessing.pool import ThreadPool from couchdb.client import Server, Database from couchdb.http import ( ResourceConflict, ResourceNotFound, - ServerError, Session, urljoin as couch_urljoin, Resource, @@ -50,9 +46,6 @@ from leap.soledad.common.l2db.errors import ( from leap.soledad.common.l2db.remote import http_app -from leap.soledad.common import ddocs -from .errors import raise_server_error -from .errors import raise_missing_design_doc_error from .support import MultipartWriter from leap.soledad.common.errors import InvalidURLError from leap.soledad.common.document import ServerDocument @@ -100,7 +93,19 @@ def couch_server(url): yield server -THREAD_POOL = ThreadPool(20) +def _get_gen_doc_id(gen): + return 'gen-%s' % str(gen).zfill(10) + + +GENERATION_KEY = 'gen' +TRANSACTION_ID_KEY = 'trans_id' +REPLICA_UID_KEY = 'replica_uid' +DOC_ID_KEY = 'doc_id' +SCHEMA_VERSION_KEY = 'schema_version' + +CONFIG_DOC_ID = '_local/config' +SYNC_DOC_ID_PREFIX = '_local/sync_' +SCHEMA_VERSION = 1 class CouchDatabase(object): @@ -111,7 +116,7 @@ class CouchDatabase(object): """ @classmethod - def open_database(cls, url, create, ensure_ddocs=False, replica_uid=None, + def open_database(cls, url, create, replica_uid=None, database_security=None): """ Open a U1DB database using CouchDB as backend. @@ -122,8 +127,6 @@ class CouchDatabase(object): :type create: bool :param replica_uid: an optional unique replica identifier :type replica_uid: str - :param ensure_ddocs: Ensure that the design docs exist on server. - :type ensure_ddocs: bool :param database_security: security rules as CouchDB security doc :type database_security: dict @@ -144,21 +147,20 @@ class CouchDatabase(object): server.create(dbname) else: raise DatabaseDoesNotExist() - db = cls(url, - dbname, ensure_ddocs=ensure_ddocs, + db = cls(url, dbname, ensure_security=create, database_security=database_security) return SoledadBackend( db, replica_uid=replica_uid) - def __init__(self, url, dbname, ensure_ddocs=True, + def __init__(self, url, dbname, ensure_security=False, database_security=None): """ :param url: Couch server URL with necessary credentials :type url: string :param dbname: Couch database name :type dbname: string - :param ensure_ddocs: Ensure that the design docs exist on server. - :type ensure_ddocs: bool + :param ensure_security: will PUT a _security ddoc if set + :type ensure_security: bool :param database_security: security rules as CouchDB security doc :type database_security: dict """ @@ -169,8 +171,7 @@ class CouchDatabase(object): self.batching = False self.batch_generation = None self.batch_docs = {} - if ensure_ddocs: - self.ensure_ddocs_on_db() + if ensure_security: self.ensure_security_ddoc(database_security) def batch_start(self): @@ -205,22 +206,6 @@ class CouchDatabase(object): except ResourceNotFound: raise DatabaseDoesNotExist() - def ensure_ddocs_on_db(self): - """ - Ensure that the design documents used by the backend exist on the - couch database. - """ - for ddoc_name in ['docs', 'syncs', 'transactions']: - try: - self.json_from_resource(['_design'] + - ddoc_name.split('/') + ['_info'], - check_missing_ddoc=False) - except ResourceNotFound: - ddoc = json.loads( - binascii.a2b_base64( - getattr(ddocs, ddoc_name))) - self._database.save(ddoc) - def ensure_security_ddoc(self, security_config=None): """ Make sure that only soledad user is able to access this database as @@ -261,13 +246,14 @@ class CouchDatabase(object): """ try: # set on existent config document - doc = self._database['u1db_config'] - doc['replica_uid'] = replica_uid + doc = self._database[CONFIG_DOC_ID] + doc[REPLICA_UID_KEY] = replica_uid except ResourceNotFound: # or create the config document doc = { - '_id': 'u1db_config', - 'replica_uid': replica_uid, + '_id': CONFIG_DOC_ID, + REPLICA_UID_KEY: replica_uid, + SCHEMA_VERSION_KEY: SCHEMA_VERSION, } self._database.save(doc) @@ -280,8 +266,8 @@ class CouchDatabase(object): """ try: # grab replica_uid from server - doc = self._database['u1db_config'] - replica_uid = doc['replica_uid'] + doc = self._database[CONFIG_DOC_ID] + replica_uid = doc[REPLICA_UID_KEY] return replica_uid except ResourceNotFound: # create a unique replica_uid @@ -308,8 +294,8 @@ class CouchDatabase(object): """ generation, _ = self.get_generation_info() - results = list(self.get_docs(self._database, - include_deleted=include_deleted)) + results = list( + self._get_docs(None, True, include_deleted)) return (generation, results) def get_docs(self, doc_ids, check_for_conflicts=True, @@ -330,24 +316,37 @@ class CouchDatabase(object): in matching doc_ids order. :rtype: iterable """ - # Workaround for: - # - # http://bugs.python.org/issue7980 - # https://leap.se/code/issues/5449 - # - # python-couchdb uses time.strptime, which is not thread safe. In - # order to avoid the problem described on the issues above, we preload - # strptime here by evaluating the conversion of an arbitrary date. - # This will not be needed when/if we switch from python-couchdb to - # paisley. - time.strptime('Mar 8 1917', '%b %d %Y') - get_one = functools.partial( - self.get_doc, check_for_conflicts=check_for_conflicts) - docs = [THREAD_POOL.apply_async(get_one, [doc_id]) - for doc_id in doc_ids] - for doc in docs: - doc = doc.get() - if not doc or not include_deleted and doc.is_tombstone(): + return self._get_docs(doc_ids, check_for_conflicts, include_deleted) + + def _get_docs(self, doc_ids, check_for_conflicts, include_deleted): + """ + Use couch's `_all_docs` view to get the documents indicated in + `doc_ids`, + + :param doc_ids: A list of document identifiers or None for all. + :type doc_ids: list + :param check_for_conflicts: If set to False, then the conflict check + will be skipped, and 'None' will be + returned instead of True/False. + :type check_for_conflicts: bool + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted + documents will not be included in the results. + + :return: iterable giving the Document object for each document id + in matching doc_ids order. + :rtype: iterable + """ + params = {'include_docs': 'true', 'attachments': 'true'} + if doc_ids is not None: + params['keys'] = doc_ids + view = self._database.view("_all_docs", **params) + for row in view.rows: + result = row['doc'] + doc = self.__parse_doc_from_couch( + result, result['_id'], check_for_conflicts=check_for_conflicts) + # filter out non-u1db or deleted documents + if not doc or (not include_deleted and doc.is_tombstone()): continue yield doc @@ -434,8 +433,6 @@ class CouchDatabase(object): result['_attachments']['u1db_conflicts']['data'])))) # store couch revision doc.couch_rev = result['_rev'] - # store transactions - doc.transactions = result['u1db_transactions'] return doc def _build_conflicts(self, doc_id, attached_conflicts): @@ -471,14 +468,11 @@ class CouchDatabase(object): """ if generation == 0: return '' - # query a couch list function - ddoc_path = [ - '_design', 'transactions', '_list', 'trans_id_for_gen', 'log' - ] - response = self.json_from_resource(ddoc_path, gen=generation) - if response == {}: + log = self._get_transaction_log(start=generation, end=generation) + if not log: raise InvalidGeneration - return response['transaction_id'] + _, _, trans_id = log[0] + return trans_id def get_replica_gen_and_trans_id(self, other_replica_uid): """ @@ -499,18 +493,19 @@ class CouchDatabase(object): synchronized with the replica, this is (0, ''). :rtype: (int, str) """ - doc_id = 'u1db_sync_%s' % other_replica_uid + doc_id = '%s%s' % (SYNC_DOC_ID_PREFIX, other_replica_uid) try: doc = self._database[doc_id] except ResourceNotFound: doc = { '_id': doc_id, - 'generation': 0, - 'transaction_id': '', + GENERATION_KEY: 0, + REPLICA_UID_KEY: str(other_replica_uid), + TRANSACTION_ID_KEY: '', } self._database.save(doc) - result = doc['generation'], doc['transaction_id'] - return result + gen, trans_id = doc[GENERATION_KEY], doc[TRANSACTION_ID_KEY] + return gen, trans_id def get_doc_conflicts(self, doc_id, couch_rev=None): """ @@ -537,7 +532,6 @@ class CouchDatabase(object): try: response = self.json_from_resource([doc_id, 'u1db_conflicts'], - check_missing_ddoc=False, **params) return conflicts + self._build_conflicts( doc_id, json.loads(response.read())) @@ -562,13 +556,13 @@ class CouchDatabase(object): generation. :type other_transaction_id: str """ - doc_id = 'u1db_sync_%s' % other_replica_uid + doc_id = '%s%s' % (SYNC_DOC_ID_PREFIX, other_replica_uid) try: doc = self._database[doc_id] except ResourceNotFound: doc = {'_id': doc_id} - doc['generation'] = other_generation - doc['transaction_id'] = other_transaction_id + doc[GENERATION_KEY] = other_generation + doc[TRANSACTION_ID_KEY] = other_transaction_id self._database.save(doc) def get_transaction_log(self): @@ -578,12 +572,35 @@ class CouchDatabase(object): :return: The complete transaction log. :rtype: [(str, str)] """ - # query a couch view - ddoc_path = ['_design', 'transactions', '_view', 'log'] - response = self.json_from_resource(ddoc_path) - return map( - lambda row: (row['id'], row['value']), - response['rows']) + log = self._get_transaction_log() + return map(lambda i: (i[1], i[2]), log) + + def _get_gen_docs( + self, start=0, end=9999999999, descending=None, limit=None): + params = {} + if descending: + params['descending'] = 'true' + # honor couch way of traversing the view tree in reverse order + start, end = end, start + params['startkey'] = _get_gen_doc_id(start) + params['endkey'] = _get_gen_doc_id(end) + params['include_docs'] = 'true' + if limit: + params['limit'] = limit + view = self._database.view("_all_docs", **params) + return view.rows + + def _get_transaction_log(self, start=0, end=9999999999): + # get current gen and trans_id + rows = self._get_gen_docs(start=start, end=end) + log = [] + for row in rows: + doc = row['doc'] + log.append(( + doc[GENERATION_KEY], + doc[DOC_ID_KEY], + doc[TRANSACTION_ID_KEY])) + return log def whats_changed(self, old_generation=0): """ @@ -602,32 +619,16 @@ class CouchDatabase(object): changes first) :rtype: (int, str, [(str, int, str)]) """ - # query a couch list function - ddoc_path = [ - '_design', 'transactions', '_list', 'whats_changed', 'log' - ] - response = self.json_from_resource(ddoc_path, old_gen=old_generation) - results = map( - lambda row: - (row['generation'], row['doc_id'], row['transaction_id']), - response['transactions']) - results.reverse() - cur_gen = old_generation - seen = set() changes = [] - newest_trans_id = '' - for generation, doc_id, trans_id in results: + cur_generation, last_trans_id = self.get_generation_info() + relevant_tail = self._get_transaction_log(start=old_generation + 1) + seen = set() + for generation, doc_id, trans_id in reversed(relevant_tail): if doc_id not in seen: changes.append((doc_id, generation, trans_id)) seen.add(doc_id) - if changes: - cur_gen = changes[0][1] # max generation - newest_trans_id = changes[0][2] - changes.reverse() - else: - cur_gen, newest_trans_id = self.get_generation_info() - - return cur_gen, newest_trans_id, changes + changes.reverse() + return (cur_generation, last_trans_id, changes) def get_generation_info(self): """ @@ -638,53 +639,74 @@ class CouchDatabase(object): """ if self.batching and self.batch_generation: return self.batch_generation - # query a couch list function - ddoc_path = ['_design', 'transactions', '_list', 'generation', 'log'] - info = self.json_from_resource(ddoc_path) - return (info['generation'], info['transaction_id']) + rows = self._get_gen_docs(descending=True, limit=1) + if not rows: + return 0, '' + gen_doc = rows.pop()['doc'] + return gen_doc[GENERATION_KEY], gen_doc[TRANSACTION_ID_KEY] - def json_from_resource(self, ddoc_path, check_missing_ddoc=True, - **kwargs): + def json_from_resource(self, doc_path, **kwargs): """ Get a resource from it's path and gets a doc's JSON using provided - parameters, also checking for missing design docs by default. + parameters. - :param ddoc_path: The path to resource. - :type ddoc_path: [str] - :param check_missing_ddoc: Raises info on what design doc is missing. - :type check_missin_ddoc: bool + :param doc_path: The path to resource. + :type doc_path: [str] :return: The request's data parsed from JSON to a dict. :rtype: dict - - :raise MissingDesignDocError: Raised when tried to access a missing - design document. - :raise MissingDesignDocListFunctionError: Raised when trying to access - a missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a - design document for an yet - unknown reason. - """ - if ddoc_path is not None: - resource = self._database.resource(*ddoc_path) + """ + if doc_path is not None: + resource = self._database.resource(*doc_path) else: resource = self._database.resource() - try: - _, _, data = resource.get_json(**kwargs) - return data - except ResourceNotFound as e: - if check_missing_ddoc: - raise_missing_design_doc_error(e, ddoc_path) - else: - raise e - except ServerError as e: - raise_server_error(e, ddoc_path) + _, _, data = resource.get_json(**kwargs) + return data + + def _allocate_new_generation(self, doc_id, transaction_id): + """ + Allocate a new generation number for a document modification. + + We need to allocate a new generation to this document modification by + creating a new gen doc. In order to avoid concurrent database updates + from allocating the same new generation, we will try to create the + document until we succeed, meaning that no other piece of code holds + the same generation number as ours. + + The loop below would only be executed more than once if: + + 1. there's more than one thread trying to modify the user's database, + and + + 2. the execution of getting the current generation and saving the gen + doc different threads get interleaved (one of them will succeed + and the others will fail and try again). + + Number 1 only happens when more than one user device is syncing at the + same time. Number 2 depends on not-so-frequent coincidence of + code execution. + + Also, in the race between threads for a generation number there's + always one thread that wins. so if there are N threads in the race, the + expected number of repetitions of the loop for each thread would be + N/2. If N is equal to the number of devices that the user has, the + number of possible repetitions of the loop should always be low. + """ + while True: + try: + # add the gen document + gen, _ = self.get_generation_info() + new_gen = gen + 1 + gen_doc = { + '_id': _get_gen_doc_id(new_gen), + GENERATION_KEY: new_gen, + DOC_ID_KEY: doc_id, + TRANSACTION_ID_KEY: transaction_id, + } + self._database.save(gen_doc) + break # succeeded allocating a new generation, proceed + except ResourceConflict: + pass # try again! def save_document(self, old_doc, doc, transaction_id): """ @@ -701,19 +723,6 @@ class CouchDatabase(object): :raise RevisionConflict: Raised when trying to update a document but couch revisions mismatch. - :raise MissingDesignDocError: Raised when tried to access a missing - design document. - :raise MissingDesignDocListFunctionError: Raised when trying to access - a missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a - design document for an yet - unknown reason. """ attachments = {} # we save content and conflicts as attachments parts = [] # and we put it using couch's multipart PUT @@ -726,6 +735,7 @@ class CouchDatabase(object): 'length': len(content), } parts.append(content) + # save conflicts as attachment if doc.has_conflicts is True: conflicts = json.dumps( @@ -737,21 +747,11 @@ class CouchDatabase(object): 'length': len(conflicts), } parts.append(conflicts) - # store old transactions, if any - transactions = old_doc.transactions[:] if old_doc is not None else [] - # create a new transaction id and timestamp it so the transaction log - # is consistent when querying the database. - transactions.append( - # here we store milliseconds to keep consistent with javascript - # Date.prototype.getTime() which was used before inside a couchdb - # update handler. - (int(time.time() * 1000), - transaction_id)) + # build the couch document couch_doc = { '_id': doc.doc_id, 'u1db_rev': doc.rev, - 'u1db_transactions': transactions, '_attachments': attachments, } # if we are updating a doc we have to add the couch doc revision @@ -761,7 +761,19 @@ class CouchDatabase(object): if not self.batching: buf = StringIO() envelope = MultipartWriter(buf) - envelope.add('application/json', json.dumps(couch_doc)) + # the order in which attachments are described inside the + # serialization of the couch document must match the order in + # which they are actually written in the multipart structure. + # Because of that, we use `sorted_keys=True` in the json + # serialization (so "u1db_conflicts" comes before + # "u1db_content" on the couch document attachments + # description), and also reverse the order of the parts before + # writing them, so the "conflict" part is written before the + # "content" part. + envelope.add( + 'application/json', + json.dumps(couch_doc, sort_keys=True)) + parts.reverse() for part in parts: envelope.add('application/octet-stream', part) envelope.close() @@ -778,12 +790,14 @@ class CouchDatabase(object): del attachment['follows'] del attachment['length'] index = 0 if name is 'u1db_content' else 1 - attachment['data'] = binascii.b2a_base64(parts[index]).strip() + attachment['data'] = binascii.b2a_base64( + parts[index]).strip() couch_doc['_attachments'] = attachments self.batch_docs[doc.doc_id] = couch_doc last_gen, last_trans_id = self.batch_generation self.batch_generation = (last_gen + 1, transaction_id) - return transactions[-1][1] + + self._allocate_new_generation(doc.doc_id, transaction_id) def _new_resource(self, *path): """ diff --git a/common/src/leap/soledad/common/couch/errors.py b/common/src/leap/soledad/common/couch/errors.py deleted file mode 100644 index 9b287c76..00000000 --- a/common/src/leap/soledad/common/couch/errors.py +++ /dev/null @@ -1,144 +0,0 @@ -# -*- coding: utf-8 -*- -# errors.py -# Copyright (C) 2015 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -from leap.soledad.common.errors import SoledadError, BackendNotReadyError -from leap.soledad.common.errors import register_exception - -""" -Specific errors that can be raised by CouchDatabase. -""" - - -@register_exception -class MissingDesignDocError(BackendNotReadyError): - - """ - Raised when trying to access a missing couch design document. - """ - - wire_description = "missing design document" - status = 500 - - -@register_exception -class MissingDesignDocNamedViewError(SoledadError): - - """ - Raised when trying to access a missing named view on a couch design - document. - """ - - wire_description = "missing design document named function" - status = 500 - - -@register_exception -class MissingDesignDocListFunctionError(SoledadError): - - """ - Raised when trying to access a missing list function on a couch design - document. - """ - - wire_description = "missing design document list function" - status = 500 - - -@register_exception -class MissingDesignDocDeletedError(SoledadError): - - """ - Raised when trying to access a deleted couch design document. - """ - - wire_description = "design document was deleted" - status = 500 - - -@register_exception -class DesignDocUnknownError(SoledadError): - - """ - Raised when trying to access a couch design document and getting an - unknown error. - """ - - wire_description = "missing design document unknown error" - status = 500 - - -def raise_missing_design_doc_error(exc, ddoc_path): - """ - Raise an appropriate exception when catching a ResourceNotFound when - accessing a design document. - - :param exc: The exception cought. - :type exc: ResourceNotFound - :param ddoc_path: A list representing the requested path. - :type ddoc_path: list - - :raise MissingDesignDocError: Raised when tried to access a missing design - document. - :raise MissingDesignDocListFunctionError: Raised when trying to access a - missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a design - document for an yet unknown reason. - """ - path = "".join(ddoc_path) - if exc.message[1] == 'missing': - raise MissingDesignDocError(path) - elif exc.message[1] == 'missing function' or \ - exc.message[1].startswith('missing lists function'): - raise MissingDesignDocListFunctionError(path) - elif exc.message[1] == 'missing_named_view': - raise MissingDesignDocNamedViewError(path) - elif exc.message[1] == 'deleted': - raise MissingDesignDocDeletedError(path) - # other errors are unknown for now - raise DesignDocUnknownError("%s: %s" % (path, str(exc.message))) - - -def raise_server_error(exc, ddoc_path): - """ - Raise an appropriate exception when catching a ServerError when - accessing a design document. - - :param exc: The exception cought. - :type exc: ResourceNotFound - :param ddoc_path: A list representing the requested path. - :type ddoc_path: list - - :raise MissingDesignDocListFunctionError: Raised when trying to access a - missing list function on a - design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a design - document for an yet unknown reason. - """ - path = "".join(ddoc_path) - msg = exc.message[1][0] - if msg == 'unnamed_error': - raise MissingDesignDocListFunctionError(path) - elif msg == 'TypeError': - if 'point is undefined' in exc.message[1][1]: - raise MissingDesignDocListFunctionError - # other errors are unknown for now - raise DesignDocUnknownError("%s: %s" % (path, str(exc.message))) diff --git a/common/src/leap/soledad/common/couch/state.py b/common/src/leap/soledad/common/couch/state.py index 9b40a264..523ac0b0 100644 --- a/common/src/leap/soledad/common/couch/state.py +++ b/common/src/leap/soledad/common/couch/state.py @@ -17,20 +17,26 @@ """ Server state using CouchDatabase as backend. """ -import logging +import couchdb import re import time from urlparse import urljoin from hashlib import sha512 +from leap.soledad.common.log import getLogger from leap.soledad.common.couch import CouchDatabase from leap.soledad.common.couch import couch_server +from leap.soledad.common.couch import CONFIG_DOC_ID +from leap.soledad.common.couch import SCHEMA_VERSION +from leap.soledad.common.couch import SCHEMA_VERSION_KEY from leap.soledad.common.command import exec_validated_cmd from leap.soledad.common.l2db.remote.server_state import ServerState from leap.soledad.common.l2db.errors import Unauthorized +from leap.soledad.common.errors import WrongCouchSchemaVersionError +from leap.soledad.common.errors import MissingCouchConfigDocumentError -logger = logging.getLogger(__name__) +logger = getLogger(__name__) def is_db_name_valid(name): @@ -59,15 +65,47 @@ class CouchServerState(ServerState): TOKENS_TYPE_DEF = "Token" TOKENS_USER_ID_KEY = "user_id" - def __init__(self, couch_url, create_cmd=None): + def __init__(self, couch_url, create_cmd=None, + check_schema_versions=False): """ Initialize the couch server state. :param couch_url: The URL for the couch database. :type couch_url: str + :param create_cmd: Command to be executed for user db creation. It will + receive a properly sanitized parameter with user db + name and should access CouchDB with necessary + privileges, which server lacks for security reasons. + :type create_cmd: str + :param check_schema_versions: Whether to check couch schema version of + user dbs. Set to False as this is only + intended to run once during start-up. + :type check_schema_versions: bool """ self.couch_url = couch_url self.create_cmd = create_cmd + if check_schema_versions: + self._check_schema_versions() + + def _check_schema_versions(self): + """ + Check that all user databases use the correct couch schema. + """ + server = couchdb.client.Server(self.couch_url) + for dbname in server: + if not dbname.startswith('user-'): + continue + db = server[dbname] + + # if there are documents, ensure that a config doc exists + config_doc = db.get(CONFIG_DOC_ID) + if config_doc: + if config_doc[SCHEMA_VERSION_KEY] != SCHEMA_VERSION: + raise WrongCouchSchemaVersionError(dbname) + else: + result = db.view('_all_docs', limit=1) + if result.total_rows != 0: + raise MissingCouchConfigDocumentError(dbname) def open_database(self, dbname): """ @@ -80,7 +118,7 @@ class CouchServerState(ServerState): :rtype: SoledadBackend """ url = urljoin(self.couch_url, dbname) - db = CouchDatabase.open_database(url, create=False, ensure_ddocs=False) + db = CouchDatabase.open_database(url, create=False) return db def ensure_database(self, dbname): diff --git a/common/src/leap/soledad/common/ddocs/README.txt b/common/src/leap/soledad/common/ddocs/README.txt deleted file mode 100644 index 5569d929..00000000 --- a/common/src/leap/soledad/common/ddocs/README.txt +++ /dev/null @@ -1,34 +0,0 @@ -This directory holds a folder structure containing javascript files that -represent the design documents needed by the CouchDB U1DB backend. These files -are compiled into the `../ddocs.py` file by setuptools when creating the -source distribution. - -The following table depicts the U1DB CouchDB backend method and the URI that -is queried to obtain/update data from/to the server. - - +----------------------------------+------------------------------------------------------------------+ - | u1db backend method | URI | - |----------------------------------+------------------------------------------------------------------| - | _get_generation | _design/transactions/_list/generation/log | - | _get_generation_info | _design/transactions/_list/generation/log | - | _get_trans_id_for_gen | _design/transactions/_list/trans_id_for_gen/log | - | _get_transaction_log | _design/transactions/_view/log | - | _get_doc (*) | _design/docs/_view/get?key=<doc_id> | - | _has_conflicts | _design/docs/_view/get?key=<doc_id> | - | get_all_docs | _design/docs/_view/get | - | _put_doc | _design/docs/_update/put/<doc_id> | - | _whats_changed | _design/transactions/_list/whats_changed/log?old_gen=<gen> | - | _get_conflicts (*) | _design/docs/_view/conflicts?key=<doc_id> | - | _get_replica_gen_and_trans_id | _design/syncs/_view/log?other_replica_uid=<uid> | - | _do_set_replica_gen_and_trans_id | _design/syncs/_update/put/u1db_sync_log | - | _add_conflict | _design/docs/_update/add_conflict/<doc_id> | - | _delete_conflicts | _design/docs/_update/delete_conflicts/<doc_id>?doc_rev=<doc_rev> | - | list_indexes | not implemented | - | _get_index_definition | not implemented | - | delete_index | not implemented | - | _get_indexed_fields | not implemented | - | _put_and_update_indexes | not implemented | - +----------------------------------+------------------------------------------------------------------+ - -(*) These methods also request CouchDB document attachments that store U1DB - document contents. diff --git a/common/src/leap/soledad/common/ddocs/docs/views/get/map.js b/common/src/leap/soledad/common/ddocs/docs/views/get/map.js deleted file mode 100644 index ae08d9e9..00000000 --- a/common/src/leap/soledad/common/ddocs/docs/views/get/map.js +++ /dev/null @@ -1,20 +0,0 @@ -function(doc) { - if (doc.u1db_rev) { - var is_tombstone = true; - var has_conflicts = false; - if (doc._attachments) { - if (doc._attachments.u1db_content) - is_tombstone = false; - if (doc._attachments.u1db_conflicts) - has_conflicts = true; - } - emit(doc._id, - { - "couch_rev": doc._rev, - "u1db_rev": doc.u1db_rev, - "is_tombstone": is_tombstone, - "has_conflicts": has_conflicts, - } - ); - } -} diff --git a/common/src/leap/soledad/common/ddocs/syncs/updates/state.js b/common/src/leap/soledad/common/ddocs/syncs/updates/state.js deleted file mode 100644 index d62aeb40..00000000 --- a/common/src/leap/soledad/common/ddocs/syncs/updates/state.js +++ /dev/null @@ -1,105 +0,0 @@ -/** - * This update handler stores information about ongoing synchronization - * attempts from distinct source replicas. - * - * Normally, u1db synchronization occurs during one POST request. In order to - * split that into many serial POST requests, we store the state of each sync - * in the server, using a document with id 'u1db_sync_state'. To identify - * each sync attempt, we use a sync_id sent by the client. If we ever receive - * a new sync_id, we trash current data for that source replica and start - * over. - * - * We expect the following in the document body: - * - * { - * 'source_replica_uid': '<source_replica_uid>', - * 'sync_id': '<sync_id>', - * 'seen_ids': [['<doc_id>', <at_gen>], ...], // optional - * 'changes_to_return': [ // optional - * 'gen': <gen>, - * 'trans_id': '<trans_id>', - * 'changes_to_return': [[<doc_id>', <gen>, '<trans_id>'], ...] - * ], - * } - * - * The format of the final document stored on server is: - * - * { - * '_id': '<str>', - * '_rev' '<str>', - * 'ongoing_syncs': { - * '<source_replica_uid>': { - * 'sync_id': '<sync_id>', - * 'seen_ids': [['<doc_id>', <at_gen>[, ...], - * 'changes_to_return': { - * 'gen': <gen>, - * 'trans_id': '<trans_id>', - * 'changes_to_return': [ - * ['<doc_id>', <gen>, '<trans_id>'], - * ..., - * ], - * }, - * }, - * ... // info about other source replicas here - * } - * } - */ -function(doc, req) { - - // prevent updates to alien documents - if (doc != null && doc['_id'] != 'u1db_sync_state') - return [null, 'invalid data']; - - // create the document if it doesn't exist - if (!doc) - doc = { - '_id': 'u1db_sync_state', - 'ongoing_syncs': {}, - }; - - // parse and validate incoming data - var body = JSON.parse(req.body); - if (body['source_replica_uid'] == null) - return [null, 'invalid data']; - var source_replica_uid = body['source_replica_uid']; - - if (body['sync_id'] == null) - return [null, 'invalid data']; - var sync_id = body['sync_id']; - - // trash outdated sync data for that replica if that exists - if (doc['ongoing_syncs'][source_replica_uid] != null && - doc['ongoing_syncs'][source_replica_uid]['sync_id'] != sync_id) - delete doc['ongoing_syncs'][source_replica_uid]; - - // create an entry for that source replica - if (doc['ongoing_syncs'][source_replica_uid] == null) - doc['ongoing_syncs'][source_replica_uid] = { - 'sync_id': sync_id, - 'seen_ids': {}, - 'changes_to_return': null, - }; - - // incoming meta-data values should be exclusive, so we count how many - // arrived and deny to accomplish the transaction if the count is high. - var incoming_values = 0; - var info = doc['ongoing_syncs'][source_replica_uid] - - // add incoming seen id - if ('seen_id' in body) { - info['seen_ids'][body['seen_id'][0]] = body['seen_id'][1]; - incoming_values += 1; - } - - // add incoming changes_to_return - if ('changes_to_return' in body) { - info['changes_to_return'] = body['changes_to_return']; - incoming_values += 1; - } - - if (incoming_values != 1) - return [null, 'invalid data']; - - return [doc, 'ok']; -} - diff --git a/common/src/leap/soledad/common/ddocs/syncs/views/changes_to_return/map.js b/common/src/leap/soledad/common/ddocs/syncs/views/changes_to_return/map.js deleted file mode 100644 index 94b7e767..00000000 --- a/common/src/leap/soledad/common/ddocs/syncs/views/changes_to_return/map.js +++ /dev/null @@ -1,20 +0,0 @@ -function(doc) { - if (doc['_id'] == 'u1db_sync_state' && doc['ongoing_syncs'] != null) - for (var source_replica_uid in doc['ongoing_syncs']) { - var changes = doc['ongoing_syncs'][source_replica_uid]['changes_to_return']; - var sync_id = doc['ongoing_syncs'][source_replica_uid]['sync_id']; - if (changes == null) - emit([source_replica_uid, sync_id, 0], null); - else if (changes.length == 0) - emit([source_replica_uid, sync_id, 0], []); - else - for (var i = 0; i < changes['changes_to_return'].length; i++) - emit( - [source_replica_uid, sync_id, i], - { - 'gen': changes['gen'], - 'trans_id': changes['trans_id'], - 'next_change_to_return': changes['changes_to_return'][i], - }); - } -} diff --git a/common/src/leap/soledad/common/ddocs/syncs/views/seen_ids/map.js b/common/src/leap/soledad/common/ddocs/syncs/views/seen_ids/map.js deleted file mode 100644 index 16118e88..00000000 --- a/common/src/leap/soledad/common/ddocs/syncs/views/seen_ids/map.js +++ /dev/null @@ -1,11 +0,0 @@ -function(doc) { - if (doc['_id'] == 'u1db_sync_state' && doc['ongoing_syncs'] != null) - for (var source_replica_uid in doc['ongoing_syncs']) { - var sync_id = doc['ongoing_syncs'][source_replica_uid]['sync_id']; - emit( - [source_replica_uid, sync_id], - { - 'seen_ids': doc['ongoing_syncs'][source_replica_uid]['seen_ids'], - }); - } -} diff --git a/common/src/leap/soledad/common/ddocs/syncs/views/state/map.js b/common/src/leap/soledad/common/ddocs/syncs/views/state/map.js deleted file mode 100644 index e88c6ebb..00000000 --- a/common/src/leap/soledad/common/ddocs/syncs/views/state/map.js +++ /dev/null @@ -1,17 +0,0 @@ -function(doc) { - if (doc['_id'] == 'u1db_sync_state' && doc['ongoing_syncs'] != null) - for (var source_replica_uid in doc['ongoing_syncs']) { - var changes = doc['ongoing_syncs'][source_replica_uid]['changes_to_return']; - var sync_id = doc['ongoing_syncs'][source_replica_uid]['sync_id']; - if (changes == null) - emit([source_replica_uid, sync_id], null); - else - emit( - [source_replica_uid, sync_id], - { - 'gen': changes['gen'], - 'trans_id': changes['trans_id'], - 'number_of_changes': changes['changes_to_return'].length - }); - } -} diff --git a/common/src/leap/soledad/common/ddocs/transactions/lists/generation.js b/common/src/leap/soledad/common/ddocs/transactions/lists/generation.js deleted file mode 100644 index dbdfff0d..00000000 --- a/common/src/leap/soledad/common/ddocs/transactions/lists/generation.js +++ /dev/null @@ -1,20 +0,0 @@ -function(head, req) { - var row; - var rows=[]; - // fetch all rows - while(row = getRow()) { - rows.push(row); - } - if (rows.length > 0) - send(JSON.stringify({ - "generation": rows.length, - "doc_id": rows[rows.length-1]['id'], - "transaction_id": rows[rows.length-1]['value'] - })); - else - send(JSON.stringify({ - "generation": 0, - "doc_id": "", - "transaction_id": "", - })); -} diff --git a/common/src/leap/soledad/common/ddocs/transactions/lists/trans_id_for_gen.js b/common/src/leap/soledad/common/ddocs/transactions/lists/trans_id_for_gen.js deleted file mode 100644 index 2ec91794..00000000 --- a/common/src/leap/soledad/common/ddocs/transactions/lists/trans_id_for_gen.js +++ /dev/null @@ -1,19 +0,0 @@ -function(head, req) { - var row; - var rows=[]; - var i = 1; - var gen = 1; - if (req.query.gen) - gen = parseInt(req.query['gen']); - // fetch all rows - while(row = getRow()) - rows.push(row); - if (gen <= rows.length) - send(JSON.stringify({ - "generation": gen, - "doc_id": rows[gen-1]['id'], - "transaction_id": rows[gen-1]['value'], - })); - else - send('{}'); -} diff --git a/common/src/leap/soledad/common/ddocs/transactions/lists/whats_changed.js b/common/src/leap/soledad/common/ddocs/transactions/lists/whats_changed.js deleted file mode 100644 index b35cdf51..00000000 --- a/common/src/leap/soledad/common/ddocs/transactions/lists/whats_changed.js +++ /dev/null @@ -1,22 +0,0 @@ -function(head, req) { - var row; - var gen = 1; - var old_gen = 0; - if (req.query.old_gen) - old_gen = parseInt(req.query['old_gen']); - send('{"transactions":[\n'); - // fetch all rows - while(row = getRow()) { - if (gen > old_gen) { - if (gen > old_gen+1) - send(',\n'); - send(JSON.stringify({ - "generation": gen, - "doc_id": row["id"], - "transaction_id": row["value"] - })); - } - gen++; - } - send('\n]}'); -} diff --git a/common/src/leap/soledad/common/ddocs/transactions/views/log/map.js b/common/src/leap/soledad/common/ddocs/transactions/views/log/map.js deleted file mode 100644 index 94ef63ca..00000000 --- a/common/src/leap/soledad/common/ddocs/transactions/views/log/map.js +++ /dev/null @@ -1,7 +0,0 @@ -function(doc) { - if (doc.u1db_transactions) - doc.u1db_transactions.forEach(function(t) { - emit(t[0], // use timestamp as key so the results are ordered - t[1]); // value is the transaction_id - }); -} diff --git a/common/src/leap/soledad/common/errors.py b/common/src/leap/soledad/common/errors.py index dec871c9..d543a3de 100644 --- a/common/src/leap/soledad/common/errors.py +++ b/common/src/leap/soledad/common/errors.py @@ -77,7 +77,6 @@ http_errors.ERROR_STATUSES = set( class InvalidURLError(Exception): - """ Exception raised when Soledad encounters a malformed URL. """ @@ -90,3 +89,15 @@ class BackendNotReadyError(SoledadError): """ wire_description = "backend not ready" status = 500 + + +class WrongCouchSchemaVersionError(SoledadError): + """ + Raised in case there is a user database with wrong couch schema version. + """ + + +class MissingCouchConfigDocumentError(SoledadError): + """ + Raised if a database has documents but lacks the couch config document. + """ diff --git a/common/src/leap/soledad/common/l2db/__init__.py b/common/src/leap/soledad/common/l2db/__init__.py index c0bd15fe..568897c4 100644 --- a/common/src/leap/soledad/common/l2db/__init__.py +++ b/common/src/leap/soledad/common/l2db/__init__.py @@ -16,10 +16,7 @@ """L2DB""" -try: - import simplejson as json -except ImportError: - import json # noqa +import json from leap.soledad.common.l2db.errors import InvalidJSON, InvalidContent diff --git a/common/src/leap/soledad/common/l2db/backends/__init__.py b/common/src/leap/soledad/common/l2db/backends/__init__.py index 922daafd..c731c3d3 100644 --- a/common/src/leap/soledad/common/l2db/backends/__init__.py +++ b/common/src/leap/soledad/common/l2db/backends/__init__.py @@ -17,10 +17,7 @@ """Abstract classes and common implementations for the backends.""" import re -try: - import simplejson as json -except ImportError: - import json # noqa +import json import uuid from leap.soledad.common import l2db diff --git a/common/src/leap/soledad/common/l2db/backends/inmemory.py b/common/src/leap/soledad/common/l2db/backends/inmemory.py index 06a934a6..6fd251af 100644 --- a/common/src/leap/soledad/common/l2db/backends/inmemory.py +++ b/common/src/leap/soledad/common/l2db/backends/inmemory.py @@ -16,10 +16,7 @@ """The in-memory Database class for U1DB.""" -try: - import simplejson as json -except ImportError: - import json # noqa +import json from leap.soledad.common.l2db import ( Document, errors, diff --git a/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py b/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py index ba273039..d73c0d16 100644 --- a/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py +++ b/common/src/leap/soledad/common/l2db/backends/sqlite_backend.py @@ -21,17 +21,14 @@ A L2DB implementation that uses SQLite as its persistence layer. import errno import os -try: - import simplejson as json -except ImportError: - import json # noqa -from sqlite3 import dbapi2 +import json import sys import time import uuid - import pkg_resources +from sqlite3 import dbapi2 + from leap.soledad.common.l2db.backends import CommonBackend, CommonSyncTarget from leap.soledad.common.l2db import ( Document, errors, diff --git a/common/src/leap/soledad/common/l2db/remote/basic_auth_middleware.py b/common/src/leap/soledad/common/l2db/remote/basic_auth_middleware.py index a2cbff62..96d0d872 100644 --- a/common/src/leap/soledad/common/l2db/remote/basic_auth_middleware.py +++ b/common/src/leap/soledad/common/l2db/remote/basic_auth_middleware.py @@ -15,10 +15,8 @@ # along with u1db. If not, see <http://www.gnu.org/licenses/>. """U1DB Basic Auth authorisation WSGI middleware.""" import httplib -try: - import simplejson as json -except ImportError: - import json # noqa +import json + from wsgiref.util import shift_path_info diff --git a/common/src/leap/soledad/common/l2db/remote/http_app.py b/common/src/leap/soledad/common/l2db/remote/http_app.py index 65277bd1..5cf6645e 100644 --- a/common/src/leap/soledad/common/l2db/remote/http_app.py +++ b/common/src/leap/soledad/common/l2db/remote/http_app.py @@ -23,10 +23,7 @@ HTTP Application exposing U1DB. import functools import httplib import inspect -try: - import simplejson as json -except ImportError: - import json # noqa +import json import sys import urlparse diff --git a/common/src/leap/soledad/common/l2db/remote/http_client.py b/common/src/leap/soledad/common/l2db/remote/http_client.py index a65264b6..53363c0a 100644 --- a/common/src/leap/soledad/common/l2db/remote/http_client.py +++ b/common/src/leap/soledad/common/l2db/remote/http_client.py @@ -17,10 +17,7 @@ """Base class to make requests to a remote HTTP server.""" import httplib -try: - import simplejson as json -except ImportError: - import json # noqa +import json import socket import ssl import sys diff --git a/common/src/leap/soledad/common/l2db/remote/http_database.py b/common/src/leap/soledad/common/l2db/remote/http_database.py index b2b48dee..7512379f 100644 --- a/common/src/leap/soledad/common/l2db/remote/http_database.py +++ b/common/src/leap/soledad/common/l2db/remote/http_database.py @@ -16,10 +16,7 @@ """HTTPDatabase to access a remote db over the HTTP API.""" -try: - import simplejson as json -except ImportError: - import json # noqa +import json import uuid from leap.soledad.common.l2db import ( diff --git a/common/src/leap/soledad/common/l2db/remote/http_target.py b/common/src/leap/soledad/common/l2db/remote/http_target.py index 7e7f366f..38804f01 100644 --- a/common/src/leap/soledad/common/l2db/remote/http_target.py +++ b/common/src/leap/soledad/common/l2db/remote/http_target.py @@ -16,10 +16,7 @@ """SyncTarget API implementation to a remote HTTP server.""" -try: - import simplejson as json -except ImportError: - import json # noqa +import json from leap.soledad.common.l2db import Document, SyncTarget from leap.soledad.common.l2db.errors import BrokenSyncStream diff --git a/common/src/leap/soledad/common/l2db/remote/server_state.py b/common/src/leap/soledad/common/l2db/remote/server_state.py index f131e09e..e20b4679 100644 --- a/common/src/leap/soledad/common/l2db/remote/server_state.py +++ b/common/src/leap/soledad/common/l2db/remote/server_state.py @@ -15,8 +15,6 @@ # along with u1db. If not, see <http://www.gnu.org/licenses/>. """State for servers exposing a set of U1DB databases.""" -import os -import errno class ServerState(object): diff --git a/common/src/leap/soledad/common/l2db/sync.py b/common/src/leap/soledad/common/l2db/sync.py index c612629f..5e9b22f4 100644 --- a/common/src/leap/soledad/common/l2db/sync.py +++ b/common/src/leap/soledad/common/l2db/sync.py @@ -126,8 +126,8 @@ class Synchronizer(object): target_last_known_gen, target_last_known_trans_id = 0, '' else: target_last_known_gen, target_last_known_trans_id = ( - self.source._get_replica_gen_and_trans_id( # nopep8 - self.target_replica_uid)) + self.source._get_replica_gen_and_trans_id( # nopep8 + self.target_replica_uid)) if not changes and target_last_known_gen == target_gen: if target_trans_id != target_last_known_trans_id: raise errors.InvalidTransactionId diff --git a/common/src/leap/soledad/common/log.py b/common/src/leap/soledad/common/log.py new file mode 100644 index 00000000..3f026045 --- /dev/null +++ b/common/src/leap/soledad/common/log.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# log.py +# Copyright (C) 2016 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +""" +This module centralizes logging facilities and allows for different behaviours, +as using the python logging module instead of twisted logger, and to print logs +to stdout, mainly for development purposes. +""" + + +import os +import sys + +from twisted.logger import Logger +from twisted.logger import textFileLogObserver + + +def getLogger(*args, **kwargs): + + if os.environ.get('SOLEDAD_USE_PYTHON_LOGGING'): + import logging + return logging.getLogger(__name__) + + if os.environ.get('SOLEDAD_LOG_TO_STDOUT'): + kwargs({'observer': textFileLogObserver(sys.stdout)}) + + return Logger(*args, **kwargs) + + +__all__ = ['getLogger'] |