diff options
| -rw-r--r-- | common/src/leap/soledad/common/backend.py | 681 | ||||
| -rw-r--r-- | common/src/leap/soledad/common/couch.py | 1514 | ||||
| -rw-r--r-- | common/src/leap/soledad/common/couch/__init__.py | 746 | ||||
| -rw-r--r-- | common/src/leap/soledad/common/couch/state.py | 114 | ||||
| -rw-r--r-- | common/src/leap/soledad/common/errors.py | 2 | ||||
| -rw-r--r-- | common/src/leap/soledad/common/tests/test_couch.py | 149 | ||||
| -rw-r--r-- | common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py | 5 | ||||
| -rw-r--r-- | common/src/leap/soledad/common/tests/test_server.py | 10 | ||||
| -rw-r--r-- | common/src/leap/soledad/common/tests/test_sync_mutex.py | 7 | ||||
| -rw-r--r-- | common/src/leap/soledad/common/tests/test_sync_target.py | 6 | ||||
| -rw-r--r-- | common/src/leap/soledad/common/tests/util.py | 9 | ||||
| -rwxr-xr-x | server/pkg/create-user-db | 6 | ||||
| -rw-r--r-- | server/src/leap/soledad/server/__init__.py | 2 | 
13 files changed, 1597 insertions, 1654 deletions
| diff --git a/common/src/leap/soledad/common/backend.py b/common/src/leap/soledad/common/backend.py new file mode 100644 index 00000000..cb37b4ac --- /dev/null +++ b/common/src/leap/soledad/common/backend.py @@ -0,0 +1,681 @@ +# -*- coding: utf-8 -*- +# backend.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +"""A U1DB generic backend.""" + + +from u1db import vectorclock +from u1db.errors import ( +    RevisionConflict, +    InvalidDocId, +    ConflictedDoc, +    DocumentDoesNotExist, +    DocumentAlreadyDeleted, +) +from u1db.backends import CommonBackend +from u1db.backends import CommonSyncTarget +from leap.soledad.common.document import CouchDocument + + +class SoledadBackend(CommonBackend): + +    """ +    A U1DB backend implementation. +    """ + +    def __init__(self, database, replica_uid=None): +        """ +        Create a new backend. + +        :param database: the database implementation +        :type database: Database +        :param replica_uid: an optional unique replica identifier +        :type replica_uid: str +        """ +        # save params +        self._factory = CouchDocument +        self._real_replica_uid = None +        self._cache = None +        self._dbname = database._dbname +        self._database = database +        if replica_uid is not None: +            self._set_replica_uid(replica_uid) + +    @property +    def cache(self): +        if self._cache is not None: +            return self._cache +        else: +            return {} + +    def init_caching(self, cache): +        """ +        Start using cache by setting internal _cache attribute. + +        :param cache: the cache instance, anything that behaves like a dict +        :type cache: dict +        """ +        self._cache = cache + +    def get_sync_target(self): +        """ +        Return a SyncTarget object, for another u1db to synchronize with. + +        :return: The sync target. +        :rtype: SoledadSyncTarget +        """ +        return SoledadSyncTarget(self) + +    def delete_database(self): +        """ +        Delete a U1DB CouchDB database. +        """ +        self._database.delete_database() + +    def close(self): +        """ +        Release any resources associated with this database. + +        :return: True if db was succesfully closed. +        :rtype: bool +        """ +        self._database.close() +        return True + +    def __del__(self): +        """ +        Close the database upon garbage collection. +        """ +        self.close() + +    def _set_replica_uid(self, replica_uid): +        """ +        Force the replica uid to be set. + +        :param replica_uid: The new replica uid. +        :type replica_uid: str +        """ +        self._database.set_replica_uid(replica_uid) +        self._real_replica_uid = replica_uid +        self.cache['replica_uid'] = self._real_replica_uid + +    def _get_replica_uid(self): +        """ +        Get the replica uid. + +        :return: The replica uid. +        :rtype: str +        """ +        if self._real_replica_uid is not None: +            self.cache['replica_uid'] = self._real_replica_uid +            return self._real_replica_uid +        if 'replica_uid' in self.cache: +            return self.cache['replica_uid'] +        self._real_replica_uid = self._database.get_replica_uid() +        self._set_replica_uid(self._real_replica_uid) +        return self._real_replica_uid + +    _replica_uid = property(_get_replica_uid, _set_replica_uid) + +    replica_uid = property(_get_replica_uid) + +    def _get_generation(self): +        """ +        Return the current generation. + +        :return: The current generation. +        :rtype: int + +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason. +        """ +        return self._get_generation_info()[0] + +    def _get_generation_info(self): +        """ +        Return the current generation. + +        :return: A tuple containing the current generation and transaction id. +        :rtype: (int, str) + +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason. +        """ +        if self.replica_uid + '_gen' in self.cache: +            response = self.cache[self.replica_uid + '_gen'] +            return response +        cur_gen, newest_trans_id = self._database._get_generation_info() +        self.cache[self.replica_uid + '_gen'] = (cur_gen, newest_trans_id) +        return (cur_gen, newest_trans_id) + +    def _get_trans_id_for_gen(self, generation): +        """ +        Get the transaction id corresponding to a particular generation. + +        :param generation: The generation for which to get the transaction id. +        :type generation: int + +        :return: The transaction id for C{generation}. +        :rtype: str + +        """ +        return self._database._get_trans_id_for_gen(generation) + +    def _get_transaction_log(self): +        """ +        This is only for the test suite, it is not part of the api. + +        :return: The complete transaction log. +        :rtype: [(str, str)] + +        """ +        return self._database._get_transaction_log() + +    def _get_doc(self, doc_id, check_for_conflicts=False): +        """ +        Extract the document from storage. + +        This can return None if the document doesn't exist. + +        :param doc_id: The unique document identifier +        :type doc_id: str +        :param check_for_conflicts: If set to False, then the conflict check +                                    will be skipped. +        :type check_for_conflicts: bool + +        :return: The document. +        :rtype: CouchDocument +        """ +        return self._database._get_doc(doc_id, check_for_conflicts) + +    def get_doc(self, doc_id, include_deleted=False): +        """ +        Get the JSON string for the given document. + +        :param doc_id: The unique document identifier +        :type doc_id: str +        :param include_deleted: If set to True, deleted documents will be +            returned with empty content. Otherwise asking for a deleted +            document will return None. +        :type include_deleted: bool + +        :return: A document object. +        :rtype: CouchDocument. +        """ +        doc = self._get_doc(doc_id, check_for_conflicts=True) +        if doc is None: +            return None +        if doc.is_tombstone() and not include_deleted: +            return None +        return doc + +    def get_all_docs(self, include_deleted=False): +        """ +        Get the JSON content for all documents in the database. + +        :param include_deleted: If set to True, deleted documents will be +                                returned with empty content. Otherwise deleted +                                documents will not be included in the results. +        :type include_deleted: bool + +        :return: (generation, [CouchDocument]) +            The current generation of the database, followed by a list of all +            the documents in the database. +        :rtype: (int, [CouchDocument]) +        """ +        return self._database.get_all_docs(include_deleted) + +    def _put_doc(self, old_doc, doc): +        """ +        Put the document in the Couch backend database. + +        Note that C{old_doc} must have been fetched with the parameter +        C{check_for_conflicts} equal to True, so we can properly update the +        new document using the conflict information from the old one. + +        :param old_doc: The old document version. +        :type old_doc: CouchDocument +        :param doc: The document to be put. +        :type doc: CouchDocument +        """ +        last_transaction =\ +            self._database.save_document(old_doc, doc, +                                         self._allocate_transaction_id()) +        if self.replica_uid + '_gen' in self.cache: +            gen, trans = self.cache[self.replica_uid + '_gen'] +            gen += 1 +            trans = last_transaction +            self.cache[self.replica_uid + '_gen'] = (gen, trans) + +    def put_doc(self, doc): +        """ +        Update a document. + +        If the document currently has conflicts, put will fail. +        If the database specifies a maximum document size and the document +        exceeds it, put will fail and raise a DocumentTooBig exception. + +        :param doc: A Document with new content. +        :return: new_doc_rev - The new revision identifier for the document. +            The Document object will also be updated. + +        :raise InvalidDocId: Raised if the document's id is invalid. +        :raise DocumentTooBig: Raised if the document size is too big. +        :raise ConflictedDoc: Raised if the document has conflicts. +        """ +        if doc.doc_id is None: +            raise InvalidDocId() +        self._check_doc_id(doc.doc_id) +        self._check_doc_size(doc) +        old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) +        if old_doc and old_doc.has_conflicts: +            raise ConflictedDoc() +        if old_doc and doc.rev is None and old_doc.is_tombstone(): +            new_rev = self._allocate_doc_rev(old_doc.rev) +        else: +            if old_doc is not None: +                    if old_doc.rev != doc.rev: +                        raise RevisionConflict() +            else: +                if doc.rev is not None: +                    raise RevisionConflict() +            new_rev = self._allocate_doc_rev(doc.rev) +        doc.rev = new_rev +        self._put_doc(old_doc, doc) +        return new_rev + +    def whats_changed(self, old_generation=0): +        """ +        Return a list of documents that have changed since old_generation. + +        :param old_generation: The generation of the database in the old +                               state. +        :type old_generation: int + +        :return: (generation, trans_id, [(doc_id, generation, trans_id),...]) +                 The current generation of the database, its associated +                 transaction id, and a list of of changed documents since +                 old_generation, represented by tuples with for each document +                 its doc_id and the generation and transaction id corresponding +                 to the last intervening change and sorted by generation (old +                 changes first) +        :rtype: (int, str, [(str, int, str)]) +        """ +        return self._database.whats_changed(old_generation) + +    def delete_doc(self, doc): +        """ +        Mark a document as deleted. + +        Will abort if the current revision doesn't match doc.rev. +        This will also set doc.content to None. + +        :param doc: The document to mark as deleted. +        :type doc: CouchDocument. + +        :raise DocumentDoesNotExist: Raised if the document does not +                                            exist. +        :raise RevisionConflict: Raised if the revisions do not match. +        :raise DocumentAlreadyDeleted: Raised if the document is +                                              already deleted. +        :raise ConflictedDoc: Raised if the doc has conflicts. +        """ +        old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) +        if old_doc is None: +            raise DocumentDoesNotExist +        if old_doc.rev != doc.rev: +            raise RevisionConflict() +        if old_doc.is_tombstone(): +            raise DocumentAlreadyDeleted +        if old_doc.has_conflicts: +            raise ConflictedDoc() +        new_rev = self._allocate_doc_rev(doc.rev) +        doc.rev = new_rev +        doc.make_tombstone() +        self._put_doc(old_doc, doc) +        return new_rev + +    def get_doc_conflicts(self, doc_id, couch_rev=None): +        """ +        Get the conflicted versions of a document. + +        If the C{couch_rev} parameter is not None, conflicts for a specific +        document's couch revision are returned. + +        :param couch_rev: The couch document revision. +        :type couch_rev: str + +        :return: A list of conflicted versions of the document. +        :rtype: list +        """ +        return self._database.get_doc_conflicts(doc_id, couch_rev) + +    def _get_replica_gen_and_trans_id(self, other_replica_uid): +        """ +        Return the last known generation and transaction id for the other db +        replica. + +        When you do a synchronization with another replica, the Database keeps +        track of what generation the other database replica was at, and what +        the associated transaction id was.  This is used to determine what data +        needs to be sent, and if two databases are claiming to be the same +        replica. + +        :param other_replica_uid: The identifier for the other replica. +        :type other_replica_uid: str + +        :return: A tuple containing the generation and transaction id we +                 encountered during synchronization. If we've never +                 synchronized with the replica, this is (0, ''). +        :rtype: (int, str) +        """ +        return self._database._get_replica_gen_and_trans_id(other_replica_uid) + +    def _set_replica_gen_and_trans_id(self, other_replica_uid, +                                      other_generation, other_transaction_id, +                                      number_of_docs=None, doc_idx=None, +                                      sync_id=None): +        """ +        Set the last-known generation and transaction id for the other +        database replica. + +        We have just performed some synchronization, and we want to track what +        generation the other replica was at. See also +        _get_replica_gen_and_trans_id. + +        :param other_replica_uid: The U1DB identifier for the other replica. +        :type other_replica_uid: str +        :param other_generation: The generation number for the other replica. +        :type other_generation: int +        :param other_transaction_id: The transaction id associated with the +            generation. +        :type other_transaction_id: str +        :param number_of_docs: The total amount of documents sent on this sync +                               session. +        :type number_of_docs: int +        :param doc_idx: The index of the current document being sent. +        :type doc_idx: int +        :param sync_id: The id of the current sync session. +        :type sync_id: str +        """ +        if other_replica_uid is not None and other_generation is not None: +            self._do_set_replica_gen_and_trans_id( +                other_replica_uid, other_generation, other_transaction_id, +                number_of_docs=number_of_docs, doc_idx=doc_idx, +                sync_id=sync_id) + +    def _do_set_replica_gen_and_trans_id( +            self, other_replica_uid, other_generation, other_transaction_id, +            number_of_docs=None, doc_idx=None, sync_id=None): +        """ +        Set the last-known generation and transaction id for the other +        database replica. + +        We have just performed some synchronization, and we want to track what +        generation the other replica was at. See also +        _get_replica_gen_and_trans_id. + +        :param other_replica_uid: The U1DB identifier for the other replica. +        :type other_replica_uid: str +        :param other_generation: The generation number for the other replica. +        :type other_generation: int +        :param other_transaction_id: The transaction id associated with the +                                     generation. +        :type other_transaction_id: str +        :param number_of_docs: The total amount of documents sent on this sync +                               session. +        :type number_of_docs: int +        :param doc_idx: The index of the current document being sent. +        :type doc_idx: int +        :param sync_id: The id of the current sync session. +        :type sync_id: str +        """ +        self.cache[other_replica_uid] = (other_generation, +                                         other_transaction_id) +        self._database._do_set_replica_gen_and_trans_id(other_replica_uid, +                                                        other_generation, +                                                        other_transaction_id) + +    def _force_doc_sync_conflict(self, doc): +        """ +        Add a conflict and force a document put. + +        :param doc: The document to be put. +        :type doc: CouchDocument +        """ +        my_doc = self._get_doc(doc.doc_id) +        self._prune_conflicts(doc, vectorclock.VectorClockRev(doc.rev)) +        doc.add_conflict(self._factory(doc.doc_id, my_doc.rev, +                                       my_doc.get_json())) +        doc.has_conflicts = True +        self._put_doc(my_doc, doc) + +    def resolve_doc(self, doc, conflicted_doc_revs): +        """ +        Mark a document as no longer conflicted. + +        We take the list of revisions that the client knows about that it is +        superseding. This may be a different list from the actual current +        conflicts, in which case only those are removed as conflicted.  This +        may fail if the conflict list is significantly different from the +        supplied information. (sync could have happened in the background from +        the time you GET_DOC_CONFLICTS until the point where you RESOLVE) + +        :param doc: A Document with the new content to be inserted. +        :type doc: CouchDocument +        :param conflicted_doc_revs: A list of revisions that the new content +                                    supersedes. +        :type conflicted_doc_revs: [str] + +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason. +        """ +        cur_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) +        new_rev = self._ensure_maximal_rev(cur_doc.rev, +                                           conflicted_doc_revs) +        superseded_revs = set(conflicted_doc_revs) +        doc.rev = new_rev +        # this backend stores conflicts as properties of the documents, so we +        # have to copy these conflicts over to the document being updated. +        if cur_doc.rev in superseded_revs: +            # the newer doc version will supersede the one in the database, so +            # we copy conflicts before updating the backend. +            doc.set_conflicts(cur_doc.get_conflicts())  # copy conflicts over. +            doc.delete_conflicts(superseded_revs) +            self._put_doc(cur_doc, doc) +        else: +            # the newer doc version does not supersede the one in the +            # database, so we will add a conflict to the database and copy +            # those over to the document the user has in her hands. +            cur_doc.add_conflict(doc) +            cur_doc.delete_conflicts(superseded_revs) +            self._put_doc(cur_doc, cur_doc)  # just update conflicts +            # backend has been updated with current conflicts, now copy them +            # to the current document. +            doc.set_conflicts(cur_doc.get_conflicts()) + +    def _put_doc_if_newer(self, doc, save_conflict, replica_uid, replica_gen, +                          replica_trans_id='', number_of_docs=None, +                          doc_idx=None, sync_id=None): +        """ +        Insert/update document into the database with a given revision. + +        This api is used during synchronization operations. + +        If a document would conflict and save_conflict is set to True, the +        content will be selected as the 'current' content for doc.doc_id, +        even though doc.rev doesn't supersede the currently stored revision. +        The currently stored document will be added to the list of conflict +        alternatives for the given doc_id. + +        This forces the new content to be 'current' so that we get convergence +        after synchronizing, even if people don't resolve conflicts. Users can +        then notice that their content is out of date, update it, and +        synchronize again. (The alternative is that users could synchronize and +        think the data has propagated, but their local copy looks fine, and the +        remote copy is never updated again.) + +        :param doc: A document object +        :type doc: CouchDocument +        :param save_conflict: If this document is a conflict, do you want to +                              save it as a conflict, or just ignore it. +        :type save_conflict: bool +        :param replica_uid: A unique replica identifier. +        :type replica_uid: str +        :param replica_gen: The generation of the replica corresponding to the +                            this document. The replica arguments are optional, +                            but are used during synchronization. +        :type replica_gen: int +        :param replica_trans_id: The transaction_id associated with the +                                 generation. +        :type replica_trans_id: str +        :param number_of_docs: The total amount of documents sent on this sync +                               session. +        :type number_of_docs: int +        :param doc_idx: The index of the current document being sent. +        :type doc_idx: int +        :param sync_id: The id of the current sync session. +        :type sync_id: str + +        :return: (state, at_gen) -  If we don't have doc_id already, or if +                 doc_rev supersedes the existing document revision, then the +                 content will be inserted, and state is 'inserted'.  If +                 doc_rev is less than or equal to the existing revision, then +                 the put is ignored and state is respecitvely 'superseded' or +                 'converged'.  If doc_rev is not strictly superseded or +                 supersedes, then state is 'conflicted'. The document will not +                 be inserted if save_conflict is False.  For 'inserted' or +                 'converged', at_gen is the insertion/current generation. +        :rtype: (str, int) +        """ +        if not isinstance(doc, CouchDocument): +            doc = self._factory(doc.doc_id, doc.rev, doc.get_json()) +        my_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) +        if my_doc: +            doc.set_conflicts(my_doc.get_conflicts()) +        return CommonBackend._put_doc_if_newer(self, doc, save_conflict, +                                               replica_uid, replica_gen, +                                               replica_trans_id) + +    def _put_and_update_indexes(self, cur_doc, doc): +        self._put_doc(cur_doc, doc) + +    def get_docs(self, doc_ids, check_for_conflicts=True, +                 include_deleted=False): +        """ +        Get the JSON content for many documents. + +        :param doc_ids: A list of document identifiers or None for all. +        :type doc_ids: list +        :param check_for_conflicts: If set to False, then the conflict check +                                    will be skipped, and 'None' will be +                                    returned instead of True/False. +        :type check_for_conflicts: bool +        :param include_deleted: If set to True, deleted documents will be +                                returned with empty content. Otherwise deleted +                                documents will not be included in the results. +        :return: iterable giving the Document object for each document id +                 in matching doc_ids order. +        :rtype: iterable +        """ +        return self._database.get_docs(doc_ids, check_for_conflicts, +                                       include_deleted) + +    def _prune_conflicts(self, doc, doc_vcr): +        """ +        Prune conflicts that are older then the current document's revision, or +        whose content match to the current document's content. +        Originally in u1db.CommonBackend + +        :param doc: The document to have conflicts pruned. +        :type doc: CouchDocument +        :param doc_vcr: A vector clock representing the current document's +                        revision. +        :type doc_vcr: u1db.vectorclock.VectorClock +        """ +        if doc.has_conflicts: +            autoresolved = False +            c_revs_to_prune = [] +            for c_doc in doc._conflicts: +                c_vcr = vectorclock.VectorClockRev(c_doc.rev) +                if doc_vcr.is_newer(c_vcr): +                    c_revs_to_prune.append(c_doc.rev) +                elif doc.same_content_as(c_doc): +                    c_revs_to_prune.append(c_doc.rev) +                    doc_vcr.maximize(c_vcr) +                    autoresolved = True +            if autoresolved: +                doc_vcr.increment(self._replica_uid) +                doc.rev = doc_vcr.as_str() +            doc.delete_conflicts(c_revs_to_prune) + + +class SoledadSyncTarget(CommonSyncTarget): + +    """ +    Functionality for using a SoledadBackend as a synchronization target. +    """ + +    def get_sync_info(self, source_replica_uid): +        source_gen, source_trans_id = self._db._get_replica_gen_and_trans_id( +            source_replica_uid) +        my_gen, my_trans_id = self._db._get_generation_info() +        return ( +            self._db._replica_uid, my_gen, my_trans_id, source_gen, +            source_trans_id) + +    def record_sync_info(self, source_replica_uid, source_replica_generation, +                         source_replica_transaction_id): +        if self._trace_hook: +            self._trace_hook('record_sync_info') +        self._db._set_replica_gen_and_trans_id( +            source_replica_uid, source_replica_generation, +            source_replica_transaction_id) diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py deleted file mode 100644 index d4f67696..00000000 --- a/common/src/leap/soledad/common/couch.py +++ /dev/null @@ -1,1514 +0,0 @@ -# -*- coding: utf-8 -*- -# couch.py -# Copyright (C) 2013 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - - -"""A U1DB backend that uses CouchDB as its persistence layer.""" - - -import json -import re -import uuid -import logging -import binascii -import time - - -from StringIO import StringIO -from urlparse import urljoin -from contextlib import contextmanager -from multiprocessing.pool import ThreadPool - - -from couchdb.client import Server, Database -from couchdb.multipart import MultipartWriter -from couchdb.http import ( -    ResourceConflict, -    ResourceNotFound, -    ServerError, -    Session, -    urljoin as couch_urljoin, -    Resource, -) -from u1db import vectorclock -from u1db.errors import ( -    DatabaseDoesNotExist, -    InvalidGeneration, -    RevisionConflict, -    InvalidDocId, -    ConflictedDoc, -    DocumentDoesNotExist, -    DocumentAlreadyDeleted, -    Unauthorized, -) -from u1db.backends import CommonBackend, CommonSyncTarget -from u1db.remote import http_app -from u1db.remote.server_state import ServerState - - -from leap.soledad.common import ddocs -from leap.soledad.common.errors import raise_server_error -from leap.soledad.common.errors import raise_missing_design_doc_error -from leap.soledad.common.errors import InvalidURLError -from leap.soledad.common.command import exec_validated_cmd -from leap.soledad.common.document import CouchDocument - - -logger = logging.getLogger(__name__) - - -COUCH_TIMEOUT = 120  # timeout for transfers between Soledad server and Couch - - -def list_users_dbs(couch_url): -    """ -    Retrieves a list with all databases that starts with 'user-' on CouchDB. -    Those databases belongs to users. So, the list will contain all the -    database names in the form of 'user-{uuid4}'. - -    :param couch_url: The couch url with needed credentials -    :type couch_url: str - -    :return: The list of all database names from users. -    :rtype: [str] -    """ -    with couch_server(couch_url) as server: -        users = [dbname for dbname in server if dbname.startswith('user-')] -    return users - -# monkey-patch the u1db http app to use CouchDocument -http_app.Document = CouchDocument - - -@contextmanager -def couch_server(url): -    """ -    Provide a connection to a couch server and cleanup after use. - -    For database creation and deletion we use an ephemeral connection to the -    couch server. That connection has to be properly closed, so we provide it -    as a context manager. - -    :param url: The URL of the Couch server. -    :type url: str -    """ -    session = Session(timeout=COUCH_TIMEOUT) -    server = Server(url=url, full_commit=False, session=session) -    yield server - - -THREAD_POOL = ThreadPool(20) - - -class SoledadBackend(CommonBackend): - -    """ -    A U1DB implementation that uses CouchDB as its persistence layer. -    """ - -    @classmethod -    def open_database(cls, url, create, replica_uid=None, ensure_ddocs=False, -                      database_security=None): -        """ -        Open a U1DB database using CouchDB as backend. - -        :param url: the url of the database replica -        :type url: str -        :param create: should the replica be created if it does not exist? -        :type create: bool -        :param replica_uid: an optional unique replica identifier -        :type replica_uid: str -        :param ensure_ddocs: Ensure that the design docs exist on server. -        :type ensure_ddocs: bool - -        :return: the database instance -        :rtype: SoledadBackend -        """ -        db = CouchDatabase.open_database(url, create, ensure_ddocs) -        return cls( -            db, replica_uid=replica_uid) - -    def __init__(self, database, replica_uid=None): -        """ -        Create a new Couch data container. - -        :param url: the url of the couch database -        :type url: str -        :param dbname: the database name -        :type dbname: str -        :param replica_uid: an optional unique replica identifier -        :type replica_uid: str -        :param ensure_ddocs: Ensure that the design docs exist on server. -        :type ensure_ddocs: bool -        """ -        # save params -        self._factory = CouchDocument -        self._real_replica_uid = None -        # configure couch -        self._cache = None -        self._dbname = database._dbname -        self._database = database -        if replica_uid is not None: -            self._set_replica_uid(replica_uid) - -    @property -    def cache(self): -        if self._cache is not None: -            return self._cache -        else: -            return {} - -    def init_caching(self, cache): -        """ -        Start using cache by setting internal _cache attribute. - -        :param cache: the cache instance, anything that behaves like a dict -        :type cache: dict -        """ -        self._cache = cache - -    def get_sync_target(self): -        """ -        Return a SyncTarget object, for another u1db to synchronize with. - -        :return: The sync target. -        :rtype: CouchSyncTarget -        """ -        return CouchSyncTarget(self) - -    def delete_database(self): -        """ -        Delete a U1DB CouchDB database. -        """ -        self._database.delete_database() - -    def close(self): -        """ -        Release any resources associated with this database. - -        :return: True if db was succesfully closed. -        :rtype: bool -        """ -        self._database.close() -        return True - -    def __del__(self): -        """ -        Close the database upon garbage collection. -        """ -        self.close() - -    def _set_replica_uid(self, replica_uid): -        """ -        Force the replica uid to be set. - -        :param replica_uid: The new replica uid. -        :type replica_uid: str -        """ -        self._database.set_replica_uid(replica_uid) -        self._real_replica_uid = replica_uid -        self.cache['replica_uid'] = self._real_replica_uid - -    def _get_replica_uid(self): -        """ -        Get the replica uid. - -        :return: The replica uid. -        :rtype: str -        """ -        if self._real_replica_uid is not None: -            self.cache['replica_uid'] = self._real_replica_uid -            return self._real_replica_uid -        if 'replica_uid' in self.cache: -            return self.cache['replica_uid'] -        self._real_replica_uid = self._database.get_replica_uid() -        self._set_replica_uid(self._real_replica_uid) -        return self._real_replica_uid - -    _replica_uid = property(_get_replica_uid, _set_replica_uid) - -    replica_uid = property(_get_replica_uid) - -    def _get_generation(self): -        """ -        Return the current generation. - -        :return: The current generation. -        :rtype: int - -        :raise MissingDesignDocError: Raised when tried to access a missing -                                      design document. -        :raise MissingDesignDocListFunctionError: Raised when trying to access -                                                  a missing list function on a -                                                  design document. -        :raise MissingDesignDocNamedViewError: Raised when trying to access a -                                               missing named view on a design -                                               document. -        :raise MissingDesignDocDeletedError: Raised when trying to access a -                                             deleted design document. -        :raise MissingDesignDocUnknownError: Raised when failed to access a -                                             design document for an yet -                                             unknown reason. -        """ -        return self._get_generation_info()[0] - -    def _get_generation_info(self): -        """ -        Return the current generation. - -        :return: A tuple containing the current generation and transaction id. -        :rtype: (int, str) - -        :raise MissingDesignDocError: Raised when tried to access a missing -                                      design document. -        :raise MissingDesignDocListFunctionError: Raised when trying to access -                                                  a missing list function on a -                                                  design document. -        :raise MissingDesignDocNamedViewError: Raised when trying to access a -                                               missing named view on a design -                                               document. -        :raise MissingDesignDocDeletedError: Raised when trying to access a -                                             deleted design document. -        :raise MissingDesignDocUnknownError: Raised when failed to access a -                                             design document for an yet -                                             unknown reason. -        """ -        if self.replica_uid + '_gen' in self.cache: -            response = self.cache[self.replica_uid + '_gen'] -            return response -        cur_gen, newest_trans_id = self._database._get_generation_info() -        self.cache[self.replica_uid + '_gen'] = (cur_gen, newest_trans_id) -        return (cur_gen, newest_trans_id) - -    def _get_trans_id_for_gen(self, generation): -        """ -        Get the transaction id corresponding to a particular generation. - -        :param generation: The generation for which to get the transaction id. -        :type generation: int - -        :return: The transaction id for C{generation}. -        :rtype: str - -        """ -        return self._database._get_trans_id_for_gen(generation) - -    def _get_transaction_log(self): -        """ -        This is only for the test suite, it is not part of the api. - -        :return: The complete transaction log. -        :rtype: [(str, str)] - -        """ -        return self._database._get_transaction_log() - -    def _get_doc(self, doc_id, check_for_conflicts=False): -        """ -        Extract the document from storage. - -        This can return None if the document doesn't exist. - -        :param doc_id: The unique document identifier -        :type doc_id: str -        :param check_for_conflicts: If set to False, then the conflict check -                                    will be skipped. -        :type check_for_conflicts: bool - -        :return: The document. -        :rtype: CouchDocument -        """ -        return self._database._get_doc(doc_id, check_for_conflicts) - -    def get_doc(self, doc_id, include_deleted=False): -        """ -        Get the JSON string for the given document. - -        :param doc_id: The unique document identifier -        :type doc_id: str -        :param include_deleted: If set to True, deleted documents will be -            returned with empty content. Otherwise asking for a deleted -            document will return None. -        :type include_deleted: bool - -        :return: A document object. -        :rtype: CouchDocument. -        """ -        doc = self._get_doc(doc_id, check_for_conflicts=True) -        if doc is None: -            return None -        if doc.is_tombstone() and not include_deleted: -            return None -        return doc - -    def get_all_docs(self, include_deleted=False): -        """ -        Get the JSON content for all documents in the database. - -        :param include_deleted: If set to True, deleted documents will be -                                returned with empty content. Otherwise deleted -                                documents will not be included in the results. -        :type include_deleted: bool - -        :return: (generation, [CouchDocument]) -            The current generation of the database, followed by a list of all -            the documents in the database. -        :rtype: (int, [CouchDocument]) -        """ -        return self._database.get_all_docs(include_deleted) - -    def _put_doc(self, old_doc, doc): -        """ -        Put the document in the Couch backend database. - -        Note that C{old_doc} must have been fetched with the parameter -        C{check_for_conflicts} equal to True, so we can properly update the -        new document using the conflict information from the old one. - -        :param old_doc: The old document version. -        :type old_doc: CouchDocument -        :param doc: The document to be put. -        :type doc: CouchDocument -        """ -        last_transaction =\ -            self._database.save_document(old_doc, doc, -                                         self._allocate_transaction_id()) -        if self.replica_uid + '_gen' in self.cache: -            gen, trans = self.cache[self.replica_uid + '_gen'] -            gen += 1 -            trans = last_transaction -            self.cache[self.replica_uid + '_gen'] = (gen, trans) - -    def put_doc(self, doc): -        """ -        Update a document. - -        If the document currently has conflicts, put will fail. -        If the database specifies a maximum document size and the document -        exceeds it, put will fail and raise a DocumentTooBig exception. - -        :param doc: A Document with new content. -        :return: new_doc_rev - The new revision identifier for the document. -            The Document object will also be updated. - -        :raise InvalidDocId: Raised if the document's id is invalid. -        :raise DocumentTooBig: Raised if the document size is too big. -        :raise ConflictedDoc: Raised if the document has conflicts. -        """ -        if doc.doc_id is None: -            raise InvalidDocId() -        self._check_doc_id(doc.doc_id) -        self._check_doc_size(doc) -        old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) -        if old_doc and old_doc.has_conflicts: -            raise ConflictedDoc() -        if old_doc and doc.rev is None and old_doc.is_tombstone(): -            new_rev = self._allocate_doc_rev(old_doc.rev) -        else: -            if old_doc is not None: -                    if old_doc.rev != doc.rev: -                        raise RevisionConflict() -            else: -                if doc.rev is not None: -                    raise RevisionConflict() -            new_rev = self._allocate_doc_rev(doc.rev) -        doc.rev = new_rev -        self._put_doc(old_doc, doc) -        return new_rev - -    def whats_changed(self, old_generation=0): -        """ -        Return a list of documents that have changed since old_generation. - -        :param old_generation: The generation of the database in the old -                               state. -        :type old_generation: int - -        :return: (generation, trans_id, [(doc_id, generation, trans_id),...]) -                 The current generation of the database, its associated -                 transaction id, and a list of of changed documents since -                 old_generation, represented by tuples with for each document -                 its doc_id and the generation and transaction id corresponding -                 to the last intervening change and sorted by generation (old -                 changes first) -        :rtype: (int, str, [(str, int, str)]) -        """ -        return self._database.whats_changed(old_generation) - -    def delete_doc(self, doc): -        """ -        Mark a document as deleted. - -        Will abort if the current revision doesn't match doc.rev. -        This will also set doc.content to None. - -        :param doc: The document to mark as deleted. -        :type doc: CouchDocument. - -        :raise DocumentDoesNotExist: Raised if the document does not -                                            exist. -        :raise RevisionConflict: Raised if the revisions do not match. -        :raise DocumentAlreadyDeleted: Raised if the document is -                                              already deleted. -        :raise ConflictedDoc: Raised if the doc has conflicts. -        """ -        old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) -        if old_doc is None: -            raise DocumentDoesNotExist -        if old_doc.rev != doc.rev: -            raise RevisionConflict() -        if old_doc.is_tombstone(): -            raise DocumentAlreadyDeleted -        if old_doc.has_conflicts: -            raise ConflictedDoc() -        new_rev = self._allocate_doc_rev(doc.rev) -        doc.rev = new_rev -        doc.make_tombstone() -        self._put_doc(old_doc, doc) -        return new_rev - -    def get_doc_conflicts(self, doc_id, couch_rev=None): -        """ -        Get the conflicted versions of a document. - -        If the C{couch_rev} parameter is not None, conflicts for a specific -        document's couch revision are returned. - -        :param couch_rev: The couch document revision. -        :type couch_rev: str - -        :return: A list of conflicted versions of the document. -        :rtype: list -        """ -        return self._database.get_doc_conflicts(doc_id, couch_rev) - -    def _get_replica_gen_and_trans_id(self, other_replica_uid): -        """ -        Return the last known generation and transaction id for the other db -        replica. - -        When you do a synchronization with another replica, the Database keeps -        track of what generation the other database replica was at, and what -        the associated transaction id was.  This is used to determine what data -        needs to be sent, and if two databases are claiming to be the same -        replica. - -        :param other_replica_uid: The identifier for the other replica. -        :type other_replica_uid: str - -        :return: A tuple containing the generation and transaction id we -                 encountered during synchronization. If we've never -                 synchronized with the replica, this is (0, ''). -        :rtype: (int, str) -        """ -        return self._database._get_replica_gen_and_trans_id(other_replica_uid) - -    def _set_replica_gen_and_trans_id(self, other_replica_uid, -                                      other_generation, other_transaction_id, -                                      number_of_docs=None, doc_idx=None, -                                      sync_id=None): -        """ -        Set the last-known generation and transaction id for the other -        database replica. - -        We have just performed some synchronization, and we want to track what -        generation the other replica was at. See also -        _get_replica_gen_and_trans_id. - -        :param other_replica_uid: The U1DB identifier for the other replica. -        :type other_replica_uid: str -        :param other_generation: The generation number for the other replica. -        :type other_generation: int -        :param other_transaction_id: The transaction id associated with the -            generation. -        :type other_transaction_id: str -        :param number_of_docs: The total amount of documents sent on this sync -                               session. -        :type number_of_docs: int -        :param doc_idx: The index of the current document being sent. -        :type doc_idx: int -        :param sync_id: The id of the current sync session. -        :type sync_id: str -        """ -        if other_replica_uid is not None and other_generation is not None: -            self._do_set_replica_gen_and_trans_id( -                other_replica_uid, other_generation, other_transaction_id, -                number_of_docs=number_of_docs, doc_idx=doc_idx, -                sync_id=sync_id) - -    def _do_set_replica_gen_and_trans_id( -            self, other_replica_uid, other_generation, other_transaction_id, -            number_of_docs=None, doc_idx=None, sync_id=None): -        """ -        Set the last-known generation and transaction id for the other -        database replica. - -        We have just performed some synchronization, and we want to track what -        generation the other replica was at. See also -        _get_replica_gen_and_trans_id. - -        :param other_replica_uid: The U1DB identifier for the other replica. -        :type other_replica_uid: str -        :param other_generation: The generation number for the other replica. -        :type other_generation: int -        :param other_transaction_id: The transaction id associated with the -                                     generation. -        :type other_transaction_id: str -        :param number_of_docs: The total amount of documents sent on this sync -                               session. -        :type number_of_docs: int -        :param doc_idx: The index of the current document being sent. -        :type doc_idx: int -        :param sync_id: The id of the current sync session. -        :type sync_id: str -        """ -        self.cache[other_replica_uid] = (other_generation, -                                         other_transaction_id) -        self._database._do_set_replica_gen_and_trans_id(other_replica_uid, -                                                        other_generation, -                                                        other_transaction_id) - -    def _force_doc_sync_conflict(self, doc): -        """ -        Add a conflict and force a document put. - -        :param doc: The document to be put. -        :type doc: CouchDocument -        """ -        my_doc = self._get_doc(doc.doc_id) -        self._prune_conflicts(doc, vectorclock.VectorClockRev(doc.rev)) -        doc.add_conflict(self._factory(doc.doc_id, my_doc.rev, -                                       my_doc.get_json())) -        doc.has_conflicts = True -        self._put_doc(my_doc, doc) - -    def resolve_doc(self, doc, conflicted_doc_revs): -        """ -        Mark a document as no longer conflicted. - -        We take the list of revisions that the client knows about that it is -        superseding. This may be a different list from the actual current -        conflicts, in which case only those are removed as conflicted.  This -        may fail if the conflict list is significantly different from the -        supplied information. (sync could have happened in the background from -        the time you GET_DOC_CONFLICTS until the point where you RESOLVE) - -        :param doc: A Document with the new content to be inserted. -        :type doc: CouchDocument -        :param conflicted_doc_revs: A list of revisions that the new content -                                    supersedes. -        :type conflicted_doc_revs: [str] - -        :raise MissingDesignDocError: Raised when tried to access a missing -                                      design document. -        :raise MissingDesignDocListFunctionError: Raised when trying to access -                                                  a missing list function on a -                                                  design document. -        :raise MissingDesignDocNamedViewError: Raised when trying to access a -                                               missing named view on a design -                                               document. -        :raise MissingDesignDocDeletedError: Raised when trying to access a -                                             deleted design document. -        :raise MissingDesignDocUnknownError: Raised when failed to access a -                                             design document for an yet -                                             unknown reason. -        """ -        cur_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) -        new_rev = self._ensure_maximal_rev(cur_doc.rev, -                                           conflicted_doc_revs) -        superseded_revs = set(conflicted_doc_revs) -        doc.rev = new_rev -        # this backend stores conflicts as properties of the documents, so we -        # have to copy these conflicts over to the document being updated. -        if cur_doc.rev in superseded_revs: -            # the newer doc version will supersede the one in the database, so -            # we copy conflicts before updating the backend. -            doc.set_conflicts(cur_doc.get_conflicts())  # copy conflicts over. -            doc.delete_conflicts(superseded_revs) -            self._put_doc(cur_doc, doc) -        else: -            # the newer doc version does not supersede the one in the -            # database, so we will add a conflict to the database and copy -            # those over to the document the user has in her hands. -            cur_doc.add_conflict(doc) -            cur_doc.delete_conflicts(superseded_revs) -            self._put_doc(cur_doc, cur_doc)  # just update conflicts -            # backend has been updated with current conflicts, now copy them -            # to the current document. -            doc.set_conflicts(cur_doc.get_conflicts()) - -    def _put_doc_if_newer(self, doc, save_conflict, replica_uid, replica_gen, -                          replica_trans_id='', number_of_docs=None, -                          doc_idx=None, sync_id=None): -        """ -        Insert/update document into the database with a given revision. - -        This api is used during synchronization operations. - -        If a document would conflict and save_conflict is set to True, the -        content will be selected as the 'current' content for doc.doc_id, -        even though doc.rev doesn't supersede the currently stored revision. -        The currently stored document will be added to the list of conflict -        alternatives for the given doc_id. - -        This forces the new content to be 'current' so that we get convergence -        after synchronizing, even if people don't resolve conflicts. Users can -        then notice that their content is out of date, update it, and -        synchronize again. (The alternative is that users could synchronize and -        think the data has propagated, but their local copy looks fine, and the -        remote copy is never updated again.) - -        :param doc: A document object -        :type doc: CouchDocument -        :param save_conflict: If this document is a conflict, do you want to -                              save it as a conflict, or just ignore it. -        :type save_conflict: bool -        :param replica_uid: A unique replica identifier. -        :type replica_uid: str -        :param replica_gen: The generation of the replica corresponding to the -                            this document. The replica arguments are optional, -                            but are used during synchronization. -        :type replica_gen: int -        :param replica_trans_id: The transaction_id associated with the -                                 generation. -        :type replica_trans_id: str -        :param number_of_docs: The total amount of documents sent on this sync -                               session. -        :type number_of_docs: int -        :param doc_idx: The index of the current document being sent. -        :type doc_idx: int -        :param sync_id: The id of the current sync session. -        :type sync_id: str - -        :return: (state, at_gen) -  If we don't have doc_id already, or if -                 doc_rev supersedes the existing document revision, then the -                 content will be inserted, and state is 'inserted'.  If -                 doc_rev is less than or equal to the existing revision, then -                 the put is ignored and state is respecitvely 'superseded' or -                 'converged'.  If doc_rev is not strictly superseded or -                 supersedes, then state is 'conflicted'. The document will not -                 be inserted if save_conflict is False.  For 'inserted' or -                 'converged', at_gen is the insertion/current generation. -        :rtype: (str, int) -        """ -        if not isinstance(doc, CouchDocument): -            doc = self._factory(doc.doc_id, doc.rev, doc.get_json()) -        my_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) -        if my_doc: -            doc.set_conflicts(my_doc.get_conflicts()) -        return CommonBackend._put_doc_if_newer(self, doc, save_conflict, -                                               replica_uid, replica_gen, -                                               replica_trans_id) - -    def _put_and_update_indexes(self, cur_doc, doc): -        self._put_doc(cur_doc, doc) - -    def get_docs(self, doc_ids, check_for_conflicts=True, -                 include_deleted=False): -        """ -        Get the JSON content for many documents. - -        :param doc_ids: A list of document identifiers or None for all. -        :type doc_ids: list -        :param check_for_conflicts: If set to False, then the conflict check -                                    will be skipped, and 'None' will be -                                    returned instead of True/False. -        :type check_for_conflicts: bool -        :param include_deleted: If set to True, deleted documents will be -                                returned with empty content. Otherwise deleted -                                documents will not be included in the results. -        :return: iterable giving the Document object for each document id -                 in matching doc_ids order. -        :rtype: iterable -        """ -        return self._database.get_docs(doc_ids, check_for_conflicts, -                                       include_deleted) - -    def _prune_conflicts(self, doc, doc_vcr): -        """ -        Prune conflicts that are older then the current document's revision, or -        whose content match to the current document's content. -        Originally in u1db.CommonBackend - -        :param doc: The document to have conflicts pruned. -        :type doc: CouchDocument -        :param doc_vcr: A vector clock representing the current document's -                        revision. -        :type doc_vcr: u1db.vectorclock.VectorClock -        """ -        if doc.has_conflicts: -            autoresolved = False -            c_revs_to_prune = [] -            for c_doc in doc._conflicts: -                c_vcr = vectorclock.VectorClockRev(c_doc.rev) -                if doc_vcr.is_newer(c_vcr): -                    c_revs_to_prune.append(c_doc.rev) -                elif doc.same_content_as(c_doc): -                    c_revs_to_prune.append(c_doc.rev) -                    doc_vcr.maximize(c_vcr) -                    autoresolved = True -            if autoresolved: -                doc_vcr.increment(self._replica_uid) -                doc.rev = doc_vcr.as_str() -            doc.delete_conflicts(c_revs_to_prune) - - -class CouchDatabase(object): -    """ -    Holds CouchDB related code. -    This class gives methods to encapsulate database operations and hide -    CouchDB details from backend code. -    """ - -    @classmethod -    def open_database(cls, url, create, ensure_ddocs=False): -        """ -        Open a U1DB database using CouchDB as backend. - -        :param url: the url of the database replica -        :type url: str -        :param create: should the replica be created if it does not exist? -        :type create: bool -        :param replica_uid: an optional unique replica identifier -        :type replica_uid: str -        :param ensure_ddocs: Ensure that the design docs exist on server. -        :type ensure_ddocs: bool - -        :return: the database instance -        :rtype: SoledadBackend -        """ -        # get database from url -        m = re.match('(^https?://[^/]+)/(.+)$', url) -        if not m: -            raise InvalidURLError -        url = m.group(1) -        dbname = m.group(2) -        with couch_server(url) as server: -            try: -                server[dbname] -            except ResourceNotFound: -                if not create: -                    raise DatabaseDoesNotExist() -                server.create(dbname) -        return cls( -            url, dbname, ensure_ddocs=ensure_ddocs) - -    def __init__(self, url, dbname, ensure_ddocs=True, -                 database_security=None): -        self._session = Session(timeout=COUCH_TIMEOUT) -        self._url = url -        self._dbname = dbname -        self._database = Database( -            urljoin(url, dbname), -            self._session) -        self._database.info() -        if ensure_ddocs: -            self.ensure_ddocs_on_db() -            self.ensure_security_ddoc(database_security) - -    def ensure_ddocs_on_db(self): -        """ -        Ensure that the design documents used by the backend exist on the -        couch database. -        """ -        for ddoc_name in ['docs', 'syncs', 'transactions']: -            try: -                self._database.resource('_design', -                                        ddoc_name, '_info').get_json() -            except ResourceNotFound: -                ddoc = json.loads( -                    binascii.a2b_base64( -                        getattr(ddocs, ddoc_name))) -                self._database.save(ddoc) - -    def ensure_security_ddoc(self, security_config=None): -        """ -        Make sure that only soledad user is able to access this database as -        an unprivileged member, meaning that administration access will -        be forbidden even inside an user database. -        The goal is to make sure that only the lowest access level is given -        to the unprivileged CouchDB user set on the server process. -        This is achieved by creating a _security design document, see: -        http://docs.couchdb.org/en/latest/api/database/security.html - -        :param database_security: security configuration parsed from conf file -        :type cache: dict -        """ -        security_config = security_config or {} -        security = self._database.resource.get_json('_security')[2] -        security['members'] = {'names': [], 'roles': []} -        security['members']['names'] = security_config.get('members', -                                                           ['soledad']) -        security['members']['roles'] = security_config.get('members_roles', []) -        security['admins'] = {'names': [], 'roles': []} -        security['admins']['names'] = security_config.get('admins', []) -        security['admins']['roles'] = security_config.get('admins_roles', []) -        self._database.resource.put_json('_security', body=security) - -    def delete_database(self): -        """ -        Delete a U1DB CouchDB database. -        """ -        with couch_server(self._url) as server: -            del(server[self._dbname]) - -    def set_replica_uid(self, replica_uid): -        """ -        Force the replica uid to be set. - -        :param replica_uid: The new replica uid. -        :type replica_uid: str -        """ -        try: -            # set on existent config document -            doc = self._database['u1db_config'] -            doc['replica_uid'] = replica_uid -        except ResourceNotFound: -            # or create the config document -            doc = { -                '_id': 'u1db_config', -                'replica_uid': replica_uid, -            } -        self._database.save(doc) - -    def get_replica_uid(self): -        """ -        Get the replica uid. - -        :return: The replica uid. -        :rtype: str -        """ -        try: -            # grab replica_uid from server -            doc = self._database['u1db_config'] -            replica_uid = doc['replica_uid'] -            return replica_uid -        except ResourceNotFound: -            # create a unique replica_uid -            replica_uid = uuid.uuid4().hex -            self.set_replica_uid(replica_uid) -            return replica_uid - -    def close(self): -        self._database = None - -    def get_all_docs(self, include_deleted=False): -        """ -        Get the JSON content for all documents in the database. - -        :param include_deleted: If set to True, deleted documents will be -                                returned with empty content. Otherwise deleted -                                documents will not be included in the results. -        :type include_deleted: bool - -        :return: (generation, [CouchDocument]) -            The current generation of the database, followed by a list of all -            the documents in the database. -        :rtype: (int, [CouchDocument]) -        """ - -        generation, _ = self._get_generation_info() -        results = list(self.get_docs(self._database, -                                     include_deleted=include_deleted)) -        return (generation, results) - -    def get_docs(self, doc_ids, check_for_conflicts=True, -                 include_deleted=False): -        """ -        Get the JSON content for many documents. - -        :param doc_ids: A list of document identifiers or None for all. -        :type doc_ids: list -        :param check_for_conflicts: If set to False, then the conflict check -                                    will be skipped, and 'None' will be -                                    returned instead of True/False. -        :type check_for_conflicts: bool -        :param include_deleted: If set to True, deleted documents will be -                                returned with empty content. Otherwise deleted -                                documents will not be included in the results. -        :return: iterable giving the Document object for each document id -                 in matching doc_ids order. -        :rtype: iterable -        """ -        # Workaround for: -        # -        #   http://bugs.python.org/issue7980 -        #   https://leap.se/code/issues/5449 -        # -        # python-couchdb uses time.strptime, which is not thread safe. In -        # order to avoid the problem described on the issues above, we preload -        # strptime here by evaluating the conversion of an arbitrary date. -        # This will not be needed when/if we switch from python-couchdb to -        # paisley. -        time.strptime('Mar 8 1917', '%b %d %Y') -        get_one = lambda doc_id: self._get_doc(doc_id, check_for_conflicts) -        docs = [THREAD_POOL.apply_async(get_one, [doc_id]) -                for doc_id in doc_ids] -        for doc in docs: -            doc = doc.get() -            if not doc or not include_deleted and doc.is_tombstone(): -                continue -            yield doc - -    def _get_doc(self, doc_id, check_for_conflicts=False): -        """ -        Extract the document from storage. - -        This can return None if the document doesn't exist. - -        :param doc_id: The unique document identifier -        :type doc_id: str -        :param check_for_conflicts: If set to False, then the conflict check -                                    will be skipped. -        :type check_for_conflicts: bool - -        :return: The document. -        :rtype: CouchDocument -        """ -        # get document with all attachments (u1db content and eventual -        # conflicts) -        try: -            result = \ -                self._database.resource(doc_id).get_json( -                    attachments=True)[2] -        except ResourceNotFound: -            return None -        return self.__parse_doc_from_couch(result, doc_id, check_for_conflicts) - -    def __parse_doc_from_couch(self, result, doc_id, -                               check_for_conflicts=False): -        # restrict to u1db documents -        if 'u1db_rev' not in result: -            return None -        doc = CouchDocument(doc_id, result['u1db_rev']) -        # set contents or make tombstone -        if '_attachments' not in result \ -                or 'u1db_content' not in result['_attachments']: -            doc.make_tombstone() -        else: -            doc.content = json.loads( -                binascii.a2b_base64( -                    result['_attachments']['u1db_content']['data'])) -        # determine if there are conflicts -        if check_for_conflicts \ -                and '_attachments' in result \ -                and 'u1db_conflicts' in result['_attachments']: -            doc.set_conflicts( -                self._build_conflicts( -                    doc.doc_id, -                    json.loads(binascii.a2b_base64( -                        result['_attachments']['u1db_conflicts']['data'])))) -        # store couch revision -        doc.couch_rev = result['_rev'] -        # store transactions -        doc.transactions = result['u1db_transactions'] -        return doc - -    def _build_conflicts(self, doc_id, attached_conflicts): -        """ -        Build the conflicted documents list from the conflicts attachment -        fetched from a couch document. - -        :param attached_conflicts: The document's conflicts as fetched from a -                                   couch document attachment. -        :type attached_conflicts: dict -        """ -        conflicts = [] -        for doc_rev, content in attached_conflicts: -            doc = CouchDocument(doc_id, doc_rev) -            if content is None: -                doc.make_tombstone() -            else: -                doc.content = content -            conflicts.append(doc) -        return conflicts - -    def _get_trans_id_for_gen(self, generation): -        """ -        Get the transaction id corresponding to a particular generation. - -        :param generation: The generation for which to get the transaction id. -        :type generation: int - -        :return: The transaction id for C{generation}. -        :rtype: str - -        :raise InvalidGeneration: Raised when the generation does not exist. -        :raise MissingDesignDocError: Raised when tried to access a missing -                                      design document. -        :raise MissingDesignDocListFunctionError: Raised when trying to access -                                                  a missing list function on a -                                                  design document. -        :raise MissingDesignDocNamedViewError: Raised when trying to access a -                                               missing named view on a design -                                               document. -        :raise MissingDesignDocDeletedError: Raised when trying to access a -                                             deleted design document. -        :raise MissingDesignDocUnknownError: Raised when failed to access a -                                             design document for an yet -                                             unknown reason. -        """ -        if generation == 0: -            return '' -        # query a couch list function -        ddoc_path = [ -            '_design', 'transactions', '_list', 'trans_id_for_gen', 'log' -        ] -        res = self._database.resource(*ddoc_path) -        try: -            response = res.get_json(gen=generation) -            if response[2] == {}: -                raise InvalidGeneration -            return response[2]['transaction_id'] -        except ResourceNotFound as e: -            raise_missing_design_doc_error(e, ddoc_path) -        except ServerError as e: -            raise_server_error(e, ddoc_path) - -    def _get_replica_gen_and_trans_id(self, other_replica_uid): -        """ -        Return the last known generation and transaction id for the other db -        replica. - -        When you do a synchronization with another replica, the Database keeps -        track of what generation the other database replica was at, and what -        the associated transaction id was.  This is used to determine what data -        needs to be sent, and if two databases are claiming to be the same -        replica. - -        :param other_replica_uid: The identifier for the other replica. -        :type other_replica_uid: str - -        :return: A tuple containing the generation and transaction id we -                 encountered during synchronization. If we've never -                 synchronized with the replica, this is (0, ''). -        :rtype: (int, str) -        """ -        doc_id = 'u1db_sync_%s' % other_replica_uid -        try: -            doc = self._database[doc_id] -        except ResourceNotFound: -            doc = { -                '_id': doc_id, -                'generation': 0, -                'transaction_id': '', -            } -            self._database.save(doc) -        result = doc['generation'], doc['transaction_id'] -        return result - -    def get_doc_conflicts(self, doc_id, couch_rev=None): -        """ -        Get the conflicted versions of a document. - -        If the C{couch_rev} parameter is not None, conflicts for a specific -        document's couch revision are returned. - -        :param couch_rev: The couch document revision. -        :type couch_rev: str - -        :return: A list of conflicted versions of the document. -        :rtype: list -        """ -        # request conflicts attachment from server -        params = {} -        conflicts = [] -        if couch_rev is not None: -            params['rev'] = couch_rev  # restric document's couch revision -        else: -            # TODO: move into resource logic! -            first_entry = self._get_doc(doc_id, check_for_conflicts=True) -            conflicts.append(first_entry) -        resource = self._database.resource(doc_id, 'u1db_conflicts') -        try: -            response = resource.get_json(**params) -            return conflicts + self._build_conflicts( -                doc_id, json.loads(response[2].read())) -        except ResourceNotFound: -            return [] - -    def _do_set_replica_gen_and_trans_id( -            self, other_replica_uid, other_generation, other_transaction_id, -            number_of_docs=None, doc_idx=None, sync_id=None): -        """ -        Set the last-known generation and transaction id for the other -        database replica. - -        We have just performed some synchronization, and we want to track what -        generation the other replica was at. See also -        _get_replica_gen_and_trans_id. - -        :param other_replica_uid: The U1DB identifier for the other replica. -        :type other_replica_uid: str -        :param other_generation: The generation number for the other replica. -        :type other_generation: int -        :param other_transaction_id: The transaction id associated with the -                                     generation. -        :type other_transaction_id: str -        :param number_of_docs: The total amount of documents sent on this sync -                               session. -        :type number_of_docs: int -        :param doc_idx: The index of the current document being sent. -        :type doc_idx: int -        :param sync_id: The id of the current sync session. -        :type sync_id: str -        """ -        doc_id = 'u1db_sync_%s' % other_replica_uid -        try: -            doc = self._database[doc_id] -        except ResourceNotFound: -            doc = {'_id': doc_id} -        doc['generation'] = other_generation -        doc['transaction_id'] = other_transaction_id -        self._database.save(doc) - -    def _get_transaction_log(self): -        """ -        This is only for the test suite, it is not part of the api. - -        :return: The complete transaction log. -        :rtype: [(str, str)] - -        :raise MissingDesignDocError: Raised when tried to access a missing -                                      design document. -        :raise MissingDesignDocListFunctionError: Raised when trying to access -                                                  a missing list function on a -                                                  design document. -        :raise MissingDesignDocNamedViewError: Raised when trying to access a -                                               missing named view on a design -                                               document. -        :raise MissingDesignDocDeletedError: Raised when trying to access a -                                             deleted design document. -        :raise MissingDesignDocUnknownError: Raised when failed to access a -                                             design document for an yet -                                             unknown reason. -        """ -        # query a couch view -        ddoc_path = ['_design', 'transactions', '_view', 'log'] -        res = self._database.resource(*ddoc_path) -        try: -            response = res.get_json() -            return map( -                lambda row: (row['id'], row['value']), -                response[2]['rows']) -        except ResourceNotFound as e: -            raise_missing_design_doc_error(e, ddoc_path) - -    def whats_changed(self, old_generation=0): -        """ -        Return a list of documents that have changed since old_generation. - -        :param old_generation: The generation of the database in the old -                               state. -        :type old_generation: int - -        :return: (generation, trans_id, [(doc_id, generation, trans_id),...]) -                 The current generation of the database, its associated -                 transaction id, and a list of of changed documents since -                 old_generation, represented by tuples with for each document -                 its doc_id and the generation and transaction id corresponding -                 to the last intervening change and sorted by generation (old -                 changes first) -        :rtype: (int, str, [(str, int, str)]) - -        :raise MissingDesignDocError: Raised when tried to access a missing -                                      design document. -        :raise MissingDesignDocListFunctionError: Raised when trying to access -                                                  a missing list function on a -                                                  design document. -        :raise MissingDesignDocNamedViewError: Raised when trying to access a -                                               missing named view on a design -                                               document. -        :raise MissingDesignDocDeletedError: Raised when trying to access a -                                             deleted design document. -        :raise MissingDesignDocUnknownError: Raised when failed to access a -                                             design document for an yet -                                             unknown reason. -        """ -        # query a couch list function -        ddoc_path = [ -            '_design', 'transactions', '_list', 'whats_changed', 'log' -        ] -        res = self._database.resource(*ddoc_path) -        try: -            response = res.get_json(old_gen=old_generation) -            results = map( -                lambda row: -                    (row['generation'], row['doc_id'], row['transaction_id']), -                response[2]['transactions']) -            results.reverse() -            cur_gen = old_generation -            seen = set() -            changes = [] -            newest_trans_id = '' -            for generation, doc_id, trans_id in results: -                if doc_id not in seen: -                    changes.append((doc_id, generation, trans_id)) -                    seen.add(doc_id) -            if changes: -                cur_gen = changes[0][1]  # max generation -                newest_trans_id = changes[0][2] -                changes.reverse() -            else: -                cur_gen, newest_trans_id = self._get_generation_info() - -            return cur_gen, newest_trans_id, changes -        except ResourceNotFound as e: -            raise_missing_design_doc_error(e, ddoc_path) -        except ServerError as e: -            raise_server_error(e, ddoc_path) - -    def _get_generation_info(self): -        """ -        Return the current generation. - -        :return: A tuple containing the current generation and transaction id. -        :rtype: (int, str) - -        :raise MissingDesignDocError: Raised when tried to access a missing -                                      design document. -        :raise MissingDesignDocListFunctionError: Raised when trying to access -                                                  a missing list function on a -                                                  design document. -        :raise MissingDesignDocNamedViewError: Raised when trying to access a -                                               missing named view on a design -                                               document. -        :raise MissingDesignDocDeletedError: Raised when trying to access a -                                             deleted design document. -        :raise MissingDesignDocUnknownError: Raised when failed to access a -                                             design document for an yet -                                             unknown reason. -        """ -        # query a couch list function -        ddoc_path = ['_design', 'transactions', '_list', 'generation', 'log'] -        res = self._database.resource(*ddoc_path) -        try: -            response = res.get_json() -            return (response[2]['generation'], response[2]['transaction_id']) -        except ResourceNotFound as e: -            raise_missing_design_doc_error(e, ddoc_path) -        except ServerError as e: -            raise_server_error(e, ddoc_path) - -    def save_document(self, old_doc, doc, transaction_id): -        """ -        Put the document in the Couch backend database. - -        Note that C{old_doc} must have been fetched with the parameter -        C{check_for_conflicts} equal to True, so we can properly update the -        new document using the conflict information from the old one. - -        :param old_doc: The old document version. -        :type old_doc: CouchDocument -        :param doc: The document to be put. -        :type doc: CouchDocument - -        :raise RevisionConflict: Raised when trying to update a document but -                                 couch revisions mismatch. -        :raise MissingDesignDocError: Raised when tried to access a missing -                                      design document. -        :raise MissingDesignDocListFunctionError: Raised when trying to access -                                                  a missing list function on a -                                                  design document. -        :raise MissingDesignDocNamedViewError: Raised when trying to access a -                                               missing named view on a design -                                               document. -        :raise MissingDesignDocDeletedError: Raised when trying to access a -                                             deleted design document. -        :raise MissingDesignDocUnknownError: Raised when failed to access a -                                             design document for an yet -                                             unknown reason. -        """ -        attachments = {}  # we save content and conflicts as attachments -        parts = []  # and we put it using couch's multipart PUT -        # save content as attachment -        if doc.is_tombstone() is False: -            content = doc.get_json() -            attachments['u1db_content'] = { -                'follows': True, -                'content_type': 'application/octet-stream', -                'length': len(content), -            } -            parts.append(content) -        # save conflicts as attachment -        if doc.has_conflicts is True: -            conflicts = json.dumps( -                map(lambda cdoc: (cdoc.rev, cdoc.content), -                    doc.get_conflicts())) -            attachments['u1db_conflicts'] = { -                'follows': True, -                'content_type': 'application/octet-stream', -                'length': len(conflicts), -            } -            parts.append(conflicts) -        # store old transactions, if any -        transactions = old_doc.transactions[:] if old_doc is not None else [] -        # create a new transaction id and timestamp it so the transaction log -        # is consistent when querying the database. -        transactions.append( -            # here we store milliseconds to keep consistent with javascript -            # Date.prototype.getTime() which was used before inside a couchdb -            # update handler. -            (int(time.time() * 1000), -             transaction_id)) -        # build the couch document -        couch_doc = { -            '_id': doc.doc_id, -            'u1db_rev': doc.rev, -            'u1db_transactions': transactions, -            '_attachments': attachments, -        } -        # if we are updating a doc we have to add the couch doc revision -        if old_doc is not None: -            couch_doc['_rev'] = old_doc.couch_rev -        # prepare the multipart PUT -        buf = StringIO() -        headers = {} -        envelope = MultipartWriter(buf, headers=headers, subtype='related') -        envelope.add('application/json', json.dumps(couch_doc)) -        for part in parts: -            envelope.add('application/octet-stream', part) -        envelope.close() -        # try to save and fail if there's a revision conflict -        try: -            resource = self._new_resource() -            resource.put_json( -                doc.doc_id, body=str(buf.getvalue()), headers=headers) -        except ResourceConflict: -            raise RevisionConflict() -        return transactions[-1][1] - -    def _new_resource(self, *path): -        """ -        Return a new resource for accessing a couch database. - -        :return: A resource for accessing a couch database. -        :rtype: couchdb.http.Resource -        """ -        # Workaround for: https://leap.se/code/issues/5448 -        url = couch_urljoin(self._database.resource.url, *path) -        resource = Resource(url, Session(timeout=COUCH_TIMEOUT)) -        resource.credentials = self._database.resource.credentials -        resource.headers = self._database.resource.headers.copy() -        return resource - - -class CouchSyncTarget(CommonSyncTarget): - -    """ -    Functionality for using a SoledadBackend as a synchronization target. -    """ - -    def get_sync_info(self, source_replica_uid): -        source_gen, source_trans_id = self._db._get_replica_gen_and_trans_id( -            source_replica_uid) -        my_gen, my_trans_id = self._db._get_generation_info() -        return ( -            self._db._replica_uid, my_gen, my_trans_id, source_gen, -            source_trans_id) - -    def record_sync_info(self, source_replica_uid, source_replica_generation, -                         source_replica_transaction_id): -        if self._trace_hook: -            self._trace_hook('record_sync_info') -        self._db._set_replica_gen_and_trans_id( -            source_replica_uid, source_replica_generation, -            source_replica_transaction_id) - - -def is_db_name_valid(name): -    """ -    Validate a user database using a regular expression. - -    :param name: database name. -    :type name: str - -    :return: boolean for name vailidity -    :rtype: bool -    """ -    db_name_regex = "^user-[a-f0-9]+$" -    return re.match(db_name_regex, name) is not None - - -class CouchServerState(ServerState): - -    """ -    Inteface of the WSGI server with the CouchDB backend. -    """ - -    def __init__(self, couch_url, create_cmd=None): -        """ -        Initialize the couch server state. - -        :param couch_url: The URL for the couch database. -        :type couch_url: str -        """ -        self.couch_url = couch_url -        self.create_cmd = create_cmd - -    def open_database(self, dbname): -        """ -        Open a couch database. - -        :param dbname: The name of the database to open. -        :type dbname: str - -        :return: The SoledadBackend object. -        :rtype: SoledadBackend -        """ -        url = urljoin(self.couch_url, dbname) -        db = SoledadBackend.open_database(url, create=False, -                                          ensure_ddocs=False) -        return db - -    def ensure_database(self, dbname): -        """ -        Ensure couch database exists. - -        :param dbname: The name of the database to ensure. -        :type dbname: str - -        :raise Unauthorized: If disabled or other error was raised. - -        :return: The SoledadBackend object and its replica_uid. -        :rtype: (SoledadBackend, str) -        """ -        if not self.create_cmd: -            raise Unauthorized() -        else: -            code, out = exec_validated_cmd(self.create_cmd, dbname, -                                           validator=is_db_name_valid) -            if code is not 0: -                logger.error(""" -                    Error while creating database (%s) with (%s) command. -                    Output: %s -                    Exit code: %d -                    """ % (dbname, self.create_cmd, out, code)) -                raise Unauthorized() -        db = self.open_database(dbname) -        return db, db.replica_uid - -    def delete_database(self, dbname): -        """ -        Delete couch database. - -        :param dbname: The name of the database to delete. -        :type dbname: str - -        :raise Unauthorized: Always, because Soledad server is not allowed to -                             delete databases. -        """ -        raise Unauthorized() diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py new file mode 100644 index 00000000..727f033f --- /dev/null +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -0,0 +1,746 @@ +# -*- coding: utf-8 -*- +# couch.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +"""A U1DB backend that uses CouchDB as its persistence layer.""" + + +import json +import re +import uuid +import binascii +import time + + +from StringIO import StringIO +from urlparse import urljoin +from contextlib import contextmanager +from multiprocessing.pool import ThreadPool + + +from couchdb.client import Server, Database +from couchdb.multipart import MultipartWriter +from couchdb.http import ( +    ResourceConflict, +    ResourceNotFound, +    ServerError, +    Session, +    urljoin as couch_urljoin, +    Resource, +) +from u1db.errors import ( +    DatabaseDoesNotExist, +    InvalidGeneration, +    RevisionConflict, +) +from u1db.remote import http_app + + +from leap.soledad.common import ddocs +from leap.soledad.common.errors import raise_server_error +from leap.soledad.common.errors import raise_missing_design_doc_error +from leap.soledad.common.errors import InvalidURLError +from leap.soledad.common.document import CouchDocument +from leap.soledad.common.backend import SoledadBackend + + +COUCH_TIMEOUT = 120  # timeout for transfers between Soledad server and Couch + + +def list_users_dbs(couch_url): +    """ +    Retrieves a list with all databases that starts with 'user-' on CouchDB. +    Those databases belongs to users. So, the list will contain all the +    database names in the form of 'user-{uuid4}'. + +    :param couch_url: The couch url with needed credentials +    :type couch_url: str + +    :return: The list of all database names from users. +    :rtype: [str] +    """ +    with couch_server(couch_url) as server: +        users = [dbname for dbname in server if dbname.startswith('user-')] +    return users + + +# monkey-patch the u1db http app to use CouchDocument +http_app.Document = CouchDocument + + +@contextmanager +def couch_server(url): +    """ +    Provide a connection to a couch server and cleanup after use. + +    For database creation and deletion we use an ephemeral connection to the +    couch server. That connection has to be properly closed, so we provide it +    as a context manager. + +    :param url: The URL of the Couch server. +    :type url: str +    """ +    session = Session(timeout=COUCH_TIMEOUT) +    server = Server(url=url, full_commit=False, session=session) +    yield server + + +THREAD_POOL = ThreadPool(20) + + +class CouchDatabase(object): +    """ +    Holds CouchDB related code. +    This class gives methods to encapsulate database operations and hide +    CouchDB details from backend code. +    """ + +    @classmethod +    def open_database(cls, url, create, ensure_ddocs=False, replica_uid=None, +                      database_security=None): +        """ +        Open a U1DB database using CouchDB as backend. + +        :param url: the url of the database replica +        :type url: str +        :param create: should the replica be created if it does not exist? +        :type create: bool +        :param replica_uid: an optional unique replica identifier +        :type replica_uid: str +        :param ensure_ddocs: Ensure that the design docs exist on server. +        :type ensure_ddocs: bool + +        :return: the database instance +        :rtype: SoledadBackend +        """ +        # get database from url +        m = re.match('(^https?://[^/]+)/(.+)$', url) +        if not m: +            raise InvalidURLError +        url = m.group(1) +        dbname = m.group(2) +        with couch_server(url) as server: +            try: +                server[dbname] +            except ResourceNotFound: +                if not create: +                    raise DatabaseDoesNotExist() +                server.create(dbname) +        db = cls(url, +                 dbname, ensure_ddocs=ensure_ddocs, +                 database_security=database_security) +        return SoledadBackend( +            db, replica_uid=replica_uid) + +    def __init__(self, url, dbname, ensure_ddocs=True, +                 database_security=None): +        self._session = Session(timeout=COUCH_TIMEOUT) +        self._url = url +        self._dbname = dbname +        self._database = Database( +            urljoin(url, dbname), +            self._session) +        self._database.info() +        if ensure_ddocs: +            self.ensure_ddocs_on_db() +            self.ensure_security_ddoc(database_security) + +    def ensure_ddocs_on_db(self): +        """ +        Ensure that the design documents used by the backend exist on the +        couch database. +        """ +        for ddoc_name in ['docs', 'syncs', 'transactions']: +            try: +                self._database.resource('_design', +                                        ddoc_name, '_info').get_json() +            except ResourceNotFound: +                ddoc = json.loads( +                    binascii.a2b_base64( +                        getattr(ddocs, ddoc_name))) +                self._database.save(ddoc) + +    def ensure_security_ddoc(self, security_config=None): +        """ +        Make sure that only soledad user is able to access this database as +        an unprivileged member, meaning that administration access will +        be forbidden even inside an user database. +        The goal is to make sure that only the lowest access level is given +        to the unprivileged CouchDB user set on the server process. +        This is achieved by creating a _security design document, see: +        http://docs.couchdb.org/en/latest/api/database/security.html + +        :param database_security: security configuration parsed from conf file +        :type cache: dict +        """ +        security_config = security_config or {} +        security = self._database.resource.get_json('_security')[2] +        security['members'] = {'names': [], 'roles': []} +        security['members']['names'] = security_config.get('members', +                                                           ['soledad']) +        security['members']['roles'] = security_config.get('members_roles', []) +        security['admins'] = {'names': [], 'roles': []} +        security['admins']['names'] = security_config.get('admins', []) +        security['admins']['roles'] = security_config.get('admins_roles', []) +        self._database.resource.put_json('_security', body=security) + +    def delete_database(self): +        """ +        Delete a U1DB CouchDB database. +        """ +        with couch_server(self._url) as server: +            del(server[self._dbname]) + +    def set_replica_uid(self, replica_uid): +        """ +        Force the replica uid to be set. + +        :param replica_uid: The new replica uid. +        :type replica_uid: str +        """ +        try: +            # set on existent config document +            doc = self._database['u1db_config'] +            doc['replica_uid'] = replica_uid +        except ResourceNotFound: +            # or create the config document +            doc = { +                '_id': 'u1db_config', +                'replica_uid': replica_uid, +            } +        self._database.save(doc) + +    def get_replica_uid(self): +        """ +        Get the replica uid. + +        :return: The replica uid. +        :rtype: str +        """ +        try: +            # grab replica_uid from server +            doc = self._database['u1db_config'] +            replica_uid = doc['replica_uid'] +            return replica_uid +        except ResourceNotFound: +            # create a unique replica_uid +            replica_uid = uuid.uuid4().hex +            self.set_replica_uid(replica_uid) +            return replica_uid + +    def close(self): +        self._database = None + +    def get_all_docs(self, include_deleted=False): +        """ +        Get the JSON content for all documents in the database. + +        :param include_deleted: If set to True, deleted documents will be +                                returned with empty content. Otherwise deleted +                                documents will not be included in the results. +        :type include_deleted: bool + +        :return: (generation, [CouchDocument]) +            The current generation of the database, followed by a list of all +            the documents in the database. +        :rtype: (int, [CouchDocument]) +        """ + +        generation, _ = self._get_generation_info() +        results = list(self.get_docs(self._database, +                                     include_deleted=include_deleted)) +        return (generation, results) + +    def get_docs(self, doc_ids, check_for_conflicts=True, +                 include_deleted=False): +        """ +        Get the JSON content for many documents. + +        :param doc_ids: A list of document identifiers or None for all. +        :type doc_ids: list +        :param check_for_conflicts: If set to False, then the conflict check +                                    will be skipped, and 'None' will be +                                    returned instead of True/False. +        :type check_for_conflicts: bool +        :param include_deleted: If set to True, deleted documents will be +                                returned with empty content. Otherwise deleted +                                documents will not be included in the results. +        :return: iterable giving the Document object for each document id +                 in matching doc_ids order. +        :rtype: iterable +        """ +        # Workaround for: +        # +        #   http://bugs.python.org/issue7980 +        #   https://leap.se/code/issues/5449 +        # +        # python-couchdb uses time.strptime, which is not thread safe. In +        # order to avoid the problem described on the issues above, we preload +        # strptime here by evaluating the conversion of an arbitrary date. +        # This will not be needed when/if we switch from python-couchdb to +        # paisley. +        time.strptime('Mar 8 1917', '%b %d %Y') +        get_one = lambda doc_id: self._get_doc(doc_id, check_for_conflicts) +        docs = [THREAD_POOL.apply_async(get_one, [doc_id]) +                for doc_id in doc_ids] +        for doc in docs: +            doc = doc.get() +            if not doc or not include_deleted and doc.is_tombstone(): +                continue +            yield doc + +    def _get_doc(self, doc_id, check_for_conflicts=False): +        """ +        Extract the document from storage. + +        This can return None if the document doesn't exist. + +        :param doc_id: The unique document identifier +        :type doc_id: str +        :param check_for_conflicts: If set to False, then the conflict check +                                    will be skipped. +        :type check_for_conflicts: bool + +        :return: The document. +        :rtype: CouchDocument +        """ +        # get document with all attachments (u1db content and eventual +        # conflicts) +        try: +            result = \ +                self._database.resource(doc_id).get_json( +                    attachments=True)[2] +        except ResourceNotFound: +            return None +        return self.__parse_doc_from_couch(result, doc_id, check_for_conflicts) + +    def __parse_doc_from_couch(self, result, doc_id, +                               check_for_conflicts=False): +        # restrict to u1db documents +        if 'u1db_rev' not in result: +            return None +        doc = CouchDocument(doc_id, result['u1db_rev']) +        # set contents or make tombstone +        if '_attachments' not in result \ +                or 'u1db_content' not in result['_attachments']: +            doc.make_tombstone() +        else: +            doc.content = json.loads( +                binascii.a2b_base64( +                    result['_attachments']['u1db_content']['data'])) +        # determine if there are conflicts +        if check_for_conflicts \ +                and '_attachments' in result \ +                and 'u1db_conflicts' in result['_attachments']: +            doc.set_conflicts( +                self._build_conflicts( +                    doc.doc_id, +                    json.loads(binascii.a2b_base64( +                        result['_attachments']['u1db_conflicts']['data'])))) +        # store couch revision +        doc.couch_rev = result['_rev'] +        # store transactions +        doc.transactions = result['u1db_transactions'] +        return doc + +    def _build_conflicts(self, doc_id, attached_conflicts): +        """ +        Build the conflicted documents list from the conflicts attachment +        fetched from a couch document. + +        :param attached_conflicts: The document's conflicts as fetched from a +                                   couch document attachment. +        :type attached_conflicts: dict +        """ +        conflicts = [] +        for doc_rev, content in attached_conflicts: +            doc = CouchDocument(doc_id, doc_rev) +            if content is None: +                doc.make_tombstone() +            else: +                doc.content = content +            conflicts.append(doc) +        return conflicts + +    def _get_trans_id_for_gen(self, generation): +        """ +        Get the transaction id corresponding to a particular generation. + +        :param generation: The generation for which to get the transaction id. +        :type generation: int + +        :return: The transaction id for C{generation}. +        :rtype: str + +        :raise InvalidGeneration: Raised when the generation does not exist. +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason. +        """ +        if generation == 0: +            return '' +        # query a couch list function +        ddoc_path = [ +            '_design', 'transactions', '_list', 'trans_id_for_gen', 'log' +        ] +        res = self._database.resource(*ddoc_path) +        try: +            response = res.get_json(gen=generation) +            if response[2] == {}: +                raise InvalidGeneration +            return response[2]['transaction_id'] +        except ResourceNotFound as e: +            raise_missing_design_doc_error(e, ddoc_path) +        except ServerError as e: +            raise_server_error(e, ddoc_path) + +    def _get_replica_gen_and_trans_id(self, other_replica_uid): +        """ +        Return the last known generation and transaction id for the other db +        replica. + +        When you do a synchronization with another replica, the Database keeps +        track of what generation the other database replica was at, and what +        the associated transaction id was.  This is used to determine what data +        needs to be sent, and if two databases are claiming to be the same +        replica. + +        :param other_replica_uid: The identifier for the other replica. +        :type other_replica_uid: str + +        :return: A tuple containing the generation and transaction id we +                 encountered during synchronization. If we've never +                 synchronized with the replica, this is (0, ''). +        :rtype: (int, str) +        """ +        doc_id = 'u1db_sync_%s' % other_replica_uid +        try: +            doc = self._database[doc_id] +        except ResourceNotFound: +            doc = { +                '_id': doc_id, +                'generation': 0, +                'transaction_id': '', +            } +            self._database.save(doc) +        result = doc['generation'], doc['transaction_id'] +        return result + +    def get_doc_conflicts(self, doc_id, couch_rev=None): +        """ +        Get the conflicted versions of a document. + +        If the C{couch_rev} parameter is not None, conflicts for a specific +        document's couch revision are returned. + +        :param couch_rev: The couch document revision. +        :type couch_rev: str + +        :return: A list of conflicted versions of the document. +        :rtype: list +        """ +        # request conflicts attachment from server +        params = {} +        conflicts = [] +        if couch_rev is not None: +            params['rev'] = couch_rev  # restric document's couch revision +        else: +            # TODO: move into resource logic! +            first_entry = self._get_doc(doc_id, check_for_conflicts=True) +            conflicts.append(first_entry) +        resource = self._database.resource(doc_id, 'u1db_conflicts') +        try: +            response = resource.get_json(**params) +            return conflicts + self._build_conflicts( +                doc_id, json.loads(response[2].read())) +        except ResourceNotFound: +            return [] + +    def _do_set_replica_gen_and_trans_id( +            self, other_replica_uid, other_generation, other_transaction_id, +            number_of_docs=None, doc_idx=None, sync_id=None): +        """ +        Set the last-known generation and transaction id for the other +        database replica. + +        We have just performed some synchronization, and we want to track what +        generation the other replica was at. See also +        _get_replica_gen_and_trans_id. + +        :param other_replica_uid: The U1DB identifier for the other replica. +        :type other_replica_uid: str +        :param other_generation: The generation number for the other replica. +        :type other_generation: int +        :param other_transaction_id: The transaction id associated with the +                                     generation. +        :type other_transaction_id: str +        :param number_of_docs: The total amount of documents sent on this sync +                               session. +        :type number_of_docs: int +        :param doc_idx: The index of the current document being sent. +        :type doc_idx: int +        :param sync_id: The id of the current sync session. +        :type sync_id: str +        """ +        doc_id = 'u1db_sync_%s' % other_replica_uid +        try: +            doc = self._database[doc_id] +        except ResourceNotFound: +            doc = {'_id': doc_id} +        doc['generation'] = other_generation +        doc['transaction_id'] = other_transaction_id +        self._database.save(doc) + +    def _get_transaction_log(self): +        """ +        This is only for the test suite, it is not part of the api. + +        :return: The complete transaction log. +        :rtype: [(str, str)] + +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason. +        """ +        # query a couch view +        ddoc_path = ['_design', 'transactions', '_view', 'log'] +        res = self._database.resource(*ddoc_path) +        try: +            response = res.get_json() +            return map( +                lambda row: (row['id'], row['value']), +                response[2]['rows']) +        except ResourceNotFound as e: +            raise_missing_design_doc_error(e, ddoc_path) + +    def whats_changed(self, old_generation=0): +        """ +        Return a list of documents that have changed since old_generation. + +        :param old_generation: The generation of the database in the old +                               state. +        :type old_generation: int + +        :return: (generation, trans_id, [(doc_id, generation, trans_id),...]) +                 The current generation of the database, its associated +                 transaction id, and a list of of changed documents since +                 old_generation, represented by tuples with for each document +                 its doc_id and the generation and transaction id corresponding +                 to the last intervening change and sorted by generation (old +                 changes first) +        :rtype: (int, str, [(str, int, str)]) + +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason. +        """ +        # query a couch list function +        ddoc_path = [ +            '_design', 'transactions', '_list', 'whats_changed', 'log' +        ] +        res = self._database.resource(*ddoc_path) +        try: +            response = res.get_json(old_gen=old_generation) +            results = map( +                lambda row: +                    (row['generation'], row['doc_id'], row['transaction_id']), +                response[2]['transactions']) +            results.reverse() +            cur_gen = old_generation +            seen = set() +            changes = [] +            newest_trans_id = '' +            for generation, doc_id, trans_id in results: +                if doc_id not in seen: +                    changes.append((doc_id, generation, trans_id)) +                    seen.add(doc_id) +            if changes: +                cur_gen = changes[0][1]  # max generation +                newest_trans_id = changes[0][2] +                changes.reverse() +            else: +                cur_gen, newest_trans_id = self._get_generation_info() + +            return cur_gen, newest_trans_id, changes +        except ResourceNotFound as e: +            raise_missing_design_doc_error(e, ddoc_path) +        except ServerError as e: +            raise_server_error(e, ddoc_path) + +    def _get_generation_info(self): +        """ +        Return the current generation. + +        :return: A tuple containing the current generation and transaction id. +        :rtype: (int, str) + +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason. +        """ +        # query a couch list function +        ddoc_path = ['_design', 'transactions', '_list', 'generation', 'log'] +        res = self._database.resource(*ddoc_path) +        try: +            response = res.get_json() +            return (response[2]['generation'], response[2]['transaction_id']) +        except ResourceNotFound as e: +            raise_missing_design_doc_error(e, ddoc_path) +        except ServerError as e: +            raise_server_error(e, ddoc_path) + +    def save_document(self, old_doc, doc, transaction_id): +        """ +        Put the document in the Couch backend database. + +        Note that C{old_doc} must have been fetched with the parameter +        C{check_for_conflicts} equal to True, so we can properly update the +        new document using the conflict information from the old one. + +        :param old_doc: The old document version. +        :type old_doc: CouchDocument +        :param doc: The document to be put. +        :type doc: CouchDocument + +        :raise RevisionConflict: Raised when trying to update a document but +                                 couch revisions mismatch. +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason. +        """ +        attachments = {}  # we save content and conflicts as attachments +        parts = []  # and we put it using couch's multipart PUT +        # save content as attachment +        if doc.is_tombstone() is False: +            content = doc.get_json() +            attachments['u1db_content'] = { +                'follows': True, +                'content_type': 'application/octet-stream', +                'length': len(content), +            } +            parts.append(content) +        # save conflicts as attachment +        if doc.has_conflicts is True: +            conflicts = json.dumps( +                map(lambda cdoc: (cdoc.rev, cdoc.content), +                    doc.get_conflicts())) +            attachments['u1db_conflicts'] = { +                'follows': True, +                'content_type': 'application/octet-stream', +                'length': len(conflicts), +            } +            parts.append(conflicts) +        # store old transactions, if any +        transactions = old_doc.transactions[:] if old_doc is not None else [] +        # create a new transaction id and timestamp it so the transaction log +        # is consistent when querying the database. +        transactions.append( +            # here we store milliseconds to keep consistent with javascript +            # Date.prototype.getTime() which was used before inside a couchdb +            # update handler. +            (int(time.time() * 1000), +             transaction_id)) +        # build the couch document +        couch_doc = { +            '_id': doc.doc_id, +            'u1db_rev': doc.rev, +            'u1db_transactions': transactions, +            '_attachments': attachments, +        } +        # if we are updating a doc we have to add the couch doc revision +        if old_doc is not None: +            couch_doc['_rev'] = old_doc.couch_rev +        # prepare the multipart PUT +        buf = StringIO() +        headers = {} +        envelope = MultipartWriter(buf, headers=headers, subtype='related') +        envelope.add('application/json', json.dumps(couch_doc)) +        for part in parts: +            envelope.add('application/octet-stream', part) +        envelope.close() +        # try to save and fail if there's a revision conflict +        try: +            resource = self._new_resource() +            resource.put_json( +                doc.doc_id, body=str(buf.getvalue()), headers=headers) +        except ResourceConflict: +            raise RevisionConflict() +        return transactions[-1][1] + +    def _new_resource(self, *path): +        """ +        Return a new resource for accessing a couch database. + +        :return: A resource for accessing a couch database. +        :rtype: couchdb.http.Resource +        """ +        # Workaround for: https://leap.se/code/issues/5448 +        url = couch_urljoin(self._database.resource.url, *path) +        resource = Resource(url, Session(timeout=COUCH_TIMEOUT)) +        resource.credentials = self._database.resource.credentials +        resource.headers = self._database.resource.headers.copy() +        return resource diff --git a/common/src/leap/soledad/common/couch/state.py b/common/src/leap/soledad/common/couch/state.py new file mode 100644 index 00000000..39d49fa0 --- /dev/null +++ b/common/src/leap/soledad/common/couch/state.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +# state.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Server state using CouchDatabase as backend. +""" +import re +import logging +from urlparse import urljoin + +from u1db.remote.server_state import ServerState +from leap.soledad.common.command import exec_validated_cmd +from leap.soledad.common.couch import CouchDatabase +from u1db.errors import Unauthorized + + +logger = logging.getLogger(__name__) + + +def is_db_name_valid(name): +    """ +    Validate a user database using a regular expression. + +    :param name: database name. +    :type name: str + +    :return: boolean for name vailidity +    :rtype: bool +    """ +    db_name_regex = "^user-[a-f0-9]+$" +    return re.match(db_name_regex, name) is not None + + +class CouchServerState(ServerState): + +    """ +    Inteface of the WSGI server with the CouchDB backend. +    """ + +    def __init__(self, couch_url, create_cmd=None): +        """ +        Initialize the couch server state. + +        :param couch_url: The URL for the couch database. +        :type couch_url: str +        """ +        self.couch_url = couch_url +        self.create_cmd = create_cmd + +    def open_database(self, dbname): +        """ +        Open a couch database. + +        :param dbname: The name of the database to open. +        :type dbname: str + +        :return: The SoledadBackend object. +        :rtype: SoledadBackend +        """ +        url = urljoin(self.couch_url, dbname) +        db = CouchDatabase.open_database(url, create=False, ensure_ddocs=False) +        return db + +    def ensure_database(self, dbname): +        """ +        Ensure couch database exists. + +        :param dbname: The name of the database to ensure. +        :type dbname: str + +        :raise Unauthorized: If disabled or other error was raised. + +        :return: The SoledadBackend object and its replica_uid. +        :rtype: (SoledadBackend, str) +        """ +        if not self.create_cmd: +            raise Unauthorized() +        else: +            code, out = exec_validated_cmd(self.create_cmd, dbname, +                                           validator=is_db_name_valid) +            if code is not 0: +                logger.error(""" +                    Error while creating database (%s) with (%s) command. +                    Output: %s +                    Exit code: %d +                    """ % (dbname, self.create_cmd, out, code)) +                raise Unauthorized() +        db = self.open_database(dbname) +        return db, db.replica_uid + +    def delete_database(self, dbname): +        """ +        Delete couch database. + +        :param dbname: The name of the database to delete. +        :type dbname: str + +        :raise Unauthorized: Always, because Soledad server is not allowed to +                             delete databases. +        """ +        raise Unauthorized() diff --git a/common/src/leap/soledad/common/errors.py b/common/src/leap/soledad/common/errors.py index ea9d6ce4..84d8d813 100644 --- a/common/src/leap/soledad/common/errors.py +++ b/common/src/leap/soledad/common/errors.py @@ -266,4 +266,4 @@ def raise_server_error(exc, ddoc_path):          if 'point is undefined' in exc.message[1][1]:              raise MissingDesignDocListFunctionError      # other errors are unknown for now -    raise DesignDocUnknownError(path) +    raise DesignDocUnknownError("%s: %s" % (path, str(exc.message))) diff --git a/common/src/leap/soledad/common/tests/test_couch.py b/common/src/leap/soledad/common/tests/test_couch.py index b3536a6a..1634d30c 100644 --- a/common/src/leap/soledad/common/tests/test_couch.py +++ b/common/src/leap/soledad/common/tests/test_couch.py @@ -36,6 +36,7 @@ from u1db import SyncTarget  from u1db import vectorclock  from leap.soledad.common import couch +from leap.soledad.common import backend  from leap.soledad.common import errors  from leap.soledad.common.tests import u1db_tests as tests @@ -56,7 +57,7 @@ from u1db.backends.inmemory import InMemoryIndex  class TestCouchBackendImpl(CouchDBTestCase):      def test__allocate_doc_id(self): -        db = couch.SoledadBackend.open_database( +        db = couch.CouchDatabase.open_database(              urljoin(                  'http://localhost:' + str(self.couch_port),                  ('test-%s' % uuid4().hex) @@ -78,7 +79,7 @@ class TestCouchBackendImpl(CouchDBTestCase):  def make_couch_database_for_test(test, replica_uid):      port = str(test.couch_port)      dbname = ('test-%s' % uuid4().hex) -    db = couch.SoledadBackend.open_database( +    db = couch.CouchDatabase.open_database(          urljoin('http://localhost:' + port, dbname),          create=True,          replica_uid=replica_uid or 'test', @@ -91,7 +92,7 @@ def copy_couch_database_for_test(test, db):      port = str(test.couch_port)      couch_url = 'http://localhost:' + port      new_dbname = db._dbname + '_copy' -    new_db = couch.SoledadBackend.open_database( +    new_db = couch.CouchDatabase.open_database(          urljoin(couch_url, new_dbname),          create=True,          replica_uid=db._replica_uid or 'test') @@ -529,89 +530,6 @@ class SoledadBackendSyncTargetTests(          self.st.record_sync_info('replica', 0, 'T-sid')          self.assertEqual(expected, called) - -# The following tests need that the database have an index, so we fake one. - -class IndexedSoledadBackend(couch.SoledadBackend): - -    def __init__(self, db, replica_uid=None): -        old_class.__init__(self, db, replica_uid=replica_uid) -        self._indexes = {} - -    def _put_doc(self, old_doc, doc): -        for index in self._indexes.itervalues(): -            if old_doc is not None and not old_doc.is_tombstone(): -                index.remove_json(old_doc.doc_id, old_doc.get_json()) -            if not doc.is_tombstone(): -                index.add_json(doc.doc_id, doc.get_json()) -        old_class._put_doc(self, old_doc, doc) - -    def create_index(self, index_name, *index_expressions): -        if index_name in self._indexes: -            if self._indexes[index_name]._definition == list( -                    index_expressions): -                return -            raise u1db_errors.IndexNameTakenError -        index = InMemoryIndex(index_name, list(index_expressions)) -        _, all_docs = self.get_all_docs() -        for doc in all_docs: -            index.add_json(doc.doc_id, doc.get_json()) -        self._indexes[index_name] = index - -    def delete_index(self, index_name): -        del self._indexes[index_name] - -    def list_indexes(self): -        definitions = [] -        for idx in self._indexes.itervalues(): -            definitions.append((idx._name, idx._definition)) -        return definitions - -    def get_from_index(self, index_name, *key_values): -        try: -            index = self._indexes[index_name] -        except KeyError: -            raise u1db_errors.IndexDoesNotExist -        doc_ids = index.lookup(key_values) -        result = [] -        for doc_id in doc_ids: -            result.append(self._get_doc(doc_id, check_for_conflicts=True)) -        return result - -    def get_range_from_index(self, index_name, start_value=None, -                             end_value=None): -        """Return all documents with key values in the specified range.""" -        try: -            index = self._indexes[index_name] -        except KeyError: -            raise u1db_errors.IndexDoesNotExist -        if isinstance(start_value, basestring): -            start_value = (start_value,) -        if isinstance(end_value, basestring): -            end_value = (end_value,) -        doc_ids = index.lookup_range(start_value, end_value) -        result = [] -        for doc_id in doc_ids: -            result.append(self._get_doc(doc_id, check_for_conflicts=True)) -        return result - -    def get_index_keys(self, index_name): -        try: -            index = self._indexes[index_name] -        except KeyError: -            raise u1db_errors.IndexDoesNotExist -        keys = index.keys() -        # XXX inefficiency warning -        return list(set([tuple(key.split('\x01')) for key in keys])) - - -# monkey patch SoledadBackend (once) to include virtual indexes -if getattr(couch.SoledadBackend, '_old_class', None) is None: -    old_class = couch.SoledadBackend -    IndexedSoledadBackend._old_class = old_class -    couch.SoledadBackend = IndexedSoledadBackend - -  sync_scenarios = []  for name, scenario in COUCH_SCENARIOS:      scenario = dict(scenario) @@ -921,7 +839,6 @@ class SoledadBackendSyncTests(          self.db1 = self.create_database('test1', 'source')          self.db2 = self.create_database('test2', 'target')          doc = self.db2.create_doc_from_json(simple_doc) -        self.db1.create_index('test-idx', 'key')          self.assertEqual(0, self.sync(self.db1, self.db2))          self.assertGetDoc(self.db1, doc.doc_id, doc.rev, simple_doc, False)          self.assertEqual(1, self.db1._get_replica_gen_and_trans_id('test2')[0]) @@ -931,7 +848,7 @@ class SoledadBackendSyncTests(              {'receive': {'docs': [], 'last_known_gen': 0},               'return': {'docs': [(doc.doc_id, doc.rev)],                          'last_gen': 1}}) -        self.assertEqual([doc], self.db1.get_from_index('test-idx', 'value')) +        self.assertGetDoc(self.db2, doc.doc_id, doc.rev, simple_doc, False)      def test_sync_pulling_doesnt_update_other_if_changed(self):          self.db1 = self.create_database('test1', 'source') @@ -1020,7 +937,7 @@ class SoledadBackendSyncTests(          doc1 = self.db1.create_doc_from_json(simple_doc)          doc_id = doc1.doc_id          doc1_rev = doc1.rev -        self.db1.create_index('test-idx', 'key') +        # self.db1.create_index('test-idx', 'key')          new_doc = '{"key": "altval"}'          doc2 = self.db2.create_doc_from_json(new_doc, doc_id=doc_id)          doc2_rev = doc2.rev @@ -1036,18 +953,18 @@ class SoledadBackendSyncTests(          self.assertTransactionLog([doc_id, doc_id], self.db1)          self.assertGetDoc(self.db1, doc_id, doc2_rev, new_doc, True)          self.assertGetDoc(self.db2, doc_id, doc2_rev, new_doc, False) -        from_idx = self.db1.get_from_index('test-idx', 'altval')[0] -        self.assertEqual(doc2.doc_id, from_idx.doc_id) -        self.assertEqual(doc2.rev, from_idx.rev) -        self.assertTrue(from_idx.has_conflicts) -        self.assertEqual([], self.db1.get_from_index('test-idx', 'value')) +        # soledad doesnt support index due to encryption +        # from_idx = self.db1.get_from_index('test-idx', 'altval')[0] +        # self.assertEqual(doc2.doc_id, from_idx.doc_id) +        # self.assertEqual(doc2.rev, from_idx.rev) +        # self.assertTrue(from_idx.has_conflicts) +        # self.assertEqual([], self.db1.get_from_index('test-idx', 'value'))      def test_sync_sees_remote_delete_conflicted(self):          self.db1 = self.create_database('test1', 'source')          self.db2 = self.create_database('test2', 'target')          doc1 = self.db1.create_doc_from_json(simple_doc)          doc_id = doc1.doc_id -        self.db1.create_index('test-idx', 'key')          self.sync(self.db1, self.db2)          doc2 = self.make_document(doc1.doc_id, doc1.rev, doc1.get_json())          new_doc = '{"key": "altval"}' @@ -1067,7 +984,6 @@ class SoledadBackendSyncTests(          self.assertGetDocIncludeDeleted(self.db1, doc_id, doc2.rev, None, True)          self.assertGetDocIncludeDeleted(              self.db2, doc_id, doc2.rev, None, False) -        self.assertEqual([], self.db1.get_from_index('test-idx', 'value'))      def test_sync_local_race_conflicted(self):          self.db1 = self.create_database('test1', 'source') @@ -1075,7 +991,7 @@ class SoledadBackendSyncTests(          doc = self.db1.create_doc_from_json(simple_doc)          doc_id = doc.doc_id          doc1_rev = doc.rev -        self.db1.create_index('test-idx', 'key') +        # self.db1.create_index('test-idx', 'key')          self.sync(self.db1, self.db2)          content1 = '{"key": "localval"}'          content2 = '{"key": "altval"}' @@ -1094,21 +1010,22 @@ class SoledadBackendSyncTests(          self.sync(self.db1, self.db2, trace_hook=after_whatschanged)          self.assertEqual([True], triggered)          self.assertGetDoc(self.db1, doc_id, doc2_rev2, content2, True) -        from_idx = self.db1.get_from_index('test-idx', 'altval')[0] -        self.assertEqual(doc.doc_id, from_idx.doc_id) -        self.assertEqual(doc.rev, from_idx.rev) -        self.assertTrue(from_idx.has_conflicts) -        self.assertEqual([], self.db1.get_from_index('test-idx', 'value')) -        self.assertEqual([], self.db1.get_from_index('test-idx', 'localval')) +        # soledad doesnt support indexing due to encryption +        # from_idx = self.db1.get_from_index('test-idx', 'altval')[0] +        # self.assertEqual(doc.doc_id, from_idx.doc_id) +        # self.assertEqual(doc.rev, from_idx.rev) +        # self.assertTrue(from_idx.has_conflicts) +        # self.assertEqual([], self.db1.get_from_index('test-idx', 'value')) +        # self.assertEqual([], self.db1.get_from_index('test-idx', 'localval'))      def test_sync_propagates_deletes(self):          self.db1 = self.create_database('test1', 'source')          self.db2 = self.create_database('test2', 'both')          doc1 = self.db1.create_doc_from_json(simple_doc)          doc_id = doc1.doc_id -        self.db1.create_index('test-idx', 'key') +        # self.db1.create_index('test-idx', 'key')          self.sync(self.db1, self.db2) -        self.db2.create_index('test-idx', 'key') +        # self.db2.create_index('test-idx', 'key')          self.db3 = self.create_database('test3', 'target')          self.sync(self.db1, self.db3)          self.db1.delete_doc(doc1) @@ -1124,8 +1041,8 @@ class SoledadBackendSyncTests(              self.db1, doc_id, deleted_rev, None, False)          self.assertGetDocIncludeDeleted(              self.db2, doc_id, deleted_rev, None, False) -        self.assertEqual([], self.db1.get_from_index('test-idx', 'value')) -        self.assertEqual([], self.db2.get_from_index('test-idx', 'value')) +        # self.assertEqual([], self.db1.get_from_index('test-idx', 'value')) +        # self.assertEqual([], self.db2.get_from_index('test-idx', 'value'))          self.sync(self.db2, self.db3)          self.assertLastExchangeLog(              self.db3, @@ -1324,9 +1241,11 @@ class SoledadBackendExceptionsTests(CouchDBTestCase):      def create_db(self, ensure=True, dbname=None):          if not dbname:              dbname = ('test-%s' % uuid4().hex) -        self.db = couch.CouchDatabase.open_database( -            urljoin('http://127.0.0.1:%d' % self.couch_port, dbname), -            create=True, +        if dbname not in self.couch_server: +            self.couch_server.create(dbname) +        self.db = couch.CouchDatabase( +            ('http://127.0.0.1:%d' % self.couch_port), +            dbname,              ensure_ddocs=ensure)      def tearDown(self): @@ -1520,14 +1439,14 @@ class SoledadBackendExceptionsTests(CouchDBTestCase):  class DatabaseNameValidationTest(unittest.TestCase):      def test_database_name_validation(self): -        self.assertFalse(couch.is_db_name_valid("user-deadbeef | cat /secret")) -        self.assertTrue(couch.is_db_name_valid("user-cafe1337")) +        self.assertFalse(couch.state.is_db_name_valid("user-deadbeef | cat /secret")) +        self.assertTrue(couch.state.is_db_name_valid("user-cafe1337"))  class CommandBasedDBCreationTest(unittest.TestCase):      def test_ensure_db_using_custom_command(self): -        state = couch.CouchServerState("url", create_cmd="echo") +        state = couch.state.CouchServerState("url", create_cmd="echo")          mock_db = Mock()          mock_db.replica_uid = 'replica_uid'          state.open_database = Mock(return_value=mock_db) @@ -1536,11 +1455,11 @@ class CommandBasedDBCreationTest(unittest.TestCase):          self.assertEquals(mock_db.replica_uid, replica_uid)      def test_raises_unauthorized_on_failure(self): -        state = couch.CouchServerState("url", create_cmd="inexistent") +        state = couch.state.CouchServerState("url", create_cmd="inexistent")          self.assertRaises(u1db_errors.Unauthorized,                            state.ensure_database, "user-1337")      def test_raises_unauthorized_by_default(self): -        state = couch.CouchServerState("url") +        state = couch.state.CouchServerState("url")          self.assertRaises(u1db_errors.Unauthorized,                            state.ensure_database, "user-1337") diff --git a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py index 2c2daf05..8cd3ae08 100644 --- a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py +++ b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py @@ -26,7 +26,8 @@ from twisted.internet import defer  from uuid import uuid4  from leap.soledad.client import Soledad -from leap.soledad.common.couch import SoledadBackend, CouchServerState +from leap.soledad.common.couch.state import CouchServerState +from leap.soledad.common.couch import CouchDatabase  from leap.soledad.common.tests.util import (      make_token_soledad_app, @@ -86,7 +87,7 @@ class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer):          TestCaseWithServer.setUp(self)          CouchDBTestCase.setUp(self)          self.user = ('user-%s' % uuid4().hex) -        self.db = SoledadBackend.open_database( +        self.db = CouchDatabase.open_database(              urljoin(self.couch_url, 'user-' + self.user),              create=True,              replica_uid='replica', diff --git a/common/src/leap/soledad/common/tests/test_server.py b/common/src/leap/soledad/common/tests/test_server.py index 5810ce31..d75275d6 100644 --- a/common/src/leap/soledad/common/tests/test_server.py +++ b/common/src/leap/soledad/common/tests/test_server.py @@ -29,10 +29,8 @@ from urlparse import urljoin  from twisted.internet import defer  from twisted.trial import unittest -from leap.soledad.common.couch import ( -    CouchServerState, -    SoledadBackend, -) +from leap.soledad.common.couch.state import CouchServerState +from leap.soledad.common.couch import CouchDatabase  from leap.soledad.common.tests.u1db_tests import TestCaseWithServer  from leap.soledad.common.tests.test_couch import CouchDBTestCase  from leap.soledad.common.tests.util import ( @@ -358,7 +356,7 @@ class EncryptedSyncTestCase(              passphrase=passphrase)          # ensure remote db exists before syncing -        db = SoledadBackend.open_database( +        db = CouchDatabase.open_database(              urljoin(self.couch_url, 'user-' + user),              create=True,              ensure_ddocs=True) @@ -488,7 +486,7 @@ class LockResourceTestCase(          self.tempdir = tempfile.mkdtemp(prefix="leap_tests-")          TestCaseWithServer.setUp(self)          # create the databases -        db = SoledadBackend.open_database( +        db = CouchDatabase.open_database(              urljoin(self.couch_url, ('shared-%s' % (uuid4().hex))),              create=True,              ensure_ddocs=True) diff --git a/common/src/leap/soledad/common/tests/test_sync_mutex.py b/common/src/leap/soledad/common/tests/test_sync_mutex.py index 35cecb8f..973a8587 100644 --- a/common/src/leap/soledad/common/tests/test_sync_mutex.py +++ b/common/src/leap/soledad/common/tests/test_sync_mutex.py @@ -33,7 +33,8 @@ from twisted.internet import defer  from leap.soledad.client.sync import SoledadSynchronizer -from leap.soledad.common import couch +from leap.soledad.common.couch.state import CouchServerState +from leap.soledad.common.couch import CouchDatabase  from leap.soledad.common.tests.u1db_tests import TestCaseWithServer  from leap.soledad.common.tests.test_couch import CouchDBTestCase @@ -84,7 +85,7 @@ class TestSyncMutex(      sync_target = soledad_sync_target      def make_app(self): -        self.request_state = couch.CouchServerState(self.couch_url) +        self.request_state = CouchServerState(self.couch_url)          return self.make_app_with_state(self.request_state)      def setUp(self): @@ -102,7 +103,7 @@ class TestSyncMutex(          self.startServer()          # ensure remote db exists before syncing -        db = couch.SoledadBackend.open_database( +        db = CouchDatabase.open_database(              urljoin(self.couch_url, 'user-' + self.user),              create=True,              ensure_ddocs=True) diff --git a/common/src/leap/soledad/common/tests/test_sync_target.py b/common/src/leap/soledad/common/tests/test_sync_target.py index 0cf31c3f..f25e84dd 100644 --- a/common/src/leap/soledad/common/tests/test_sync_target.py +++ b/common/src/leap/soledad/common/tests/test_sync_target.py @@ -29,7 +29,6 @@ from uuid import uuid4  from testscenarios import TestWithScenarios  from twisted.internet import defer -from urlparse import urljoin  from leap.soledad.client import http_target as target  from leap.soledad.client import crypto @@ -37,7 +36,6 @@ from leap.soledad.client.sqlcipher import SQLCipherU1DBSync  from leap.soledad.client.sqlcipher import SQLCipherOptions  from leap.soledad.client.sqlcipher import SQLCipherDatabase -from leap.soledad.common import couch  from leap.soledad.common.document import SoledadDocument  from leap.soledad.common.tests import u1db_tests as tests @@ -265,9 +263,9 @@ class TestSoledadSyncTarget(                                       replica_trans_id=replica_trans_id,                                       number_of_docs=number_of_docs,                                       doc_idx=doc_idx, sync_id=sync_id) -        from leap.soledad.common.tests.test_couch import IndexedSoledadBackend +        from leap.soledad.common.backend import SoledadBackend          self.patch( -            IndexedSoledadBackend, '_put_doc_if_newer', bomb_put_doc_if_newer) +            SoledadBackend, '_put_doc_if_newer', bomb_put_doc_if_newer)          remote_target = self.getSyncTarget(              source_replica_uid='replica')          other_changes = [] diff --git a/common/src/leap/soledad/common/tests/util.py b/common/src/leap/soledad/common/tests/util.py index bfd06856..f7f9ebd0 100644 --- a/common/src/leap/soledad/common/tests/util.py +++ b/common/src/leap/soledad/common/tests/util.py @@ -27,7 +27,6 @@ import shutil  import random  import string  import u1db -import traceback  import couchdb  from uuid import uuid4 @@ -37,17 +36,17 @@ from StringIO import StringIO  from pysqlcipher import dbapi2  from u1db import sync -from u1db.errors import DatabaseDoesNotExist  from u1db.remote import http_database  from twisted.trial import unittest -from leap.common.files import mkdir_p  from leap.common.testing.basetest import BaseLeapTest  from leap.soledad.common import soledad_assert  from leap.soledad.common.document import SoledadDocument -from leap.soledad.common.couch import SoledadBackend, CouchServerState +from leap.soledad.common.couch import CouchDatabase +from leap.soledad.common.couch.state import CouchServerState +  from leap.soledad.common.crypto import ENC_SCHEME_KEY  from leap.soledad.client import Soledad @@ -379,7 +378,7 @@ class CouchServerStateForTests(CouchServerState):          Create db and append to a list, allowing test to close it later          """          dbname = dbname or ('test-%s' % uuid4().hex) -        db = SoledadBackend.open_database( +        db = CouchDatabase.open_database(              urljoin(self.couch_url, dbname),              True,              replica_uid=replica_uid or 'test', diff --git a/server/pkg/create-user-db b/server/pkg/create-user-db index a8ba3833..54856643 100755 --- a/server/pkg/create-user-db +++ b/server/pkg/create-user-db @@ -19,8 +19,8 @@ import os  import sys  import netrc  import argparse -from leap.soledad.common.couch import SoledadBackend -from leap.soledad.common.couch import is_db_name_valid +from leap.soledad.common.couch import CouchDatabase +from leap.soledad.common.couch.state import is_db_name_valid  from leap.soledad.common.couch import list_users_dbs  from leap.soledad.server import load_configuration @@ -69,7 +69,7 @@ def ensure_database(dbname):          sys.exit(1)      url = url_for_db(dbname)      db_security = CONF['database-security'] -    db = SoledadBackend.open_database(url=url, create=True, +    db = CouchDatabase.open_database(url=url, create=True,                                       replica_uid=None, ensure_ddocs=True,                                       database_security=db_security)      print ('success! Ensured that database %s exists, with replica_uid: %s' % diff --git a/server/src/leap/soledad/server/__init__.py b/server/src/leap/soledad/server/__init__.py index 618ccb2b..00e1e9fb 100644 --- a/server/src/leap/soledad/server/__init__.py +++ b/server/src/leap/soledad/server/__init__.py @@ -104,7 +104,7 @@ from leap.soledad.server.sync import (  )  from leap.soledad.common import SHARED_DB_NAME -from leap.soledad.common.couch import CouchServerState +from leap.soledad.common.couch.state import CouchServerState  # ----------------------------------------------------------------------------  # Soledad WSGI application | 
