diff options
Diffstat (limited to 'common/src/leap/soledad/common/couch.py')
-rw-r--r-- | common/src/leap/soledad/common/couch.py | 362 |
1 files changed, 271 insertions, 91 deletions
diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 9642e8f3..187d3035 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -18,19 +18,19 @@ """A U1DB backend that uses CouchDB as its persistence layer.""" -import uuid import re import simplejson as json +import socket +import logging -from base64 import b64encode, b64decode from u1db import errors from u1db.sync import Synchronizer from u1db.backends.inmemory import InMemoryIndex from u1db.remote.server_state import ServerState from u1db.errors import DatabaseDoesNotExist from couchdb.client import Server, Document as CouchDocument -from couchdb.http import ResourceNotFound +from couchdb.http import ResourceNotFound, Unauthorized from leap.soledad.common.objectstore import ( @@ -39,28 +39,155 @@ from leap.soledad.common.objectstore import ( ) +logger = logging.getLogger(__name__) + + class InvalidURLError(Exception): """ Exception raised when Soledad encounters a malformed URL. """ +def persistent_class(cls): + """ + Decorator that modifies a class to ensure u1db metadata persists on + underlying storage. + + @param cls: The class that will be modified. + @type cls: type + """ + + def _create_persistent_method(old_method_name, key, load_method_name, + dump_method_name, store): + """ + Create a persistent method to replace C{old_method_name}. + + The new method will load C{key} using C{load_method_name} and stores + it using C{dump_method_name} depending on the value of C{store}. + """ + # get methods + old_method = getattr(cls, old_method_name) + load_method = getattr(cls, load_method_name) \ + if load_method_name is not None \ + else lambda self, data: setattr(self, key, data) + dump_method = getattr(cls, dump_method_name) \ + if dump_method_name is not None \ + else lambda self: getattr(self, key) + + def _new_method(self, *args, **kwargs): + # get u1db data from couch db + doc = self._get_doc('%s%s' % + (self.U1DB_DATA_DOC_ID_PREFIX, key)) + load_method(self, doc.content['content']) + # run old method + retval = old_method(self, *args, **kwargs) + # store u1db data on couch + if store: + doc.content = {'content': dump_method(self)} + self._put_doc(doc) + return retval + + return _new_method + + # ensure the class has a persistency map + if not hasattr(cls, 'PERSISTENCY_MAP'): + logger.error('Class %s has no PERSISTENCY_MAP attribute, skipping ' + 'persistent methods substitution.' % cls) + return cls + # replace old methods with new persistent ones + for key, ((load_method_name, dump_method_name), + persistent_methods) in cls.PERSISTENCY_MAP.iteritems(): + for (method_name, store) in persistent_methods: + setattr(cls, method_name, + _create_persistent_method( + method_name, + key, + load_method_name, + dump_method_name, + store)) + return cls + + +@persistent_class class CouchDatabase(ObjectStoreDatabase): """ A U1DB backend that uses Couch as its persistence layer. """ - U1DB_TRANSACTION_LOG_KEY = 'transaction_log' - U1DB_CONFLICTS_KEY = 'conflicts' - U1DB_OTHER_GENERATIONS_KEY = 'other_generations' - U1DB_INDEXES_KEY = 'indexes' - U1DB_REPLICA_UID_KEY = 'replica_uid' + U1DB_TRANSACTION_LOG_KEY = '_transaction_log' + U1DB_CONFLICTS_KEY = '_conflicts' + U1DB_OTHER_GENERATIONS_KEY = '_other_generations' + U1DB_INDEXES_KEY = '_indexes' + U1DB_REPLICA_UID_KEY = '_replica_uid' + + U1DB_DATA_KEYS = [ + U1DB_TRANSACTION_LOG_KEY, + U1DB_CONFLICTS_KEY, + U1DB_OTHER_GENERATIONS_KEY, + U1DB_INDEXES_KEY, + U1DB_REPLICA_UID_KEY, + ] COUCH_ID_KEY = '_id' COUCH_REV_KEY = '_rev' COUCH_U1DB_ATTACHMENT_KEY = 'u1db_json' COUCH_U1DB_REV_KEY = 'u1db_rev' + # the following map describes information about methods usage of + # properties that have to persist on the underlying database. The format + # of the map is assumed to be: + # + # { + # 'property_name': [ + # ('property_load_method_name', 'property_dump_method_name'), + # [('method_1_name', bool), + # ... + # ('method_N_name', bool)]], + # ... + # } + # + # where the booleans indicate if the property should be stored after + # each method execution (i.e. if the method alters the property). Property + # load/dump methods will be run after/before properties are read/written + # to the underlying db. + PERSISTENCY_MAP = { + U1DB_TRANSACTION_LOG_KEY: [ + ('_load_transaction_log_from_json', None), + [('_get_transaction_log', False), + ('_get_generation', False), + ('_get_generation_info', False), + ('_get_trans_id_for_gen', False), + ('whats_changed', False), + ('_put_and_update_indexes', True)]], + U1DB_CONFLICTS_KEY: [ + (None, None), + [('_has_conflicts', False), + ('get_doc_conflicts', False), + ('_prune_conflicts', False), + ('resolve_doc', False), + ('_replace_conflicts', True), + ('_force_doc_sync_conflict', True)]], + U1DB_OTHER_GENERATIONS_KEY: [ + ('_load_other_generations_from_json', None), + [('_get_replica_gen_and_trans_id', False), + ('_do_set_replica_gen_and_trans_id', True)]], + U1DB_INDEXES_KEY: [ + ('_load_indexes_from_json', '_dump_indexes_as_json'), + [('list_indexes', False), + ('get_from_index', False), + ('get_range_from_index', False), + ('get_index_keys', False), + ('_put_and_update_indexes', True), + ('create_index', True), + ('delete_index', True)]], + U1DB_REPLICA_UID_KEY: [ + (None, None), + [('_allocate_doc_rev', False), + ('_put_doc_if_newer', False), + ('_ensure_maximal_rev', False), + ('_prune_conflicts', False), + ('_set_replica_uid', True)]]} + @classmethod def open_database(cls, url, create): """ @@ -104,9 +231,11 @@ class CouchDatabase(ObjectStoreDatabase): @param session: an http.Session instance or None for a default session @type session: http.Session """ + # save params self._url = url self._full_commit = full_commit self._session = session + # configure couch self._server = Server(url=self._url, full_commit=self._full_commit, session=self._session) @@ -174,7 +303,7 @@ class CouchDatabase(ObjectStoreDatabase): generation = self._get_generation() results = [] for doc_id in self._database: - if doc_id == self.U1DB_DATA_DOC_ID: + if doc_id.startswith(self.U1DB_DATA_DOC_ID_PREFIX): continue doc = self._get_doc(doc_id, check_for_conflicts=True) if doc.content is None and not include_deleted: @@ -241,14 +370,12 @@ class CouchDatabase(ObjectStoreDatabase): raise errors.IndexNameTakenError index = InMemoryIndex(index_name, list(index_expressions)) for doc_id in self._database: - if doc_id == self.U1DB_DATA_DOC_ID: # skip special file - continue + if doc_id.startswith(self.U1DB_DATA_DOC_ID_PREFIX): + continue # skip special files doc = self._get_doc(doc_id) if doc.content is not None: index.add_json(doc_id, doc.get_json()) self._indexes[index_name] = index - # save data in object store - self._store_u1db_data() def close(self): """ @@ -290,76 +417,25 @@ class CouchDatabase(ObjectStoreDatabase): def _init_u1db_data(self): """ - Initialize U1DB info data structure in the couch db. + Initialize u1db configuration data on backend storage. A U1DB database needs to keep track of all database transactions, document conflicts, the generation of other replicas it has seen, indexes created by users and so on. - In this implementation, all this information is stored in a special - document stored in the couch db with id equals to - CouchDatabse.U1DB_DATA_DOC_ID. - - This method initializes the document that will hold such information. - """ - if self._replica_uid is None: - self._replica_uid = uuid.uuid4().hex - # TODO: prevent user from overwriting a document with the same doc_id - # as this one. - doc = self._factory(doc_id=self.U1DB_DATA_DOC_ID) - doc.content = { - self.U1DB_TRANSACTION_LOG_KEY: b64encode(json.dumps([])), - self.U1DB_CONFLICTS_KEY: b64encode(json.dumps({})), - self.U1DB_OTHER_GENERATIONS_KEY: b64encode(json.dumps({})), - self.U1DB_INDEXES_KEY: b64encode(json.dumps({})), - self.U1DB_REPLICA_UID_KEY: b64encode(self._replica_uid), - } - self._put_doc(doc) - - def _fetch_u1db_data(self): - """ - Fetch U1DB info from the couch db. - - See C{_init_u1db_data} documentation. - """ - # retrieve u1db data from couch db - cdoc = self._database.get(self.U1DB_DATA_DOC_ID) - jsonstr = self._database.get_attachment( - cdoc, self.COUCH_U1DB_ATTACHMENT_KEY).read() - content = json.loads(jsonstr) - # set u1db database info - self._transaction_log = json.loads( - b64decode(content[self.U1DB_TRANSACTION_LOG_KEY])) - self._conflicts = json.loads( - b64decode(content[self.U1DB_CONFLICTS_KEY])) - self._other_generations = json.loads( - b64decode(content[self.U1DB_OTHER_GENERATIONS_KEY])) - self._indexes = self._load_indexes_from_json( - b64decode(content[self.U1DB_INDEXES_KEY])) - self._replica_uid = b64decode(content[self.U1DB_REPLICA_UID_KEY]) - # save couch _rev - self._couch_rev = cdoc[self.COUCH_REV_KEY] - - def _store_u1db_data(self): - """ - Store U1DB info in the couch db. - - See C{_init_u1db_data} documentation. - """ - doc = self._factory(doc_id=self.U1DB_DATA_DOC_ID) - doc.content = { - # Here, the b64 encode ensures that document content - # does not cause strange behaviour in couchdb because - # of encoding. - self.U1DB_TRANSACTION_LOG_KEY: - b64encode(json.dumps(self._transaction_log)), - self.U1DB_CONFLICTS_KEY: b64encode(json.dumps(self._conflicts)), - self.U1DB_OTHER_GENERATIONS_KEY: - b64encode(json.dumps(self._other_generations)), - self.U1DB_INDEXES_KEY: b64encode(self._dump_indexes_as_json()), - self.U1DB_REPLICA_UID_KEY: b64encode(self._replica_uid), - self.COUCH_REV_KEY: self._couch_rev} - self._put_doc(doc) + In this implementation, all this information is stored in special + documents stored in the underlying with doc_id prefix equal to + U1DB_DATA_DOC_ID_PREFIX. Those documents ids are reserved: put_doc(), + get_doc() and delete_doc() will not allow documents with a doc_id with + that prefix to be accessed or modified. + """ + for key in self.U1DB_DATA_KEYS: + doc_id = '%s%s' % (self.U1DB_DATA_DOC_ID_PREFIX, key) + doc = self._get_doc(doc_id) + if doc is None: + doc = self._factory(doc_id) + doc.content = {'content': getattr(self, key)} + self._put_doc(doc) #------------------------------------------------------------------------- # Couch specific methods @@ -377,7 +453,7 @@ class CouchDatabase(ObjectStoreDatabase): def _dump_indexes_as_json(self): """ - Dump index definitions as JSON string. + Dump index definitions as JSON. """ indexes = {} for name, idx in self._indexes.iteritems(): @@ -385,31 +461,60 @@ class CouchDatabase(ObjectStoreDatabase): for attr in [self.INDEX_NAME_KEY, self.INDEX_DEFINITION_KEY, self.INDEX_VALUES_KEY]: indexes[name][attr] = getattr(idx, '_' + attr) - return json.dumps(indexes) + return indexes def _load_indexes_from_json(self, indexes): """ - Load index definitions from JSON string. + Load index definitions from stored JSON. - @param indexes: A JSON serialization of a list of [('index-name', - ['field', 'field2'])]. + @param indexes: A JSON representation of indexes as + [('index-name', ['field', 'field2', ...]), ...]. @type indexes: str - - @return: A dictionary with the index definitions. - @rtype: dict """ - dict = {} - for name, idx_dict in json.loads(indexes).iteritems(): + self._indexes = {} + for name, idx_dict in indexes.iteritems(): idx = InMemoryIndex(name, idx_dict[self.INDEX_DEFINITION_KEY]) idx._values = idx_dict[self.INDEX_VALUES_KEY] - dict[name] = idx - return dict + self._indexes[name] = idx + + def _load_transaction_log_from_json(self, transaction_log): + """ + Load transaction log from stored JSON. + + @param transaction_log: A JSON representation of transaction_log as + [('generation', 'transaction_id'), ...]. + @type transaction_log: list + """ + self._transaction_log = [] + for gen, trans_id in transaction_log: + self._transaction_log.append((gen, trans_id)) + + def _load_other_generations_from_json(self, other_generations): + """ + Load other generations from stored JSON. + + @param other_generations: A JSON representation of other_generations + as {'replica_uid': ('generation', 'transaction_id'), ...}. + @type other_generations: dict + """ + self._other_generations = {} + for replica_uid, [gen, trans_id] in other_generations.iteritems(): + self._other_generations[replica_uid] = (gen, trans_id) class CouchSyncTarget(ObjectStoreSyncTarget): """ Functionality for using a CouchDatabase as a synchronization target. """ + pass + + +class NotEnoughCouchPermissions(Exception): + """ + Raised when failing to assert for enough permissions on underlying Couch + Database. + """ + pass class CouchServerState(ServerState): @@ -417,8 +522,83 @@ class CouchServerState(ServerState): Inteface of the WSGI server with the CouchDB backend. """ - def __init__(self, couch_url): + def __init__(self, couch_url, shared_db_name, tokens_db_name, + user_db_prefix): + """ + Initialize the couch server state. + + @param couch_url: The URL for the couch database. + @type couch_url: str + @param shared_db_name: The name of the shared database. + @type shared_db_name: str + @param tokens_db_name: The name of the tokens database. + @type tokens_db_name: str + @param user_db_prefix: The prefix for user database names. + @type user_db_prefix: str + """ self._couch_url = couch_url + self._shared_db_name = shared_db_name + self._tokens_db_name = tokens_db_name + self._user_db_prefix = user_db_prefix + try: + self._check_couch_permissions() + except NotEnoughCouchPermissions: + logger.error("Not enough permissions on underlying couch " + "database (%s)." % self._couch_url) + except (socket.error, socket.gaierror, socket.herror, + socket.timeout), e: + logger.error("Socket problem while trying to reach underlying " + "couch database: (%s, %s)." % + (self._couch_url, e)) + + def _check_couch_permissions(self): + """ + Assert that Soledad Server has enough permissions on the underlying couch + database. + + Soledad Server has to be able to do the following in the couch server: + + * Create, read and write from/to 'shared' db. + * Create, read and write from/to 'user-<anything>' dbs. + * Read from 'tokens' db. + + This function tries to perform the actions above using the "low level" + couch library to ensure that Soledad Server can do everything it needs on + the underlying couch database. + + @param couch_url: The URL of the couch database. + @type couch_url: str + + @raise NotEnoughCouchPermissions: Raised in case there are not enough + permissions to read/write/create the needed couch databases. + @rtype: bool + """ + + def _open_couch_db(dbname): + server = Server(url=self._couch_url) + try: + server[dbname] + except ResourceNotFound: + server.create(dbname) + return server[dbname] + + def _create_delete_test_doc(db): + doc_id, _ = db.save({'test': 'document'}) + doc = db[doc_id] + db.delete(doc) + + try: + # test read/write auth for shared db + _create_delete_test_doc( + _open_couch_db(self._shared_db_name)) + # test read/write auth for user-<something> db + _create_delete_test_doc( + _open_couch_db('%stest-db' % self._user_db_prefix)) + # test read auth for tokens db + tokensdb = _open_couch_db(self._tokens_db_name) + tokensdb.info() + except Unauthorized: + raise NotEnoughCouchPermissions(self._couch_url) def open_database(self, dbname): """ |