From 3306ea69ee33cdafa8803f9c717b60b9f3d3b4a0 Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 25 Oct 2017 15:58:56 -0200 Subject: [feat] improve speed of server startup To avoid corrupting data, Soledad Server checks all user databases during startup to make sure all of them use the correct schema version. This was done synchronously, so when there are many databases startup would take a long time. This commit makes that verification asynchronous, thus speeding up server startup. --- src/leap/soledad/common/couch/state.py | 133 ++++++++++++++++++++++++--------- 1 file changed, 99 insertions(+), 34 deletions(-) (limited to 'src/leap/soledad/common') diff --git a/src/leap/soledad/common/couch/state.py b/src/leap/soledad/common/couch/state.py index 8cbe0934..38098e3f 100644 --- a/src/leap/soledad/common/couch/state.py +++ b/src/leap/soledad/common/couch/state.py @@ -17,10 +17,15 @@ """ Server state using CouchDatabase as backend. """ -import couchdb import re +import os +import treq from six.moves.urllib.parse import urljoin +from twisted.internet import defer +from urlparse import urlsplit + +from twisted.internet import reactor from leap.soledad.common.log import getLogger from leap.soledad.common.couch import CouchDatabase @@ -37,6 +42,98 @@ from leap.soledad.common.errors import MissingCouchConfigDocumentError logger = getLogger(__name__) +# +# Database schema version verification +# + +@defer.inlineCallbacks +def _check_db_schema_version(url, db, auth, agent=None): + """ + Check if the schema version is up to date for a given database. + + :param url: the server base URL. + :type url: str + :param db: the database name. + :type db: str + :param auth: a tuple with (username, password) for acessing CouchDB. + :type auth: tuple(str, str) + :param agent: an optional agent for doing requests, used in tests. + :type agent: twisted.web.client.Agent + + :raise MissingCouchConfigDocumentError: raised when a database is not empty + but has no config document in it. + + :raise WrongCouchSchemaVersionError: raised when a config document was + found but the schema version is + different from what is expected. + """ + # if there are documents, ensure that a config doc exists + db_url = urljoin(url, '%s/' % db) + config_doc_url = urljoin(db_url, CONFIG_DOC_ID) + res = yield treq.get(config_doc_url, auth=auth, agent=agent) + + if res.code != 200 and res.code != 404: + raise Exception("Unexpected HTTP response code: %d" % res.code) + + elif res.code == 404: + res = yield treq.get(urljoin(db_url, '_all_docs'), auth=auth, + params={'limit': 1}, agent=agent) + docs = yield res.json() + if docs['total_rows'] != 0: + logger.error( + "Missing couch config document in database %s" % db) + raise MissingCouchConfigDocumentError(db) + + elif res.code == 200: + config_doc = yield res.json() + if config_doc[SCHEMA_VERSION_KEY] != SCHEMA_VERSION: + logger.error( + "Unsupported database schema in database %s" % db) + raise WrongCouchSchemaVersionError(db) + + +def _stop(failure, reactor): + exception = failure.value.subFailure.value + logger.error("Failure while checking schema versions: %r - %s" + % (exception, exception.message)) + reactor.addSystemEventTrigger('after', 'shutdown', os._exit, 1) + reactor.stop() + + +@defer.inlineCallbacks +def check_schema_versions(couch_url, agent=None, reactor=reactor): + """ + Check that all user databases use the correct couch schema. + + :param couch_url: The URL for the couch database. + :type couch_url: str + :param agent: an optional agent for doing requests, used in tests. + :type agent: twisted.web.client.Agent + :param reactor: an optional reactor for stopping in case of errors, used + in tests. + :type reactor: twisted.internet.base.ReactorBase + """ + url = urlsplit(couch_url) + auth = (url.username, url.password) if url.username else None + url = "%s://%s:%d" % (url.scheme, url.hostname, url.port) + res = yield treq.get(urljoin(url, '_all_dbs'), auth=auth, agent=agent) + dbs = yield res.json() + deferreds = [] + semaphore = defer.DeferredSemaphore(20) + for db in dbs: + if not db.startswith('user-'): + continue + d = semaphore.run(_check_db_schema_version, url, db, auth, agent=agent) + deferreds.append(d) + d = defer.gatherResults(deferreds, consumeErrors=True) + d.addErrback(_stop, reactor=reactor) + yield d + + +# +# CouchDB Server state +# + def is_db_name_valid(name): """ Validate a user database using a regular expression. @@ -57,8 +154,7 @@ class CouchServerState(ServerState): Inteface of the WSGI server with the CouchDB backend. """ - def __init__(self, couch_url, create_cmd=None, - check_schema_versions=False): + def __init__(self, couch_url, create_cmd=None): """ Initialize the couch server state. @@ -69,40 +165,9 @@ class CouchServerState(ServerState): name and should access CouchDB with necessary privileges, which server lacks for security reasons. :type create_cmd: str - :param check_schema_versions: Whether to check couch schema version of - user dbs. Set to False as this is only - intended to run once during start-up. - :type check_schema_versions: bool """ self.couch_url = couch_url self.create_cmd = create_cmd - if check_schema_versions: - self._check_schema_versions() - - def _check_schema_versions(self): - """ - Check that all user databases use the correct couch schema. - """ - server = couchdb.client.Server(self.couch_url) - for dbname in server: - if not dbname.startswith('user-'): - continue - db = server[dbname] - - # if there are documents, ensure that a config doc exists - config_doc = db.get(CONFIG_DOC_ID) - if config_doc: - if config_doc[SCHEMA_VERSION_KEY] != SCHEMA_VERSION: - logger.error( - "Unsupported database schema in database %s" % dbname) - raise WrongCouchSchemaVersionError(dbname) - else: - result = db.view('_all_docs', limit=1) - if result.total_rows != 0: - logger.error( - "Missing couch config document in database %s" - % dbname) - raise MissingCouchConfigDocumentError(dbname) def open_database(self, dbname): """ -- cgit v1.2.3