diff options
author | drebs <drebs@riseup.net> | 2017-10-25 15:58:56 -0200 |
---|---|---|
committer | drebs <drebs@riseup.net> | 2017-10-31 16:52:59 -0200 |
commit | 3306ea69ee33cdafa8803f9c717b60b9f3d3b4a0 (patch) | |
tree | 3897a29e0d7c2fddc262eca71779e46bae43313c | |
parent | f1c010fc4119d6348d0bd823e4a9f6da310149bd (diff) |
[feat] improve speed of server startup
To avoid corrupting data, Soledad Server checks all user databases
during startup to make sure all of them use the correct schema version.
This was done synchronously, so when there are many databases startup
would take a long time. This commit makes that verification
asynchronous, thus speeding up server startup.
-rw-r--r-- | src/leap/soledad/common/couch/state.py | 133 | ||||
-rw-r--r-- | src/leap/soledad/server/_wsgi.py | 3 | ||||
-rw-r--r-- | src/leap/soledad/server/entrypoints.py | 2 | ||||
-rw-r--r-- | tests/couch/test_command.py | 7 | ||||
-rw-r--r-- | tests/couch/test_state.py | 47 |
5 files changed, 145 insertions, 47 deletions
diff --git a/src/leap/soledad/common/couch/state.py b/src/leap/soledad/common/couch/state.py index 8cbe0934..38098e3f 100644 --- a/src/leap/soledad/common/couch/state.py +++ b/src/leap/soledad/common/couch/state.py @@ -17,10 +17,15 @@ """ Server state using CouchDatabase as backend. """ -import couchdb import re +import os +import treq from six.moves.urllib.parse import urljoin +from twisted.internet import defer +from urlparse import urlsplit + +from twisted.internet import reactor from leap.soledad.common.log import getLogger from leap.soledad.common.couch import CouchDatabase @@ -37,6 +42,98 @@ from leap.soledad.common.errors import MissingCouchConfigDocumentError logger = getLogger(__name__) +# +# Database schema version verification +# + +@defer.inlineCallbacks +def _check_db_schema_version(url, db, auth, agent=None): + """ + Check if the schema version is up to date for a given database. + + :param url: the server base URL. + :type url: str + :param db: the database name. + :type db: str + :param auth: a tuple with (username, password) for acessing CouchDB. + :type auth: tuple(str, str) + :param agent: an optional agent for doing requests, used in tests. + :type agent: twisted.web.client.Agent + + :raise MissingCouchConfigDocumentError: raised when a database is not empty + but has no config document in it. + + :raise WrongCouchSchemaVersionError: raised when a config document was + found but the schema version is + different from what is expected. + """ + # if there are documents, ensure that a config doc exists + db_url = urljoin(url, '%s/' % db) + config_doc_url = urljoin(db_url, CONFIG_DOC_ID) + res = yield treq.get(config_doc_url, auth=auth, agent=agent) + + if res.code != 200 and res.code != 404: + raise Exception("Unexpected HTTP response code: %d" % res.code) + + elif res.code == 404: + res = yield treq.get(urljoin(db_url, '_all_docs'), auth=auth, + params={'limit': 1}, agent=agent) + docs = yield res.json() + if docs['total_rows'] != 0: + logger.error( + "Missing couch config document in database %s" % db) + raise MissingCouchConfigDocumentError(db) + + elif res.code == 200: + config_doc = yield res.json() + if config_doc[SCHEMA_VERSION_KEY] != SCHEMA_VERSION: + logger.error( + "Unsupported database schema in database %s" % db) + raise WrongCouchSchemaVersionError(db) + + +def _stop(failure, reactor): + exception = failure.value.subFailure.value + logger.error("Failure while checking schema versions: %r - %s" + % (exception, exception.message)) + reactor.addSystemEventTrigger('after', 'shutdown', os._exit, 1) + reactor.stop() + + +@defer.inlineCallbacks +def check_schema_versions(couch_url, agent=None, reactor=reactor): + """ + Check that all user databases use the correct couch schema. + + :param couch_url: The URL for the couch database. + :type couch_url: str + :param agent: an optional agent for doing requests, used in tests. + :type agent: twisted.web.client.Agent + :param reactor: an optional reactor for stopping in case of errors, used + in tests. + :type reactor: twisted.internet.base.ReactorBase + """ + url = urlsplit(couch_url) + auth = (url.username, url.password) if url.username else None + url = "%s://%s:%d" % (url.scheme, url.hostname, url.port) + res = yield treq.get(urljoin(url, '_all_dbs'), auth=auth, agent=agent) + dbs = yield res.json() + deferreds = [] + semaphore = defer.DeferredSemaphore(20) + for db in dbs: + if not db.startswith('user-'): + continue + d = semaphore.run(_check_db_schema_version, url, db, auth, agent=agent) + deferreds.append(d) + d = defer.gatherResults(deferreds, consumeErrors=True) + d.addErrback(_stop, reactor=reactor) + yield d + + +# +# CouchDB Server state +# + def is_db_name_valid(name): """ Validate a user database using a regular expression. @@ -57,8 +154,7 @@ class CouchServerState(ServerState): Inteface of the WSGI server with the CouchDB backend. """ - def __init__(self, couch_url, create_cmd=None, - check_schema_versions=False): + def __init__(self, couch_url, create_cmd=None): """ Initialize the couch server state. @@ -69,40 +165,9 @@ class CouchServerState(ServerState): name and should access CouchDB with necessary privileges, which server lacks for security reasons. :type create_cmd: str - :param check_schema_versions: Whether to check couch schema version of - user dbs. Set to False as this is only - intended to run once during start-up. - :type check_schema_versions: bool """ self.couch_url = couch_url self.create_cmd = create_cmd - if check_schema_versions: - self._check_schema_versions() - - def _check_schema_versions(self): - """ - Check that all user databases use the correct couch schema. - """ - server = couchdb.client.Server(self.couch_url) - for dbname in server: - if not dbname.startswith('user-'): - continue - db = server[dbname] - - # if there are documents, ensure that a config doc exists - config_doc = db.get(CONFIG_DOC_ID) - if config_doc: - if config_doc[SCHEMA_VERSION_KEY] != SCHEMA_VERSION: - logger.error( - "Unsupported database schema in database %s" % dbname) - raise WrongCouchSchemaVersionError(dbname) - else: - result = db.view('_all_docs', limit=1) - if result.total_rows != 0: - logger.error( - "Missing couch config document in database %s" - % dbname) - raise MissingCouchConfigDocumentError(dbname) def open_database(self, dbname): """ diff --git a/src/leap/soledad/server/_wsgi.py b/src/leap/soledad/server/_wsgi.py index f1b0018d..510cb7b9 100644 --- a/src/leap/soledad/server/_wsgi.py +++ b/src/leap/soledad/server/_wsgi.py @@ -33,8 +33,7 @@ __all__ = ['init_couch_state', 'get_sync_resource'] def _get_couch_state(conf): - state = CouchServerState(conf['couch_url'], create_cmd=conf['create_cmd'], - check_schema_versions=True) + state = CouchServerState(conf['couch_url'], create_cmd=conf['create_cmd']) SoledadBackend.BATCH_SUPPORT = conf.get('batching', False) return state diff --git a/src/leap/soledad/server/entrypoints.py b/src/leap/soledad/server/entrypoints.py index 7d18ca58..9bccbcf4 100644 --- a/src/leap/soledad/server/entrypoints.py +++ b/src/leap/soledad/server/entrypoints.py @@ -26,6 +26,7 @@ from twisted.internet import reactor from twisted.python import threadpool from twisted.logger import Logger +from ..common.couch.state import check_schema_versions from .auth import localPortal, publicPortal from .session import SoledadSession from ._config import get_config @@ -73,4 +74,5 @@ def check_conf(): reactor.callWhenRunning(check_conf) +reactor.callWhenRunning(check_schema_versions, conf['couch_url']) reactor.callWhenRunning(init_couch_state, conf) diff --git a/tests/couch/test_command.py b/tests/couch/test_command.py index 9fb2c153..52719065 100644 --- a/tests/couch/test_command.py +++ b/tests/couch/test_command.py @@ -10,7 +10,7 @@ class CommandBasedDBCreationTest(unittest.TestCase): def test_ensure_db_using_custom_command(self): state = couch_state.CouchServerState( - "url", create_cmd="/bin/echo", check_schema_versions=False) + "url", create_cmd="/bin/echo") mock_db = Mock() mock_db.replica_uid = 'replica_uid' state.open_database = Mock(return_value=mock_db) @@ -20,12 +20,11 @@ class CommandBasedDBCreationTest(unittest.TestCase): def test_raises_unauthorized_on_failure(self): state = couch_state.CouchServerState( - "url", create_cmd="inexistent", check_schema_versions=False) + "url", create_cmd="inexistent") self.assertRaises(u1db_errors.Unauthorized, state.ensure_database, "user-1337") def test_raises_unauthorized_by_default(self): - state = couch_state.CouchServerState("url", - check_schema_versions=False) + state = couch_state.CouchServerState("url") self.assertRaises(u1db_errors.Unauthorized, state.ensure_database, "user-1337") diff --git a/tests/couch/test_state.py b/tests/couch/test_state.py index e5ac3704..673d9c41 100644 --- a/tests/couch/test_state.py +++ b/tests/couch/test_state.py @@ -1,14 +1,21 @@ +import mock import pytest + from leap.soledad.common.couch import CONFIG_DOC_ID from leap.soledad.common.couch import SCHEMA_VERSION from leap.soledad.common.couch import SCHEMA_VERSION_KEY -from leap.soledad.common.couch.state import CouchServerState +from leap.soledad.common.couch.state import _check_db_schema_version +from leap.soledad.common.couch.state import check_schema_versions from uuid import uuid4 from leap.soledad.common.errors import WrongCouchSchemaVersionError from leap.soledad.common.errors import MissingCouchConfigDocumentError from test_soledad.util import CouchDBTestCase +from twisted.internet import defer +from twisted.internet import reactor +from twisted.web.client import HTTPConnectionPool, Agent + class CouchDesignDocsTests(CouchDBTestCase): @@ -16,17 +23,43 @@ class CouchDesignDocsTests(CouchDBTestCase): CouchDBTestCase.setUp(self) self.db = self.couch_server.create('user-' + uuid4().hex) self.addCleanup(self.delete_db, self.db.name) + self.pool = HTTPConnectionPool(reactor, persistent=False) + self.agent = Agent(reactor, pool=self.pool) + + @defer.inlineCallbacks + def tearDown(self): + yield self.pool.closeCachedConnections() - def test_wrong_couch_version_raises(self): + @defer.inlineCallbacks + def test__check_db_schema_version_wrong_schema_version_raises(self): wrong_schema_version = SCHEMA_VERSION + 1 self.db.create( {'_id': CONFIG_DOC_ID, SCHEMA_VERSION_KEY: wrong_schema_version}) with pytest.raises(WrongCouchSchemaVersionError): - CouchServerState(self.couch_url, create_cmd='/bin/echo', - check_schema_versions=True) + yield _check_db_schema_version( + self.couch_url, self.db.name, None, agent=self.agent) - def test_missing_config_doc_raises(self): + @defer.inlineCallbacks + def test_check_schema_versions_wrong_schema_version_stops_reactor(self): + wrong_schema_version = SCHEMA_VERSION + 1 + self.db.create( + {'_id': CONFIG_DOC_ID, SCHEMA_VERSION_KEY: wrong_schema_version}) + mocked_reactor = mock.Mock() + yield check_schema_versions( + self.couch_url, agent=self.agent, reactor=mocked_reactor) + self.assertTrue(mocked_reactor.stop.call_count == 1) + + @defer.inlineCallbacks + def test__check_db_schema_version_missing_config_doc_raises(self): self.db.create({}) with pytest.raises(MissingCouchConfigDocumentError): - CouchServerState(self.couch_url, create_cmd='/bin/echo', - check_schema_versions=True) + yield _check_db_schema_version( + self.couch_url, self.db.name, None, agent=self.agent) + + @defer.inlineCallbacks + def test_check_schema_versions_missing_config_doc_stops_reactor(self): + self.db.create({}) + mocked_reactor = mock.Mock() + yield check_schema_versions( + self.couch_url, agent=self.agent, reactor=mocked_reactor) + self.assertTrue(mocked_reactor.stop.call_count == 1) |