diff options
| author | drebs <drebs@riseup.net> | 2017-10-25 15:58:56 -0200 | 
|---|---|---|
| committer | drebs <drebs@riseup.net> | 2017-10-31 16:52:59 -0200 | 
| commit | 3306ea69ee33cdafa8803f9c717b60b9f3d3b4a0 (patch) | |
| tree | 3897a29e0d7c2fddc262eca71779e46bae43313c | |
| parent | f1c010fc4119d6348d0bd823e4a9f6da310149bd (diff) | |
[feat] improve speed of server startup
To avoid corrupting data, Soledad Server checks all user databases
during startup to make sure all of them use the correct schema version.
This was done synchronously, so when there are many databases startup
would take a long time. This commit makes that verification
asynchronous, thus speeding up server startup.
| -rw-r--r-- | src/leap/soledad/common/couch/state.py | 133 | ||||
| -rw-r--r-- | src/leap/soledad/server/_wsgi.py | 3 | ||||
| -rw-r--r-- | src/leap/soledad/server/entrypoints.py | 2 | ||||
| -rw-r--r-- | tests/couch/test_command.py | 7 | ||||
| -rw-r--r-- | tests/couch/test_state.py | 47 | 
5 files changed, 145 insertions, 47 deletions
| diff --git a/src/leap/soledad/common/couch/state.py b/src/leap/soledad/common/couch/state.py index 8cbe0934..38098e3f 100644 --- a/src/leap/soledad/common/couch/state.py +++ b/src/leap/soledad/common/couch/state.py @@ -17,10 +17,15 @@  """  Server state using CouchDatabase as backend.  """ -import couchdb  import re +import os +import treq  from six.moves.urllib.parse import urljoin +from twisted.internet import defer +from urlparse import urlsplit + +from twisted.internet import reactor  from leap.soledad.common.log import getLogger  from leap.soledad.common.couch import CouchDatabase @@ -37,6 +42,98 @@ from leap.soledad.common.errors import MissingCouchConfigDocumentError  logger = getLogger(__name__) +# +# Database schema version verification +# + +@defer.inlineCallbacks +def _check_db_schema_version(url, db, auth, agent=None): +    """ +    Check if the schema version is up to date for a given database. + +    :param url: the server base URL. +    :type url: str +    :param db: the database name. +    :type db: str +    :param auth: a tuple with (username, password) for acessing CouchDB. +    :type auth: tuple(str, str) +    :param agent: an optional agent for doing requests, used in tests. +    :type agent: twisted.web.client.Agent + +    :raise MissingCouchConfigDocumentError: raised when a database is not empty +                                            but has no config document in it. + +    :raise WrongCouchSchemaVersionError: raised when a config document was +                                         found but the schema version is +                                         different from what is expected. +    """ +    # if there are documents, ensure that a config doc exists +    db_url = urljoin(url, '%s/' % db) +    config_doc_url = urljoin(db_url, CONFIG_DOC_ID) +    res = yield treq.get(config_doc_url, auth=auth, agent=agent) + +    if res.code != 200 and res.code != 404: +        raise Exception("Unexpected HTTP response code: %d" % res.code) + +    elif res.code == 404: +        res = yield treq.get(urljoin(db_url, '_all_docs'), auth=auth, +                             params={'limit': 1}, agent=agent) +        docs = yield res.json() +        if docs['total_rows'] != 0: +            logger.error( +                "Missing couch config document in database %s" % db) +            raise MissingCouchConfigDocumentError(db) + +    elif res.code == 200: +        config_doc = yield res.json() +        if config_doc[SCHEMA_VERSION_KEY] != SCHEMA_VERSION: +            logger.error( +                "Unsupported database schema in database %s" % db) +            raise WrongCouchSchemaVersionError(db) + + +def _stop(failure, reactor): +    exception = failure.value.subFailure.value +    logger.error("Failure while checking schema versions: %r - %s" +                 % (exception, exception.message)) +    reactor.addSystemEventTrigger('after', 'shutdown', os._exit, 1) +    reactor.stop() + + +@defer.inlineCallbacks +def check_schema_versions(couch_url, agent=None, reactor=reactor): +    """ +    Check that all user databases use the correct couch schema. + +    :param couch_url: The URL for the couch database. +    :type couch_url: str +    :param agent: an optional agent for doing requests, used in tests. +    :type agent: twisted.web.client.Agent +    :param reactor: an optional reactor for stopping in case of errors, used +                    in tests. +    :type reactor: twisted.internet.base.ReactorBase +    """ +    url = urlsplit(couch_url) +    auth = (url.username, url.password) if url.username else None +    url = "%s://%s:%d" % (url.scheme, url.hostname, url.port) +    res = yield treq.get(urljoin(url, '_all_dbs'), auth=auth, agent=agent) +    dbs = yield res.json() +    deferreds = [] +    semaphore = defer.DeferredSemaphore(20) +    for db in dbs: +        if not db.startswith('user-'): +            continue +        d = semaphore.run(_check_db_schema_version, url, db, auth, agent=agent) +        deferreds.append(d) +    d = defer.gatherResults(deferreds, consumeErrors=True) +    d.addErrback(_stop, reactor=reactor) +    yield d + + +# +# CouchDB Server state +# +  def is_db_name_valid(name):      """      Validate a user database using a regular expression. @@ -57,8 +154,7 @@ class CouchServerState(ServerState):      Inteface of the WSGI server with the CouchDB backend.      """ -    def __init__(self, couch_url, create_cmd=None, -                 check_schema_versions=False): +    def __init__(self, couch_url, create_cmd=None):          """          Initialize the couch server state. @@ -69,40 +165,9 @@ class CouchServerState(ServerState):                             name and should access CouchDB with necessary                             privileges, which server lacks for security reasons.          :type create_cmd: str -        :param check_schema_versions: Whether to check couch schema version of -                                      user dbs. Set to False as this is only -                                      intended to run once during start-up. -        :type check_schema_versions: bool          """          self.couch_url = couch_url          self.create_cmd = create_cmd -        if check_schema_versions: -            self._check_schema_versions() - -    def _check_schema_versions(self): -        """ -        Check that all user databases use the correct couch schema. -        """ -        server = couchdb.client.Server(self.couch_url) -        for dbname in server: -            if not dbname.startswith('user-'): -                continue -            db = server[dbname] - -            # if there are documents, ensure that a config doc exists -            config_doc = db.get(CONFIG_DOC_ID) -            if config_doc: -                if config_doc[SCHEMA_VERSION_KEY] != SCHEMA_VERSION: -                    logger.error( -                        "Unsupported database schema in database %s" % dbname) -                    raise WrongCouchSchemaVersionError(dbname) -            else: -                result = db.view('_all_docs', limit=1) -                if result.total_rows != 0: -                    logger.error( -                        "Missing couch config document in database %s" -                        % dbname) -                    raise MissingCouchConfigDocumentError(dbname)      def open_database(self, dbname):          """ diff --git a/src/leap/soledad/server/_wsgi.py b/src/leap/soledad/server/_wsgi.py index f1b0018d..510cb7b9 100644 --- a/src/leap/soledad/server/_wsgi.py +++ b/src/leap/soledad/server/_wsgi.py @@ -33,8 +33,7 @@ __all__ = ['init_couch_state', 'get_sync_resource']  def _get_couch_state(conf): -    state = CouchServerState(conf['couch_url'], create_cmd=conf['create_cmd'], -                             check_schema_versions=True) +    state = CouchServerState(conf['couch_url'], create_cmd=conf['create_cmd'])      SoledadBackend.BATCH_SUPPORT = conf.get('batching', False)      return state diff --git a/src/leap/soledad/server/entrypoints.py b/src/leap/soledad/server/entrypoints.py index 7d18ca58..9bccbcf4 100644 --- a/src/leap/soledad/server/entrypoints.py +++ b/src/leap/soledad/server/entrypoints.py @@ -26,6 +26,7 @@ from twisted.internet import reactor  from twisted.python import threadpool  from twisted.logger import Logger +from ..common.couch.state import check_schema_versions  from .auth import localPortal, publicPortal  from .session import SoledadSession  from ._config import get_config @@ -73,4 +74,5 @@ def check_conf():  reactor.callWhenRunning(check_conf) +reactor.callWhenRunning(check_schema_versions, conf['couch_url'])  reactor.callWhenRunning(init_couch_state, conf) diff --git a/tests/couch/test_command.py b/tests/couch/test_command.py index 9fb2c153..52719065 100644 --- a/tests/couch/test_command.py +++ b/tests/couch/test_command.py @@ -10,7 +10,7 @@ class CommandBasedDBCreationTest(unittest.TestCase):      def test_ensure_db_using_custom_command(self):          state = couch_state.CouchServerState( -            "url", create_cmd="/bin/echo", check_schema_versions=False) +            "url", create_cmd="/bin/echo")          mock_db = Mock()          mock_db.replica_uid = 'replica_uid'          state.open_database = Mock(return_value=mock_db) @@ -20,12 +20,11 @@ class CommandBasedDBCreationTest(unittest.TestCase):      def test_raises_unauthorized_on_failure(self):          state = couch_state.CouchServerState( -            "url", create_cmd="inexistent", check_schema_versions=False) +            "url", create_cmd="inexistent")          self.assertRaises(u1db_errors.Unauthorized,                            state.ensure_database, "user-1337")      def test_raises_unauthorized_by_default(self): -        state = couch_state.CouchServerState("url", -                                             check_schema_versions=False) +        state = couch_state.CouchServerState("url")          self.assertRaises(u1db_errors.Unauthorized,                            state.ensure_database, "user-1337") diff --git a/tests/couch/test_state.py b/tests/couch/test_state.py index e5ac3704..673d9c41 100644 --- a/tests/couch/test_state.py +++ b/tests/couch/test_state.py @@ -1,14 +1,21 @@ +import mock  import pytest +  from leap.soledad.common.couch import CONFIG_DOC_ID  from leap.soledad.common.couch import SCHEMA_VERSION  from leap.soledad.common.couch import SCHEMA_VERSION_KEY -from leap.soledad.common.couch.state import CouchServerState +from leap.soledad.common.couch.state import _check_db_schema_version +from leap.soledad.common.couch.state import check_schema_versions  from uuid import uuid4  from leap.soledad.common.errors import WrongCouchSchemaVersionError  from leap.soledad.common.errors import MissingCouchConfigDocumentError  from test_soledad.util import CouchDBTestCase +from twisted.internet import defer +from twisted.internet import reactor +from twisted.web.client import HTTPConnectionPool, Agent +  class CouchDesignDocsTests(CouchDBTestCase): @@ -16,17 +23,43 @@ class CouchDesignDocsTests(CouchDBTestCase):          CouchDBTestCase.setUp(self)          self.db = self.couch_server.create('user-' + uuid4().hex)          self.addCleanup(self.delete_db, self.db.name) +        self.pool = HTTPConnectionPool(reactor, persistent=False) +        self.agent = Agent(reactor, pool=self.pool) + +    @defer.inlineCallbacks +    def tearDown(self): +        yield self.pool.closeCachedConnections() -    def test_wrong_couch_version_raises(self): +    @defer.inlineCallbacks +    def test__check_db_schema_version_wrong_schema_version_raises(self):          wrong_schema_version = SCHEMA_VERSION + 1          self.db.create(              {'_id': CONFIG_DOC_ID, SCHEMA_VERSION_KEY: wrong_schema_version})          with pytest.raises(WrongCouchSchemaVersionError): -            CouchServerState(self.couch_url, create_cmd='/bin/echo', -                             check_schema_versions=True) +            yield _check_db_schema_version( +                self.couch_url, self.db.name, None, agent=self.agent) -    def test_missing_config_doc_raises(self): +    @defer.inlineCallbacks +    def test_check_schema_versions_wrong_schema_version_stops_reactor(self): +        wrong_schema_version = SCHEMA_VERSION + 1 +        self.db.create( +            {'_id': CONFIG_DOC_ID, SCHEMA_VERSION_KEY: wrong_schema_version}) +        mocked_reactor = mock.Mock() +        yield check_schema_versions( +            self.couch_url, agent=self.agent, reactor=mocked_reactor) +        self.assertTrue(mocked_reactor.stop.call_count == 1) + +    @defer.inlineCallbacks +    def test__check_db_schema_version_missing_config_doc_raises(self):          self.db.create({})          with pytest.raises(MissingCouchConfigDocumentError): -            CouchServerState(self.couch_url, create_cmd='/bin/echo', -                             check_schema_versions=True) +            yield _check_db_schema_version( +                self.couch_url, self.db.name, None, agent=self.agent) + +    @defer.inlineCallbacks +    def test_check_schema_versions_missing_config_doc_stops_reactor(self): +        self.db.create({}) +        mocked_reactor = mock.Mock() +        yield check_schema_versions( +            self.couch_url, agent=self.agent, reactor=mocked_reactor) +        self.assertTrue(mocked_reactor.stop.call_count == 1) | 
