summaryrefslogtreecommitdiff
path: root/src/leap/soledad
diff options
context:
space:
mode:
authordrebs <drebs@riseup.net>2017-10-25 15:58:56 -0200
committerdrebs <drebs@riseup.net>2017-10-31 16:52:59 -0200
commit3306ea69ee33cdafa8803f9c717b60b9f3d3b4a0 (patch)
tree3897a29e0d7c2fddc262eca71779e46bae43313c /src/leap/soledad
parentf1c010fc4119d6348d0bd823e4a9f6da310149bd (diff)
[feat] improve speed of server startup
To avoid corrupting data, Soledad Server checks all user databases during startup to make sure all of them use the correct schema version. This was done synchronously, so when there are many databases startup would take a long time. This commit makes that verification asynchronous, thus speeding up server startup.
Diffstat (limited to 'src/leap/soledad')
-rw-r--r--src/leap/soledad/common/couch/state.py133
-rw-r--r--src/leap/soledad/server/_wsgi.py3
-rw-r--r--src/leap/soledad/server/entrypoints.py2
3 files changed, 102 insertions, 36 deletions
diff --git a/src/leap/soledad/common/couch/state.py b/src/leap/soledad/common/couch/state.py
index 8cbe0934..38098e3f 100644
--- a/src/leap/soledad/common/couch/state.py
+++ b/src/leap/soledad/common/couch/state.py
@@ -17,10 +17,15 @@
"""
Server state using CouchDatabase as backend.
"""
-import couchdb
import re
+import os
+import treq
from six.moves.urllib.parse import urljoin
+from twisted.internet import defer
+from urlparse import urlsplit
+
+from twisted.internet import reactor
from leap.soledad.common.log import getLogger
from leap.soledad.common.couch import CouchDatabase
@@ -37,6 +42,98 @@ from leap.soledad.common.errors import MissingCouchConfigDocumentError
logger = getLogger(__name__)
+#
+# Database schema version verification
+#
+
+@defer.inlineCallbacks
+def _check_db_schema_version(url, db, auth, agent=None):
+ """
+ Check if the schema version is up to date for a given database.
+
+ :param url: the server base URL.
+ :type url: str
+ :param db: the database name.
+ :type db: str
+ :param auth: a tuple with (username, password) for acessing CouchDB.
+ :type auth: tuple(str, str)
+ :param agent: an optional agent for doing requests, used in tests.
+ :type agent: twisted.web.client.Agent
+
+ :raise MissingCouchConfigDocumentError: raised when a database is not empty
+ but has no config document in it.
+
+ :raise WrongCouchSchemaVersionError: raised when a config document was
+ found but the schema version is
+ different from what is expected.
+ """
+ # if there are documents, ensure that a config doc exists
+ db_url = urljoin(url, '%s/' % db)
+ config_doc_url = urljoin(db_url, CONFIG_DOC_ID)
+ res = yield treq.get(config_doc_url, auth=auth, agent=agent)
+
+ if res.code != 200 and res.code != 404:
+ raise Exception("Unexpected HTTP response code: %d" % res.code)
+
+ elif res.code == 404:
+ res = yield treq.get(urljoin(db_url, '_all_docs'), auth=auth,
+ params={'limit': 1}, agent=agent)
+ docs = yield res.json()
+ if docs['total_rows'] != 0:
+ logger.error(
+ "Missing couch config document in database %s" % db)
+ raise MissingCouchConfigDocumentError(db)
+
+ elif res.code == 200:
+ config_doc = yield res.json()
+ if config_doc[SCHEMA_VERSION_KEY] != SCHEMA_VERSION:
+ logger.error(
+ "Unsupported database schema in database %s" % db)
+ raise WrongCouchSchemaVersionError(db)
+
+
+def _stop(failure, reactor):
+ exception = failure.value.subFailure.value
+ logger.error("Failure while checking schema versions: %r - %s"
+ % (exception, exception.message))
+ reactor.addSystemEventTrigger('after', 'shutdown', os._exit, 1)
+ reactor.stop()
+
+
+@defer.inlineCallbacks
+def check_schema_versions(couch_url, agent=None, reactor=reactor):
+ """
+ Check that all user databases use the correct couch schema.
+
+ :param couch_url: The URL for the couch database.
+ :type couch_url: str
+ :param agent: an optional agent for doing requests, used in tests.
+ :type agent: twisted.web.client.Agent
+ :param reactor: an optional reactor for stopping in case of errors, used
+ in tests.
+ :type reactor: twisted.internet.base.ReactorBase
+ """
+ url = urlsplit(couch_url)
+ auth = (url.username, url.password) if url.username else None
+ url = "%s://%s:%d" % (url.scheme, url.hostname, url.port)
+ res = yield treq.get(urljoin(url, '_all_dbs'), auth=auth, agent=agent)
+ dbs = yield res.json()
+ deferreds = []
+ semaphore = defer.DeferredSemaphore(20)
+ for db in dbs:
+ if not db.startswith('user-'):
+ continue
+ d = semaphore.run(_check_db_schema_version, url, db, auth, agent=agent)
+ deferreds.append(d)
+ d = defer.gatherResults(deferreds, consumeErrors=True)
+ d.addErrback(_stop, reactor=reactor)
+ yield d
+
+
+#
+# CouchDB Server state
+#
+
def is_db_name_valid(name):
"""
Validate a user database using a regular expression.
@@ -57,8 +154,7 @@ class CouchServerState(ServerState):
Inteface of the WSGI server with the CouchDB backend.
"""
- def __init__(self, couch_url, create_cmd=None,
- check_schema_versions=False):
+ def __init__(self, couch_url, create_cmd=None):
"""
Initialize the couch server state.
@@ -69,40 +165,9 @@ class CouchServerState(ServerState):
name and should access CouchDB with necessary
privileges, which server lacks for security reasons.
:type create_cmd: str
- :param check_schema_versions: Whether to check couch schema version of
- user dbs. Set to False as this is only
- intended to run once during start-up.
- :type check_schema_versions: bool
"""
self.couch_url = couch_url
self.create_cmd = create_cmd
- if check_schema_versions:
- self._check_schema_versions()
-
- def _check_schema_versions(self):
- """
- Check that all user databases use the correct couch schema.
- """
- server = couchdb.client.Server(self.couch_url)
- for dbname in server:
- if not dbname.startswith('user-'):
- continue
- db = server[dbname]
-
- # if there are documents, ensure that a config doc exists
- config_doc = db.get(CONFIG_DOC_ID)
- if config_doc:
- if config_doc[SCHEMA_VERSION_KEY] != SCHEMA_VERSION:
- logger.error(
- "Unsupported database schema in database %s" % dbname)
- raise WrongCouchSchemaVersionError(dbname)
- else:
- result = db.view('_all_docs', limit=1)
- if result.total_rows != 0:
- logger.error(
- "Missing couch config document in database %s"
- % dbname)
- raise MissingCouchConfigDocumentError(dbname)
def open_database(self, dbname):
"""
diff --git a/src/leap/soledad/server/_wsgi.py b/src/leap/soledad/server/_wsgi.py
index f1b0018d..510cb7b9 100644
--- a/src/leap/soledad/server/_wsgi.py
+++ b/src/leap/soledad/server/_wsgi.py
@@ -33,8 +33,7 @@ __all__ = ['init_couch_state', 'get_sync_resource']
def _get_couch_state(conf):
- state = CouchServerState(conf['couch_url'], create_cmd=conf['create_cmd'],
- check_schema_versions=True)
+ state = CouchServerState(conf['couch_url'], create_cmd=conf['create_cmd'])
SoledadBackend.BATCH_SUPPORT = conf.get('batching', False)
return state
diff --git a/src/leap/soledad/server/entrypoints.py b/src/leap/soledad/server/entrypoints.py
index 7d18ca58..9bccbcf4 100644
--- a/src/leap/soledad/server/entrypoints.py
+++ b/src/leap/soledad/server/entrypoints.py
@@ -26,6 +26,7 @@ from twisted.internet import reactor
from twisted.python import threadpool
from twisted.logger import Logger
+from ..common.couch.state import check_schema_versions
from .auth import localPortal, publicPortal
from .session import SoledadSession
from ._config import get_config
@@ -73,4 +74,5 @@ def check_conf():
reactor.callWhenRunning(check_conf)
+reactor.callWhenRunning(check_schema_versions, conf['couch_url'])
reactor.callWhenRunning(init_couch_state, conf)