diff options
Diffstat (limited to 'server/src/leap')
-rw-r--r-- | server/src/leap/soledad/server/__init__.py | 63 | ||||
-rw-r--r-- | server/src/leap/soledad/server/_version.py | 489 | ||||
-rw-r--r-- | server/src/leap/soledad/server/auth.py | 55 | ||||
-rw-r--r-- | server/src/leap/soledad/server/caching.py | 32 | ||||
-rw-r--r-- | server/src/leap/soledad/server/state.py | 141 | ||||
-rw-r--r-- | server/src/leap/soledad/server/sync.py | 192 |
6 files changed, 713 insertions, 259 deletions
diff --git a/server/src/leap/soledad/server/__init__.py b/server/src/leap/soledad/server/__init__.py index 1b795016..22894dac 100644 --- a/server/src/leap/soledad/server/__init__.py +++ b/server/src/leap/soledad/server/__init__.py @@ -94,12 +94,6 @@ from u1db.remote import http_app, utils from ._version import get_versions -# Keep OpenSSL's tsafe before importing Twisted submodules so we can put -# it back if Twisted==12.0.0 messes with it. -from OpenSSL import tsafe - -from twisted import version - from leap.soledad.server.auth import SoledadTokenAuthMiddleware from leap.soledad.server.gzip_middleware import GzipMiddleware from leap.soledad.server.lock_resource import LockResource @@ -110,14 +104,8 @@ from leap.soledad.server.sync import ( ) from leap.soledad.common import SHARED_DB_NAME -from leap.soledad.common.couch import CouchServerState - -old_tsafe = tsafe - -if version.base() == "12.0.0": - # Put OpenSSL's tsafe back into place. This can probably be removed if we - # come to use Twisted>=12.3.0. - sys.modules['OpenSSL.tsafe'] = old_tsafe +from leap.soledad.common.backend import SoledadBackend +from leap.soledad.common.couch.state import CouchServerState # ---------------------------------------------------------------------------- # Soledad WSGI application @@ -272,6 +260,21 @@ http_app.HTTPInvocationByMethodWithBody = HTTPInvocationByMethodWithBody # ---------------------------------------------------------------------------- # Auxiliary functions # ---------------------------------------------------------------------------- +CONFIG_DEFAULTS = { + 'soledad-server': { + 'couch_url': 'http://localhost:5984', + 'create_cmd': None, + 'admin_netrc': '/etc/couchdb/couchdb-admin.netrc', + 'batching': False + }, + 'database-security': { + 'members': ['soledad'], + 'members_roles': [], + 'admins': [], + 'admins_roles': [] + } +} + def load_configuration(file_path): """ @@ -283,18 +286,26 @@ def load_configuration(file_path): @return: A dictionary with the configuration. @rtype: dict """ - conf = { - 'couch_url': 'http://localhost:5984', - } - config = configparser.ConfigParser() + defaults = dict(CONFIG_DEFAULTS) + config = configparser.SafeConfigParser() config.read(file_path) - if 'soledad-server' in config: - for key in conf: - if key in config['soledad-server']: - conf[key] = config['soledad-server'][key] + for section in defaults: + if not config.has_section(section): + continue + for key, value in defaults[section].items(): + if not config.has_option(section, key): + continue + elif type(value) == bool: + defaults[section][key] = config.getboolean(section, key) + elif type(value) == list: + values = config.get(section, key).split(',') + values = [v.strip() for v in values] + defaults[section][key] = values + else: + defaults[section][key] = config.get(section, key) # TODO: implement basic parsing/sanitization of options comming from # config file. - return conf + return defaults # ---------------------------------------------------------------------------- @@ -302,8 +313,10 @@ def load_configuration(file_path): # ---------------------------------------------------------------------------- def application(environ, start_response): - conf = load_configuration('/etc/leap/soledad-server.conf') - state = CouchServerState(conf['couch_url']) + conf = load_configuration('/etc/soledad/soledad-server.conf') + conf = conf['soledad-server'] + state = CouchServerState(conf['couch_url'], create_cmd=conf['create_cmd']) + SoledadBackend.BATCH_SUPPORT = conf['batching'] # WSGI application that may be used by `twistd -web` application = GzipMiddleware( SoledadTokenAuthMiddleware(SoledadApp(state))) diff --git a/server/src/leap/soledad/server/_version.py b/server/src/leap/soledad/server/_version.py index 62315c76..8c27440f 100644 --- a/server/src/leap/soledad/server/_version.py +++ b/server/src/leap/soledad/server/_version.py @@ -1,13 +1,484 @@ -# This file was generated by the `freeze_debianver` command in setup.py -# Using 'versioneer.py' (0.7+) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. -version_version = '0.7.4' -version_full = '49fd07cde3b1f50dcce85d4e9fcdfc6196f484c4' +# This file is released into the public domain. Generated by +# versioneer-0.16 (https://github.com/warner/python-versioneer) +"""Git implementation of _version.py.""" -def get_versions(default={}, verbose=False): - return {'version': version_version, 'full': version_full} +import errno +import os +import re +import subprocess +import sys + + +def get_keywords(): + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "$Format:%d$" + git_full = "$Format:%H$" + keywords = {"refnames": git_refnames, "full": git_full} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_config(): + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "" + cfg.parentdir_prefix = "None" + cfg.versionfile_source = "src/leap/soledad/server/_version.py" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + """Decorator to mark a method as the handler for a particular VCS.""" + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): + """Call the given command(s).""" + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None)) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None + stdout = p.communicate()[0].strip() + if sys.version_info[0] >= 3: + stdout = stdout.decode() + if p.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + return None + return stdout + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes + both the project name and a version string. + """ + dirname = os.path.basename(root) + if not dirname.startswith(parentdir_prefix): + if verbose: + print("guessing rootdir is '%s', but '%s' doesn't start with " + "prefix '%s'" % (root, dirname, parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None} + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if not keywords: + raise NotThisMethod("no keywords at all, weird") + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r'\d', r)]) + if verbose: + print("discarding '%s', no digits" % ",".join(refs-tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + if verbose: + print("picking %s" % r) + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags"} + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + if not os.path.exists(os.path.join(root, ".git")): + if verbose: + print("no .git in %s" % root) + raise NotThisMethod("no .git directory") + + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out = run_command(GITS, ["describe", "--tags", "--dirty", + "--always", "--long", + "--match", "%s*" % tag_prefix], + cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%s'" + % describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" + % (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], + cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + return pieces + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + """TAG[.post.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post.devDISTANCE + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post.dev%d" % pieces["distance"] + else: + # exception #1 + rendered = "0.post.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Eexceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"]} + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None} + + +def get_versions(): + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, + verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in cfg.versionfile_source.split('/'): + root = os.path.dirname(root) + except NameError: + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree"} + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to compute version"} diff --git a/server/src/leap/soledad/server/auth.py b/server/src/leap/soledad/server/auth.py index 02b54cca..ccbd6fbd 100644 --- a/server/src/leap/soledad/server/auth.py +++ b/server/src/leap/soledad/server/auth.py @@ -21,20 +21,16 @@ Authentication facilities for Soledad Server. """ -import time import httplib import json from u1db import DBNAME_CONSTRAINTS, errors as u1db_errors from abc import ABCMeta, abstractmethod from routes.mapper import Mapper -from couchdb.client import Server from twisted.python import log -from hashlib import sha512 from leap.soledad.common import SHARED_DB_NAME from leap.soledad.common import USER_DB_PREFIX -from leap.soledad.common.errors import InvalidAuthTokenError class URLToAuthorization(object): @@ -351,14 +347,12 @@ class SoledadTokenAuthMiddleware(SoledadAuthMiddleware): Token based authentication. """ - TOKENS_DB_PREFIX = "tokens_" - TOKENS_DB_EXPIRE = 30 * 24 * 3600 # 30 days in seconds - TOKENS_TYPE_KEY = "type" - TOKENS_TYPE_DEF = "Token" - TOKENS_USER_ID_KEY = "user_id" - TOKEN_AUTH_ERROR_STRING = "Incorrect address or token." + def __init__(self, app): + self._state = app.state + super(SoledadTokenAuthMiddleware, self).__init__(app) + def _verify_authentication_scheme(self, scheme): """ Verify if authentication scheme is valid. @@ -391,50 +385,11 @@ class SoledadTokenAuthMiddleware(SoledadAuthMiddleware): """ token = auth_data # we expect a cleartext token at this point try: - return self._verify_token_in_couch(uuid, token) - except InvalidAuthTokenError: - raise + return self._state.verify_token(uuid, token) except Exception as e: log.err(e) return False - def _verify_token_in_couch(self, uuid, token): - """ - Query couchdb to decide if C{token} is valid for C{uuid}. - - @param uuid: The user uuid. - @type uuid: str - @param token: The token. - @type token: str - - @raise InvalidAuthTokenError: Raised when token received from user is - either missing in the tokens db or is - invalid. - """ - server = Server(url=self._app.state.couch_url) - # the tokens db rotates every 30 days, and the current db name is - # "tokens_NNN", where NNN is the number of seconds since epoch divided - # by the rotate period in seconds. When rotating, old and new tokens - # db coexist during a certain window of time and valid tokens are - # replicated from the old db to the new one. See: - # https://leap.se/code/issues/6785 - dbname = self.TOKENS_DB_PREFIX + \ - str(int(time.time() / self.TOKENS_DB_EXPIRE)) - db = server[dbname] - # lookup key is a hash of the token to prevent timing attacks. - token = db.get(sha512(token).hexdigest()) - if token is None: - raise InvalidAuthTokenError() - # we compare uuid hashes to avoid possible timing attacks that - # might exploit python's builtin comparison operator behaviour, - # which fails immediatelly when non-matching bytes are found. - couch_uuid_hash = sha512(token[self.TOKENS_USER_ID_KEY]).digest() - req_uuid_hash = sha512(uuid).digest() - if token[self.TOKENS_TYPE_KEY] != self.TOKENS_TYPE_DEF \ - or couch_uuid_hash != req_uuid_hash: - raise InvalidAuthTokenError() - return True - def _get_auth_error_string(self): """ Get the error string for token auth. diff --git a/server/src/leap/soledad/server/caching.py b/server/src/leap/soledad/server/caching.py new file mode 100644 index 00000000..9a049a39 --- /dev/null +++ b/server/src/leap/soledad/server/caching.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# caching.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Server side caching. Using beaker for now. +""" +from beaker.cache import CacheManager + + +def setup_caching(): + _cache_manager = CacheManager(type='memory') + return _cache_manager + + +_cache_manager = setup_caching() + + +def get_cache_for(key, expire=3600): + return _cache_manager.get_cache(key, expire=expire) diff --git a/server/src/leap/soledad/server/state.py b/server/src/leap/soledad/server/state.py new file mode 100644 index 00000000..f269b77e --- /dev/null +++ b/server/src/leap/soledad/server/state.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- +# state.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Server side synchronization infrastructure. +""" +from leap.soledad.server import caching + + +class ServerSyncState(object): + """ + The state of one sync session, as stored on backend server. + + On server side, the ongoing syncs metadata is maintained in + a caching layer. + """ + + def __init__(self, source_replica_uid, sync_id): + """ + Initialize the sync state object. + + :param sync_id: The id of current sync + :type sync_id: str + :param source_replica_uid: The source replica uid + :type source_replica_uid: str + """ + self._source_replica_uid = source_replica_uid + self._sync_id = sync_id + caching_key = source_replica_uid + sync_id + self._storage = caching.get_cache_for(caching_key) + + def _put_dict_info(self, key, value): + """ + Put some information about the sync state. + + :param key: The key for the info to be put. + :type key: str + :param value: The value for the info to be put. + :type value: str + """ + if key not in self._storage: + self._storage[key] = [] + info_list = self._storage.get(key) + info_list.append(value) + self._storage[key] = info_list + + def put_seen_id(self, seen_id, gen): + """ + Put one seen id on the sync state. + + :param seen_id: The doc_id of a document seen during sync. + :type seen_id: str + :param gen: The corresponding db generation. + :type gen: int + """ + self._put_dict_info( + 'seen_id', + (seen_id, gen)) + + def seen_ids(self): + """ + Return all document ids seen during the sync. + + :return: A dict with doc ids seen during the sync. + :rtype: dict + """ + if 'seen_id' in self._storage: + seen_ids = self._storage.get('seen_id') + else: + seen_ids = [] + return dict(seen_ids) + + def put_changes_to_return(self, gen, trans_id, changes_to_return): + """ + Put the calculated changes to return in the backend sync state. + + :param gen: The target database generation that will be synced. + :type gen: int + :param trans_id: The target database transaction id that will be + synced. + :type trans_id: str + :param changes_to_return: A list of tuples with the changes to be + returned during the sync process. + :type changes_to_return: list + """ + self._put_dict_info( + 'changes_to_return', + { + 'gen': gen, + 'trans_id': trans_id, + 'changes_to_return': changes_to_return, + } + ) + + def sync_info(self): + """ + Return information about the current sync state. + + :return: The generation and transaction id of the target database + which will be synced, and the number of documents to return, + or a tuple of Nones if those have not already been sent to + server. + :rtype: tuple + """ + gen = trans_id = number_of_changes = None + if 'changes_to_return' in self._storage: + info = self._storage.get('changes_to_return')[0] + gen = info['gen'] + trans_id = info['trans_id'] + number_of_changes = len(info['changes_to_return']) + return gen, trans_id, number_of_changes + + def next_change_to_return(self, received): + """ + Return the next change to be returned to the source syncing replica. + + :param received: How many documents the source replica has already + received during the current sync process. + :type received: int + """ + gen = trans_id = next_change_to_return = None + if 'changes_to_return' in self._storage: + info = self._storage.get('changes_to_return')[0] + gen = info['gen'] + trans_id = info['trans_id'] + if received < len(info['changes_to_return']): + next_change_to_return = (info['changes_to_return'][received]) + return gen, trans_id, next_change_to_return diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 18c4ee40..96f65912 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -17,189 +17,22 @@ """ Server side synchronization infrastructure. """ -import json - -from leap.soledad.common.couch import CouchDatabase from u1db import sync, Document from u1db.remote import http_app +from leap.soledad.server.state import ServerSyncState +from leap.soledad.server.caching import get_cache_for MAX_REQUEST_SIZE = 200 # in Mb MAX_ENTRY_SIZE = 200 # in Mb -class ServerSyncState(object): - """ - The state of one sync session, as stored on backend server. - - This object performes queries to distinct design documents: - - _design/syncs/_update/state - _design/syncs/_view/state - _design/syncs/_view/seen_ids - _design/syncs/_view/changes_to_return - - On server side, the ongoing syncs metadata is maintained in a document - called 'u1db_sync_state'. - """ - - def __init__(self, db, source_replica_uid, sync_id): - """ - Initialize the sync state object. - - :param db: The target syncing database. - :type db: CouchDatabase. - :param source_replica_uid: CouchDatabase - :type source_replica_uid: str - """ - self._db = db - self._source_replica_uid = source_replica_uid - self._sync_id = sync_id - - def _key(self, key): - """ - Format a key to be used on couch views. - - :param key: The lookup key. - :type key: json serializable object - - :return: The properly formatted key. - :rtype: str - """ - return json.dumps(key, separators=(',', ':')) - - def _put_info(self, key, value): - """ - Put some information on the sync state document. - - This method works in conjunction with the - _design/syncs/_update/state update handler couch backend. - - :param key: The key for the info to be put. - :type key: str - :param value: The value for the info to be put. - :type value: str - """ - ddoc_path = [ - '_design', 'syncs', '_update', 'state', - 'u1db_sync_state'] - res = self._db._database.resource(*ddoc_path) - with CouchDatabase.sync_info_lock[self._db.replica_uid]: - res.put_json( - body={ - 'sync_id': self._sync_id, - 'source_replica_uid': self._source_replica_uid, - key: value, - }, - headers={'content-type': 'application/json'}) - - def put_seen_id(self, seen_id, gen): - """ - Put one seen id on the sync state document. - - :param seen_id: The doc_id of a document seen during sync. - :type seen_id: str - :param gen: The corresponding db generation for that document. - :type gen: int - """ - self._put_info( - 'seen_id', - [seen_id, gen]) - - def seen_ids(self): - """ - Return all document ids seen during the sync. - - :return: A list with doc ids seen during the sync. - :rtype: list - """ - ddoc_path = ['_design', 'syncs', '_view', 'seen_ids'] - resource = self._db._database.resource(*ddoc_path) - response = resource.get_json( - key=self._key([self._source_replica_uid, self._sync_id])) - data = response[2] - if data['rows']: - entry = data['rows'].pop() - return entry['value']['seen_ids'] - return [] - - def put_changes_to_return(self, gen, trans_id, changes_to_return): - """ - Put the calculated changes to return in the backend sync state - document. - - :param gen: The target database generation that will be synced. - :type gen: int - :param trans_id: The target database transaction id that will be - synced. - :type trans_id: str - :param changes_to_return: A list of tuples with the changes to be - returned during the sync process. - :type changes_to_return: list - """ - self._put_info( - 'changes_to_return', - { - 'gen': gen, - 'trans_id': trans_id, - 'changes_to_return': changes_to_return, - } - ) - - def sync_info(self): - """ - Return information about the current sync state. - - :return: The generation and transaction id of the target database - which will be synced, and the number of documents to return, - or a tuple of Nones if those have not already been sent to - server. - :rtype: tuple - """ - ddoc_path = ['_design', 'syncs', '_view', 'state'] - resource = self._db._database.resource(*ddoc_path) - response = resource.get_json( - key=self._key([self._source_replica_uid, self._sync_id])) - data = response[2] - gen = None - trans_id = None - number_of_changes = None - if data['rows'] and data['rows'][0]['value'] is not None: - value = data['rows'][0]['value'] - gen = value['gen'] - trans_id = value['trans_id'] - number_of_changes = value['number_of_changes'] - return gen, trans_id, number_of_changes - - def next_change_to_return(self, received): - """ - Return the next change to be returned to the source syncing replica. - - :param received: How many documents the source replica has already - received during the current sync process. - :type received: int - """ - ddoc_path = ['_design', 'syncs', '_view', 'changes_to_return'] - resource = self._db._database.resource(*ddoc_path) - response = resource.get_json( - key=self._key( - [self._source_replica_uid, self._sync_id, received])) - data = response[2] - if not data['rows']: - return None, None, None - value = data['rows'][0]['value'] - gen = value['gen'] - trans_id = value['trans_id'] - next_change_to_return = value['next_change_to_return'] - return gen, trans_id, tuple(next_change_to_return) - - class SyncExchange(sync.SyncExchange): def __init__(self, db, source_replica_uid, last_known_generation, sync_id): """ :param db: The target syncing database. - :type db: CouchDatabase + :type db: SoledadBackend :param source_replica_uid: The uid of the source syncing replica. :type source_replica_uid: str :param last_known_generation: The last target replica generation the @@ -216,8 +49,7 @@ class SyncExchange(sync.SyncExchange): self.new_trans_id = None self._trace_hook = None # recover sync state - self._sync_state = ServerSyncState( - self._db, self.source_replica_uid, sync_id) + self._sync_state = ServerSyncState(self.source_replica_uid, sync_id) def find_changes_to_return(self, received): """ @@ -280,6 +112,14 @@ class SyncExchange(sync.SyncExchange): doc = self._db.get_doc(changed_doc_id, include_deleted=True) return_doc_cb(doc, gen, trans_id) + def batched_insert_from_source(self, entries, sync_id): + self._db.batch_start() + for entry in entries: + doc, gen, trans_id, number_of_docs, doc_idx = entry + self.insert_doc_from_source(doc, gen, trans_id, number_of_docs, + doc_idx, sync_id) + self._db.batch_end() + def insert_doc_from_source( self, doc, source_gen, trans_id, number_of_docs=None, doc_idx=None, sync_id=None): @@ -353,10 +193,12 @@ class SyncResource(http_app.SyncResource): :type ensure: bool """ # create or open the database + cache = get_cache_for('db-' + sync_id + self.dbname, expire=120) if ensure: db, self.replica_uid = self.state.ensure_database(self.dbname) else: db = self.state.open_database(self.dbname) + db.init_caching(cache) # validate the information the client has about server replica db.validate_gen_and_trans_id( last_known_generation, last_known_trans_id) @@ -364,6 +206,7 @@ class SyncResource(http_app.SyncResource): self.sync_exch = self.sync_exchange_class( db, self.source_replica_uid, last_known_generation, sync_id) self._sync_id = sync_id + self._staging = [] @http_app.http_method(content_as_args=True) def post_put( @@ -391,9 +234,7 @@ class SyncResource(http_app.SyncResource): :type doc_idx: int """ doc = Document(id, rev, content) - self.sync_exch.insert_doc_from_source( - doc, gen, trans_id, number_of_docs=number_of_docs, - doc_idx=doc_idx, sync_id=self._sync_id) + self._staging.append((doc, gen, trans_id, number_of_docs, doc_idx)) @http_app.http_method(received=int, content_as_args=True) def post_get(self, received): @@ -432,6 +273,7 @@ class SyncResource(http_app.SyncResource): Return the current generation and transaction_id after inserting one incoming document. """ + self.sync_exch.batched_insert_from_source(self._staging, self._sync_id) self.responder.content_type = 'application/x-soledad-sync-response' self.responder.start_response(200) self.responder.start_stream(), |