diff options
Diffstat (limited to 'common/src/leap/soledad')
24 files changed, 2802 insertions, 2031 deletions
diff --git a/common/src/leap/soledad/common/__init__.py b/common/src/leap/soledad/common/__init__.py index 1ba6ab89..d7f6929c 100644 --- a/common/src/leap/soledad/common/__init__.py +++ b/common/src/leap/soledad/common/__init__.py @@ -47,3 +47,7 @@ __all__ = [ "soledad_assert_type", "__version__", ] + +from ._version import get_versions +__version__ = get_versions()['version'] +del get_versions diff --git a/common/src/leap/soledad/common/_version.py b/common/src/leap/soledad/common/_version.py index 62315c76..1168a88d 100644 --- a/common/src/leap/soledad/common/_version.py +++ b/common/src/leap/soledad/common/_version.py @@ -1,13 +1,484 @@ -# This file was generated by the `freeze_debianver` command in setup.py -# Using 'versioneer.py' (0.7+) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. -version_version = '0.7.4' -version_full = '49fd07cde3b1f50dcce85d4e9fcdfc6196f484c4' +# This file is released into the public domain. Generated by +# versioneer-0.16 (https://github.com/warner/python-versioneer) +"""Git implementation of _version.py.""" -def get_versions(default={}, verbose=False): - return {'version': version_version, 'full': version_full} +import errno +import os +import re +import subprocess +import sys + + +def get_keywords(): + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "$Format:%d$" + git_full = "$Format:%H$" + keywords = {"refnames": git_refnames, "full": git_full} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_config(): + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "" + cfg.parentdir_prefix = "None" + cfg.versionfile_source = "src/leap/soledad/common/_version.py" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + """Decorator to mark a method as the handler for a particular VCS.""" + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): + """Call the given command(s).""" + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None)) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None + stdout = p.communicate()[0].strip() + if sys.version_info[0] >= 3: + stdout = stdout.decode() + if p.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + return None + return stdout + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes + both the project name and a version string. + """ + dirname = os.path.basename(root) + if not dirname.startswith(parentdir_prefix): + if verbose: + print("guessing rootdir is '%s', but '%s' doesn't start with " + "prefix '%s'" % (root, dirname, parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None} + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if not keywords: + raise NotThisMethod("no keywords at all, weird") + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r'\d', r)]) + if verbose: + print("discarding '%s', no digits" % ",".join(refs-tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + if verbose: + print("picking %s" % r) + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags"} + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + if not os.path.exists(os.path.join(root, ".git")): + if verbose: + print("no .git in %s" % root) + raise NotThisMethod("no .git directory") + + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out = run_command(GITS, ["describe", "--tags", "--dirty", + "--always", "--long", + "--match", "%s*" % tag_prefix], + cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%s'" + % describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" + % (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], + cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + return pieces + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + """TAG[.post.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post.devDISTANCE + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post.dev%d" % pieces["distance"] + else: + # exception #1 + rendered = "0.post.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Eexceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"]} + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None} + + +def get_versions(): + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, + verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in cfg.versionfile_source.split('/'): + root = os.path.dirname(root) + except NameError: + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree"} + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to compute version"} diff --git a/common/src/leap/soledad/common/backend.py b/common/src/leap/soledad/common/backend.py new file mode 100644 index 00000000..53426fb5 --- /dev/null +++ b/common/src/leap/soledad/common/backend.py @@ -0,0 +1,642 @@ +# -*- coding: utf-8 -*- +# backend.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +"""A U1DB generic backend.""" + + +from u1db import vectorclock +from u1db.errors import ( + RevisionConflict, + InvalidDocId, + ConflictedDoc, + DocumentDoesNotExist, + DocumentAlreadyDeleted, +) +from u1db.backends import CommonBackend +from u1db.backends import CommonSyncTarget +from leap.soledad.common.document import ServerDocument + + +class SoledadBackend(CommonBackend): + BATCH_SUPPORT = False + + """ + A U1DB backend implementation. + """ + + def __init__(self, database, replica_uid=None): + """ + Create a new backend. + + :param database: the database implementation + :type database: Database + :param replica_uid: an optional unique replica identifier + :type replica_uid: str + """ + # save params + self._factory = ServerDocument + self._real_replica_uid = None + self._cache = None + self._dbname = database._dbname + self._database = database + self.batching = False + if replica_uid is not None: + self._set_replica_uid(replica_uid) + + def batch_start(self): + if not self.BATCH_SUPPORT: + return + self.batching = True + self.after_batch_callbacks = {} + self._database.batch_start() + if not self._cache: + # batching needs cache + self._cache = {} + self._get_generation() # warm up gen info + + def batch_end(self): + if not self.BATCH_SUPPORT: + return + self.batching = False + self._database.batch_end() + for name in self.after_batch_callbacks: + self.after_batch_callbacks[name]() + self.after_batch_callbacks = None + + @property + def cache(self): + if self._cache is not None: + return self._cache + else: + return {} + + def init_caching(self, cache): + """ + Start using cache by setting internal _cache attribute. + + :param cache: the cache instance, anything that behaves like a dict + :type cache: dict + """ + self._cache = cache + + def get_sync_target(self): + """ + Return a SyncTarget object, for another u1db to synchronize with. + + :return: The sync target. + :rtype: SoledadSyncTarget + """ + return SoledadSyncTarget(self) + + def delete_database(self): + """ + Delete a U1DB database. + """ + self._database.delete_database() + + def close(self): + """ + Release any resources associated with this database. + + :return: True if db was succesfully closed. + :rtype: bool + """ + self._database.close() + return True + + def __del__(self): + """ + Close the database upon garbage collection. + """ + self.close() + + def _set_replica_uid(self, replica_uid): + """ + Force the replica uid to be set. + + :param replica_uid: The new replica uid. + :type replica_uid: str + """ + self._database.set_replica_uid(replica_uid) + self._real_replica_uid = replica_uid + self.cache['replica_uid'] = self._real_replica_uid + + def _get_replica_uid(self): + """ + Get the replica uid. + + :return: The replica uid. + :rtype: str + """ + if self._real_replica_uid is not None: + self.cache['replica_uid'] = self._real_replica_uid + return self._real_replica_uid + if 'replica_uid' in self.cache: + return self.cache['replica_uid'] + self._real_replica_uid = self._database.get_replica_uid() + self._set_replica_uid(self._real_replica_uid) + return self._real_replica_uid + + _replica_uid = property(_get_replica_uid, _set_replica_uid) + + replica_uid = property(_get_replica_uid) + + def _get_generation(self): + """ + Return the current generation. + + :return: The current generation. + :rtype: int + + :raise SoledadError: Raised by database on operation failure + """ + return self._get_generation_info()[0] + + def _get_generation_info(self): + """ + Return the current generation. + + :return: A tuple containing the current generation and transaction id. + :rtype: (int, str) + + :raise SoledadError: Raised by database on operation failure + """ + cur_gen, newest_trans_id = self._database.get_generation_info() + return (cur_gen, newest_trans_id) + + def _get_trans_id_for_gen(self, generation): + """ + Get the transaction id corresponding to a particular generation. + + :param generation: The generation for which to get the transaction id. + :type generation: int + + :return: The transaction id for C{generation}. + :rtype: str + + :raise InvalidGeneration: Raised when the generation does not exist. + + """ + return self._database.get_trans_id_for_gen(generation) + + def _get_transaction_log(self): + """ + This is only for the test suite, it is not part of the api. + + :return: The complete transaction log. + :rtype: [(str, str)] + + """ + return self._database.get_transaction_log() + + def _get_doc(self, doc_id, check_for_conflicts=False): + """ + Extract the document from storage. + + This can return None if the document doesn't exist. + + :param doc_id: The unique document identifier + :type doc_id: str + :param check_for_conflicts: If set to False, then the conflict check + will be skipped. + :type check_for_conflicts: bool + + :return: The document. + :rtype: ServerDocument + """ + return self._database.get_doc(doc_id, check_for_conflicts) + + def get_doc(self, doc_id, include_deleted=False): + """ + Get the JSON string for the given document. + + :param doc_id: The unique document identifier + :type doc_id: str + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise asking for a deleted + document will return None. + :type include_deleted: bool + + :return: A document object. + :rtype: ServerDocument. + """ + doc = self._get_doc(doc_id, check_for_conflicts=True) + if doc is None: + return None + if doc.is_tombstone() and not include_deleted: + return None + return doc + + def get_all_docs(self, include_deleted=False): + """ + Get the JSON content for all documents in the database. + + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted + documents will not be included in the results. + :type include_deleted: bool + + :return: (generation, [ServerDocument]) + The current generation of the database, followed by a list of all + the documents in the database. + :rtype: (int, [ServerDocument]) + """ + return self._database.get_all_docs(include_deleted) + + def _put_doc(self, old_doc, doc): + """ + Put the document in the backend database. + + Note that C{old_doc} must have been fetched with the parameter + C{check_for_conflicts} equal to True, so we can properly update the + new document using the conflict information from the old one. + + :param old_doc: The old document version. + :type old_doc: ServerDocument + :param doc: The document to be put. + :type doc: ServerDocument + """ + self._database.save_document(old_doc, doc, + self._allocate_transaction_id()) + + def put_doc(self, doc): + """ + Update a document. + + If the document currently has conflicts, put will fail. + If the database specifies a maximum document size and the document + exceeds it, put will fail and raise a DocumentTooBig exception. + + :param doc: A Document with new content. + :return: new_doc_rev - The new revision identifier for the document. + The Document object will also be updated. + + :raise InvalidDocId: Raised if the document's id is invalid. + :raise DocumentTooBig: Raised if the document size is too big. + :raise ConflictedDoc: Raised if the document has conflicts. + """ + if doc.doc_id is None: + raise InvalidDocId() + self._check_doc_id(doc.doc_id) + self._check_doc_size(doc) + old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) + if old_doc and old_doc.has_conflicts: + raise ConflictedDoc() + if old_doc and doc.rev is None and old_doc.is_tombstone(): + new_rev = self._allocate_doc_rev(old_doc.rev) + else: + if old_doc is not None: + if old_doc.rev != doc.rev: + raise RevisionConflict() + else: + if doc.rev is not None: + raise RevisionConflict() + new_rev = self._allocate_doc_rev(doc.rev) + doc.rev = new_rev + self._put_doc(old_doc, doc) + return new_rev + + def whats_changed(self, old_generation=0): + """ + Return a list of documents that have changed since old_generation. + + :param old_generation: The generation of the database in the old + state. + :type old_generation: int + + :return: (generation, trans_id, [(doc_id, generation, trans_id),...]) + The current generation of the database, its associated + transaction id, and a list of of changed documents since + old_generation, represented by tuples with for each document + its doc_id and the generation and transaction id corresponding + to the last intervening change and sorted by generation (old + changes first) + :rtype: (int, str, [(str, int, str)]) + """ + return self._database.whats_changed(old_generation) + + def delete_doc(self, doc): + """ + Mark a document as deleted. + + Will abort if the current revision doesn't match doc.rev. + This will also set doc.content to None. + + :param doc: The document to mark as deleted. + :type doc: ServerDocument. + + :raise DocumentDoesNotExist: Raised if the document does not + exist. + :raise RevisionConflict: Raised if the revisions do not match. + :raise DocumentAlreadyDeleted: Raised if the document is + already deleted. + :raise ConflictedDoc: Raised if the doc has conflicts. + """ + old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) + if old_doc is None: + raise DocumentDoesNotExist + if old_doc.rev != doc.rev: + raise RevisionConflict() + if old_doc.is_tombstone(): + raise DocumentAlreadyDeleted + if old_doc.has_conflicts: + raise ConflictedDoc() + new_rev = self._allocate_doc_rev(doc.rev) + doc.rev = new_rev + doc.make_tombstone() + self._put_doc(old_doc, doc) + return new_rev + + def get_doc_conflicts(self, doc_id): + """ + Get the conflicted versions of a document. + + :param doc_id: The document id. + :type doc_id: str + + :return: A list of conflicted versions of the document. + :rtype: list + """ + return self._database.get_doc_conflicts(doc_id) + + def _get_replica_gen_and_trans_id(self, other_replica_uid): + """ + Return the last known generation and transaction id for the other db + replica. + + When you do a synchronization with another replica, the Database keeps + track of what generation the other database replica was at, and what + the associated transaction id was. This is used to determine what data + needs to be sent, and if two databases are claiming to be the same + replica. + + :param other_replica_uid: The identifier for the other replica. + :type other_replica_uid: str + + :return: A tuple containing the generation and transaction id we + encountered during synchronization. If we've never + synchronized with the replica, this is (0, ''). + :rtype: (int, str) + """ + if other_replica_uid in self.cache: + return self.cache[other_replica_uid] + gen, trans_id = \ + self._database.get_replica_gen_and_trans_id(other_replica_uid) + self.cache[other_replica_uid] = (gen, trans_id) + return (gen, trans_id) + + def _set_replica_gen_and_trans_id(self, other_replica_uid, + other_generation, other_transaction_id): + """ + Set the last-known generation and transaction id for the other + database replica. + + We have just performed some synchronization, and we want to track what + generation the other replica was at. See also + _get_replica_gen_and_trans_id. + + :param other_replica_uid: The U1DB identifier for the other replica. + :type other_replica_uid: str + :param other_generation: The generation number for the other replica. + :type other_generation: int + :param other_transaction_id: The transaction id associated with the + generation. + :type other_transaction_id: str + """ + if other_replica_uid is not None and other_generation is not None: + self.cache[other_replica_uid] = (other_generation, + other_transaction_id) + self._database.set_replica_gen_and_trans_id(other_replica_uid, + other_generation, + other_transaction_id) + + def _do_set_replica_gen_and_trans_id( + self, other_replica_uid, other_generation, other_transaction_id): + """ + _put_doc_if_newer from super class is calling it. So we declare this. + + :param other_replica_uid: The U1DB identifier for the other replica. + :type other_replica_uid: str + :param other_generation: The generation number for the other replica. + :type other_generation: int + :param other_transaction_id: The transaction id associated with the + generation. + :type other_transaction_id: str + """ + function = self._set_replica_gen_and_trans_id + args = [other_replica_uid, other_generation, other_transaction_id] + callback = lambda: function(*args) + if self.batching: + self.after_batch_callbacks['set_source_info'] = callback + else: + callback() + + def _force_doc_sync_conflict(self, doc): + """ + Add a conflict and force a document put. + + :param doc: The document to be put. + :type doc: ServerDocument + """ + my_doc = self._get_doc(doc.doc_id) + self._prune_conflicts(doc, vectorclock.VectorClockRev(doc.rev)) + doc.add_conflict(self._factory(doc.doc_id, my_doc.rev, + my_doc.get_json())) + doc.has_conflicts = True + self._put_doc(my_doc, doc) + + def resolve_doc(self, doc, conflicted_doc_revs): + """ + Mark a document as no longer conflicted. + + We take the list of revisions that the client knows about that it is + superseding. This may be a different list from the actual current + conflicts, in which case only those are removed as conflicted. This + may fail if the conflict list is significantly different from the + supplied information. (sync could have happened in the background from + the time you GET_DOC_CONFLICTS until the point where you RESOLVE) + + :param doc: A Document with the new content to be inserted. + :type doc: ServerDocument + :param conflicted_doc_revs: A list of revisions that the new content + supersedes. + :type conflicted_doc_revs: [str] + + :raise SoledadError: Raised by database on operation failure + """ + cur_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) + new_rev = self._ensure_maximal_rev(cur_doc.rev, + conflicted_doc_revs) + superseded_revs = set(conflicted_doc_revs) + doc.rev = new_rev + # this backend stores conflicts as properties of the documents, so we + # have to copy these conflicts over to the document being updated. + if cur_doc.rev in superseded_revs: + # the newer doc version will supersede the one in the database, so + # we copy conflicts before updating the backend. + doc.set_conflicts(cur_doc.get_conflicts()) # copy conflicts over. + doc.delete_conflicts(superseded_revs) + self._put_doc(cur_doc, doc) + else: + # the newer doc version does not supersede the one in the + # database, so we will add a conflict to the database and copy + # those over to the document the user has in her hands. + cur_doc.add_conflict(doc) + cur_doc.delete_conflicts(superseded_revs) + self._put_doc(cur_doc, cur_doc) # just update conflicts + # backend has been updated with current conflicts, now copy them + # to the current document. + doc.set_conflicts(cur_doc.get_conflicts()) + + def _put_doc_if_newer(self, doc, save_conflict, replica_uid, replica_gen, + replica_trans_id='', number_of_docs=None, + doc_idx=None, sync_id=None): + """ + Insert/update document into the database with a given revision. + + This api is used during synchronization operations. + + If a document would conflict and save_conflict is set to True, the + content will be selected as the 'current' content for doc.doc_id, + even though doc.rev doesn't supersede the currently stored revision. + The currently stored document will be added to the list of conflict + alternatives for the given doc_id. + + This forces the new content to be 'current' so that we get convergence + after synchronizing, even if people don't resolve conflicts. Users can + then notice that their content is out of date, update it, and + synchronize again. (The alternative is that users could synchronize and + think the data has propagated, but their local copy looks fine, and the + remote copy is never updated again.) + + :param doc: A document object + :type doc: ServerDocument + :param save_conflict: If this document is a conflict, do you want to + save it as a conflict, or just ignore it. + :type save_conflict: bool + :param replica_uid: A unique replica identifier. + :type replica_uid: str + :param replica_gen: The generation of the replica corresponding to the + this document. The replica arguments are optional, + but are used during synchronization. + :type replica_gen: int + :param replica_trans_id: The transaction_id associated with the + generation. + :type replica_trans_id: str + :param number_of_docs: The total amount of documents sent on this sync + session. + :type number_of_docs: int + :param doc_idx: The index of the current document being sent. + :type doc_idx: int + :param sync_id: The id of the current sync session. + :type sync_id: str + + :return: (state, at_gen) - If we don't have doc_id already, or if + doc_rev supersedes the existing document revision, then the + content will be inserted, and state is 'inserted'. If + doc_rev is less than or equal to the existing revision, then + the put is ignored and state is respecitvely 'superseded' or + 'converged'. If doc_rev is not strictly superseded or + supersedes, then state is 'conflicted'. The document will not + be inserted if save_conflict is False. For 'inserted' or + 'converged', at_gen is the insertion/current generation. + :rtype: (str, int) + """ + if not isinstance(doc, ServerDocument): + doc = self._factory(doc.doc_id, doc.rev, doc.get_json()) + my_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) + if my_doc: + doc.set_conflicts(my_doc.get_conflicts()) + return CommonBackend._put_doc_if_newer(self, doc, save_conflict, + replica_uid, replica_gen, + replica_trans_id) + + def _put_and_update_indexes(self, cur_doc, doc): + self._put_doc(cur_doc, doc) + + def get_docs(self, doc_ids, check_for_conflicts=True, + include_deleted=False): + """ + Get the JSON content for many documents. + + :param doc_ids: A list of document identifiers or None for all. + :type doc_ids: list + :param check_for_conflicts: If set to False, then the conflict check + will be skipped, and 'None' will be + returned instead of True/False. + :type check_for_conflicts: bool + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted + documents will not be included in the results. + :return: iterable giving the Document object for each document id + in matching doc_ids order. + :rtype: iterable + """ + return self._database.get_docs(doc_ids, check_for_conflicts, + include_deleted) + + def _prune_conflicts(self, doc, doc_vcr): + """ + Prune conflicts that are older then the current document's revision, or + whose content match to the current document's content. + Originally in u1db.CommonBackend + + :param doc: The document to have conflicts pruned. + :type doc: ServerDocument + :param doc_vcr: A vector clock representing the current document's + revision. + :type doc_vcr: u1db.vectorclock.VectorClock + """ + if doc.has_conflicts: + autoresolved = False + c_revs_to_prune = [] + for c_doc in doc._conflicts: + c_vcr = vectorclock.VectorClockRev(c_doc.rev) + if doc_vcr.is_newer(c_vcr): + c_revs_to_prune.append(c_doc.rev) + elif doc.same_content_as(c_doc): + c_revs_to_prune.append(c_doc.rev) + doc_vcr.maximize(c_vcr) + autoresolved = True + if autoresolved: + doc_vcr.increment(self._replica_uid) + doc.rev = doc_vcr.as_str() + doc.delete_conflicts(c_revs_to_prune) + + +class SoledadSyncTarget(CommonSyncTarget): + + """ + Functionality for using a SoledadBackend as a synchronization target. + """ + + def get_sync_info(self, source_replica_uid): + source_gen, source_trans_id = self._db._get_replica_gen_and_trans_id( + source_replica_uid) + my_gen, my_trans_id = self._db._get_generation_info() + return ( + self._db._replica_uid, my_gen, my_trans_id, source_gen, + source_trans_id) + + def record_sync_info(self, source_replica_uid, source_replica_generation, + source_replica_transaction_id): + if self._trace_hook: + self._trace_hook('record_sync_info') + self._db._set_replica_gen_and_trans_id( + source_replica_uid, source_replica_generation, + source_replica_transaction_id) diff --git a/common/src/leap/soledad/common/command.py b/common/src/leap/soledad/common/command.py new file mode 100644 index 00000000..811bf135 --- /dev/null +++ b/common/src/leap/soledad/common/command.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +# command.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +""" +Utility to sanitize and run shell commands. +""" + + +import subprocess + + +def exec_validated_cmd(cmd, argument, validator=None): + """ + Executes cmd, validating argument with a validator function. + + :param cmd: command. + :type dbname: str + :param argument: argument. + :type argument: str + :param validator: optional function to validate argument + :type validator: function + + :return: exit code and stdout or stderr (if code != 0) + :rtype: (int, str) + """ + if validator and not validator(argument): + return 1, "invalid argument" + command = cmd.split(' ') + command.append(argument) + try: + process = subprocess.Popen(command, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + except OSError, e: + return 1, e + (out, err) = process.communicate() + code = process.wait() + if code is not 0: + return code, err + else: + return code, out diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py deleted file mode 100644 index 1c762036..00000000 --- a/common/src/leap/soledad/common/couch.py +++ /dev/null @@ -1,1546 +0,0 @@ -# -*- coding: utf-8 -*- -# couch.py -# Copyright (C) 2013 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - - -"""A U1DB backend that uses CouchDB as its persistence layer.""" - - -import json -import re -import uuid -import logging -import binascii -import time -import sys -import threading - - -from StringIO import StringIO -from collections import defaultdict -from urlparse import urljoin -from contextlib import contextmanager - - -from couchdb.client import Server, Database -from couchdb.http import ( - ResourceConflict, - ResourceNotFound, - ServerError, - Session, - urljoin as couch_urljoin, - Resource, -) -from u1db import vectorclock -from u1db.errors import ( - DatabaseDoesNotExist, - InvalidGeneration, - RevisionConflict, - InvalidDocId, - ConflictedDoc, - DocumentDoesNotExist, - DocumentAlreadyDeleted, - Unauthorized, -) -from u1db.backends import CommonBackend, CommonSyncTarget -from u1db.remote import http_app -from u1db.remote.server_state import ServerState - - -from leap.soledad.common import ddocs, errors -from leap.soledad.common.document import SoledadDocument - - -logger = logging.getLogger(__name__) - - -COUCH_TIMEOUT = 120 # timeout for transfers between Soledad server and Couch - - -class InvalidURLError(Exception): - - """ - Exception raised when Soledad encounters a malformed URL. - """ - - -class CouchDocument(SoledadDocument): - - """ - This is the document used for maintaining the Couch backend. - - A CouchDocument can fetch and manipulate conflicts and also holds a - reference to the couch document revision. This data is used to ensure an - atomic and consistent update of the database. - """ - - def __init__(self, doc_id=None, rev=None, json='{}', has_conflicts=False): - """ - Container for handling a document that is stored in couch backend. - - :param doc_id: The unique document identifier. - :type doc_id: str - :param rev: The revision identifier of the document. - :type rev: str - :param json: The JSON string for this document. - :type json: str - :param has_conflicts: Boolean indicating if this document has conflicts - :type has_conflicts: bool - """ - SoledadDocument.__init__(self, doc_id, rev, json, has_conflicts) - self.couch_rev = None - self.transactions = None - - def get_conflicts(self): - """ - Get the conflicted versions of the document. - - :return: The conflicted versions of the document. - :rtype: [CouchDocument] - """ - return self._conflicts - - def set_conflicts(self, conflicts): - """ - Set the conflicted versions of the document. - - :param conflicts: The conflicted versions of the document. - :type conflicts: list - """ - self._conflicts = conflicts - self.has_conflicts = len(self._conflicts) > 0 - - def add_conflict(self, doc): - """ - Add a conflict to this document. - - :param doc: The conflicted version to be added. - :type doc: CouchDocument - """ - if self._conflicts is None: - raise Exception("Fetch conflicts first!") - self._conflicts.append(doc) - self.has_conflicts = len(self._conflicts) > 0 - - def delete_conflicts(self, conflict_revs): - """ - Delete conflicted versions of this document. - - :param conflict_revs: The conflicted revisions to be deleted. - :type conflict_revs: [str] - """ - if self._conflicts is None: - raise Exception("Fetch conflicts first!") - self._conflicts = filter( - lambda doc: doc.rev not in conflict_revs, - self._conflicts) - self.has_conflicts = len(self._conflicts) > 0 - - def update(self, new_doc): - # update info - self.rev = new_doc.rev - if new_doc.is_tombstone(): - self.is_tombstone() - else: - self.content = new_doc.content - self.has_conflicts = new_doc.has_conflicts - - def prune_conflicts(self, doc_vcr, autoresolved_increment): - """ - Prune conflicts that are older then the current document's revision, or - whose content match to the current document's content. - Originally in u1db.CommonBackend - - :param doc: The document to have conflicts pruned. - :type doc: CouchDocument - :param doc_vcr: A vector clock representing the current document's - revision. - :type doc_vcr: u1db.vectorclock.VectorClock - """ - if self.has_conflicts: - autoresolved = False - c_revs_to_prune = [] - for c_doc in self._conflicts: - c_vcr = vectorclock.VectorClockRev(c_doc.rev) - if doc_vcr.is_newer(c_vcr): - c_revs_to_prune.append(c_doc.rev) - elif self.same_content_as(c_doc): - c_revs_to_prune.append(c_doc.rev) - doc_vcr.maximize(c_vcr) - autoresolved = True - if autoresolved: - doc_vcr.increment(autoresolved_increment) - self.rev = doc_vcr.as_str() - self.delete_conflicts(c_revs_to_prune) - - -# monkey-patch the u1db http app to use CouchDocument -http_app.Document = CouchDocument - - -def raise_missing_design_doc_error(exc, ddoc_path): - """ - Raise an appropriate exception when catching a ResourceNotFound when - accessing a design document. - - :param exc: The exception cought. - :type exc: ResourceNotFound - :param ddoc_path: A list representing the requested path. - :type ddoc_path: list - - :raise MissingDesignDocError: Raised when tried to access a missing design - document. - :raise MissingDesignDocListFunctionError: Raised when trying to access a - missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a design - document for an yet unknown reason. - """ - path = "".join(ddoc_path) - if exc.message[1] == 'missing': - raise errors.MissingDesignDocError(path) - elif exc.message[1] == 'missing function' or \ - exc.message[1].startswith('missing lists function'): - raise errors.MissingDesignDocListFunctionError(path) - elif exc.message[1] == 'missing_named_view': - raise errors.MissingDesignDocNamedViewError(path) - elif exc.message[1] == 'deleted': - raise errors.MissingDesignDocDeletedError(path) - # other errors are unknown for now - raise errors.DesignDocUnknownError("%s: %s" % (path, str(exc.message))) - - -def raise_server_error(exc, ddoc_path): - """ - Raise an appropriate exception when catching a ServerError when - accessing a design document. - - :param exc: The exception cought. - :type exc: ResourceNotFound - :param ddoc_path: A list representing the requested path. - :type ddoc_path: list - - :raise MissingDesignDocListFunctionError: Raised when trying to access a - missing list function on a - design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a design - document for an yet unknown reason. - """ - path = "".join(ddoc_path) - msg = exc.message[1][0] - if msg == 'unnamed_error': - raise errors.MissingDesignDocListFunctionError(path) - elif msg == 'TypeError': - if 'point is undefined' in exc.message[1][1]: - raise errors.MissingDesignDocListFunctionError - # other errors are unknown for now - raise errors.DesignDocUnknownError(path) - - -class MultipartWriter(object): - - """ - A multipart writer adapted from python-couchdb's one so we can PUT - documents using couch's multipart PUT. - - This stripped down version does not allow for nested structures, and - contains only the essential things we need to PUT SoledadDocuments to the - couch backend. - """ - - CRLF = '\r\n' - - def __init__(self, fileobj, headers=None, boundary=None): - """ - Initialize the multipart writer. - """ - self.fileobj = fileobj - if boundary is None: - boundary = self._make_boundary() - self._boundary = boundary - self._build_headers('related', headers) - - def add(self, mimetype, content, headers={}): - """ - Add a part to the multipart stream. - """ - self.fileobj.write('--') - self.fileobj.write(self._boundary) - self.fileobj.write(self.CRLF) - headers['Content-Type'] = mimetype - self._write_headers(headers) - if content: - # XXX: throw an exception if a boundary appears in the content?? - self.fileobj.write(content) - self.fileobj.write(self.CRLF) - - def close(self): - """ - Close the multipart stream. - """ - self.fileobj.write('--') - self.fileobj.write(self._boundary) - # be careful not to have anything after '--', otherwise old couch - # versions (including bigcouch) will fail. - self.fileobj.write('--') - - def _make_boundary(self): - """ - Create a boundary to discern multi parts. - """ - try: - from uuid import uuid4 - return '==' + uuid4().hex + '==' - except ImportError: - from random import randrange - token = randrange(sys.maxint) - format = '%%0%dd' % len(repr(sys.maxint - 1)) - return '===============' + (format % token) + '==' - - def _write_headers(self, headers): - """ - Write a part header in the buffer stream. - """ - if headers: - for name in sorted(headers.keys()): - value = headers[name] - self.fileobj.write(name) - self.fileobj.write(': ') - self.fileobj.write(value) - self.fileobj.write(self.CRLF) - self.fileobj.write(self.CRLF) - - def _build_headers(self, subtype, headers): - """ - Build the main headers of the multipart stream. - - This is here so we can send headers separete from content using - python-couchdb API. - """ - self.headers = {} - self.headers['Content-Type'] = 'multipart/%s; boundary="%s"' % \ - (subtype, self._boundary) - if headers: - for name in sorted(headers.keys()): - value = headers[name] - self.headers[name] = value - - -@contextmanager -def couch_server(url): - """ - Provide a connection to a couch server and cleanup after use. - - For database creation and deletion we use an ephemeral connection to the - couch server. That connection has to be properly closed, so we provide it - as a context manager. - - :param url: The URL of the Couch server. - :type url: str - """ - session = Session(timeout=COUCH_TIMEOUT) - server = Server(url=url, session=session) - yield server - - -class CouchDatabase(CommonBackend): - - """ - A U1DB implementation that uses CouchDB as its persistence layer. - """ - - # We spawn threads to parallelize the CouchDatabase.get_docs() method - MAX_GET_DOCS_THREADS = 20 - - update_handler_lock = defaultdict(threading.Lock) - sync_info_lock = defaultdict(threading.Lock) - - class _GetDocThread(threading.Thread): - - """ - A thread that gets a document from a database. - - TODO: switch this for a twisted deferred to thread. This depends on - replacing python-couchdb for paisley in this module. - """ - - def __init__(self, db, doc_id, check_for_conflicts, - release_fun): - """ - :param db: The database from where to get the document. - :type db: CouchDatabase - :param doc_id: The doc_id of the document to be retrieved. - :type doc_id: str - :param check_for_conflicts: Whether the get_doc() method should - check for existing conflicts. - :type check_for_conflicts: bool - :param release_fun: A function that releases a semaphore, to be - called after the document is fetched. - :type release_fun: function - """ - threading.Thread.__init__(self) - self._db = db - self._doc_id = doc_id - self._check_for_conflicts = check_for_conflicts - self._release_fun = release_fun - self._doc = None - - def run(self): - """ - Fetch the document, store it as a property, and call the release - function. - """ - self._doc = self._db._get_doc( - self._doc_id, self._check_for_conflicts) - self._release_fun() - - @classmethod - def open_database(cls, url, create, replica_uid=None, ensure_ddocs=False): - """ - Open a U1DB database using CouchDB as backend. - - :param url: the url of the database replica - :type url: str - :param create: should the replica be created if it does not exist? - :type create: bool - :param replica_uid: an optional unique replica identifier - :type replica_uid: str - :param ensure_ddocs: Ensure that the design docs exist on server. - :type ensure_ddocs: bool - - :return: the database instance - :rtype: CouchDatabase - """ - # get database from url - m = re.match('(^https?://[^/]+)/(.+)$', url) - if not m: - raise InvalidURLError - url = m.group(1) - dbname = m.group(2) - with couch_server(url) as server: - try: - server[dbname] - except ResourceNotFound: - if not create: - raise DatabaseDoesNotExist() - server.create(dbname) - return cls( - url, dbname, replica_uid=replica_uid, ensure_ddocs=ensure_ddocs) - - def __init__(self, url, dbname, replica_uid=None, ensure_ddocs=True): - """ - Create a new Couch data container. - - :param url: the url of the couch database - :type url: str - :param dbname: the database name - :type dbname: str - :param replica_uid: an optional unique replica identifier - :type replica_uid: str - :param ensure_ddocs: Ensure that the design docs exist on server. - :type ensure_ddocs: bool - """ - # save params - self._url = url - self._session = Session(timeout=COUCH_TIMEOUT) - self._factory = CouchDocument - self._real_replica_uid = None - # configure couch - self._dbname = dbname - self._database = Database( - urljoin(self._url, self._dbname), - self._session) - try: - self._database.info() - except ResourceNotFound: - raise DatabaseDoesNotExist() - if replica_uid is not None: - self._set_replica_uid(replica_uid) - if ensure_ddocs: - self.ensure_ddocs_on_db() - # initialize a thread pool for parallelizing get_docs() - self._sem_pool = threading.BoundedSemaphore( - value=self.MAX_GET_DOCS_THREADS) - - def ensure_ddocs_on_db(self): - """ - Ensure that the design documents used by the backend exist on the - couch database. - """ - for ddoc_name in ['docs', 'syncs', 'transactions']: - try: - self._database.info(ddoc_name) - except ResourceNotFound: - ddoc = json.loads( - binascii.a2b_base64( - getattr(ddocs, ddoc_name))) - self._database.save(ddoc) - - def get_sync_target(self): - """ - Return a SyncTarget object, for another u1db to synchronize with. - - :return: The sync target. - :rtype: CouchSyncTarget - """ - return CouchSyncTarget(self) - - def delete_database(self): - """ - Delete a U1DB CouchDB database. - """ - with couch_server(self._url) as server: - del(server[self._dbname]) - - def close(self): - """ - Release any resources associated with this database. - - :return: True if db was succesfully closed. - :rtype: bool - """ - self._url = None - self._full_commit = None - self._session = None - self._database = None - return True - - def __del__(self): - """ - Close the database upon garbage collection. - """ - self.close() - - def _set_replica_uid(self, replica_uid): - """ - Force the replica uid to be set. - - :param replica_uid: The new replica uid. - :type replica_uid: str - """ - try: - # set on existent config document - doc = self._database['u1db_config'] - doc['replica_uid'] = replica_uid - except ResourceNotFound: - # or create the config document - doc = { - '_id': 'u1db_config', - 'replica_uid': replica_uid, - } - self._database.save(doc) - self._real_replica_uid = replica_uid - - def _get_replica_uid(self): - """ - Get the replica uid. - - :return: The replica uid. - :rtype: str - """ - if self._real_replica_uid is not None: - return self._real_replica_uid - try: - # grab replica_uid from server - doc = self._database['u1db_config'] - self._real_replica_uid = doc['replica_uid'] - return self._real_replica_uid - except ResourceNotFound: - # create a unique replica_uid - self._real_replica_uid = uuid.uuid4().hex - self._set_replica_uid(self._real_replica_uid) - return self._real_replica_uid - - _replica_uid = property(_get_replica_uid, _set_replica_uid) - - replica_uid = property(_get_replica_uid) - - def _get_generation(self): - """ - Return the current generation. - - :return: The current generation. - :rtype: int - - :raise MissingDesignDocError: Raised when tried to access a missing - design document. - :raise MissingDesignDocListFunctionError: Raised when trying to access - a missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a - design document for an yet - unknown reason. - """ - # query a couch list function - ddoc_path = ['_design', 'transactions', '_list', 'generation', 'log'] - res = self._database.resource(*ddoc_path) - try: - response = res.get_json() - return response[2]['generation'] - except ResourceNotFound as e: - raise_missing_design_doc_error(e, ddoc_path) - except ServerError as e: - raise_server_error(e, ddoc_path) - - def _get_generation_info(self): - """ - Return the current generation. - - :return: A tuple containing the current generation and transaction id. - :rtype: (int, str) - - :raise MissingDesignDocError: Raised when tried to access a missing - design document. - :raise MissingDesignDocListFunctionError: Raised when trying to access - a missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a - design document for an yet - unknown reason. - """ - # query a couch list function - ddoc_path = ['_design', 'transactions', '_list', 'generation', 'log'] - res = self._database.resource(*ddoc_path) - try: - response = res.get_json() - return (response[2]['generation'], response[2]['transaction_id']) - except ResourceNotFound as e: - raise_missing_design_doc_error(e, ddoc_path) - except ServerError as e: - raise_server_error(e, ddoc_path) - - def _get_trans_id_for_gen(self, generation): - """ - Get the transaction id corresponding to a particular generation. - - :param generation: The generation for which to get the transaction id. - :type generation: int - - :return: The transaction id for C{generation}. - :rtype: str - - :raise InvalidGeneration: Raised when the generation does not exist. - :raise MissingDesignDocError: Raised when tried to access a missing - design document. - :raise MissingDesignDocListFunctionError: Raised when trying to access - a missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a - design document for an yet - unknown reason. - """ - if generation == 0: - return '' - # query a couch list function - ddoc_path = [ - '_design', 'transactions', '_list', 'trans_id_for_gen', 'log' - ] - res = self._database.resource(*ddoc_path) - try: - response = res.get_json(gen=generation) - if response[2] == {}: - raise InvalidGeneration - return response[2]['transaction_id'] - except ResourceNotFound as e: - raise_missing_design_doc_error(e, ddoc_path) - except ServerError as e: - raise_server_error(e, ddoc_path) - - def _get_transaction_log(self): - """ - This is only for the test suite, it is not part of the api. - - :return: The complete transaction log. - :rtype: [(str, str)] - - :raise MissingDesignDocError: Raised when tried to access a missing - design document. - :raise MissingDesignDocListFunctionError: Raised when trying to access - a missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a - design document for an yet - unknown reason. - """ - # query a couch view - ddoc_path = ['_design', 'transactions', '_view', 'log'] - res = self._database.resource(*ddoc_path) - try: - response = res.get_json() - return map( - lambda row: (row['id'], row['value']), - response[2]['rows']) - except ResourceNotFound as e: - raise_missing_design_doc_error(e, ddoc_path) - - def _get_doc(self, doc_id, check_for_conflicts=False): - """ - Extract the document from storage. - - This can return None if the document doesn't exist. - - :param doc_id: The unique document identifier - :type doc_id: str - :param check_for_conflicts: If set to False, then the conflict check - will be skipped. - :type check_for_conflicts: bool - - :return: The document. - :rtype: CouchDocument - """ - # get document with all attachments (u1db content and eventual - # conflicts) - try: - result = \ - self._database.resource(doc_id).get_json( - attachments=True)[2] - except ResourceNotFound: - return None - # restrict to u1db documents - if 'u1db_rev' not in result: - return None - doc = self._factory(doc_id, result['u1db_rev']) - # set contents or make tombstone - if '_attachments' not in result \ - or 'u1db_content' not in result['_attachments']: - doc.make_tombstone() - else: - doc.content = json.loads( - binascii.a2b_base64( - result['_attachments']['u1db_content']['data'])) - # determine if there are conflicts - if check_for_conflicts \ - and '_attachments' in result \ - and 'u1db_conflicts' in result['_attachments']: - doc.set_conflicts( - self._build_conflicts( - doc.doc_id, - json.loads(binascii.a2b_base64( - result['_attachments']['u1db_conflicts']['data'])))) - # store couch revision - doc.couch_rev = result['_rev'] - # store transactions - doc.transactions = result['u1db_transactions'] - return doc - - def get_doc(self, doc_id, include_deleted=False): - """ - Get the JSON string for the given document. - - :param doc_id: The unique document identifier - :type doc_id: str - :param include_deleted: If set to True, deleted documents will be - returned with empty content. Otherwise asking for a deleted - document will return None. - :type include_deleted: bool - - :return: A document object. - :rtype: CouchDocument. - """ - doc = self._get_doc(doc_id, check_for_conflicts=True) - if doc is None: - return None - if doc.is_tombstone() and not include_deleted: - return None - return doc - - def get_all_docs(self, include_deleted=False): - """ - Get the JSON content for all documents in the database. - - :param include_deleted: If set to True, deleted documents will be - returned with empty content. Otherwise deleted - documents will not be included in the results. - :type include_deleted: bool - - :return: (generation, [CouchDocument]) - The current generation of the database, followed by a list of all - the documents in the database. - :rtype: (int, [CouchDocument]) - """ - - generation = self._get_generation() - results = [] - for row in self._database.view('_all_docs'): - doc = self.get_doc(row.id, include_deleted=include_deleted) - if doc is not None: - results.append(doc) - return (generation, results) - - def _put_doc(self, old_doc, doc): - """ - Put the document in the Couch backend database. - - Note that C{old_doc} must have been fetched with the parameter - C{check_for_conflicts} equal to True, so we can properly update the - new document using the conflict information from the old one. - - :param old_doc: The old document version. - :type old_doc: CouchDocument - :param doc: The document to be put. - :type doc: CouchDocument - - :raise RevisionConflict: Raised when trying to update a document but - couch revisions mismatch. - :raise MissingDesignDocError: Raised when tried to access a missing - design document. - :raise MissingDesignDocListFunctionError: Raised when trying to access - a missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a - design document for an yet - unknown reason. - """ - attachments = {} # we save content and conflicts as attachments - parts = [] # and we put it using couch's multipart PUT - # save content as attachment - if doc.is_tombstone() is False: - content = doc.get_json() - attachments['u1db_content'] = { - 'follows': True, - 'content_type': 'application/octet-stream', - 'length': len(content), - } - parts.append(content) - # save conflicts as attachment - if doc.has_conflicts is True: - conflicts = json.dumps( - map(lambda cdoc: (cdoc.rev, cdoc.content), - doc.get_conflicts())) - attachments['u1db_conflicts'] = { - 'follows': True, - 'content_type': 'application/octet-stream', - 'length': len(conflicts), - } - parts.append(conflicts) - # store old transactions, if any - transactions = old_doc.transactions[:] if old_doc is not None else [] - # create a new transaction id and timestamp it so the transaction log - # is consistent when querying the database. - transactions.append( - # here we store milliseconds to keep consistent with javascript - # Date.prototype.getTime() which was used before inside a couchdb - # update handler. - (int(time.time() * 1000), - self._allocate_transaction_id())) - # build the couch document - couch_doc = { - '_id': doc.doc_id, - 'u1db_rev': doc.rev, - 'u1db_transactions': transactions, - '_attachments': attachments, - } - # if we are updating a doc we have to add the couch doc revision - if old_doc is not None: - couch_doc['_rev'] = old_doc.couch_rev - # prepare the multipart PUT - buf = StringIO() - envelope = MultipartWriter(buf) - envelope.add('application/json', json.dumps(couch_doc)) - for part in parts: - envelope.add('application/octet-stream', part) - envelope.close() - # try to save and fail if there's a revision conflict - try: - resource = self._new_resource() - resource.put_json( - doc.doc_id, body=buf.getvalue(), headers=envelope.headers) - except ResourceConflict: - raise RevisionConflict() - - def put_doc(self, doc): - """ - Update a document. - - If the document currently has conflicts, put will fail. - If the database specifies a maximum document size and the document - exceeds it, put will fail and raise a DocumentTooBig exception. - - :param doc: A Document with new content. - :return: new_doc_rev - The new revision identifier for the document. - The Document object will also be updated. - - :raise InvalidDocId: Raised if the document's id is invalid. - :raise DocumentTooBig: Raised if the document size is too big. - :raise ConflictedDoc: Raised if the document has conflicts. - """ - if doc.doc_id is None: - raise InvalidDocId() - self._check_doc_id(doc.doc_id) - self._check_doc_size(doc) - old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) - if old_doc and old_doc.has_conflicts: - raise ConflictedDoc() - if old_doc and doc.rev is None and old_doc.is_tombstone(): - new_rev = self._allocate_doc_rev(old_doc.rev) - else: - if old_doc is not None: - if old_doc.rev != doc.rev: - raise RevisionConflict() - else: - if doc.rev is not None: - raise RevisionConflict() - new_rev = self._allocate_doc_rev(doc.rev) - doc.rev = new_rev - self._put_doc(old_doc, doc) - return new_rev - - def whats_changed(self, old_generation=0): - """ - Return a list of documents that have changed since old_generation. - - :param old_generation: The generation of the database in the old - state. - :type old_generation: int - - :return: (generation, trans_id, [(doc_id, generation, trans_id),...]) - The current generation of the database, its associated - transaction id, and a list of of changed documents since - old_generation, represented by tuples with for each document - its doc_id and the generation and transaction id corresponding - to the last intervening change and sorted by generation (old - changes first) - :rtype: (int, str, [(str, int, str)]) - - :raise MissingDesignDocError: Raised when tried to access a missing - design document. - :raise MissingDesignDocListFunctionError: Raised when trying to access - a missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a - design document for an yet - unknown reason. - """ - # query a couch list function - ddoc_path = [ - '_design', 'transactions', '_list', 'whats_changed', 'log' - ] - res = self._database.resource(*ddoc_path) - try: - response = res.get_json(old_gen=old_generation) - results = map( - lambda row: - (row['generation'], row['doc_id'], row['transaction_id']), - response[2]['transactions']) - results.reverse() - cur_gen = old_generation - seen = set() - changes = [] - newest_trans_id = '' - for generation, doc_id, trans_id in results: - if doc_id not in seen: - changes.append((doc_id, generation, trans_id)) - seen.add(doc_id) - if changes: - cur_gen = changes[0][1] # max generation - newest_trans_id = changes[0][2] - changes.reverse() - else: - cur_gen, newest_trans_id = self._get_generation_info() - - return cur_gen, newest_trans_id, changes - except ResourceNotFound as e: - raise_missing_design_doc_error(e, ddoc_path) - except ServerError as e: - raise_server_error(e, ddoc_path) - - def delete_doc(self, doc): - """ - Mark a document as deleted. - - Will abort if the current revision doesn't match doc.rev. - This will also set doc.content to None. - - :param doc: The document to mark as deleted. - :type doc: CouchDocument. - - :raise DocumentDoesNotExist: Raised if the document does not - exist. - :raise RevisionConflict: Raised if the revisions do not match. - :raise DocumentAlreadyDeleted: Raised if the document is - already deleted. - :raise ConflictedDoc: Raised if the doc has conflicts. - """ - old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) - if old_doc is None: - raise DocumentDoesNotExist - if old_doc.rev != doc.rev: - raise RevisionConflict() - if old_doc.is_tombstone(): - raise DocumentAlreadyDeleted - if old_doc.has_conflicts: - raise ConflictedDoc() - new_rev = self._allocate_doc_rev(doc.rev) - doc.rev = new_rev - doc.make_tombstone() - self._put_doc(old_doc, doc) - return new_rev - - def _build_conflicts(self, doc_id, attached_conflicts): - """ - Build the conflicted documents list from the conflicts attachment - fetched from a couch document. - - :param attached_conflicts: The document's conflicts as fetched from a - couch document attachment. - :type attached_conflicts: dict - """ - conflicts = [] - for doc_rev, content in attached_conflicts: - doc = self._factory(doc_id, doc_rev) - if content is None: - doc.make_tombstone() - else: - doc.content = content - conflicts.append(doc) - return conflicts - - def get_doc_conflicts(self, doc_id, couch_rev=None): - """ - Get the conflicted versions of a document. - - If the C{couch_rev} parameter is not None, conflicts for a specific - document's couch revision are returned. - - :param couch_rev: The couch document revision. - :type couch_rev: str - - :return: A list of conflicted versions of the document. - :rtype: list - """ - # request conflicts attachment from server - params = {} - conflicts = [] - if couch_rev is not None: - params['rev'] = couch_rev # restric document's couch revision - else: - # TODO: move into resource logic! - first_entry = self._get_doc(doc_id, check_for_conflicts=True) - conflicts.append(first_entry) - resource = self._database.resource(doc_id, 'u1db_conflicts') - try: - response = resource.get_json(**params) - return conflicts + self._build_conflicts( - doc_id, json.loads(response[2].read())) - except ResourceNotFound: - return [] - - def _get_replica_gen_and_trans_id(self, other_replica_uid): - """ - Return the last known generation and transaction id for the other db - replica. - - When you do a synchronization with another replica, the Database keeps - track of what generation the other database replica was at, and what - the associated transaction id was. This is used to determine what data - needs to be sent, and if two databases are claiming to be the same - replica. - - :param other_replica_uid: The identifier for the other replica. - :type other_replica_uid: str - - :return: A tuple containing the generation and transaction id we - encountered during synchronization. If we've never - synchronized with the replica, this is (0, ''). - :rtype: (int, str) - """ - # query a couch view - result = self._database.view('syncs/log') - if len(result[other_replica_uid].rows) == 0: - return (0, '') - return ( - result[other_replica_uid].rows[0]['value']['known_generation'], - result[other_replica_uid].rows[0]['value']['known_transaction_id'] - ) - - def _set_replica_gen_and_trans_id(self, other_replica_uid, - other_generation, other_transaction_id, - number_of_docs=None, doc_idx=None, - sync_id=None): - """ - Set the last-known generation and transaction id for the other - database replica. - - We have just performed some synchronization, and we want to track what - generation the other replica was at. See also - _get_replica_gen_and_trans_id. - - :param other_replica_uid: The U1DB identifier for the other replica. - :type other_replica_uid: str - :param other_generation: The generation number for the other replica. - :type other_generation: int - :param other_transaction_id: The transaction id associated with the - generation. - :type other_transaction_id: str - :param number_of_docs: The total amount of documents sent on this sync - session. - :type number_of_docs: int - :param doc_idx: The index of the current document being sent. - :type doc_idx: int - :param sync_id: The id of the current sync session. - :type sync_id: str - """ - if other_replica_uid is not None and other_generation is not None: - self._do_set_replica_gen_and_trans_id( - other_replica_uid, other_generation, other_transaction_id, - number_of_docs=number_of_docs, doc_idx=doc_idx, - sync_id=sync_id) - - def _do_set_replica_gen_and_trans_id( - self, other_replica_uid, other_generation, other_transaction_id, - number_of_docs=None, doc_idx=None, sync_id=None): - """ - Set the last-known generation and transaction id for the other - database replica. - - We have just performed some synchronization, and we want to track what - generation the other replica was at. See also - _get_replica_gen_and_trans_id. - - :param other_replica_uid: The U1DB identifier for the other replica. - :type other_replica_uid: str - :param other_generation: The generation number for the other replica. - :type other_generation: int - :param other_transaction_id: The transaction id associated with the - generation. - :type other_transaction_id: str - :param number_of_docs: The total amount of documents sent on this sync - session. - :type number_of_docs: int - :param doc_idx: The index of the current document being sent. - :type doc_idx: int - :param sync_id: The id of the current sync session. - :type sync_id: str - - :raise MissingDesignDocError: Raised when tried to access a missing - design document. - :raise MissingDesignDocListFunctionError: Raised when trying to access - a missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a - design document for an yet - unknown reason. - """ - # query a couch update function - ddoc_path = ['_design', 'syncs', '_update', 'put', 'u1db_sync_log'] - res = self._database.resource(*ddoc_path) - try: - with CouchDatabase.update_handler_lock[self._get_replica_uid()]: - body = { - 'other_replica_uid': other_replica_uid, - 'other_generation': other_generation, - 'other_transaction_id': other_transaction_id, - } - if number_of_docs is not None: - body['number_of_docs'] = number_of_docs - if doc_idx is not None: - body['doc_idx'] = doc_idx - if sync_id is not None: - body['sync_id'] = sync_id - res.put_json( - body=body, - headers={'content-type': 'application/json'}) - except ResourceNotFound as e: - raise_missing_design_doc_error(e, ddoc_path) - - def _force_doc_sync_conflict(self, doc): - """ - Add a conflict and force a document put. - - :param doc: The document to be put. - :type doc: CouchDocument - """ - my_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) - doc.prune_conflicts( - vectorclock.VectorClockRev(doc.rev), self._replica_uid) - doc.add_conflict(my_doc) - self._put_doc(my_doc, doc) - - def resolve_doc(self, doc, conflicted_doc_revs): - """ - Mark a document as no longer conflicted. - - We take the list of revisions that the client knows about that it is - superseding. This may be a different list from the actual current - conflicts, in which case only those are removed as conflicted. This - may fail if the conflict list is significantly different from the - supplied information. (sync could have happened in the background from - the time you GET_DOC_CONFLICTS until the point where you RESOLVE) - - :param doc: A Document with the new content to be inserted. - :type doc: CouchDocument - :param conflicted_doc_revs: A list of revisions that the new content - supersedes. - :type conflicted_doc_revs: [str] - - :raise MissingDesignDocError: Raised when tried to access a missing - design document. - :raise MissingDesignDocListFunctionError: Raised when trying to access - a missing list function on a - design document. - :raise MissingDesignDocNamedViewError: Raised when trying to access a - missing named view on a design - document. - :raise MissingDesignDocDeletedError: Raised when trying to access a - deleted design document. - :raise MissingDesignDocUnknownError: Raised when failed to access a - design document for an yet - unknown reason. - """ - cur_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) - new_rev = self._ensure_maximal_rev(cur_doc.rev, - conflicted_doc_revs) - superseded_revs = set(conflicted_doc_revs) - doc.rev = new_rev - # this backend stores conflicts as properties of the documents, so we - # have to copy these conflicts over to the document being updated. - if cur_doc.rev in superseded_revs: - # the newer doc version will supersede the one in the database, so - # we copy conflicts before updating the backend. - doc.set_conflicts(cur_doc.get_conflicts()) # copy conflicts over. - doc.delete_conflicts(superseded_revs) - self._put_doc(cur_doc, doc) - else: - # the newer doc version does not supersede the one in the - # database, so we will add a conflict to the database and copy - # those over to the document the user has in her hands. - cur_doc.add_conflict(doc) - cur_doc.delete_conflicts(superseded_revs) - self._put_doc(cur_doc, cur_doc) # just update conflicts - # backend has been updated with current conflicts, now copy them - # to the current document. - doc.set_conflicts(cur_doc.get_conflicts()) - - def _put_doc_if_newer(self, doc, save_conflict, replica_uid, replica_gen, - replica_trans_id='', number_of_docs=None, - doc_idx=None, sync_id=None): - """ - Insert/update document into the database with a given revision. - - This api is used during synchronization operations. - - If a document would conflict and save_conflict is set to True, the - content will be selected as the 'current' content for doc.doc_id, - even though doc.rev doesn't supersede the currently stored revision. - The currently stored document will be added to the list of conflict - alternatives for the given doc_id. - - This forces the new content to be 'current' so that we get convergence - after synchronizing, even if people don't resolve conflicts. Users can - then notice that their content is out of date, update it, and - synchronize again. (The alternative is that users could synchronize and - think the data has propagated, but their local copy looks fine, and the - remote copy is never updated again.) - - :param doc: A document object - :type doc: CouchDocument - :param save_conflict: If this document is a conflict, do you want to - save it as a conflict, or just ignore it. - :type save_conflict: bool - :param replica_uid: A unique replica identifier. - :type replica_uid: str - :param replica_gen: The generation of the replica corresponding to the - this document. The replica arguments are optional, - but are used during synchronization. - :type replica_gen: int - :param replica_trans_id: The transaction_id associated with the - generation. - :type replica_trans_id: str - :param number_of_docs: The total amount of documents sent on this sync - session. - :type number_of_docs: int - :param doc_idx: The index of the current document being sent. - :type doc_idx: int - :param sync_id: The id of the current sync session. - :type sync_id: str - - :return: (state, at_gen) - If we don't have doc_id already, or if - doc_rev supersedes the existing document revision, then the - content will be inserted, and state is 'inserted'. If - doc_rev is less than or equal to the existing revision, then - the put is ignored and state is respecitvely 'superseded' or - 'converged'. If doc_rev is not strictly superseded or - supersedes, then state is 'conflicted'. The document will not - be inserted if save_conflict is False. For 'inserted' or - 'converged', at_gen is the insertion/current generation. - :rtype: (str, int) - """ - if not isinstance(doc, CouchDocument): - doc = self._factory(doc.doc_id, doc.rev, doc.get_json()) - self._save_source_info(replica_uid, replica_gen, - replica_trans_id, number_of_docs, - doc_idx, sync_id) - my_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) - if my_doc is not None: - my_doc.set_conflicts( - self.get_doc_conflicts(my_doc.doc_id, my_doc.couch_rev)) - state, save_doc = _process_incoming_doc( - my_doc, doc, save_conflict, self.replica_uid) - if save_doc: - self._put_doc(my_doc, save_doc) - doc.update(save_doc) - return state, self._get_generation() - - def _save_source_info(self, replica_uid, replica_gen, replica_trans_id, - number_of_docs, doc_idx, sync_id): - """ - Validate and save source information. - """ - self._validate_source(replica_uid, replica_gen, replica_trans_id) - self._set_replica_gen_and_trans_id( - replica_uid, replica_gen, replica_trans_id, - number_of_docs=number_of_docs, doc_idx=doc_idx, - sync_id=sync_id) - - def get_docs(self, doc_ids, check_for_conflicts=True, - include_deleted=False): - """ - Get the JSON content for many documents. - - :param doc_ids: A list of document identifiers. - :type doc_ids: list - :param check_for_conflicts: If set to False, then the conflict check - will be skipped, and 'None' will be - returned instead of True/False. - :type check_for_conflictsa: bool - :param include_deleted: If set to True, deleted documents will be - returned with empty content. Otherwise deleted - documents will not be included in the results. - :return: iterable giving the Document object for each document id - in matching doc_ids order. - :rtype: iterable - """ - # Workaround for: - # - # http://bugs.python.org/issue7980 - # https://leap.se/code/issues/5449 - # - # python-couchdb uses time.strptime, which is not thread safe. In - # order to avoid the problem described on the issues above, we preload - # strptime here by evaluating the conversion of an arbitrary date. - # This will not be needed when/if we switch from python-couchdb to - # paisley. - time.strptime('Mar 8 1917', '%b %d %Y') - # spawn threads to retrieve docs - threads = [] - for doc_id in doc_ids: - self._sem_pool.acquire() - t = self._GetDocThread(self, doc_id, check_for_conflicts, - self._sem_pool.release) - t.start() - threads.append(t) - # join threads and yield docs - for t in threads: - t.join() - if t._doc.is_tombstone() and not include_deleted: - continue - yield t._doc - - def _prune_conflicts(self, doc, doc_vcr): - """ - Overrides original method, but it is implemented elsewhere for - simplicity. - """ - doc.prune_conflicts(doc_vcr, self._replica_uid) - - def _new_resource(self, *path): - """ - Return a new resource for accessing a couch database. - - :return: A resource for accessing a couch database. - :rtype: couchdb.http.Resource - """ - # Workaround for: https://leap.se/code/issues/5448 - url = couch_urljoin(self._database.resource.url, *path) - resource = Resource(url, Session(timeout=COUCH_TIMEOUT)) - resource.credentials = self._database.resource.credentials - resource.headers = self._database.resource.headers.copy() - return resource - - -class CouchSyncTarget(CommonSyncTarget): - - """ - Functionality for using a CouchDatabase as a synchronization target. - """ - - def get_sync_info(self, source_replica_uid): - source_gen, source_trans_id = self._db._get_replica_gen_and_trans_id( - source_replica_uid) - my_gen, my_trans_id = self._db._get_generation_info() - return ( - self._db._replica_uid, my_gen, my_trans_id, source_gen, - source_trans_id) - - def record_sync_info(self, source_replica_uid, source_replica_generation, - source_replica_transaction_id): - if self._trace_hook: - self._trace_hook('record_sync_info') - self._db._set_replica_gen_and_trans_id( - source_replica_uid, source_replica_generation, - source_replica_transaction_id) - - -class CouchServerState(ServerState): - - """ - Inteface of the WSGI server with the CouchDB backend. - """ - - def __init__(self, couch_url): - """ - Initialize the couch server state. - - :param couch_url: The URL for the couch database. - :type couch_url: str - """ - self.couch_url = couch_url - - def open_database(self, dbname): - """ - Open a couch database. - - :param dbname: The name of the database to open. - :type dbname: str - - :return: The CouchDatabase object. - :rtype: CouchDatabase - """ - return CouchDatabase( - self.couch_url, - dbname, - ensure_ddocs=False) - - def ensure_database(self, dbname): - """ - Ensure couch database exists. - - Usually, this method is used by the server to ensure the existence of - a database. In our setup, the Soledad user that accesses the underlying - couch server should never have permission to create (or delete) - databases. But, in case it ever does, by raising an exception here we - have one more guarantee that no modified client will be able to - enforce creation of a database when syncing. - - :param dbname: The name of the database to ensure. - :type dbname: str - - :raise Unauthorized: Always, because Soledad server is not allowed to - create databases. - """ - raise Unauthorized() - - def delete_database(self, dbname): - """ - Delete couch database. - - :param dbname: The name of the database to delete. - :type dbname: str - - :raise Unauthorized: Always, because Soledad server is not allowed to - delete databases. - """ - raise Unauthorized() - - -def _process_incoming_doc(my_doc, other_doc, save_conflict, replica_uid): - """ - Check document, save and return state. - """ - # at this point, `doc` has arrived from the other syncing party, and - # we will decide what to do with it. - # First, we prepare the arriving doc to update couch database. - new_doc = CouchDocument( - other_doc.doc_id, other_doc.rev, other_doc.get_json()) - if my_doc is None: - return 'inserted', new_doc - new_doc.couch_rev = my_doc.couch_rev - new_doc.set_conflicts(my_doc.get_conflicts()) - # fetch conflicts because we will eventually manipulate them - # from now on, it works just like u1db sqlite backend - doc_vcr = vectorclock.VectorClockRev(new_doc.rev) - cur_vcr = vectorclock.VectorClockRev(my_doc.rev) - if doc_vcr.is_newer(cur_vcr): - rev = new_doc.rev - new_doc.prune_conflicts(doc_vcr, replica_uid) - if new_doc.rev != rev: - # conflicts have been autoresolved - return 'superseded', new_doc - else: - return'inserted', new_doc - elif new_doc.rev == my_doc.rev: - # magical convergence - return 'converged', None - elif cur_vcr.is_newer(doc_vcr): - # Don't add this to seen_ids, because we have something newer, - # so we should send it back, and we should not generate a - # conflict - other_doc.update(new_doc) - return 'superseded', None - elif my_doc.same_content_as(new_doc): - # the documents have been edited to the same thing at both ends - doc_vcr.maximize(cur_vcr) - doc_vcr.increment(replica_uid) - new_doc.rev = doc_vcr.as_str() - return 'superseded', new_doc - else: - if save_conflict: - new_doc.prune_conflicts( - vectorclock.VectorClockRev(new_doc.rev), replica_uid) - new_doc.add_conflict(my_doc) - return 'conflicted', new_doc - other_doc.update(new_doc) - return 'conflicted', None diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py new file mode 100644 index 00000000..18ed8a19 --- /dev/null +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -0,0 +1,798 @@ +# -*- coding: utf-8 -*- +# __init__.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +"""A U1DB backend that uses CouchDB as its persistence layer.""" + + +import json +import re +import uuid +import binascii +import time + + +from StringIO import StringIO +from urlparse import urljoin +from contextlib import contextmanager +from multiprocessing.pool import ThreadPool + + +from couchdb.client import Server, Database +from couchdb.http import ( + ResourceConflict, + ResourceNotFound, + ServerError, + Session, + urljoin as couch_urljoin, + Resource, +) +from u1db.errors import ( + DatabaseDoesNotExist, + InvalidGeneration, + RevisionConflict, +) +from u1db.remote import http_app + + +from leap.soledad.common import ddocs +from .errors import raise_server_error +from .errors import raise_missing_design_doc_error +from .support import MultipartWriter +from leap.soledad.common.errors import InvalidURLError +from leap.soledad.common.document import ServerDocument +from leap.soledad.common.backend import SoledadBackend + + +COUCH_TIMEOUT = 120 # timeout for transfers between Soledad server and Couch + + +def list_users_dbs(couch_url): + """ + Retrieves a list with all databases that starts with 'user-' on CouchDB. + Those databases belongs to users. So, the list will contain all the + database names in the form of 'user-{uuid4}'. + + :param couch_url: The couch url with needed credentials + :type couch_url: str + + :return: The list of all database names from users. + :rtype: [str] + """ + with couch_server(couch_url) as server: + users = [dbname for dbname in server if dbname.startswith('user-')] + return users + + +# monkey-patch the u1db http app to use ServerDocument +http_app.Document = ServerDocument + + +@contextmanager +def couch_server(url): + """ + Provide a connection to a couch server and cleanup after use. + + For database creation and deletion we use an ephemeral connection to the + couch server. That connection has to be properly closed, so we provide it + as a context manager. + + :param url: The URL of the Couch server. + :type url: str + """ + session = Session(timeout=COUCH_TIMEOUT) + server = Server(url=url, full_commit=False, session=session) + yield server + + +THREAD_POOL = ThreadPool(20) + + +class CouchDatabase(object): + """ + Holds CouchDB related code. + This class gives methods to encapsulate database operations and hide + CouchDB details from backend code. + """ + + @classmethod + def open_database(cls, url, create, ensure_ddocs=False, replica_uid=None, + database_security=None): + """ + Open a U1DB database using CouchDB as backend. + + :param url: the url of the database replica + :type url: str + :param create: should the replica be created if it does not exist? + :type create: bool + :param replica_uid: an optional unique replica identifier + :type replica_uid: str + :param ensure_ddocs: Ensure that the design docs exist on server. + :type ensure_ddocs: bool + :param database_security: security rules as CouchDB security doc + :type database_security: dict + + :return: the database instance + :rtype: SoledadBackend + + :raise DatabaseDoesNotExist: Raised if database does not exist. + """ + # get database from url + m = re.match('(^https?://[^/]+)/(.+)$', url) + if not m: + raise InvalidURLError + url = m.group(1) + dbname = m.group(2) + with couch_server(url) as server: + if dbname not in server: + if create: + server.create(dbname) + else: + raise DatabaseDoesNotExist() + db = cls(url, + dbname, ensure_ddocs=ensure_ddocs, + database_security=database_security) + return SoledadBackend( + db, replica_uid=replica_uid) + + def __init__(self, url, dbname, ensure_ddocs=True, + database_security=None): + """ + :param url: Couch server URL with necessary credentials + :type url: string + :param dbname: Couch database name + :type dbname: string + :param ensure_ddocs: Ensure that the design docs exist on server. + :type ensure_ddocs: bool + :param database_security: security rules as CouchDB security doc + :type database_security: dict + """ + self._session = Session(timeout=COUCH_TIMEOUT) + self._url = url + self._dbname = dbname + self._database = self.get_couch_database(url, dbname) + self.batching = False + self.batch_generation = None + self.batch_docs = {} + if ensure_ddocs: + self.ensure_ddocs_on_db() + self.ensure_security_ddoc(database_security) + + def batch_start(self): + self.batching = True + self.batch_generation = self.get_generation_info() + ids = set(row.id for row in self._database.view('_all_docs')) + self.batched_ids = ids + + def batch_end(self): + self.batching = False + self.batch_generation = None + self.__perform_batch() + + def get_couch_database(self, url, dbname): + """ + Generate a couchdb.Database instance given a url and dbname. + + :param url: CouchDB's server url with credentials + :type url: str + :param dbname: Database name + :type dbname: str + + :return: couch library database instance + :rtype: couchdb.Database + + :raise DatabaseDoesNotExist: Raised if database does not exist. + """ + try: + return Database( + urljoin(url, dbname), + self._session) + except ResourceNotFound: + raise DatabaseDoesNotExist() + + def ensure_ddocs_on_db(self): + """ + Ensure that the design documents used by the backend exist on the + couch database. + """ + for ddoc_name in ['docs', 'syncs', 'transactions']: + try: + self.json_from_resource(['_design'] + + ddoc_name.split('/') + ['_info'], + check_missing_ddoc=False) + except ResourceNotFound: + ddoc = json.loads( + binascii.a2b_base64( + getattr(ddocs, ddoc_name))) + self._database.save(ddoc) + + def ensure_security_ddoc(self, security_config=None): + """ + Make sure that only soledad user is able to access this database as + an unprivileged member, meaning that administration access will + be forbidden even inside an user database. + The goal is to make sure that only the lowest access level is given + to the unprivileged CouchDB user set on the server process. + This is achieved by creating a _security design document, see: + http://docs.couchdb.org/en/latest/api/database/security.html + + :param security_config: security configuration parsed from conf file + :type security_config: dict + """ + security_config = security_config or {} + security = self._database.resource.get_json('_security')[2] + security['members'] = {'names': [], 'roles': []} + security['members']['names'] = security_config.get('members', + ['soledad']) + security['members']['roles'] = security_config.get('members_roles', []) + security['admins'] = {'names': [], 'roles': []} + security['admins']['names'] = security_config.get('admins', []) + security['admins']['roles'] = security_config.get('admins_roles', []) + self._database.resource.put_json('_security', body=security) + + def delete_database(self): + """ + Delete a U1DB CouchDB database. + """ + with couch_server(self._url) as server: + del(server[self._dbname]) + + def set_replica_uid(self, replica_uid): + """ + Force the replica uid to be set. + + :param replica_uid: The new replica uid. + :type replica_uid: str + """ + try: + # set on existent config document + doc = self._database['u1db_config'] + doc['replica_uid'] = replica_uid + except ResourceNotFound: + # or create the config document + doc = { + '_id': 'u1db_config', + 'replica_uid': replica_uid, + } + self._database.save(doc) + + def get_replica_uid(self): + """ + Get the replica uid. + + :return: The replica uid. + :rtype: str + """ + try: + # grab replica_uid from server + doc = self._database['u1db_config'] + replica_uid = doc['replica_uid'] + return replica_uid + except ResourceNotFound: + # create a unique replica_uid + replica_uid = uuid.uuid4().hex + self.set_replica_uid(replica_uid) + return replica_uid + + def close(self): + self._database = None + + def get_all_docs(self, include_deleted=False): + """ + Get the JSON content for all documents in the database. + + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted + documents will not be included in the results. + :type include_deleted: bool + + :return: (generation, [ServerDocument]) + The current generation of the database, followed by a list of all + the documents in the database. + :rtype: (int, [ServerDocument]) + """ + + generation, _ = self.get_generation_info() + results = list(self.get_docs(self._database, + include_deleted=include_deleted)) + return (generation, results) + + def get_docs(self, doc_ids, check_for_conflicts=True, + include_deleted=False): + """ + Get the JSON content for many documents. + + :param doc_ids: A list of document identifiers or None for all. + :type doc_ids: list + :param check_for_conflicts: If set to False, then the conflict check + will be skipped, and 'None' will be + returned instead of True/False. + :type check_for_conflicts: bool + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted + documents will not be included in the results. + :return: iterable giving the Document object for each document id + in matching doc_ids order. + :rtype: iterable + """ + # Workaround for: + # + # http://bugs.python.org/issue7980 + # https://leap.se/code/issues/5449 + # + # python-couchdb uses time.strptime, which is not thread safe. In + # order to avoid the problem described on the issues above, we preload + # strptime here by evaluating the conversion of an arbitrary date. + # This will not be needed when/if we switch from python-couchdb to + # paisley. + time.strptime('Mar 8 1917', '%b %d %Y') + get_one = lambda doc_id: self.get_doc(doc_id, check_for_conflicts) + docs = [THREAD_POOL.apply_async(get_one, [doc_id]) + for doc_id in doc_ids] + for doc in docs: + doc = doc.get() + if not doc or not include_deleted and doc.is_tombstone(): + continue + yield doc + + def get_doc(self, doc_id, check_for_conflicts=False): + """ + Extract the document from storage. + + This can return None if the document doesn't exist. + + :param doc_id: The unique document identifier + :type doc_id: str + :param check_for_conflicts: If set to False, then the conflict check + will be skipped. + :type check_for_conflicts: bool + + :return: The document. + :rtype: ServerDocument + """ + doc_from_batch = self.__check_batch_before_get(doc_id) + if doc_from_batch: + return doc_from_batch + if self.batching and doc_id not in self.batched_ids: + return None + if doc_id not in self._database: + return None + # get document with all attachments (u1db content and eventual + # conflicts) + result = self.json_from_resource([doc_id], attachments=True) + return self.__parse_doc_from_couch(result, doc_id, check_for_conflicts) + + def __check_batch_before_get(self, doc_id): + """ + If doc_id is staged for batching, then we need to commit the batch + before going ahead. This avoids consistency problems, like trying to + get a document that isn't persisted and processing like it is missing. + + :param doc_id: The unique document identifier + :type doc_id: str + """ + if doc_id in self.batch_docs: + couch_doc = self.batch_docs[doc_id] + rev = self.__perform_batch(doc_id) + couch_doc['_rev'] = rev + self.batched_ids.add(doc_id) + return self.__parse_doc_from_couch(couch_doc, doc_id, True) + return None + + def __perform_batch(self, doc_id=None): + status = self._database.update(self.batch_docs.values()) + rev = None + for ok, stored_doc_id, rev_or_error in status: + if not ok: + error = rev_or_error + if type(error) is ResourceConflict: + raise RevisionConflict + raise error + elif doc_id == stored_doc_id: + rev = rev_or_error + self.batch_docs.clear() + return rev + + def __parse_doc_from_couch(self, result, doc_id, + check_for_conflicts=False): + # restrict to u1db documents + if 'u1db_rev' not in result: + return None + doc = ServerDocument(doc_id, result['u1db_rev']) + # set contents or make tombstone + if '_attachments' not in result \ + or 'u1db_content' not in result['_attachments']: + doc.make_tombstone() + else: + doc.content = json.loads( + binascii.a2b_base64( + result['_attachments']['u1db_content']['data'])) + # determine if there are conflicts + if check_for_conflicts \ + and '_attachments' in result \ + and 'u1db_conflicts' in result['_attachments']: + doc.set_conflicts( + self._build_conflicts( + doc.doc_id, + json.loads(binascii.a2b_base64( + result['_attachments']['u1db_conflicts']['data'])))) + # store couch revision + doc.couch_rev = result['_rev'] + # store transactions + doc.transactions = result['u1db_transactions'] + return doc + + def _build_conflicts(self, doc_id, attached_conflicts): + """ + Build the conflicted documents list from the conflicts attachment + fetched from a couch document. + + :param attached_conflicts: The document's conflicts as fetched from a + couch document attachment. + :type attached_conflicts: dict + """ + conflicts = [] + for doc_rev, content in attached_conflicts: + doc = ServerDocument(doc_id, doc_rev) + if content is None: + doc.make_tombstone() + else: + doc.content = content + conflicts.append(doc) + return conflicts + + def get_trans_id_for_gen(self, generation): + """ + Get the transaction id corresponding to a particular generation. + + :param generation: The generation for which to get the transaction id. + :type generation: int + + :return: The transaction id for C{generation}. + :rtype: str + + :raise InvalidGeneration: Raised when the generation does not exist. + """ + if generation == 0: + return '' + # query a couch list function + ddoc_path = [ + '_design', 'transactions', '_list', 'trans_id_for_gen', 'log' + ] + response = self.json_from_resource(ddoc_path, gen=generation) + if response == {}: + raise InvalidGeneration + return response['transaction_id'] + + def get_replica_gen_and_trans_id(self, other_replica_uid): + """ + Return the last known generation and transaction id for the other db + replica. + + When you do a synchronization with another replica, the Database keeps + track of what generation the other database replica was at, and what + the associated transaction id was. This is used to determine what data + needs to be sent, and if two databases are claiming to be the same + replica. + + :param other_replica_uid: The identifier for the other replica. + :type other_replica_uid: str + + :return: A tuple containing the generation and transaction id we + encountered during synchronization. If we've never + synchronized with the replica, this is (0, ''). + :rtype: (int, str) + """ + doc_id = 'u1db_sync_%s' % other_replica_uid + try: + doc = self._database[doc_id] + except ResourceNotFound: + doc = { + '_id': doc_id, + 'generation': 0, + 'transaction_id': '', + } + self._database.save(doc) + result = doc['generation'], doc['transaction_id'] + return result + + def get_doc_conflicts(self, doc_id, couch_rev=None): + """ + Get the conflicted versions of a document. + + If the C{couch_rev} parameter is not None, conflicts for a specific + document's couch revision are returned. + + :param couch_rev: The couch document revision. + :type couch_rev: str + + :return: A list of conflicted versions of the document. + :rtype: list + """ + # request conflicts attachment from server + params = {} + conflicts = [] + if couch_rev is not None: + params['rev'] = couch_rev # restric document's couch revision + else: + # TODO: move into resource logic! + first_entry = self.get_doc(doc_id, check_for_conflicts=True) + conflicts.append(first_entry) + + try: + response = self.json_from_resource([doc_id, 'u1db_conflicts'], + check_missing_ddoc=False, + **params) + return conflicts + self._build_conflicts( + doc_id, json.loads(response.read())) + except ResourceNotFound: + return [] + + def set_replica_gen_and_trans_id( + self, other_replica_uid, other_generation, other_transaction_id): + """ + Set the last-known generation and transaction id for the other + database replica. + + We have just performed some synchronization, and we want to track what + generation the other replica was at. See also + get_replica_gen_and_trans_id. + + :param other_replica_uid: The U1DB identifier for the other replica. + :type other_replica_uid: str + :param other_generation: The generation number for the other replica. + :type other_generation: int + :param other_transaction_id: The transaction id associated with the + generation. + :type other_transaction_id: str + """ + doc_id = 'u1db_sync_%s' % other_replica_uid + try: + doc = self._database[doc_id] + except ResourceNotFound: + doc = {'_id': doc_id} + doc['generation'] = other_generation + doc['transaction_id'] = other_transaction_id + self._database.save(doc) + + def get_transaction_log(self): + """ + This is only for the test suite, it is not part of the api. + + :return: The complete transaction log. + :rtype: [(str, str)] + """ + # query a couch view + ddoc_path = ['_design', 'transactions', '_view', 'log'] + response = self.json_from_resource(ddoc_path) + return map( + lambda row: (row['id'], row['value']), + response['rows']) + + def whats_changed(self, old_generation=0): + """ + Return a list of documents that have changed since old_generation. + + :param old_generation: The generation of the database in the old + state. + :type old_generation: int + + :return: (generation, trans_id, [(doc_id, generation, trans_id),...]) + The current generation of the database, its associated + transaction id, and a list of of changed documents since + old_generation, represented by tuples with for each document + its doc_id and the generation and transaction id corresponding + to the last intervening change and sorted by generation (old + changes first) + :rtype: (int, str, [(str, int, str)]) + """ + # query a couch list function + ddoc_path = [ + '_design', 'transactions', '_list', 'whats_changed', 'log' + ] + response = self.json_from_resource(ddoc_path, old_gen=old_generation) + results = map( + lambda row: + (row['generation'], row['doc_id'], row['transaction_id']), + response['transactions']) + results.reverse() + cur_gen = old_generation + seen = set() + changes = [] + newest_trans_id = '' + for generation, doc_id, trans_id in results: + if doc_id not in seen: + changes.append((doc_id, generation, trans_id)) + seen.add(doc_id) + if changes: + cur_gen = changes[0][1] # max generation + newest_trans_id = changes[0][2] + changes.reverse() + else: + cur_gen, newest_trans_id = self.get_generation_info() + + return cur_gen, newest_trans_id, changes + + def get_generation_info(self): + """ + Return the current generation. + + :return: A tuple containing the current generation and transaction id. + :rtype: (int, str) + """ + if self.batching and self.batch_generation: + return self.batch_generation + # query a couch list function + ddoc_path = ['_design', 'transactions', '_list', 'generation', 'log'] + info = self.json_from_resource(ddoc_path) + return (info['generation'], info['transaction_id']) + + def json_from_resource(self, ddoc_path, check_missing_ddoc=True, + **kwargs): + """ + Get a resource from it's path and gets a doc's JSON using provided + parameters, also checking for missing design docs by default. + + :param ddoc_path: The path to resource. + :type ddoc_path: [str] + :param check_missing_ddoc: Raises info on what design doc is missing. + :type check_missin_ddoc: bool + + :return: The request's data parsed from JSON to a dict. + :rtype: dict + + :raise MissingDesignDocError: Raised when tried to access a missing + design document. + :raise MissingDesignDocListFunctionError: Raised when trying to access + a missing list function on a + design document. + :raise MissingDesignDocNamedViewError: Raised when trying to access a + missing named view on a design + document. + :raise MissingDesignDocDeletedError: Raised when trying to access a + deleted design document. + :raise MissingDesignDocUnknownError: Raised when failed to access a + design document for an yet + unknown reason. + """ + if ddoc_path is not None: + resource = self._database.resource(*ddoc_path) + else: + resource = self._database.resource() + try: + _, _, data = resource.get_json(**kwargs) + return data + except ResourceNotFound as e: + if check_missing_ddoc: + raise_missing_design_doc_error(e, ddoc_path) + else: + raise e + except ServerError as e: + raise_server_error(e, ddoc_path) + + def save_document(self, old_doc, doc, transaction_id): + """ + Put the document in the Couch backend database. + + Note that C{old_doc} must have been fetched with the parameter + C{check_for_conflicts} equal to True, so we can properly update the + new document using the conflict information from the old one. + + :param old_doc: The old document version. + :type old_doc: ServerDocument + :param doc: The document to be put. + :type doc: ServerDocument + + :raise RevisionConflict: Raised when trying to update a document but + couch revisions mismatch. + :raise MissingDesignDocError: Raised when tried to access a missing + design document. + :raise MissingDesignDocListFunctionError: Raised when trying to access + a missing list function on a + design document. + :raise MissingDesignDocNamedViewError: Raised when trying to access a + missing named view on a design + document. + :raise MissingDesignDocDeletedError: Raised when trying to access a + deleted design document. + :raise MissingDesignDocUnknownError: Raised when failed to access a + design document for an yet + unknown reason. + """ + attachments = {} # we save content and conflicts as attachments + parts = [] # and we put it using couch's multipart PUT + # save content as attachment + if doc.is_tombstone() is False: + content = doc.get_json() + attachments['u1db_content'] = { + 'follows': True, + 'content_type': 'application/octet-stream', + 'length': len(content), + } + parts.append(content) + # save conflicts as attachment + if doc.has_conflicts is True: + conflicts = json.dumps( + map(lambda cdoc: (cdoc.rev, cdoc.content), + doc.get_conflicts())) + attachments['u1db_conflicts'] = { + 'follows': True, + 'content_type': 'application/octet-stream', + 'length': len(conflicts), + } + parts.append(conflicts) + # store old transactions, if any + transactions = old_doc.transactions[:] if old_doc is not None else [] + # create a new transaction id and timestamp it so the transaction log + # is consistent when querying the database. + transactions.append( + # here we store milliseconds to keep consistent with javascript + # Date.prototype.getTime() which was used before inside a couchdb + # update handler. + (int(time.time() * 1000), + transaction_id)) + # build the couch document + couch_doc = { + '_id': doc.doc_id, + 'u1db_rev': doc.rev, + 'u1db_transactions': transactions, + '_attachments': attachments, + } + # if we are updating a doc we have to add the couch doc revision + if old_doc is not None and hasattr(old_doc, 'couch_rev'): + couch_doc['_rev'] = old_doc.couch_rev + # prepare the multipart PUT + if not self.batching: + buf = StringIO() + envelope = MultipartWriter(buf) + envelope.add('application/json', json.dumps(couch_doc)) + for part in parts: + envelope.add('application/octet-stream', part) + envelope.close() + # try to save and fail if there's a revision conflict + try: + resource = self._new_resource() + resource.put_json( + doc.doc_id, body=str(buf.getvalue()), + headers=envelope.headers) + except ResourceConflict: + raise RevisionConflict() + else: + for name, attachment in attachments.items(): + del attachment['follows'] + del attachment['length'] + index = 0 if name is 'u1db_content' else 1 + attachment['data'] = binascii.b2a_base64(parts[index]).strip() + couch_doc['_attachments'] = attachments + self.batch_docs[doc.doc_id] = couch_doc + last_gen, last_trans_id = self.batch_generation + self.batch_generation = (last_gen + 1, transaction_id) + return transactions[-1][1] + + def _new_resource(self, *path): + """ + Return a new resource for accessing a couch database. + + :return: A resource for accessing a couch database. + :rtype: couchdb.http.Resource + """ + # Workaround for: https://leap.se/code/issues/5448 + url = couch_urljoin(self._database.resource.url, *path) + resource = Resource(url, Session(timeout=COUCH_TIMEOUT)) + resource.credentials = self._database.resource.credentials + resource.headers = self._database.resource.headers.copy() + return resource diff --git a/common/src/leap/soledad/common/couch/errors.py b/common/src/leap/soledad/common/couch/errors.py new file mode 100644 index 00000000..9b287c76 --- /dev/null +++ b/common/src/leap/soledad/common/couch/errors.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +# errors.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +from leap.soledad.common.errors import SoledadError, BackendNotReadyError +from leap.soledad.common.errors import register_exception + +""" +Specific errors that can be raised by CouchDatabase. +""" + + +@register_exception +class MissingDesignDocError(BackendNotReadyError): + + """ + Raised when trying to access a missing couch design document. + """ + + wire_description = "missing design document" + status = 500 + + +@register_exception +class MissingDesignDocNamedViewError(SoledadError): + + """ + Raised when trying to access a missing named view on a couch design + document. + """ + + wire_description = "missing design document named function" + status = 500 + + +@register_exception +class MissingDesignDocListFunctionError(SoledadError): + + """ + Raised when trying to access a missing list function on a couch design + document. + """ + + wire_description = "missing design document list function" + status = 500 + + +@register_exception +class MissingDesignDocDeletedError(SoledadError): + + """ + Raised when trying to access a deleted couch design document. + """ + + wire_description = "design document was deleted" + status = 500 + + +@register_exception +class DesignDocUnknownError(SoledadError): + + """ + Raised when trying to access a couch design document and getting an + unknown error. + """ + + wire_description = "missing design document unknown error" + status = 500 + + +def raise_missing_design_doc_error(exc, ddoc_path): + """ + Raise an appropriate exception when catching a ResourceNotFound when + accessing a design document. + + :param exc: The exception cought. + :type exc: ResourceNotFound + :param ddoc_path: A list representing the requested path. + :type ddoc_path: list + + :raise MissingDesignDocError: Raised when tried to access a missing design + document. + :raise MissingDesignDocListFunctionError: Raised when trying to access a + missing list function on a + design document. + :raise MissingDesignDocNamedViewError: Raised when trying to access a + missing named view on a design + document. + :raise MissingDesignDocDeletedError: Raised when trying to access a + deleted design document. + :raise MissingDesignDocUnknownError: Raised when failed to access a design + document for an yet unknown reason. + """ + path = "".join(ddoc_path) + if exc.message[1] == 'missing': + raise MissingDesignDocError(path) + elif exc.message[1] == 'missing function' or \ + exc.message[1].startswith('missing lists function'): + raise MissingDesignDocListFunctionError(path) + elif exc.message[1] == 'missing_named_view': + raise MissingDesignDocNamedViewError(path) + elif exc.message[1] == 'deleted': + raise MissingDesignDocDeletedError(path) + # other errors are unknown for now + raise DesignDocUnknownError("%s: %s" % (path, str(exc.message))) + + +def raise_server_error(exc, ddoc_path): + """ + Raise an appropriate exception when catching a ServerError when + accessing a design document. + + :param exc: The exception cought. + :type exc: ResourceNotFound + :param ddoc_path: A list representing the requested path. + :type ddoc_path: list + + :raise MissingDesignDocListFunctionError: Raised when trying to access a + missing list function on a + design document. + :raise MissingDesignDocUnknownError: Raised when failed to access a design + document for an yet unknown reason. + """ + path = "".join(ddoc_path) + msg = exc.message[1][0] + if msg == 'unnamed_error': + raise MissingDesignDocListFunctionError(path) + elif msg == 'TypeError': + if 'point is undefined' in exc.message[1][1]: + raise MissingDesignDocListFunctionError + # other errors are unknown for now + raise DesignDocUnknownError("%s: %s" % (path, str(exc.message))) diff --git a/common/src/leap/soledad/common/couch/state.py b/common/src/leap/soledad/common/couch/state.py new file mode 100644 index 00000000..4f07c105 --- /dev/null +++ b/common/src/leap/soledad/common/couch/state.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +# state.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Server state using CouchDatabase as backend. +""" +import re +import logging +import time +from urlparse import urljoin +from hashlib import sha512 + +from u1db.remote.server_state import ServerState +from leap.soledad.common.command import exec_validated_cmd +from leap.soledad.common.couch import CouchDatabase +from leap.soledad.common.couch import couch_server +from u1db.errors import Unauthorized + + +logger = logging.getLogger(__name__) + + +def is_db_name_valid(name): + """ + Validate a user database using a regular expression. + + :param name: database name. + :type name: str + + :return: boolean for name vailidity + :rtype: bool + """ + db_name_regex = "^user-[a-f0-9]+$" + return re.match(db_name_regex, name) is not None + + +class CouchServerState(ServerState): + + """ + Inteface of the WSGI server with the CouchDB backend. + """ + + TOKENS_DB_PREFIX = "tokens_" + TOKENS_DB_EXPIRE = 30 * 24 * 3600 # 30 days in seconds + TOKENS_TYPE_KEY = "type" + TOKENS_TYPE_DEF = "Token" + TOKENS_USER_ID_KEY = "user_id" + + def __init__(self, couch_url, create_cmd=None): + """ + Initialize the couch server state. + + :param couch_url: The URL for the couch database. + :type couch_url: str + """ + self.couch_url = couch_url + self.create_cmd = create_cmd + + def open_database(self, dbname): + """ + Open a couch database. + + :param dbname: The name of the database to open. + :type dbname: str + + :return: The SoledadBackend object. + :rtype: SoledadBackend + """ + url = urljoin(self.couch_url, dbname) + db = CouchDatabase.open_database(url, create=False, ensure_ddocs=False) + return db + + def ensure_database(self, dbname): + """ + Ensure couch database exists. + + :param dbname: The name of the database to ensure. + :type dbname: str + + :raise Unauthorized: If disabled or other error was raised. + + :return: The SoledadBackend object and its replica_uid. + :rtype: (SoledadBackend, str) + """ + if not self.create_cmd: + raise Unauthorized() + else: + code, out = exec_validated_cmd(self.create_cmd, dbname, + validator=is_db_name_valid) + if code is not 0: + logger.error(""" + Error while creating database (%s) with (%s) command. + Output: %s + Exit code: %d + """ % (dbname, self.create_cmd, out, code)) + raise Unauthorized() + db = self.open_database(dbname) + return db, db.replica_uid + + def delete_database(self, dbname): + """ + Delete couch database. + + :param dbname: The name of the database to delete. + :type dbname: str + + :raise Unauthorized: Always, because Soledad server is not allowed to + delete databases. + """ + raise Unauthorized() + + def verify_token(self, uuid, token): + """ + Query couchdb to decide if C{token} is valid for C{uuid}. + + @param uuid: The user uuid. + @type uuid: str + @param token: The token. + @type token: str + """ + with couch_server(self.couch_url) as server: + # the tokens db rotates every 30 days, and the current db name is + # "tokens_NNN", where NNN is the number of seconds since epoch + # divide dby the rotate period in seconds. When rotating, old and + # new tokens db coexist during a certain window of time and valid + # tokens are replicated from the old db to the new one. See: + # https://leap.se/code/issues/6785 + dbname = self._tokens_dbname() + db = server[dbname] + # lookup key is a hash of the token to prevent timing attacks. + token = db.get(sha512(token).hexdigest()) + if token is None: + return False + # we compare uuid hashes to avoid possible timing attacks that + # might exploit python's builtin comparison operator behaviour, + # which fails immediatelly when non-matching bytes are found. + couch_uuid_hash = sha512(token[self.TOKENS_USER_ID_KEY]).digest() + req_uuid_hash = sha512(uuid).digest() + if token[self.TOKENS_TYPE_KEY] != self.TOKENS_TYPE_DEF \ + or couch_uuid_hash != req_uuid_hash: + return False + return True + + def _tokens_dbname(self): + dbname = self.TOKENS_DB_PREFIX + \ + str(int(time.time() / self.TOKENS_DB_EXPIRE)) + return dbname diff --git a/common/src/leap/soledad/common/couch/support.py b/common/src/leap/soledad/common/couch/support.py new file mode 100644 index 00000000..bfc4fef6 --- /dev/null +++ b/common/src/leap/soledad/common/couch/support.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +# support.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +import sys + + +""" +Monkey patches and temporary code that may be removed with version changes. +""" + + +# for bigcouch +# TODO: Remove if bigcouch support is dropped +class MultipartWriter(object): + + """ + A multipart writer adapted from python-couchdb's one so we can PUT + documents using couch's multipart PUT. + + This stripped down version does not allow for nested structures, and + contains only the essential things we need to PUT SoledadDocuments to the + couch backend. Also, please note that this is a patch. The couchdb lib has + another implementation that works fine with CouchDB 1.6, but removing this + now will break compatibility with bigcouch. + """ + + CRLF = '\r\n' + + def __init__(self, fileobj, headers=None, boundary=None): + """ + Initialize the multipart writer. + """ + self.fileobj = fileobj + if boundary is None: + boundary = self._make_boundary() + self._boundary = boundary + self._build_headers('related', headers) + + def add(self, mimetype, content, headers={}): + """ + Add a part to the multipart stream. + """ + self.fileobj.write('--') + self.fileobj.write(self._boundary) + self.fileobj.write(self.CRLF) + headers['Content-Type'] = mimetype + self._write_headers(headers) + if content: + # XXX: throw an exception if a boundary appears in the content?? + self.fileobj.write(content) + self.fileobj.write(self.CRLF) + + def close(self): + """ + Close the multipart stream. + """ + self.fileobj.write('--') + self.fileobj.write(self._boundary) + # be careful not to have anything after '--', otherwise old couch + # versions (including bigcouch) will fail. + self.fileobj.write('--') + + def _make_boundary(self): + """ + Create a boundary to discern multi parts. + """ + try: + from uuid import uuid4 + return '==' + uuid4().hex + '==' + except ImportError: + from random import randrange + token = randrange(sys.maxint) + format = '%%0%dd' % len(repr(sys.maxint - 1)) + return '===============' + (format % token) + '==' + + def _write_headers(self, headers): + """ + Write a part header in the buffer stream. + """ + if headers: + for name in sorted(headers.keys()): + value = headers[name] + self.fileobj.write(name) + self.fileobj.write(': ') + self.fileobj.write(value) + self.fileobj.write(self.CRLF) + self.fileobj.write(self.CRLF) + + def _build_headers(self, subtype, headers): + """ + Build the main headers of the multipart stream. + + This is here so we can send headers separete from content using + python-couchdb API. + """ + self.headers = {} + self.headers['Content-Type'] = 'multipart/%s; boundary="%s"' % \ + (subtype, self._boundary) + if headers: + for name in sorted(headers.keys()): + value = headers[name] + self.headers[name] = value diff --git a/common/src/leap/soledad/common/ddocs/syncs/updates/put.js b/common/src/leap/soledad/common/ddocs/syncs/updates/put.js deleted file mode 100644 index b0ae2de6..00000000 --- a/common/src/leap/soledad/common/ddocs/syncs/updates/put.js +++ /dev/null @@ -1,151 +0,0 @@ -/** - * The u1db_sync_log document stores both the actual sync log and a list of - * pending updates to the log, in case we receive incoming documents out of - * the correct order (i.e. if there are parallel PUTs during the sync - * process). - * - * The structure of the document is the following: - * - * { - * 'syncs': [ - * ['<replica_uid>', <gen>, '<trans_id>'], - * ... - * ], - * 'pending': { - * 'other_replica_uid': { - * 'sync_id': '<sync_id>', - * 'log': [[<gen>, '<trans_id>'], ...] - * }, - * ... - * } - * } - * - * The update function below does the following: - * - * 0. If we do not receive a sync_id, we just update the 'syncs' list with - * the incoming info about the source replica state. - * - * 1. Otherwise, if the incoming sync_id differs from current stored - * sync_id, then we assume that the previous sync session for that source - * replica was interrupted and discard all pending data. - * - * 2. Then we append incoming info as pending data for that source replica - * and current sync_id, and sort the pending data by generation. - * - * 3. Then we go through pending data and find the most recent generation - * that we can use to update the actual sync log. - * - * 4. Finally, we insert the most up to date information into the sync log. - */ -function(doc, req){ - - // create the document if it doesn't exist - if (!doc) { - doc = {} - doc['_id'] = 'u1db_sync_log'; - doc['syncs'] = []; - } - - // get and validate incoming info - var body = JSON.parse(req.body); - var other_replica_uid = body['other_replica_uid']; - var other_generation = parseInt(body['other_generation']); - var other_transaction_id = body['other_transaction_id'] - var sync_id = body['sync_id']; - var number_of_docs = body['number_of_docs']; - var doc_idx = body['doc_idx']; - - // parse integers - if (number_of_docs != null) - number_of_docs = parseInt(number_of_docs); - if (doc_idx != null) - doc_idx = parseInt(doc_idx); - - if (other_replica_uid == null - || other_generation == null - || other_transaction_id == null) - return [null, 'invalid data']; - - // create slot for pending logs - if (doc['pending'] == null) - doc['pending'] = {}; - - // these are the values that will be actually inserted - var current_gen = other_generation; - var current_trans_id = other_transaction_id; - - /*------------- Wait for sequential values before storing -------------*/ - - // we just try to obtain pending log if we received a sync_id - if (sync_id != null) { - - // create slot for current source and sync_id pending log - if (doc['pending'][other_replica_uid] == null - || doc['pending'][other_replica_uid]['sync_id'] != sync_id) { - doc['pending'][other_replica_uid] = { - 'sync_id': sync_id, - 'log': [], - 'last_doc_idx': 0, - } - } - - // append incoming data to pending log - doc['pending'][other_replica_uid]['log'].push([ - other_generation, - other_transaction_id, - doc_idx, - ]) - - // sort pending log according to generation - doc['pending'][other_replica_uid]['log'].sort(function(a, b) { - return a[0] - b[0]; - }); - - // get most up-to-date information from pending log - var last_doc_idx = doc['pending'][other_replica_uid]['last_doc_idx']; - var pending_idx = doc['pending'][other_replica_uid]['log'][0][2]; - - current_gen = null; - current_trans_id = null; - - while (last_doc_idx + 1 == pending_idx) { - pending = doc['pending'][other_replica_uid]['log'].shift() - current_gen = pending[0]; - current_trans_id = pending[1]; - last_doc_idx = pending[2] - if (doc['pending'][other_replica_uid]['log'].length == 0) - break; - pending_idx = doc['pending'][other_replica_uid]['log'][0][2]; - } - - // leave the sync log untouched if we still did not receive enough docs - if (current_gen == null) - return [doc, 'ok']; - - // update last index of received doc - doc['pending'][other_replica_uid]['last_doc_idx'] = last_doc_idx; - - // eventually remove all pending data from that replica - if (last_doc_idx == number_of_docs) - delete doc['pending'][other_replica_uid] - } - - /*--------------- Store source replica info on sync log ---------------*/ - - // remove outdated info - doc['syncs'] = doc['syncs'].filter( - function (entry) { - return entry[0] != other_replica_uid; - } - ); - - // store in log - doc['syncs'].push([ - other_replica_uid, - current_gen, - current_trans_id - ]); - - return [doc, 'ok']; -} - diff --git a/common/src/leap/soledad/common/ddocs/syncs/views/log/map.js b/common/src/leap/soledad/common/ddocs/syncs/views/log/map.js deleted file mode 100644 index a63c7cf4..00000000 --- a/common/src/leap/soledad/common/ddocs/syncs/views/log/map.js +++ /dev/null @@ -1,12 +0,0 @@ -function(doc) { - if (doc._id == 'u1db_sync_log') { - if (doc.syncs) - doc.syncs.forEach(function (entry) { - emit(entry[0], - { - 'known_generation': entry[1], - 'known_transaction_id': entry[2] - }); - }); - } -} diff --git a/common/src/leap/soledad/common/document.py b/common/src/leap/soledad/common/document.py index 919ade12..9e0c0976 100644 --- a/common/src/leap/soledad/common/document.py +++ b/common/src/leap/soledad/common/document.py @@ -108,3 +108,73 @@ class SoledadDocument(Document): _get_rev, _set_rev, doc="Wrapper to ensure `doc.rev` is always returned as bytes.") + + +class ServerDocument(SoledadDocument): + """ + This is the document used by server to hold conflicts and transactions + on a database. + + The goal is to ensure an atomic and consistent update of the database. + """ + + def __init__(self, doc_id=None, rev=None, json='{}', has_conflicts=False): + """ + Container for handling a document that stored on server. + + :param doc_id: The unique document identifier. + :type doc_id: str + :param rev: The revision identifier of the document. + :type rev: str + :param json: The JSON string for this document. + :type json: str + :param has_conflicts: Boolean indicating if this document has conflicts + :type has_conflicts: bool + """ + SoledadDocument.__init__(self, doc_id, rev, json, has_conflicts) + self._conflicts = None + + def get_conflicts(self): + """ + Get the conflicted versions of the document. + + :return: The conflicted versions of the document. + :rtype: [ServerDocument] + """ + return self._conflicts or [] + + def set_conflicts(self, conflicts): + """ + Set the conflicted versions of the document. + + :param conflicts: The conflicted versions of the document. + :type conflicts: list + """ + self._conflicts = conflicts + self.has_conflicts = len(self._conflicts) > 0 + + def add_conflict(self, doc): + """ + Add a conflict to this document. + + :param doc: The conflicted version to be added. + :type doc: Document + """ + if self._conflicts is None: + raise Exception("Fetch conflicts first!") + self._conflicts.append(doc) + self.has_conflicts = len(self._conflicts) > 0 + + def delete_conflicts(self, conflict_revs): + """ + Delete conflicted versions of this document. + + :param conflict_revs: The conflicted revisions to be deleted. + :type conflict_revs: [str] + """ + if self._conflicts is None: + raise Exception("Fetch conflicts first!") + self._conflicts = filter( + lambda doc: doc.rev not in conflict_revs, + self._conflicts) + self.has_conflicts = len(self._conflicts) > 0 diff --git a/common/src/leap/soledad/common/errors.py b/common/src/leap/soledad/common/errors.py index 5798770b..0b6bb4e6 100644 --- a/common/src/leap/soledad/common/errors.py +++ b/common/src/leap/soledad/common/errors.py @@ -132,67 +132,23 @@ class CouldNotObtainLockError(SoledadError): # -# CouchDatabase errors -# - -@register_exception -class MissingDesignDocError(SoledadError): - - """ - Raised when trying to access a missing couch design document. - """ - - wire_description = "missing design document" - status = 500 - - -@register_exception -class MissingDesignDocNamedViewError(SoledadError): - - """ - Raised when trying to access a missing named view on a couch design - document. - """ - - wire_description = "missing design document named function" - status = 500 - - -@register_exception -class MissingDesignDocListFunctionError(SoledadError): - - """ - Raised when trying to access a missing list function on a couch design - document. - """ - - wire_description = "missing design document list function" - status = 500 +# SoledadBackend errors +# u1db error statuses also have to be updated +http_errors.ERROR_STATUSES = set( + http_errors.wire_description_to_status.values()) -@register_exception -class MissingDesignDocDeletedError(SoledadError): +class InvalidURLError(Exception): """ - Raised when trying to access a deleted couch design document. + Exception raised when Soledad encounters a malformed URL. """ - wire_description = "design document was deleted" - status = 500 - - -@register_exception -class DesignDocUnknownError(SoledadError): +class BackendNotReadyError(SoledadError): """ - Raised when trying to access a couch design document and getting an - unknown error. + Generic exception raised when the backend is not ready to dispatch a client + request. """ - - wire_description = "missing design document unknown error" + wire_description = "backend not ready" status = 500 - - -# u1db error statuses also have to be updated -http_errors.ERROR_STATUSES = set( - http_errors.wire_description_to_status.values()) diff --git a/common/src/leap/soledad/common/tests/fixture_soledad.conf b/common/src/leap/soledad/common/tests/fixture_soledad.conf new file mode 100644 index 00000000..8d8161c3 --- /dev/null +++ b/common/src/leap/soledad/common/tests/fixture_soledad.conf @@ -0,0 +1,11 @@ +[soledad-server] +couch_url = http://soledad:passwd@localhost:5984 +create_cmd = sudo -u soledad-admin /usr/bin/create-user-db +admin_netrc = /etc/couchdb/couchdb-soledad-admin.netrc +batching = 0 + +[database-security] +members = user1, user2 +members_roles = role1, role2 +admins = user3, user4 +admins_roles = role3, role3 diff --git a/common/src/leap/soledad/common/tests/test_command.py b/common/src/leap/soledad/common/tests/test_command.py new file mode 100644 index 00000000..c386bdd2 --- /dev/null +++ b/common/src/leap/soledad/common/tests/test_command.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# test_command.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Tests for command execution using a validator function for arguments. +""" +from twisted.trial import unittest +from leap.soledad.common.command import exec_validated_cmd + + +class ExecuteValidatedCommandTest(unittest.TestCase): + + def test_argument_validation(self): + validator = lambda arg: True if arg is 'valid' else False + status, out = exec_validated_cmd("command", "invalid arg", validator) + self.assertEquals(status, 1) + self.assertEquals(out, "invalid argument") + status, out = exec_validated_cmd("echo", "valid", validator) + self.assertEquals(status, 0) + self.assertEquals(out, "valid\n") + + def test_return_status_code_success(self): + status, out = exec_validated_cmd("echo", "arg") + self.assertEquals(status, 0) + self.assertEquals(out, "arg\n") + + def test_handle_command_with_spaces(self): + status, out = exec_validated_cmd("echo I am", "an argument") + self.assertEquals(status, 0, out) + self.assertEquals(out, "I am an argument\n") + + def test_handle_oserror_on_invalid_command(self): + status, out = exec_validated_cmd("inexistent command with", "args") + self.assertEquals(status, 1) + self.assertIn("No such file or directory", out) + + def test_return_status_code_number_on_failure(self): + status, out = exec_validated_cmd("ls", "user-bebacafe") + self.assertNotEquals(status, 0) + self.assertIn('No such file or directory\n', out) diff --git a/common/src/leap/soledad/common/tests/test_couch.py b/common/src/leap/soledad/common/tests/test_couch.py index a08ffd16..7ba50e11 100644 --- a/common/src/leap/soledad/common/tests/test_couch.py +++ b/common/src/leap/soledad/common/tests/test_couch.py @@ -28,13 +28,16 @@ from couchdb.client import Server from uuid import uuid4 from testscenarios import TestWithScenarios +from twisted.trial import unittest +from mock import Mock from u1db import errors as u1db_errors from u1db import SyncTarget from u1db import vectorclock from leap.soledad.common import couch -from leap.soledad.common import errors +from leap.soledad.common.document import ServerDocument +from leap.soledad.common.couch import errors from leap.soledad.common.tests import u1db_tests as tests from leap.soledad.common.tests.util import CouchDBTestCase @@ -44,8 +47,6 @@ from leap.soledad.common.tests.util import sync_via_synchronizer from leap.soledad.common.tests.u1db_tests import test_backends from leap.soledad.common.tests.u1db_tests import DatabaseBaseTests -from u1db.backends.inmemory import InMemoryIndex - # ----------------------------------------------------------------------------- # The following tests come from `u1db.tests.test_common_backend`. @@ -131,7 +132,7 @@ def copy_couch_database_for_test(test, db): def make_document_for_test(test, doc_id, rev, content, has_conflicts=False): - return couch.CouchDocument( + return ServerDocument( doc_id, rev, content, has_conflicts=has_conflicts) @@ -148,7 +149,7 @@ class CouchTests( scenarios = COUCH_SCENARIOS -class CouchDatabaseTests( +class SoledadBackendTests( TestWithScenarios, test_backends.LocalDatabaseTests, CouchDBTestCase): @@ -204,7 +205,7 @@ simple_doc = tests.simple_doc nested_doc = tests.nested_doc -class CouchDatabaseSyncTargetTests( +class SoledadBackendSyncTargetTests( TestWithScenarios, DatabaseBaseTests, CouchDBTestCase): @@ -527,90 +528,6 @@ class CouchDatabaseSyncTargetTests( self.st.record_sync_info('replica', 0, 'T-sid') self.assertEqual(expected, called) - -# The following tests need that the database have an index, so we fake one. - -class IndexedCouchDatabase(couch.CouchDatabase): - - def __init__(self, url, dbname, replica_uid=None, ensure_ddocs=True): - old_class.__init__(self, url, dbname, replica_uid=replica_uid, - ensure_ddocs=ensure_ddocs) - self._indexes = {} - - def _put_doc(self, old_doc, doc): - for index in self._indexes.itervalues(): - if old_doc is not None and not old_doc.is_tombstone(): - index.remove_json(old_doc.doc_id, old_doc.get_json()) - if not doc.is_tombstone(): - index.add_json(doc.doc_id, doc.get_json()) - old_class._put_doc(self, old_doc, doc) - - def create_index(self, index_name, *index_expressions): - if index_name in self._indexes: - if self._indexes[index_name]._definition == list( - index_expressions): - return - raise u1db_errors.IndexNameTakenError - index = InMemoryIndex(index_name, list(index_expressions)) - _, all_docs = self.get_all_docs() - for doc in all_docs: - index.add_json(doc.doc_id, doc.get_json()) - self._indexes[index_name] = index - - def delete_index(self, index_name): - del self._indexes[index_name] - - def list_indexes(self): - definitions = [] - for idx in self._indexes.itervalues(): - definitions.append((idx._name, idx._definition)) - return definitions - - def get_from_index(self, index_name, *key_values): - try: - index = self._indexes[index_name] - except KeyError: - raise u1db_errors.IndexDoesNotExist - doc_ids = index.lookup(key_values) - result = [] - for doc_id in doc_ids: - result.append(self._get_doc(doc_id, check_for_conflicts=True)) - return result - - def get_range_from_index(self, index_name, start_value=None, - end_value=None): - """Return all documents with key values in the specified range.""" - try: - index = self._indexes[index_name] - except KeyError: - raise u1db_errors.IndexDoesNotExist - if isinstance(start_value, basestring): - start_value = (start_value,) - if isinstance(end_value, basestring): - end_value = (end_value,) - doc_ids = index.lookup_range(start_value, end_value) - result = [] - for doc_id in doc_ids: - result.append(self._get_doc(doc_id, check_for_conflicts=True)) - return result - - def get_index_keys(self, index_name): - try: - index = self._indexes[index_name] - except KeyError: - raise u1db_errors.IndexDoesNotExist - keys = index.keys() - # XXX inefficiency warning - return list(set([tuple(key.split('\x01')) for key in keys])) - - -# monkey patch CouchDatabase (once) to include virtual indexes -if getattr(couch.CouchDatabase, '_old_class', None) is None: - old_class = couch.CouchDatabase - IndexedCouchDatabase._old_class = old_class - couch.CouchDatabase = IndexedCouchDatabase - - sync_scenarios = [] for name, scenario in COUCH_SCENARIOS: scenario = dict(scenario) @@ -619,7 +536,7 @@ for name, scenario in COUCH_SCENARIOS: scenario = dict(scenario) -class CouchDatabaseSyncTests( +class SoledadBackendSyncTests( TestWithScenarios, DatabaseBaseTests, CouchDBTestCase): @@ -920,7 +837,6 @@ class CouchDatabaseSyncTests( self.db1 = self.create_database('test1', 'source') self.db2 = self.create_database('test2', 'target') doc = self.db2.create_doc_from_json(simple_doc) - self.db1.create_index('test-idx', 'key') self.assertEqual(0, self.sync(self.db1, self.db2)) self.assertGetDoc(self.db1, doc.doc_id, doc.rev, simple_doc, False) self.assertEqual(1, self.db1._get_replica_gen_and_trans_id('test2')[0]) @@ -930,7 +846,7 @@ class CouchDatabaseSyncTests( {'receive': {'docs': [], 'last_known_gen': 0}, 'return': {'docs': [(doc.doc_id, doc.rev)], 'last_gen': 1}}) - self.assertEqual([doc], self.db1.get_from_index('test-idx', 'value')) + self.assertGetDoc(self.db2, doc.doc_id, doc.rev, simple_doc, False) def test_sync_pulling_doesnt_update_other_if_changed(self): self.db1 = self.create_database('test1', 'source') @@ -1019,7 +935,6 @@ class CouchDatabaseSyncTests( doc1 = self.db1.create_doc_from_json(simple_doc) doc_id = doc1.doc_id doc1_rev = doc1.rev - self.db1.create_index('test-idx', 'key') new_doc = '{"key": "altval"}' doc2 = self.db2.create_doc_from_json(new_doc, doc_id=doc_id) doc2_rev = doc2.rev @@ -1035,18 +950,12 @@ class CouchDatabaseSyncTests( self.assertTransactionLog([doc_id, doc_id], self.db1) self.assertGetDoc(self.db1, doc_id, doc2_rev, new_doc, True) self.assertGetDoc(self.db2, doc_id, doc2_rev, new_doc, False) - from_idx = self.db1.get_from_index('test-idx', 'altval')[0] - self.assertEqual(doc2.doc_id, from_idx.doc_id) - self.assertEqual(doc2.rev, from_idx.rev) - self.assertTrue(from_idx.has_conflicts) - self.assertEqual([], self.db1.get_from_index('test-idx', 'value')) def test_sync_sees_remote_delete_conflicted(self): self.db1 = self.create_database('test1', 'source') self.db2 = self.create_database('test2', 'target') doc1 = self.db1.create_doc_from_json(simple_doc) doc_id = doc1.doc_id - self.db1.create_index('test-idx', 'key') self.sync(self.db1, self.db2) doc2 = self.make_document(doc1.doc_id, doc1.rev, doc1.get_json()) new_doc = '{"key": "altval"}' @@ -1066,7 +975,6 @@ class CouchDatabaseSyncTests( self.assertGetDocIncludeDeleted(self.db1, doc_id, doc2.rev, None, True) self.assertGetDocIncludeDeleted( self.db2, doc_id, doc2.rev, None, False) - self.assertEqual([], self.db1.get_from_index('test-idx', 'value')) def test_sync_local_race_conflicted(self): self.db1 = self.create_database('test1', 'source') @@ -1074,7 +982,6 @@ class CouchDatabaseSyncTests( doc = self.db1.create_doc_from_json(simple_doc) doc_id = doc.doc_id doc1_rev = doc.rev - self.db1.create_index('test-idx', 'key') self.sync(self.db1, self.db2) content1 = '{"key": "localval"}' content2 = '{"key": "altval"}' @@ -1093,21 +1000,13 @@ class CouchDatabaseSyncTests( self.sync(self.db1, self.db2, trace_hook=after_whatschanged) self.assertEqual([True], triggered) self.assertGetDoc(self.db1, doc_id, doc2_rev2, content2, True) - from_idx = self.db1.get_from_index('test-idx', 'altval')[0] - self.assertEqual(doc.doc_id, from_idx.doc_id) - self.assertEqual(doc.rev, from_idx.rev) - self.assertTrue(from_idx.has_conflicts) - self.assertEqual([], self.db1.get_from_index('test-idx', 'value')) - self.assertEqual([], self.db1.get_from_index('test-idx', 'localval')) def test_sync_propagates_deletes(self): self.db1 = self.create_database('test1', 'source') self.db2 = self.create_database('test2', 'both') doc1 = self.db1.create_doc_from_json(simple_doc) doc_id = doc1.doc_id - self.db1.create_index('test-idx', 'key') self.sync(self.db1, self.db2) - self.db2.create_index('test-idx', 'key') self.db3 = self.create_database('test3', 'target') self.sync(self.db1, self.db3) self.db1.delete_doc(doc1) @@ -1123,8 +1022,6 @@ class CouchDatabaseSyncTests( self.db1, doc_id, deleted_rev, None, False) self.assertGetDocIncludeDeleted( self.db2, doc_id, deleted_rev, None, False) - self.assertEqual([], self.db1.get_from_index('test-idx', 'value')) - self.assertEqual([], self.db2.get_from_index('test-idx', 'value')) self.sync(self.db2, self.db3) self.assertLastExchangeLog( self.db3, @@ -1315,7 +1212,7 @@ class CouchDatabaseSyncTests( self.assertEqual(cont2, self.db1.get_doc("2").get_json()) -class CouchDatabaseExceptionsTests(CouchDBTestCase): +class SoledadBackendExceptionsTests(CouchDBTestCase): def setUp(self): CouchDBTestCase.setUp(self) @@ -1323,9 +1220,11 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): def create_db(self, ensure=True, dbname=None): if not dbname: dbname = ('test-%s' % uuid4().hex) - self.db = couch.CouchDatabase.open_database( - urljoin('http://127.0.0.1:%d' % self.couch_port, dbname), - create=True, + if dbname not in self.couch_server: + self.couch_server.create(dbname) + self.db = couch.CouchDatabase( + ('http://127.0.0.1:%d' % self.couch_port), + dbname, ensure_ddocs=ensure) def tearDown(self): @@ -1339,30 +1238,22 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): design docs are not present. """ self.create_db(ensure=False) - # _get_generation() + # get_generation_info() self.assertRaises( errors.MissingDesignDocError, - self.db._get_generation) - # _get_generation_info() + self.db.get_generation_info) + # get_trans_id_for_gen() self.assertRaises( errors.MissingDesignDocError, - self.db._get_generation_info) - # _get_trans_id_for_gen() + self.db.get_trans_id_for_gen, 1) + # get_transaction_log() self.assertRaises( errors.MissingDesignDocError, - self.db._get_trans_id_for_gen, 1) - # _get_transaction_log() - self.assertRaises( - errors.MissingDesignDocError, - self.db._get_transaction_log) + self.db.get_transaction_log) # whats_changed() self.assertRaises( errors.MissingDesignDocError, self.db.whats_changed) - # _do_set_replica_gen_and_trans_id() - self.assertRaises( - errors.MissingDesignDocError, - self.db._do_set_replica_gen_and_trans_id, 1, 2, 3) def test_missing_design_doc_functions_raises(self): """ @@ -1374,18 +1265,14 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): transactions = self.db._database['_design/transactions'] transactions['lists'] = {} self.db._database.save(transactions) - # _get_generation() - self.assertRaises( - errors.MissingDesignDocListFunctionError, - self.db._get_generation) - # _get_generation_info() + # get_generation_info() self.assertRaises( errors.MissingDesignDocListFunctionError, - self.db._get_generation_info) - # _get_trans_id_for_gen() + self.db.get_generation_info) + # get_trans_id_for_gen() self.assertRaises( errors.MissingDesignDocListFunctionError, - self.db._get_trans_id_for_gen, 1) + self.db.get_trans_id_for_gen, 1) # whats_changed() self.assertRaises( errors.MissingDesignDocListFunctionError, @@ -1401,18 +1288,14 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): transactions = self.db._database['_design/transactions'] del transactions['lists'] self.db._database.save(transactions) - # _get_generation() - self.assertRaises( - errors.MissingDesignDocListFunctionError, - self.db._get_generation) - # _get_generation_info() + # get_generation_info() self.assertRaises( errors.MissingDesignDocListFunctionError, - self.db._get_generation_info) + self.db.get_generation_info) # _get_trans_id_for_gen() self.assertRaises( errors.MissingDesignDocListFunctionError, - self.db._get_trans_id_for_gen, 1) + self.db.get_trans_id_for_gen, 1) # whats_changed() self.assertRaises( errors.MissingDesignDocListFunctionError, @@ -1436,22 +1319,18 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): transactions = self.db._database['_design/transactions'] del transactions['views'] self.db._database.save(transactions) - # _get_generation() + # get_generation_info() self.assertRaises( errors.MissingDesignDocNamedViewError, - self.db._get_generation) - # _get_generation_info() - self.assertRaises( - errors.MissingDesignDocNamedViewError, - self.db._get_generation_info) + self.db.get_generation_info) # _get_trans_id_for_gen() self.assertRaises( errors.MissingDesignDocNamedViewError, - self.db._get_trans_id_for_gen, 1) + self.db.get_trans_id_for_gen, 1) # _get_transaction_log() self.assertRaises( errors.MissingDesignDocNamedViewError, - self.db._get_transaction_log) + self.db.get_transaction_log) # whats_changed() self.assertRaises( errors.MissingDesignDocNamedViewError, @@ -1469,30 +1348,22 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): del self.db._database['_design/syncs'] # delete _design/transactions del self.db._database['_design/transactions'] - # _get_generation() + # get_generation_info() self.assertRaises( errors.MissingDesignDocDeletedError, - self.db._get_generation) - # _get_generation_info() + self.db.get_generation_info) + # get_trans_id_for_gen() self.assertRaises( errors.MissingDesignDocDeletedError, - self.db._get_generation_info) - # _get_trans_id_for_gen() + self.db.get_trans_id_for_gen, 1) + # get_transaction_log() self.assertRaises( errors.MissingDesignDocDeletedError, - self.db._get_trans_id_for_gen, 1) - # _get_transaction_log() - self.assertRaises( - errors.MissingDesignDocDeletedError, - self.db._get_transaction_log) + self.db.get_transaction_log) # whats_changed() self.assertRaises( errors.MissingDesignDocDeletedError, self.db.whats_changed) - # _do_set_replica_gen_and_trans_id() - self.assertRaises( - errors.MissingDesignDocDeletedError, - self.db._do_set_replica_gen_and_trans_id, 1, 2, 3) def test_ensure_ddoc_independently(self): """ @@ -1503,6 +1374,72 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): del self.db._database['_design/transactions'] self.assertRaises( errors.MissingDesignDocDeletedError, - self.db._get_transaction_log) + self.db.get_transaction_log) self.create_db(ensure=True, dbname=self.db._dbname) - self.db._get_transaction_log() + self.db.get_transaction_log() + + def test_ensure_security_doc(self): + """ + Ensure_security creates a _security ddoc to ensure that only soledad + will have the lowest privileged access to an user db. + """ + self.create_db(ensure=False) + self.assertFalse(self.db._database.resource.get_json('_security')[2]) + self.db.ensure_security_ddoc() + security_ddoc = self.db._database.resource.get_json('_security')[2] + self.assertIn('admins', security_ddoc) + self.assertFalse(security_ddoc['admins']['names']) + self.assertIn('members', security_ddoc) + self.assertIn('soledad', security_ddoc['members']['names']) + + def test_ensure_security_from_configuration(self): + """ + Given a configuration, follow it to create the security document + """ + self.create_db(ensure=False) + configuration = {'members': ['user1', 'user2'], + 'members_roles': ['role1', 'role2'], + 'admins': ['admin'], + 'admins_roles': ['administrators'] + } + self.db.ensure_security_ddoc(configuration) + + security_ddoc = self.db._database.resource.get_json('_security')[2] + self.assertEquals(configuration['admins'], + security_ddoc['admins']['names']) + self.assertEquals(configuration['admins_roles'], + security_ddoc['admins']['roles']) + self.assertEquals(configuration['members'], + security_ddoc['members']['names']) + self.assertEquals(configuration['members_roles'], + security_ddoc['members']['roles']) + + +class DatabaseNameValidationTest(unittest.TestCase): + + def test_database_name_validation(self): + inject = couch.state.is_db_name_valid("user-deadbeef | cat /secret") + self.assertFalse(inject) + self.assertTrue(couch.state.is_db_name_valid("user-cafe1337")) + + +class CommandBasedDBCreationTest(unittest.TestCase): + + def test_ensure_db_using_custom_command(self): + state = couch.state.CouchServerState("url", create_cmd="echo") + mock_db = Mock() + mock_db.replica_uid = 'replica_uid' + state.open_database = Mock(return_value=mock_db) + db, replica_uid = state.ensure_database("user-1337") # works + self.assertEquals(mock_db, db) + self.assertEquals(mock_db.replica_uid, replica_uid) + + def test_raises_unauthorized_on_failure(self): + state = couch.state.CouchServerState("url", create_cmd="inexistent") + self.assertRaises(u1db_errors.Unauthorized, + state.ensure_database, "user-1337") + + def test_raises_unauthorized_by_default(self): + state = couch.state.CouchServerState("url") + self.assertRaises(u1db_errors.Unauthorized, + state.ensure_database, "user-1337") diff --git a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py index 25f709ca..8cd3ae08 100644 --- a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py +++ b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py @@ -26,7 +26,8 @@ from twisted.internet import defer from uuid import uuid4 from leap.soledad.client import Soledad -from leap.soledad.common.couch import CouchDatabase, CouchServerState +from leap.soledad.common.couch.state import CouchServerState +from leap.soledad.common.couch import CouchDatabase from leap.soledad.common.tests.util import ( make_token_soledad_app, @@ -35,17 +36,11 @@ from leap.soledad.common.tests.util import ( ) from leap.soledad.common.tests.test_couch import CouchDBTestCase from leap.soledad.common.tests.u1db_tests import TestCaseWithServer -from leap.soledad.common.tests.test_server import _couch_ensure_database REPEAT_TIMES = 20 -# monkey path CouchServerState so it can ensure databases. - -CouchServerState.ensure_database = _couch_ensure_database - - class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer): @staticmethod @@ -163,6 +158,7 @@ class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer): 2, len(filter(lambda t: t[0] == doc_id, transaction_log))) + @defer.inlineCallbacks def test_correct_sync_log_after_sequential_syncs(self): """ Assert that the sync_log increases accordingly with sequential syncs. @@ -170,21 +166,21 @@ class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer): sol = self._soledad_instance( auth_token='auth-token', server_url=self.getURL()) + source_replica_uid = sol._dbpool.replica_uid - def _create_docs(results): + def _create_docs(): deferreds = [] for i in xrange(0, REPEAT_TIMES): deferreds.append(sol.create_doc({})) - return defer.DeferredList(deferreds) + return defer.gatherResults(deferreds) def _assert_transaction_and_sync_logs(results, sync_idx): # assert sizes of transaction and sync logs self.assertEqual( sync_idx * REPEAT_TIMES, len(self.db._get_transaction_log())) - self.assertEqual( - 1 if sync_idx > 0 else 0, - len(self.db._database.view('syncs/log').rows)) + gen, _ = self.db._get_replica_gen_and_trans_id(source_replica_uid) + self.assertEqual(sync_idx * REPEAT_TIMES, gen) def _assert_sync(results, sync_idx): gen, docs = results @@ -193,40 +189,28 @@ class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer): # assert sizes of transaction and sync logs self.assertEqual((sync_idx + 1) * REPEAT_TIMES, len(self.db._get_transaction_log())) - sync_log_rows = self.db._database.view('syncs/log').rows - sync_log = sync_log_rows[0].value - replica_uid = sync_log_rows[0].key - known_gen = sync_log['known_generation'] - known_trans_id = sync_log['known_transaction_id'] - # assert sync_log has exactly 1 row - self.assertEqual(1, len(sync_log_rows)) - # assert it has the correct replica_uid, gen and trans_id - self.assertEqual(sol._dbpool.replica_uid, replica_uid) + target_known_gen, target_known_trans_id = \ + self.db._get_replica_gen_and_trans_id(source_replica_uid) + # assert it has the correct gen and trans_id conn_key = sol._dbpool._u1dbconnections.keys().pop() conn = sol._dbpool._u1dbconnections[conn_key] sol_gen, sol_trans_id = conn._get_generation_info() - self.assertEqual(sol_gen, known_gen) - self.assertEqual(sol_trans_id, known_trans_id) - - # create some documents - d = _create_docs(None) + self.assertEqual(sol_gen, target_known_gen) + self.assertEqual(sol_trans_id, target_known_trans_id) # sync first time and assert success - d.addCallback(_assert_transaction_and_sync_logs, 0) - d.addCallback(lambda _: sol.sync()) - d.addCallback(lambda _: sol.get_all_docs()) - d.addCallback(_assert_sync, 0) + results = yield _create_docs() + _assert_transaction_and_sync_logs(results, 0) + yield sol.sync() + results = yield sol.get_all_docs() + _assert_sync(results, 0) # create more docs, sync second time and assert success - d.addCallback(_create_docs) - d.addCallback(_assert_transaction_and_sync_logs, 1) - d.addCallback(lambda _: sol.sync()) - d.addCallback(lambda _: sol.get_all_docs()) - d.addCallback(_assert_sync, 1) - - d.addCallback(lambda _: sol.close()) - - return d + results = yield _create_docs() + _assert_transaction_and_sync_logs(results, 1) + yield sol.sync() + results = yield sol.get_all_docs() + _assert_sync(results, 1) # # Concurrency tests diff --git a/common/src/leap/soledad/common/tests/test_encdecpool.py b/common/src/leap/soledad/common/tests/test_encdecpool.py index 793bfa1a..694eb7ad 100644 --- a/common/src/leap/soledad/common/tests/test_encdecpool.py +++ b/common/src/leap/soledad/common/tests/test_encdecpool.py @@ -18,6 +18,7 @@ Tests for encryption and decryption pool. """ import json +from random import shuffle from twisted.internet.defer import inlineCallbacks @@ -171,20 +172,21 @@ class TestSyncDecrypterPool(BaseSoledadTest): DOC_ID, DOC_REV, encrypted_content, 1, "trans_id", 1) def _assert_doc_was_decrypted_and_inserted(_): + self.assertEqual(1, len(self._inserted_docs)) self.assertEqual(self._inserted_docs, [(doc, 1, u"trans_id")]) self._pool.deferred.addCallback( _assert_doc_was_decrypted_and_inserted) return self._pool.deferred - def test_insert_encrypted_received_doc_many(self): + def test_insert_encrypted_received_doc_many(self, many=100): """ Test that many encrypted documents added to the pool are decrypted and inserted using the callback. """ crypto = self._soledad._crypto - many = 100 self._pool.start(many) + docs = [] # insert many encrypted docs in the pool for i in xrange(many): @@ -198,9 +200,12 @@ class TestSyncDecrypterPool(BaseSoledadTest): doc_id=doc_id, rev=rev, json=json.dumps(content)) encrypted_content = json.loads(crypto.encrypt_doc(doc)) + docs.append((doc_id, rev, encrypted_content, gen, + trans_id, idx)) + shuffle(docs) - self._pool.insert_encrypted_received_doc( - doc_id, rev, encrypted_content, gen, trans_id, idx) + for doc in docs: + self._pool.insert_encrypted_received_doc(*doc) def _assert_docs_were_decrypted_and_inserted(_): self.assertEqual(many, len(self._inserted_docs)) @@ -223,3 +228,16 @@ class TestSyncDecrypterPool(BaseSoledadTest): self._pool.deferred.addCallback( _assert_docs_were_decrypted_and_inserted) return self._pool.deferred + + @inlineCallbacks + def test_pool_reuse(self): + """ + The pool is reused between syncs, this test verifies that + reusing is fine. + """ + for i in xrange(3): + yield self.test_insert_encrypted_received_doc_many(5) + self._inserted_docs = [] + decrypted_docs = yield self._pool._get_docs(encrypted=False) + # check that decrypted docs staging is clean + self.assertEquals([], decrypted_docs) diff --git a/common/src/leap/soledad/common/tests/test_server.py b/common/src/leap/soledad/common/tests/test_server.py index f512d6c1..20fe8579 100644 --- a/common/src/leap/soledad/common/tests/test_server.py +++ b/common/src/leap/soledad/common/tests/test_server.py @@ -22,15 +22,16 @@ import tempfile import mock import time import binascii +from pkg_resources import resource_filename from uuid import uuid4 +from hashlib import sha512 from urlparse import urljoin from twisted.internet import defer +from twisted.trial import unittest -from leap.soledad.common.couch import ( - CouchServerState, - CouchDatabase, -) +from leap.soledad.common.couch.state import CouchServerState +from leap.soledad.common.couch import CouchDatabase from leap.soledad.common.tests.u1db_tests import TestCaseWithServer from leap.soledad.common.tests.test_couch import CouchDBTestCase from leap.soledad.common.tests.util import ( @@ -43,19 +44,39 @@ from leap.soledad.common.tests.util import ( from leap.soledad.common import crypto from leap.soledad.client import Soledad from leap.soledad.server import LockResource +from leap.soledad.server import load_configuration +from leap.soledad.server import CONFIG_DEFAULTS from leap.soledad.server.auth import URLToAuthorization +from leap.soledad.server.auth import SoledadTokenAuthMiddleware -# monkey path CouchServerState so it can ensure databases. - -def _couch_ensure_database(self, dbname): - db = CouchDatabase.open_database( - self.couch_url + '/' + dbname, - create=True, - ensure_ddocs=True) - return db, db._replica_uid +class ServerAuthenticationMiddlewareTestCase(CouchDBTestCase): -CouchServerState.ensure_database = _couch_ensure_database + def setUp(self): + super(ServerAuthenticationMiddlewareTestCase, self).setUp() + app = mock.Mock() + self._state = CouchServerState(self.couch_url) + app.state = self._state + self.auth_middleware = SoledadTokenAuthMiddleware(app) + self._authorize('valid-uuid', 'valid-token') + + def _authorize(self, uuid, token): + token_doc = {} + token_doc['_id'] = sha512(token).hexdigest() + token_doc[self._state.TOKENS_USER_ID_KEY] = uuid + token_doc[self._state.TOKENS_TYPE_KEY] = \ + self._state.TOKENS_TYPE_DEF + dbname = self._state._tokens_dbname() + db = self.couch_server.create(dbname) + db.save(token_doc) + self.addCleanup(self.delete_db, db.name) + + def test_authorized_user(self): + is_authorized = self.auth_middleware._verify_authentication_data + self.assertTrue(is_authorized('valid-uuid', 'valid-token')) + self.assertFalse(is_authorized('valid-uuid', 'invalid-token')) + self.assertFalse(is_authorized('invalid-uuid', 'valid-token')) + self.assertFalse(is_authorized('eve', 'invalid-token')) class ServerAuthorizationTestCase(BaseSoledadTest): @@ -599,3 +620,45 @@ class LockResourceTestCase( self.assertIsNotNone(lr._shared_db.get_doc('lock-' + lock_uuid)) responder.send_response_json.assert_called_with( 401, error='unlock unauthorized') + + +class ConfigurationParsingTest(unittest.TestCase): + + def setUp(self): + self.maxDiff = None + + def test_use_defaults_on_failure(self): + config = load_configuration('this file will never exist') + expected = CONFIG_DEFAULTS + self.assertEquals(expected, config) + + def test_security_values_configuration(self): + # given + config_path = resource_filename('leap.soledad.common.tests', + 'fixture_soledad.conf') + # when + config = load_configuration(config_path) + + # then + expected = {'members': ['user1', 'user2'], + 'members_roles': ['role1', 'role2'], + 'admins': ['user3', 'user4'], + 'admins_roles': ['role3', 'role3']} + self.assertDictEqual(expected, config['database-security']) + + def test_server_values_configuration(self): + # given + config_path = resource_filename('leap.soledad.common.tests', + 'fixture_soledad.conf') + # when + config = load_configuration(config_path) + + # then + expected = {'couch_url': + 'http://soledad:passwd@localhost:5984', + 'create_cmd': + 'sudo -u soledad-admin /usr/bin/create-user-db', + 'admin_netrc': + '/etc/couchdb/couchdb-soledad-admin.netrc', + 'batching': False} + self.assertDictEqual(expected, config['soledad-server']) diff --git a/common/src/leap/soledad/common/tests/test_soledad.py b/common/src/leap/soledad/common/tests/test_soledad.py index 85d6734e..36c4003c 100644 --- a/common/src/leap/soledad/common/tests/test_soledad.py +++ b/common/src/leap/soledad/common/tests/test_soledad.py @@ -249,55 +249,51 @@ class SoledadSignalingTestCase(BaseSoledadTest): # get a fresh instance so it emits all bootstrap signals sol = self._soledad_instance( secrets_path='alternative_stage3.json', - local_db_path='alternative_stage3.u1db') + local_db_path='alternative_stage3.u1db', + userid=ADDRESS) # reverse call order so we can verify in the order the signals were # expected soledad.client.secrets.events.emit_async.mock_calls.reverse() soledad.client.secrets.events.emit_async.call_args = \ soledad.client.secrets.events.emit_async.call_args_list[0] soledad.client.secrets.events.emit_async.call_args_list.reverse() + + user_data = {'userid': ADDRESS, 'uuid': ADDRESS} + # downloading keys signals soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_DOWNLOADING_KEYS, - ADDRESS, + catalog.SOLEDAD_DOWNLOADING_KEYS, user_data ) self._pop_mock_call(soledad.client.secrets.events.emit_async) soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_DONE_DOWNLOADING_KEYS, - ADDRESS, + catalog.SOLEDAD_DONE_DOWNLOADING_KEYS, user_data ) # creating keys signals self._pop_mock_call(soledad.client.secrets.events.emit_async) soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_CREATING_KEYS, - ADDRESS, + catalog.SOLEDAD_CREATING_KEYS, user_data ) self._pop_mock_call(soledad.client.secrets.events.emit_async) soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_DONE_CREATING_KEYS, - ADDRESS, + catalog.SOLEDAD_DONE_CREATING_KEYS, user_data ) # downloading once more (inside _put_keys_in_shared_db) self._pop_mock_call(soledad.client.secrets.events.emit_async) soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_DOWNLOADING_KEYS, - ADDRESS, + catalog.SOLEDAD_DOWNLOADING_KEYS, user_data ) self._pop_mock_call(soledad.client.secrets.events.emit_async) soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_DONE_DOWNLOADING_KEYS, - ADDRESS, + catalog.SOLEDAD_DONE_DOWNLOADING_KEYS, user_data ) # uploading keys signals self._pop_mock_call(soledad.client.secrets.events.emit_async) soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_UPLOADING_KEYS, - ADDRESS, + catalog.SOLEDAD_UPLOADING_KEYS, user_data ) self._pop_mock_call(soledad.client.secrets.events.emit_async) soledad.client.secrets.events.emit_async.assert_called_with( - catalog.SOLEDAD_DONE_UPLOADING_KEYS, - ADDRESS, + catalog.SOLEDAD_DONE_UPLOADING_KEYS, user_data ) # assert db was locked and unlocked sol.shared_db.lock.assert_called_with() @@ -332,12 +328,12 @@ class SoledadSignalingTestCase(BaseSoledadTest): # assert download keys signals soledad.client.secrets.events.emit_async.assert_called_with( catalog.SOLEDAD_DOWNLOADING_KEYS, - ADDRESS, + {'userid': ADDRESS, 'uuid': ADDRESS} ) self._pop_mock_call(soledad.client.secrets.events.emit_async) soledad.client.secrets.events.emit_async.assert_called_with( catalog.SOLEDAD_DONE_DOWNLOADING_KEYS, - ADDRESS, + {'userid': ADDRESS, 'uuid': ADDRESS}, ) sol.close() @@ -371,6 +367,6 @@ class SoledadSignalingTestCase(BaseSoledadTest): # assert the signal has been emitted soledad.client.events.emit_async.assert_called_with( catalog.SOLEDAD_DONE_DATA_SYNC, - ADDRESS, + {'userid': ADDRESS, 'uuid': ADDRESS}, ) sol.close() diff --git a/common/src/leap/soledad/common/tests/test_sync_deferred.py b/common/src/leap/soledad/common/tests/test_sync_deferred.py index 90b00670..c62bd156 100644 --- a/common/src/leap/soledad/common/tests/test_sync_deferred.py +++ b/common/src/leap/soledad/common/tests/test_sync_deferred.py @@ -148,6 +148,8 @@ class TestSoledadDbSyncDeferredEncDecr( replica_uid = self._soledad._dbpool.replica_uid sync_db = self._soledad._sync_db sync_enc_pool = self._soledad._sync_enc_pool + dbsyncer = self._soledad._dbsyncer # Soledad.sync uses the dbsyncer + target = soledad_sync_target( self, self.db2._dbname, source_replica_uid=replica_uid, @@ -155,7 +157,7 @@ class TestSoledadDbSyncDeferredEncDecr( sync_enc_pool=sync_enc_pool) self.addCleanup(target.close) return sync.SoledadSynchronizer( - self.db1, + dbsyncer, target).sync(defer_decryption=True) def wait_for_sync(self): diff --git a/common/src/leap/soledad/common/tests/test_sync_mutex.py b/common/src/leap/soledad/common/tests/test_sync_mutex.py index 2e2123a7..973a8587 100644 --- a/common/src/leap/soledad/common/tests/test_sync_mutex.py +++ b/common/src/leap/soledad/common/tests/test_sync_mutex.py @@ -33,7 +33,8 @@ from twisted.internet import defer from leap.soledad.client.sync import SoledadSynchronizer -from leap.soledad.common import couch +from leap.soledad.common.couch.state import CouchServerState +from leap.soledad.common.couch import CouchDatabase from leap.soledad.common.tests.u1db_tests import TestCaseWithServer from leap.soledad.common.tests.test_couch import CouchDBTestCase @@ -84,7 +85,7 @@ class TestSyncMutex( sync_target = soledad_sync_target def make_app(self): - self.request_state = couch.CouchServerState(self.couch_url) + self.request_state = CouchServerState(self.couch_url) return self.make_app_with_state(self.request_state) def setUp(self): @@ -102,7 +103,7 @@ class TestSyncMutex( self.startServer() # ensure remote db exists before syncing - db = couch.CouchDatabase.open_database( + db = CouchDatabase.open_database( urljoin(self.couch_url, 'user-' + self.user), create=True, ensure_ddocs=True) diff --git a/common/src/leap/soledad/common/tests/test_sync_target.py b/common/src/leap/soledad/common/tests/test_sync_target.py index c0987e90..f25e84dd 100644 --- a/common/src/leap/soledad/common/tests/test_sync_target.py +++ b/common/src/leap/soledad/common/tests/test_sync_target.py @@ -29,7 +29,6 @@ from uuid import uuid4 from testscenarios import TestWithScenarios from twisted.internet import defer -from urlparse import urljoin from leap.soledad.client import http_target as target from leap.soledad.client import crypto @@ -37,7 +36,6 @@ from leap.soledad.client.sqlcipher import SQLCipherU1DBSync from leap.soledad.client.sqlcipher import SQLCipherOptions from leap.soledad.client.sqlcipher import SQLCipherDatabase -from leap.soledad.common import couch from leap.soledad.common.document import SoledadDocument from leap.soledad.common.tests import u1db_tests as tests @@ -265,9 +263,9 @@ class TestSoledadSyncTarget( replica_trans_id=replica_trans_id, number_of_docs=number_of_docs, doc_idx=doc_idx, sync_id=sync_id) - from leap.soledad.common.tests.test_couch import IndexedCouchDatabase + from leap.soledad.common.backend import SoledadBackend self.patch( - IndexedCouchDatabase, '_put_doc_if_newer', bomb_put_doc_if_newer) + SoledadBackend, '_put_doc_if_newer', bomb_put_doc_if_newer) remote_target = self.getSyncTarget( source_replica_uid='replica') other_changes = [] diff --git a/common/src/leap/soledad/common/tests/util.py b/common/src/leap/soledad/common/tests/util.py index 1c7adb91..d4510686 100644 --- a/common/src/leap/soledad/common/tests/util.py +++ b/common/src/leap/soledad/common/tests/util.py @@ -27,7 +27,6 @@ import shutil import random import string import u1db -import traceback import couchdb from uuid import uuid4 @@ -37,17 +36,17 @@ from StringIO import StringIO from pysqlcipher import dbapi2 from u1db import sync -from u1db.errors import DatabaseDoesNotExist from u1db.remote import http_database from twisted.trial import unittest -from leap.common.files import mkdir_p from leap.common.testing.basetest import BaseLeapTest from leap.soledad.common import soledad_assert from leap.soledad.common.document import SoledadDocument -from leap.soledad.common.couch import CouchDatabase, CouchServerState +from leap.soledad.common.couch import CouchDatabase +from leap.soledad.common.couch.state import CouchServerState + from leap.soledad.common.crypto import ENC_SCHEME_KEY from leap.soledad.client import Soledad @@ -246,6 +245,7 @@ class BaseSoledadTest(BaseLeapTest, MockedSharedDBTest): # each local db. self.rand_prefix = ''.join( map(lambda x: random.choice(string.ascii_letters), range(6))) + # initialize soledad by hand so we can control keys # XXX check if this soledad is actually used self._soledad = self._soledad_instance( @@ -286,7 +286,8 @@ class BaseSoledadTest(BaseLeapTest, MockedSharedDBTest): server_url='https://127.0.0.1/', cert_file=None, shared_db_class=None, - auth_token='auth-token'): + auth_token='auth-token', + userid=ADDRESS): def _put_doc_side_effect(doc): self._doc_put = doc @@ -308,7 +309,8 @@ class BaseSoledadTest(BaseLeapTest, MockedSharedDBTest): cert_file=cert_file, defer_encryption=self.defer_sync_encryption, shared_db=MockSharedDB(), - auth_token=auth_token) + auth_token=auth_token, + userid=userid) self.addCleanup(soledad.close) return soledad |