diff options
60 files changed, 5323 insertions, 1339 deletions
| @@ -10,5 +10,3 @@ MANIFEST  *.pyc  *.log  *.*~ - - @@ -1,3 +1,69 @@ +0.4.5 Apr 4: +Client: +  o Catch lock timeout exception. Fixes #4435. +  o Add lock for create_doc and update_indexes call, prevents +    concurrent access to the db. Closes #5139. +  o Back-compatibility for socket.create_connection interface in +    2.6. Closes #5208. +  o Always return unicode in helper method, even on +    UnicodeError. Related to #4998. +  o Fix a bug in soledad.client.sqlcipher by which we were creating a +    new connection for each sync. +  o Unlock shared_db if anything fails in the bootstrap +    sequence. Fixes #4702. +  o Avoid concurrent syncs for the same account, but allow for +    distinct accounts. Fixes #4451. +  o Adds a get_count_by_index to sqlcipher u1db backend. Related to: +    #4616. +  o Do not autocreate remote user database when syncing. Tapicero +    should make sure that that db is created when the user is +    created. Closes #5302. +  o Add a read-write lock for all client operations. Addresses: #4972 +  o Add sync=off and tem_store=mem to soledad client, for +    optimization. + +Common: +  o Add lock timeout HTTP error. Fixes #4435. +  o Remodel couch backend to fix concurrency and scalability. Closes +    #4475, #4682, #4683 and #4680. +  o Remove check for design docs on couch server state initialization +    Closes #5387. +  o Renew HTTP session after multipart PUTs to avoid request hanging. +    Fixes #5449. +  o Preload time.strptime() to avoid multi-threaded problem on couch +    backend get_docs() method. Fixes #5449. +  o Improve error messages. Closes #5035. +  o Add MissingTokenError and InvalidTokenError as sub exceptions +    from Unauthorized. +  o Allow sync of large files (~100MB). Closes #4836. +  o Add exceptions to deal with missing design documents. Fixes #4994. +  o Parallelize get_docs() on couch backend to accelerate sync. +    Closes #5008. +  o Use less memory when putting docs on couch. Fixes #5011. +  o Prevent CouchServerState from creating or deleting databases. This +    way, Soledad remote clients won't ever be able to do these +    operations when syncing. Part of #5302. +  o Avoid concurrent syncs problem by adding a lock for PUTting to the +    sync log update handler. Fixes #5388. +  o Remove check for couch permissions when CouchServerState is +    instantiated. This is not necessary anymore because platform +    takes care of giving the soledad user enough permissions and +    tapicero takes care of uploading the needed design documents. + +Server: +  o Send propper lock timeout response. Fixes #4435. +  o Fix raising of auth token errors. Fixes #5191. +  o Allow sync of large files (~100MB). Closes #4836. +  o Use a temporary directory for server side locks. Fixes #4918. +  o Catch couchdb.http.ResourceNotFound exceptions when accessing +    design documents on couch backend, and raise appropriate missing +    design documents exceptions. Fixes #4994. +  o Do not try to create the shared database when running the Soledad +    Server application. Fixes #5302. +  o Enable Gzip compression on the soledad wsgi app. + +-- 2014 -- +  0.4.4 Dec 6:  Client:    o Add MAC verirication to the recovery document and diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 62f93b3d..46e3cd5f 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -34,6 +34,8 @@ import urlparse  import hmac  from hashlib import sha256 +from threading import Lock +from collections import defaultdict  try:      import cchardet as chardet @@ -52,6 +54,7 @@ from leap.soledad.common.errors import (      InvalidTokenError,      NotLockedError,      AlreadyLockedError, +    LockTimedOutError,  )  from leap.soledad.common.crypto import (      MacMethods, @@ -245,6 +248,12 @@ class Soledad(object):      Prefix for default values for path.      """ +    syncing_lock = defaultdict(Lock) +    """ +    A dictionary that hold locks which avoid multiple sync attempts from the +    same database replica. +    """ +      def __init__(self, uuid, passphrase, secrets_path, local_db_path,                   server_url, cert_file, auth_token=None, secret_id=None):          """ @@ -315,6 +324,47 @@ class Soledad(object):      # initialization/destruction methods      # +    def _get_or_gen_crypto_secrets(self): +        """ +        Retrieves or generates the crypto secrets. + +        Might raise BootstrapSequenceError +        """ +        doc = self._get_secrets_from_shared_db() + +        if doc: +            logger.info( +                'Found cryptographic secrets in shared recovery ' +                'database.') +            _, mac = self.import_recovery_document(doc.content) +            if mac is False: +                self.put_secrets_in_shared_db() +            self._store_secrets()  # save new secrets in local file +            if self._secret_id is None: +                self._set_secret_id(self._secrets.items()[0][0]) +        else: +            # STAGE 3 - there are no secrets in server also, so +            # generate a secret and store it in remote db. +            logger.info( +                'No cryptographic secrets found, creating new ' +                ' secrets...') +            self._set_secret_id(self._gen_secret()) +            try: +                self._put_secrets_in_shared_db() +            except Exception as ex: +                # storing generated secret in shared db failed for +                # some reason, so we erase the generated secret and +                # raise. +                try: +                    os.unlink(self._secrets_path) +                except OSError as e: +                    if e.errno != errno.ENOENT:  # no such file or directory +                        logger.exception(e) +                logger.exception(ex) +                raise BootstrapSequenceError( +                    'Could not store generated secret in the shared ' +                    'database, bailing out...') +      def _bootstrap(self):          """          Bootstrap local Soledad instance. @@ -342,6 +392,8 @@ class Soledad(object):          self._init_dirs()          self._crypto = SoledadCrypto(self) +        secrets_problem = None +          # STAGE 1 - verify if secrets exist locally          if not self._has_secret():  # try to load from local storage. @@ -359,39 +411,13 @@ class Soledad(object):                  token, timeout = self._shared_db.lock()              except AlreadyLockedError:                  raise BootstrapSequenceError('Database is already locked.') +            except LockTimedOutError: +                raise BootstrapSequenceError('Lock operation timed out.') -            doc = self._get_secrets_from_shared_db() -            if doc: -                logger.info( -                    'Found cryptographic secrets in shared recovery ' -                    'database.') -                _, mac = self.import_recovery_document(doc.content) -                if mac is False: -                    self.put_secrets_in_shared_db() -                self._store_secrets()  # save new secrets in local file -                if self._secret_id is None: -                    self._set_secret_id(self._secrets.items()[0][0]) -            else: -                # STAGE 3 - there are no secrets in server also, so -                # generate a secret and store it in remote db. -                logger.info( -                    'No cryptographic secrets found, creating new ' -                    ' secrets...') -                self._set_secret_id(self._gen_secret()) -                try: -                    self._put_secrets_in_shared_db() -                except Exception: -                    # storing generated secret in shared db failed for -                    # some reason, so we erase the generated secret and -                    # raise. -                    try: -                        os.unlink(self._secrets_path) -                    except OSError as e: -                        if errno == 2:  # no such file or directory -                            pass -                    raise BootstrapSequenceError( -                        'Could not store generated secret in the shared ' -                        'database, bailing out...') +            try: +                self._get_or_gen_crypto_secrets() +            except Exception as e: +                secrets_problem = e              # release the lock on shared db              try: @@ -416,7 +442,10 @@ class Soledad(object):              # --- end of atomic operation in shared db ---          # STAGE 4 - local database initialization -        self._init_db() +        if secrets_problem is None: +            self._init_db() +        else: +            raise secrets_problem      def _init_dirs(self):          """ @@ -749,7 +778,7 @@ class Soledad(object):          ============================== WARNING ==============================          This method converts the document's contents to unicode in-place. This -        meanse that after calling C{put_doc(doc)}, the contents of the +        means that after calling C{put_doc(doc)}, the contents of the          document, i.e. C{doc.content}, might be different from before the          call.          ============================== WARNING ============================== @@ -806,9 +835,9 @@ class Soledad(object):              in matching doc_ids order.          :rtype: generator          """ -        return self._db.get_docs(doc_ids, -                                 check_for_conflicts=check_for_conflicts, -                                 include_deleted=include_deleted) +        return self._db.get_docs( +            doc_ids, check_for_conflicts=check_for_conflicts, +            include_deleted=include_deleted)      def get_all_docs(self, include_deleted=False):          """Get the JSON content for all documents in the database. @@ -824,7 +853,7 @@ class Soledad(object):      def _convert_to_unicode(self, content):          """ -        Converts content to utf8 (or all the strings in content) +        Converts content to unicode (or all the strings in content)          NOTE: Even though this method supports any type, it will          currently ignore contents of lists, tuple or any other @@ -839,13 +868,14 @@ class Soledad(object):          if isinstance(content, unicode):              return content          elif isinstance(content, str): +            result = chardet.detect(content) +            default = "utf-8" +            encoding = result["encoding"] or default              try: -                result = chardet.detect(content) -                default = "utf-8" -                encoding = result["encoding"] or default                  content = content.decode(encoding) -            except UnicodeError: -                pass +            except UnicodeError as e: +                logger.error("Unicode error: {0!r}. Using 'replace'".format(e)) +                content = content.decode(encoding, 'replace')              return content          else:              if isinstance(content, dict): @@ -910,7 +940,8 @@ class Soledad(object):              "number(fieldname, width)", "lower(fieldname)"          """          if self._db: -            return self._db.create_index(index_name, *index_expressions) +            return self._db.create_index( +                index_name, *index_expressions)      def delete_index(self, index_name):          """ @@ -955,6 +986,23 @@ class Soledad(object):          if self._db:              return self._db.get_from_index(index_name, *key_values) +    def get_count_from_index(self, index_name, *key_values): +        """ +        Return the count of the documents that match the keys and +        values supplied. + +        :param index_name: The index to query +        :type index_name: str +        :param key_values: values to match. eg, if you have +                           an index with 3 fields then you would have: +                           get_from_index(index_name, val1, val2, val3) +        :type key_values: tuple +        :return: count. +        :rtype: int +        """ +        if self._db: +            return self._db.get_count_from_index(index_name, *key_values) +      def get_range_from_index(self, index_name, start_value, end_value):          """          Return documents that fall within the specified range. @@ -1028,6 +1076,9 @@ class Soledad(object):          """          Synchronize the local encrypted replica with a remote replica. +        This method blocks until a syncing lock is acquired, so there are no +        attempts of concurrent syncs from the same client replica. +          :param url: the url of the target replica to sync with          :type url: str @@ -1036,11 +1087,13 @@ class Soledad(object):          :rtype: str          """          if self._db: -            local_gen = self._db.sync( -                urlparse.urljoin(self.server_url, 'user-%s' % self._uuid), -                creds=self._creds, autocreate=True) -            signal(SOLEDAD_DONE_DATA_SYNC, self._uuid) -            return local_gen +            # acquire lock before attempt to sync +            with Soledad.syncing_lock[self._db._get_replica_uid()]: +                local_gen = self._db.sync( +                    urlparse.urljoin(self.server_url, 'user-%s' % self._uuid), +                    creds=self._creds, autocreate=False) +                signal(SOLEDAD_DONE_DATA_SYNC, self._uuid) +                return local_gen      def need_sync(self, url):          """ @@ -1158,7 +1211,7 @@ class Soledad(object):          """          soledad_assert(self.STORAGE_SECRETS_KEY in data)          # check mac of the recovery document -        mac_auth = False +        #mac_auth = False  # XXX ?          mac = None          if MAC_KEY in data:              soledad_assert(data[MAC_KEY] is not None) @@ -1181,7 +1234,7 @@ class Soledad(object):              if mac != data[MAC_KEY]:                  raise WrongMac('Could not authenticate recovery document\'s '                                 'contents.') -            mac_auth = True +            #mac_auth = True  # XXX ?          # include secrets in the secret pool.          secrets = 0          for secret_id, secret_data in data[self.STORAGE_SECRETS_KEY].items(): @@ -1248,7 +1301,7 @@ class Soledad(object):  #-----------------------------------------------------------------------------  # We need a more reasonable timeout (in seconds) -SOLEDAD_TIMEOUT = 10 +SOLEDAD_TIMEOUT = 120  class VerifiedHTTPSConnection(httplib.HTTPSConnection): @@ -1258,9 +1311,17 @@ class VerifiedHTTPSConnection(httplib.HTTPSConnection):      # derived from httplib.py      def connect(self): -        "Connect to a host on a given (SSL) port." -        sock = socket.create_connection((self.host, self.port), -                                        SOLEDAD_TIMEOUT, self.source_address) +        """ +        Connect to a host on a given (SSL) port. +        """ +        try: +            source = self.source_address +            sock = socket.create_connection((self.host, self.port), +                                            SOLEDAD_TIMEOUT, source) +        except AttributeError: +            # source_address was introduced in 2.7 +            sock = socket.create_connection((self.host, self.port), +                                            SOLEDAD_TIMEOUT)          if self._tunnel_host:              self.sock = sock              self._tunnel() diff --git a/client/src/leap/soledad/client/_version.py b/client/src/leap/soledad/client/_version.py index 8db26fe5..65efb714 100644 --- a/client/src/leap/soledad/client/_version.py +++ b/client/src/leap/soledad/client/_version.py @@ -17,6 +17,7 @@ git_full = "$Format:%H$"  import subprocess  import sys +  def run_command(args, cwd=None, verbose=False):      try:          # remember shell=False, so use git.cmd on windows, not just git @@ -36,11 +37,10 @@ def run_command(args, cwd=None, verbose=False):          return None      return stdout - -import sys  import re  import os.path +  def get_expanded_variables(versionfile_source):      # the code embedded in _version.py can just fetch the value of these      # variables. When used from setup.py, we don't want to import @@ -48,7 +48,7 @@ def get_expanded_variables(versionfile_source):      # used from _version.py.      variables = {}      try: -        f = open(versionfile_source,"r") +        f = open(versionfile_source, "r")          for line in f.readlines():              if line.strip().startswith("git_refnames ="):                  mo = re.search(r'=\s*"(.*)"', line) @@ -63,12 +63,13 @@ def get_expanded_variables(versionfile_source):          pass      return variables +  def versions_from_expanded_variables(variables, tag_prefix, verbose=False):      refnames = variables["refnames"].strip()      if refnames.startswith("$Format"):          if verbose:              print("variables are unexpanded, not using") -        return {} # unexpanded, so not in an unpacked git-archive tarball +        return {}  # unexpanded, so not in an unpacked git-archive tarball      refs = set([r.strip() for r in refnames.strip("()").split(",")])      # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of      # just "foo-1.0". If we see a "tag: " prefix, prefer those. @@ -93,13 +94,14 @@ def versions_from_expanded_variables(variables, tag_prefix, verbose=False):              r = ref[len(tag_prefix):]              if verbose:                  print("picking %s" % r) -            return { "version": r, -                     "full": variables["full"].strip() } +            return {"version": r, +                    "full": variables["full"].strip()}      # no suitable tags, so we use the full revision id      if verbose:          print("no suitable tags, using full revision id") -    return { "version": variables["full"].strip(), -             "full": variables["full"].strip() } +    return {"version": variables["full"].strip(), +            "full": variables["full"].strip()} +  def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):      # this runs 'git' from the root of the source tree. That either means @@ -116,7 +118,7 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):          here = os.path.abspath(__file__)      except NameError:          # some py2exe/bbfreeze/non-CPython implementations don't do __file__ -        return {} # not always correct +        return {}  # not always correct      # versionfile_source is the relative path from the top of the source tree      # (where the .git directory might live) to this file. Invert this to find @@ -126,7 +128,16 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):          for i in range(len(versionfile_source.split("/"))):              root = os.path.dirname(root)      else: -        root = os.path.dirname(here) +        root = os.path.dirname( +            os.path.join('..', here)) + +    ###################################################### +    # XXX patch for our specific configuration with +    # the three projects leap.soledad.{common, client, server} +    # inside the same repo. +    ###################################################### +    root = os.path.dirname(os.path.join('..', root)) +      if not os.path.exists(os.path.join(root, ".git")):          if verbose:              print("no .git in %s" % root) @@ -141,7 +152,8 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):          return {}      if not stdout.startswith(tag_prefix):          if verbose: -            print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix)) +            print("tag '%s' doesn't start with prefix '%s'" % +                  (stdout, tag_prefix))          return {}      tag = stdout[len(tag_prefix):]      stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=root) @@ -153,7 +165,8 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):      return {"version": tag, "full": full} -def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False): +def versions_from_parentdir(parentdir_prefix, versionfile_source, +                            verbose=False):      if IN_LONG_VERSION_PY:          # We're running from _version.py. If it's from a source tree          # (execute-in-place), we can work upwards to find the root of the @@ -163,7 +176,7 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False)              here = os.path.abspath(__file__)          except NameError:              # py2exe/bbfreeze/non-CPython don't have __file__ -            return {} # without __file__, we have no hope +            return {}  # without __file__, we have no hope          # versionfile_source is the relative path from the top of the source          # tree to _version.py. Invert this to find the root from __file__.          root = here @@ -180,7 +193,8 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False)      dirname = os.path.basename(root)      if not dirname.startswith(parentdir_prefix):          if verbose: -            print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" % +            print("guessing rootdir is '%s', but '%s' doesn't start " +                  "with prefix '%s'" %                    (root, dirname, parentdir_prefix))          return None      return {"version": dirname[len(parentdir_prefix):], "full": ""} @@ -189,8 +203,9 @@ tag_prefix = ""  parentdir_prefix = "leap.soledad.client-"  versionfile_source = "src/leap/soledad/client/_version.py" +  def get_versions(default={"version": "unknown", "full": ""}, verbose=False): -    variables = { "refnames": git_refnames, "full": git_full } +    variables = {"refnames": git_refnames, "full": git_full}      ver = versions_from_expanded_variables(variables, tag_prefix, verbose)      if not ver:          ver = versions_from_vcs(tag_prefix, versionfile_source, verbose) @@ -200,4 +215,3 @@ def get_versions(default={"version": "unknown", "full": ""}, verbose=False):      if not ver:          ver = default      return ver - diff --git a/client/src/leap/soledad/client/auth.py b/client/src/leap/soledad/client/auth.py index d85e3ba6..72ab0008 100644 --- a/client/src/leap/soledad/client/auth.py +++ b/client/src/leap/soledad/client/auth.py @@ -68,4 +68,3 @@ class TokenBasedAuth(object):          else:              raise errors.UnknownAuthMethod(                  'Wrong credentials: %s' % self._creds) - diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 894c6f97..3aea340d 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -43,16 +43,19 @@ So, as the statements above were introduced for backwards compatibility with  SLCipher 1.1 databases, we do not implement them as all SQLCipher databases  handled by Soledad should be created by SQLCipher >= 2.0.  """ +import httplib  import logging  import os -import time  import string  import threading +import time - -from u1db.backends import sqlite_backend  from pysqlcipher import dbapi2 +from u1db.backends import sqlite_backend +from u1db.sync import Synchronizer  from u1db import errors as u1db_errors + +from leap.soledad.client.target import SoledadSyncTarget  from leap.soledad.common.document import SoledadDocument  logger = logging.getLogger(__name__) @@ -88,10 +91,10 @@ def open(path, password, create=True, document_factory=None, crypto=None,      database does not already exist.      :param path: The filesystem path for the database to open. -    :param type: str +    :type path: str      :param create: True/False, should the database be created if it doesn't          already exist? -    :param type: bool +    :param create: bool      :param document_factory: A function that will be called with the same          parameters as Document.__init__.      :type document_factory: callable @@ -144,25 +147,30 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):      _index_storage_value = 'expand referenced encrypted'      k_lock = threading.Lock() +    create_doc_lock = threading.Lock() +    update_indexes_lock = threading.Lock() +    _syncer = None      def __init__(self, sqlcipher_file, password, document_factory=None,                   crypto=None, raw_key=False, cipher='aes-256-cbc',                   kdf_iter=4000, cipher_page_size=1024):          """ -        Create a new sqlcipher file. +        Connect to an existing SQLCipher database, creating a new sqlcipher +        database file if needed.          :param sqlcipher_file: The path for the SQLCipher file.          :type sqlcipher_file: str          :param password: The password that protects the SQLCipher db.          :type password: str          :param document_factory: A function that will be called with the same -            parameters as Document.__init__. +                                 parameters as Document.__init__.          :type document_factory: callable          :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt -            document contents when syncing. +                       document contents when syncing.          :type crypto: soledad.crypto.SoledadCrypto -        :param raw_key: Whether C{password} is a raw 64-char hex string or a -            passphrase that should be hashed to obtain the encyrption key. +        :param raw_key: Whether password is a raw 64-char hex string or a +                        passphrase that should be hashed to obtain the +                        encyrption key.          :type raw_key: bool          :param cipher: The cipher and mode to use.          :type cipher: str @@ -186,6 +194,13 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):              self._set_crypto_pragmas(                  self._db_handle, password, raw_key, cipher, kdf_iter,                  cipher_page_size) +            if os.environ.get('LEAP_SQLITE_NOSYNC'): +                self._pragma_synchronous_off(self._db_handle) +            else: +                self._pragma_synchronous_normal(self._db_handle) +            if os.environ.get('LEAP_SQLITE_MEMSTORE'): +                self._pragma_mem_temp_store(self._db_handle) +            self._pragma_write_ahead_logging(self._db_handle)              self._real_replica_uid = None              self._ensure_schema()              self._crypto = crypto @@ -336,13 +351,46 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):          :return: The local generation before the synchronisation was performed.          :rtype: int          """ -        from u1db.sync import Synchronizer -        from leap.soledad.client.target import SoledadSyncTarget -        return Synchronizer( -            self, -            SoledadSyncTarget(url, -                              creds=creds, -                              crypto=self._crypto)).sync(autocreate=autocreate) +        if not self.syncer: +            self._create_syncer(url, creds=creds) + +        try: +            res = self.syncer.sync(autocreate=autocreate) +        except httplib.CannotSendRequest: +            # raised when you reuse httplib.HTTP object for new request +            # while you havn't called its getresponse() +            # this catch works for the current connclass used +            # by our HTTPClientBase, since it uses httplib. +            # we will have to replace it if it changes. +            logger.info("Replacing connection and trying again...") +            self._syncer = None +            self._create_syncer(url, creds=creds) +            res = self.syncer.sync(autocreate=autocreate) +        return res + +    @property +    def syncer(self): +        """ +        Accesor for synchronizer. +        """ +        return self._syncer + +    def _create_syncer(self, url, creds=None): +        """ +        Creates a synchronizer + +        :param url: The url of the target replica to sync with. +        :type url: str +        :param creds: optional dictionary giving credentials. +            to authorize the operation with the server. +        :type creds: dict +        """ +        if self._syncer is None: +            self._syncer = Synchronizer( +                self, +                SoledadSyncTarget(url, +                                  creds=creds, +                                  crypto=self._crypto))      def _extra_schema_init(self, c):          """ @@ -359,6 +407,22 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):              'ALTER TABLE document '              'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE') +    def create_doc(self, content, doc_id=None): +        """ +        Create a new document in the local encrypted database. + +        :param content: the contents of the new document +        :type content: dict +        :param doc_id: an optional identifier specifying the document id +        :type doc_id: str + +        :return: the new document +        :rtype: SoledadDocument +        """ +        with self.create_doc_lock: +            return sqlite_backend.SQLitePartialExpandDatabase.create_doc( +                self, content, doc_id=doc_id) +      def _put_and_update_indexes(self, old_doc, doc):          """          Update a document and all indexes related to it. @@ -368,12 +432,13 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):          :param doc: The new version of the document.          :type doc: u1db.Document          """ -        sqlite_backend.SQLitePartialExpandDatabase._put_and_update_indexes( -            self, old_doc, doc) -        c = self._db_handle.cursor() -        c.execute('UPDATE document SET syncable=? ' -                  'WHERE doc_id=?', -                  (doc.syncable, doc.doc_id)) +        with self.update_indexes_lock: +            sqlite_backend.SQLitePartialExpandDatabase._put_and_update_indexes( +                self, old_doc, doc) +            c = self._db_handle.cursor() +            c.execute('UPDATE document SET syncable=? ' +                      'WHERE doc_id=?', +                      (doc.syncable, doc.doc_id))      def _get_doc(self, doc_id, check_for_conflicts=False):          """ @@ -697,6 +762,115 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):          # XXX change passphrase param!          db_handle.cursor().execute('PRAGMA rekey = "x\'%s"' % passphrase) +    @classmethod +    def _pragma_synchronous_off(cls, db_handle): +        """ +        Change the setting of the "synchronous" flag to OFF. +        """ +        logger.debug("SQLCIPHER: SETTING SYNCHRONOUS OFF") +        db_handle.cursor().execute('PRAGMA synchronous=OFF') + +    @classmethod +    def _pragma_synchronous_normal(cls, db_handle): +        """ +        Change the setting of the "synchronous" flag to NORMAL. +        """ +        logger.debug("SQLCIPHER: SETTING SYNCHRONOUS NORMAL") +        db_handle.cursor().execute('PRAGMA synchronous=NORMAL') + +    @classmethod +    def _pragma_mem_temp_store(cls, db_handle): +        """ +        Use a in-memory store for temporary tables. +        """ +        logger.debug("SQLCIPHER: SETTING TEMP_STORE MEMORY") +        db_handle.cursor().execute('PRAGMA temp_store=MEMORY') + +    @classmethod +    def _pragma_write_ahead_logging(cls, db_handle): +        """ +        Enable write-ahead logging, and set the autocheckpoint to 50 pages. + +        Setting the autocheckpoint to a small value, we make the reads not +        suffer too much performance degradation. + +        From the sqlite docs: + +        "There is a tradeoff between average read performance and average write +        performance. To maximize the read performance, one wants to keep the +        WAL as small as possible and hence run checkpoints frequently, perhaps +        as often as every COMMIT. To maximize write performance, one wants to +        amortize the cost of each checkpoint over as many writes as possible, +        meaning that one wants to run checkpoints infrequently and let the WAL +        grow as large as possible before each checkpoint. The decision of how +        often to run checkpoints may therefore vary from one application to +        another depending on the relative read and write performance +        requirements of the application. The default strategy is to run a +        checkpoint once the WAL reaches 1000 pages" +        """ +        logger.debug("SQLCIPHER: SETTING WRITE-AHEAD LOGGING") +        db_handle.cursor().execute('PRAGMA journal_mode=WAL') +        # The optimum value can still use a little bit of tuning, but we favor +        # small sizes of the WAL file to get fast reads, since we assume that +        # the writes will be quick enough to not block too much. + +        # TODO +        # As a further improvement, we might want to set autocheckpoint to 0 +        # here and do the checkpoints manually in a separate thread, to avoid +        # any blocks in the main thread (we should run a loopingcall from here) +        db_handle.cursor().execute('PRAGMA wal_autocheckpoint=50') + +    # Extra query methods: extensions to the base sqlite implmentation. + +    def get_count_from_index(self, index_name, *key_values): +        """ +        Returns the count for a given combination of index_name +        and key values. + +        Extension method made from similar methods in u1db version 13.09 + +        :param index_name: The index to query +        :type index_name: str +        :param key_values: values to match. eg, if you have +                           an index with 3 fields then you would have: +                           get_from_index(index_name, val1, val2, val3) +        :type key_values: tuple +        :return: count. +        :rtype: int +        """ +        c = self._db_handle.cursor() +        definition = self._get_index_definition(index_name) + +        if len(key_values) != len(definition): +            raise u1db_errors.InvalidValueForIndex() +        tables = ["document_fields d%d" % i for i in range(len(definition))] +        novalue_where = ["d.doc_id = d%d.doc_id" +                         " AND d%d.field_name = ?" +                         % (i, i) for i in range(len(definition))] +        exact_where = [novalue_where[i] +                       + (" AND d%d.value = ?" % (i,)) +                       for i in range(len(definition))] +        args = [] +        where = [] +        for idx, (field, value) in enumerate(zip(definition, key_values)): +            args.append(field) +            where.append(exact_where[idx]) +            args.append(value) + +        tables = ["document_fields d%d" % i for i in range(len(definition))] +        statement = ( +            "SELECT COUNT(*) FROM document d, %s WHERE %s " % ( +                ', '.join(tables), +                ' AND '.join(where), +            )) +        try: +            c.execute(statement, tuple(args)) +        except dbapi2.OperationalError, e: +            raise dbapi2.OperationalError( +                str(e) + '\nstatement: %s\nargs: %s\n' % (statement, args)) +        res = c.fetchall() +        return res[0][0] +      def __del__(self):          """          Closes db_handle upon object destruction. diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index d8899a97..3b3d6870 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -14,22 +14,26 @@  #  # You should have received a copy of the GNU General Public License  # along with this program. If not, see <http://www.gnu.org/licenses/>. - -  """  A U1DB backend for encrypting data before sending to server and decrypting  after receiving.  """ - -import simplejson as json +import binascii +import cStringIO +import gzip  import hashlib  import hmac -import binascii +import logging +import urllib +import simplejson as json +from time import sleep -from u1db.remote import utils +from u1db.remote import utils, http_errors  from u1db.errors import BrokenSyncStream +from u1db import errors  from u1db.remote.http_target import HTTPSyncTarget +from u1db.remote.http_client import _encode_query_parameter  from leap.soledad.common import soledad_assert @@ -53,11 +57,13 @@ from leap.soledad.client.crypto import (      UnknownEncryptionMethod,  ) +logger = logging.getLogger(__name__)  #  # Exceptions  # +  class DocumentNotEncrypted(Exception):      """      Raised for failures in document encryption. @@ -222,6 +228,24 @@ def decrypt_doc(crypto, doc):      return plainjson +def _gunzip(data): +    """ +    Uncompress data that is gzipped. + +    :param data: gzipped data +    :type data: basestring +    """ +    buffer = cStringIO.StringIO() +    buffer.write(data) +    buffer.seek(0) +    try: +        data = gzip.GzipFile(mode='r', fileobj=buffer).read() +    except Exception: +        logger.warning("Error while decrypting gzipped data") +    buffer.close() +    return data + +  #  # SoledadSyncTarget  # @@ -353,6 +377,82 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth):              raise BrokenSyncStream          return res +    def _request(self, method, url_parts, params=None, body=None, +                 content_type=None): +        """ +        Overloaded method. See u1db docs. +        Patched for adding gzip encoding. +        """ + +        self._ensure_connection() +        unquoted_url = url_query = self._url.path +        if url_parts: +            if not url_query.endswith('/'): +                url_query += '/' +                unquoted_url = url_query +            url_query += '/'.join(urllib.quote(part, safe='') +                                  for part in url_parts) +            # oauth performs its own quoting +            unquoted_url += '/'.join(url_parts) +        encoded_params = {} +        if params: +            for key, value in params.items(): +                key = unicode(key).encode('utf-8') +                encoded_params[key] = _encode_query_parameter(value) +            url_query += ('?' + urllib.urlencode(encoded_params)) +        if body is not None and not isinstance(body, basestring): +            body = json.dumps(body) +            content_type = 'application/json' +        headers = {} +        if content_type: +            headers['content-type'] = content_type + +        # Patched: We would like to receive gzip pretty please +        # ---------------------------------------------------- +        headers['accept-encoding'] = "gzip" +        # ---------------------------------------------------- + +        headers.update( +            self._sign_request(method, unquoted_url, encoded_params)) + +        for delay in self._delays: +            try: +                self._conn.request(method, url_query, body, headers) +                return self._response() +            except errors.Unavailable, e: +                sleep(delay) +        raise e + +    def _response(self): +        """ +        Overloaded method, see u1db docs. +        We patched it for decrypting gzip content. +        """ +        resp = self._conn.getresponse() +        body = resp.read() +        headers = dict(resp.getheaders()) + +        # Patched: We would like to decode gzip +        # ---------------------------------------------------- +        encoding = headers.get('content-encoding', '') +        if "gzip" in encoding: +            body = _gunzip(body) +        # ---------------------------------------------------- + +        if resp.status in (200, 201): +            return body, headers +        elif resp.status in http_errors.ERROR_STATUSES: +            try: +                respdic = json.loads(body) +            except ValueError: +                pass +            else: +                self._error(respdic) +        # special case +        if resp.status == 503: +            raise errors.Unavailable(body, headers) +        raise errors.HTTPError(resp.status, body, headers) +      def sync_exchange(self, docs_by_generations, source_replica_uid,                        last_known_generation, last_known_trans_id,                        return_doc_cb, ensure_callback=None): @@ -364,8 +464,9 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth):          syncing.          :param docs_by_generations: A list of (doc_id, generation, trans_id) -            of local documents that were changed since the last local -            generation the remote replica knows about. +                                    of local documents that were changed since +                                    the last local generation the remote +                                    replica knows about.          :type docs_by_generations: list of tuples          :param source_replica_uid: The uid of the source replica.          :type source_replica_uid: str @@ -391,6 +492,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth):          self._conn.putheader('content-type', 'application/x-u1db-sync-stream')          for header_name, header_value in self._sign_request('POST', url, {}):              self._conn.putheader(header_name, header_value) +        self._conn.putheader('accept-encoding', 'gzip')          entries = ['[']          size = 1 @@ -428,7 +530,8 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth):          for entry in entries:              self._conn.send(entry)          entries = None -        data, _ = self._response() +        data, headers = self._response() +          res = self._parse_sync_stream(data, return_doc_cb, ensure_callback)          data = None          return res['new_generation'], res['new_transaction_id'] diff --git a/client/versioneer.py b/client/versioneer.py index b43ab062..18dfd923 100644 --- a/client/versioneer.py +++ b/client/versioneer.py @@ -115,7 +115,7 @@ import sys  def run_command(args, cwd=None, verbose=False):      try: -        # remember shell=False, so use git.cmd on windows, not just git +        # remember shell=False, so use git.exe on windows, not just git          p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd)      except EnvironmentError:          e = sys.exc_info()[1] @@ -230,7 +230,7 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):      GIT = "git"      if sys.platform == "win32": -        GIT = "git.cmd" +        GIT = "git.exe"      stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"],                           cwd=root)      if stdout is None: @@ -305,7 +305,7 @@ import sys  def run_command(args, cwd=None, verbose=False):      try: -        # remember shell=False, so use git.cmd on windows, not just git +        # remember shell=False, so use git.exe on windows, not just git          p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd)      except EnvironmentError:          e = sys.exc_info()[1] @@ -430,7 +430,7 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):      GIT = "git"      if sys.platform == "win32": -        GIT = "git.cmd" +        GIT = "git.exe"      stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"],                           cwd=root)      if stdout is None: @@ -486,7 +486,7 @@ import sys  def do_vcs_install(versionfile_source, ipy):      GIT = "git"      if sys.platform == "win32": -        GIT = "git.cmd" +        GIT = "git.exe"      run_command([GIT, "add", "versioneer.py"])      run_command([GIT, "add", versionfile_source])      run_command([GIT, "add", ipy]) diff --git a/common/setup.py b/common/setup.py index bcc2b4b3..e142d958 100644 --- a/common/setup.py +++ b/common/setup.py @@ -103,7 +103,163 @@ def get_versions(default={}, verbose=False):              f.write(subst_template) +# +# Couch backend design docs file generation. +# + +from os import listdir +from os.path import realpath, dirname, isdir, join, isfile, basename +import json +import binascii + + +old_cmd_sdist = cmdclass["sdist"] + + +def build_ddocs_py(basedir=None, with_src=True): +    """ +    Build `ddocs.py` file. + +    For ease of development, couch backend design documents are stored as +    `.js` files in  subdirectories of `src/leap/soledad/common/ddocs`. This +    function scans that directory for javascript files, builds the design +    documents structure, and encode those structures in the `ddocs.py` file. + +    This function is used when installing in develop mode, building or +    generating source distributions (see the next classes and the `cmdclass` +    setuptools parameter. + +    This funciton uses the following conventions to generate design documents: + +      - Design documents are represented by directories in the form +        `<prefix>/<ddoc>`, there prefix is the `src/leap/soledad/common/ddocs` +        directory. +      - Design document directories might contain `views`, `lists` and +        `updates` subdirectories. +      - Views subdirectories must contain a `map.js` file and may contain a +        `reduce.js` file. +      - List and updates subdirectories may contain any number of javascript +        files (i.e. ending in `.js`) whose names will be mapped to the +        corresponding list or update function name. +    """ +    cur_pwd = dirname(realpath(__file__)) +    common_path = ('src', 'leap', 'soledad', 'common') +    dest_common_path = common_path +    if not with_src: +        dest_common_path = common_path[1:] +    prefix = join(cur_pwd, *common_path) + +    dest_prefix = prefix +    if basedir is not None: +        # we're bulding a sdist +        dest_prefix = join(basedir, *dest_common_path) + +    ddocs_prefix = join(prefix, 'ddocs') +    ddocs = {} + +    # design docs are represented by subdirectories of `ddocs_prefix` +    for ddoc in [f for f in listdir(ddocs_prefix) +                 if isdir(join(ddocs_prefix, f))]: + +        ddocs[ddoc] = {'_id': '_design/%s' % ddoc} + +        for t in ['views', 'lists', 'updates']: +            tdir = join(ddocs_prefix, ddoc, t) +            if isdir(tdir): + +                ddocs[ddoc][t] = {} + +                if t == 'views':  # handle views (with map/reduce functions) +                    for view in [f for f in listdir(tdir) +                                 if isdir(join(tdir, f))]: +                        # look for map.js and reduce.js +                        mapfile = join(tdir, view, 'map.js') +                        reducefile = join(tdir, view, 'reduce.js') +                        mapfun = None +                        reducefun = None +                        try: +                            with open(mapfile) as f: +                                mapfun = f.read() +                        except IOError: +                            pass +                        try: +                            with open(reducefile) as f: +                                reducefun = f.read() +                        except IOError: +                            pass +                        ddocs[ddoc]['views'][view] = {} + +                        if mapfun is not None: +                            ddocs[ddoc]['views'][view]['map'] = mapfun +                        if reducefun is not None: +                            ddocs[ddoc]['views'][view]['reduce'] = reducefun + +                else:  # handle lists, updates, etc +                    for fun in [f for f in listdir(tdir) +                                if isfile(join(tdir, f))]: +                        funfile = join(tdir, fun) +                        funname = basename(funfile).replace('.js', '') +                        try: +                            with open(funfile) as f: +                                ddocs[ddoc][t][funname] = f.read() +                        except IOError: +                            pass +    # write file containing design docs strings +    ddoc_filename = "ddocs.py" +    with open(join(dest_prefix, ddoc_filename), 'w') as f: +        for ddoc in ddocs: +            f.write( +                "%s = '%s'\n" % +                (ddoc, binascii.b2a_base64(json.dumps(ddocs[ddoc]))[:-1])) +    print "Wrote design docs in %s" % (dest_prefix + '/' + ddoc_filename,) + + +from setuptools.command.develop import develop as _cmd_develop + + +class cmd_develop(_cmd_develop): +    def run(self): +        # versioneer: +        versions = versioneer.get_versions(verbose=True) +        self._versioneer_generated_versions = versions +        # unless we update this, the command will keep using the old version +        self.distribution.metadata.version = versions["version"] +        _cmd_develop.run(self) +        build_ddocs_py() + + +# versioneer powered +old_cmd_sdist = cmdclass["sdist"] + + +class cmd_sdist(old_cmd_sdist): +    """ +    Generate 'src/leap/soledad/common/ddocs.py' which contains couch design +    documents scripts. +    """ +    def run(self): +        old_cmd_sdist.run(self) + +    def make_release_tree(self, base_dir, files): +        old_cmd_sdist.make_release_tree(self, base_dir, files) +        build_ddocs_py(basedir=base_dir) + + +# versioneer powered +old_cmd_build = cmdclass["build"] + + +class cmd_build(old_cmd_build): +    def run(self): +        old_cmd_build.run(self) +        build_ddocs_py(basedir=self.build_lib, with_src=False) + +  cmdclass["freeze_debianver"] = freeze_debianver +cmdclass["build"] = cmd_build +cmdclass["sdist"] = cmd_sdist +cmdclass["develop"] = cmd_develop +  # XXX add ref to docs diff --git a/common/src/leap/soledad/common/.gitignore b/common/src/leap/soledad/common/.gitignore new file mode 100644 index 00000000..3378c78a --- /dev/null +++ b/common/src/leap/soledad/common/.gitignore @@ -0,0 +1 @@ +ddocs.py diff --git a/common/src/leap/soledad/common/README.txt b/common/src/leap/soledad/common/README.txt new file mode 100644 index 00000000..106efb5e --- /dev/null +++ b/common/src/leap/soledad/common/README.txt @@ -0,0 +1,79 @@ +Soledad common package +====================== + +This package contains Soledad bits used by both server and client. + +Couch U1DB Backend +------------------ + +U1DB backends rely on some atomic operations that modify documents contents +and metadata (conflicts, transaction ids and indexes). The only atomic +operation in Couch is a document put, so every u1db atomic operation has to be +mapped to a couch document put. + +The atomic operations in the U1DB SQLite reference backend implementation may +be identified by the use of a context manager to access the underlying +database. A listing of the methods involved in each atomic operation are +depiced below. The top-level elements correpond to the atomic operations that +have to be mapped, and items on deeper levels of the list have to be +implemented in a way that all changes will be pushed with just one operation. + +    * _set_replica_uid +    * put_doc: +        * _get_doc +        * _put_and_update_indexes +            * insert/update the document +            * insert into transaction log +    * delete_doc +        * _get_doc +        * _put_and_update_indexes +    * get_doc_conflicts +        * _get_conflicts +    * _set_replica_gen_and_trans_id +        * _do_set_replica_gen_and_trans_id +    * _put_doc_if_newer +        * _get_doc +        * _validate_source (**) +            * _get_replica_gen_and_trans_id +        * cases: +            * is newer: +                * _prune_conflicts (**) +                    * _has_conflicts +                    * _delete_conflicts +                * _put_and_update_indexes +            * same content as: +                * _put_and_update_indexes +            * conflicted: +                * _force_doc_sync_conflict +                    * _prune_conflicts +                    * _add_conflict +                    * _put_and_update_indexes +        * _do_set_replica_gen_and_trans_id +    * resolve_doc +        * _get_doc +        * cases: +            * doc is superseded +                * _put_and_update_indexes +            * else +                * _add_conflict +        * _delete_conflicts +    * delete_index +    * create_index + +Couch views and update functions are used in order to achieve atomicity on the +Couch backend. Transactions are stored in the `u1db_transactions` field of the +couch document. Document's content and conflicted versions are stored as couch +document attachments with names, respectivelly, `u1db_content` and +`u1db_conflicts`. + +A map of methods and couch query URI can be found on the `./ddocs/README.txt` +document. + +Notes: + +  * Currently, the couch backend does not implement indexing, so what is +    depicted as `_put_and_update_indexes` above will be found as `_put_doc` in +    the backend. + +  * Conflict updates are part of document put using couch update functions, +    and as such are part of the same atomic operation as document put. diff --git a/common/src/leap/soledad/common/_version.py b/common/src/leap/soledad/common/_version.py index 1d020a14..7d4262b5 100644 --- a/common/src/leap/soledad/common/_version.py +++ b/common/src/leap/soledad/common/_version.py @@ -17,6 +17,7 @@ git_full = "$Format:%H$"  import subprocess  import sys +  def run_command(args, cwd=None, verbose=False):      try:          # remember shell=False, so use git.cmd on windows, not just git @@ -36,11 +37,10 @@ def run_command(args, cwd=None, verbose=False):          return None      return stdout - -import sys  import re  import os.path +  def get_expanded_variables(versionfile_source):      # the code embedded in _version.py can just fetch the value of these      # variables. When used from setup.py, we don't want to import @@ -48,7 +48,7 @@ def get_expanded_variables(versionfile_source):      # used from _version.py.      variables = {}      try: -        f = open(versionfile_source,"r") +        f = open(versionfile_source, "r")          for line in f.readlines():              if line.strip().startswith("git_refnames ="):                  mo = re.search(r'=\s*"(.*)"', line) @@ -63,12 +63,13 @@ def get_expanded_variables(versionfile_source):          pass      return variables +  def versions_from_expanded_variables(variables, tag_prefix, verbose=False):      refnames = variables["refnames"].strip()      if refnames.startswith("$Format"):          if verbose:              print("variables are unexpanded, not using") -        return {} # unexpanded, so not in an unpacked git-archive tarball +        return {}  # unexpanded, so not in an unpacked git-archive tarball      refs = set([r.strip() for r in refnames.strip("()").split(",")])      # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of      # just "foo-1.0". If we see a "tag: " prefix, prefer those. @@ -93,13 +94,14 @@ def versions_from_expanded_variables(variables, tag_prefix, verbose=False):              r = ref[len(tag_prefix):]              if verbose:                  print("picking %s" % r) -            return { "version": r, -                     "full": variables["full"].strip() } +            return {"version": r, +                    "full": variables["full"].strip()}      # no suitable tags, so we use the full revision id      if verbose:          print("no suitable tags, using full revision id") -    return { "version": variables["full"].strip(), -             "full": variables["full"].strip() } +    return {"version": variables["full"].strip(), +            "full": variables["full"].strip()} +  def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):      # this runs 'git' from the root of the source tree. That either means @@ -116,7 +118,7 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):          here = os.path.abspath(__file__)      except NameError:          # some py2exe/bbfreeze/non-CPython implementations don't do __file__ -        return {} # not always correct +        return {}  # not always correct      # versionfile_source is the relative path from the top of the source tree      # (where the .git directory might live) to this file. Invert this to find @@ -126,7 +128,16 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):          for i in range(len(versionfile_source.split("/"))):              root = os.path.dirname(root)      else: -        root = os.path.dirname(here) +        root = os.path.dirname( +            os.path.join('..', here)) + +    ###################################################### +    # XXX patch for our specific configuration with +    # the three projects leap.soledad.{common, client, server} +    # inside the same repo. +    ###################################################### +    root = os.path.dirname(os.path.join('..', root)) +      if not os.path.exists(os.path.join(root, ".git")):          if verbose:              print("no .git in %s" % root) @@ -141,7 +152,8 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):          return {}      if not stdout.startswith(tag_prefix):          if verbose: -            print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix)) +            print("tag '%s' doesn't start with prefix '%s'" % ( +                stdout, tag_prefix))          return {}      tag = stdout[len(tag_prefix):]      stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=root) @@ -153,7 +165,8 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):      return {"version": tag, "full": full} -def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False): +def versions_from_parentdir(parentdir_prefix, versionfile_source, +                            verbose=False):      if IN_LONG_VERSION_PY:          # We're running from _version.py. If it's from a source tree          # (execute-in-place), we can work upwards to find the root of the @@ -163,7 +176,7 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False)              here = os.path.abspath(__file__)          except NameError:              # py2exe/bbfreeze/non-CPython don't have __file__ -            return {} # without __file__, we have no hope +            return {}  # without __file__, we have no hope          # versionfile_source is the relative path from the top of the source          # tree to _version.py. Invert this to find the root from __file__.          root = here @@ -180,7 +193,8 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False)      dirname = os.path.basename(root)      if not dirname.startswith(parentdir_prefix):          if verbose: -            print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" % +            print("guessing rootdir is '%s', but '%s' doesn't start with " +                  "prefix '%s'" %                    (root, dirname, parentdir_prefix))          return None      return {"version": dirname[len(parentdir_prefix):], "full": ""} @@ -189,8 +203,9 @@ tag_prefix = ""  parentdir_prefix = "leap.soledad.common-"  versionfile_source = "src/leap/soledad/common/_version.py" +  def get_versions(default={"version": "unknown", "full": ""}, verbose=False): -    variables = { "refnames": git_refnames, "full": git_full } +    variables = {"refnames": git_refnames, "full": git_full}      ver = versions_from_expanded_variables(variables, tag_prefix, verbose)      if not ver:          ver = versions_from_vcs(tag_prefix, versionfile_source, verbose) @@ -200,4 +215,3 @@ def get_versions(default={"version": "unknown", "full": ""}, verbose=False):      if not ver:          ver = default      return ver - diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 1396f4d7..8e8613a1 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -18,189 +18,381 @@  """A U1DB backend that uses CouchDB as its persistence layer.""" -import re  import simplejson as json -import socket +import re +import uuid  import logging +import binascii +import socket +import time +import sys +import threading -from u1db import errors -from u1db.sync import Synchronizer -from u1db.backends.inmemory import InMemoryIndex -from u1db.remote.server_state import ServerState -from u1db.errors import DatabaseDoesNotExist -from couchdb.client import Server, Document as CouchDocument -from couchdb.http import ResourceNotFound, Unauthorized +from StringIO import StringIO +from collections import defaultdict -from leap.soledad.common import USER_DB_PREFIX -from leap.soledad.common.objectstore import ( -    ObjectStoreDatabase, -    ObjectStoreSyncTarget, +from couchdb.client import Server +from couchdb.http import ( +    ResourceConflict, +    ResourceNotFound, +    ServerError, +    Session, +) +from u1db import query_parser, vectorclock +from u1db.errors import ( +    DatabaseDoesNotExist, +    InvalidGeneration, +    RevisionConflict, +    InvalidDocId, +    ConflictedDoc, +    DocumentDoesNotExist, +    DocumentAlreadyDeleted, +    Unauthorized,  ) +from u1db.backends import CommonBackend, CommonSyncTarget +from u1db.remote import http_app +from u1db.remote.server_state import ServerState + + +from leap.soledad.common import USER_DB_PREFIX, ddocs, errors +from leap.soledad.common.document import SoledadDocument  logger = logging.getLogger(__name__) +COUCH_TIMEOUT = 120  # timeout for transfers between Soledad server and Couch + +  class InvalidURLError(Exception):      """      Exception raised when Soledad encounters a malformed URL.      """ -def persistent_class(cls): +class CouchDocument(SoledadDocument):      """ -    Decorator that modifies a class to ensure u1db metadata persists on -    underlying storage. +    This is the document used for maintaining the Couch backend. + +    A CouchDocument can fetch and manipulate conflicts and also holds a +    reference to the couch document revision. This data is used to ensure an +    atomic and consistent update of the database. +    """ + +    def __init__(self, doc_id=None, rev=None, json='{}', has_conflicts=False, +                 syncable=True): +        """ +        Container for handling a document that is stored in couch backend. + +        :param doc_id: The unique document identifier. +        :type doc_id: str +        :param rev: The revision identifier of the document. +        :type rev: str +        :param json: The JSON string for this document. +        :type json: str +        :param has_conflicts: Boolean indicating if this document has conflicts +        :type has_conflicts: bool +        :param syncable: Should this document be synced with remote replicas? +        :type syncable: bool +        """ +        SoledadDocument.__init__(self, doc_id, rev, json, has_conflicts) +        self._couch_rev = None +        self._conflicts = None +        self._transactions = None + +    def _ensure_fetch_conflicts(self, get_conflicts_fun): +        """ +        Ensure conflict data has been fetched from the server. + +        :param get_conflicts_fun: A function which, given the document id and +                                  the couch revision, return the conflicted +                                  versions of the current document. +        :type get_conflicts_fun: function +        """ +        if self._conflicts is None: +            self._conflicts = get_conflicts_fun(self.doc_id, +                                                couch_rev=self.couch_rev) +        self.has_conflicts = len(self._conflicts) > 0 + +    def get_conflicts(self): +        """ +        Get the conflicted versions of the document. + +        :return: The conflicted versions of the document. +        :rtype: [CouchDocument] +        """ +        return self._conflicts + +    def set_conflicts(self, conflicts): +        """ +        Set the conflicted versions of the document. + +        :param conflicts: The conflicted versions of the document. +        :type conflicts: list +        """ +        self._conflicts = conflicts +        self.has_conflicts = len(self._conflicts) > 0 -    @param cls: The class that will be modified. -    @type cls: type +    def add_conflict(self, doc): +        """ +        Add a conflict to this document. + +        :param doc: The conflicted version to be added. +        :type doc: CouchDocument +        """ +        if self._conflicts is None: +            raise Exception("Run self._ensure_fetch_conflicts first!") +        self._conflicts.append(doc) +        self.has_conflicts = len(self._conflicts) > 0 + +    def delete_conflicts(self, conflict_revs): +        """ +        Delete conflicted versions of this document. + +        :param conflict_revs: The conflicted revisions to be deleted. +        :type conflict_revs: [str] +        """ +        if self._conflicts is None: +            raise Exception("Run self._ensure_fetch_conflicts first!") +        conflicts_len = len(self._conflicts) +        self._conflicts = filter( +            lambda doc: doc.rev not in conflict_revs, +            self._conflicts) +        self.has_conflicts = len(self._conflicts) > 0 + +    def _get_couch_rev(self): +        return self._couch_rev + +    def _set_couch_rev(self, rev): +        self._couch_rev = rev + +    couch_rev = property(_get_couch_rev, _set_couch_rev) + +    def _get_transactions(self): +        return self._transactions + +    def _set_transactions(self, rev): +        self._transactions = rev + +    transactions = property(_get_transactions, _set_transactions) + + +# monkey-patch the u1db http app to use CouchDocument +http_app.Document = CouchDocument + + +def raise_missing_design_doc_error(exc, ddoc_path): +    """ +    Raise an appropriate exception when catching a ResourceNotFound when +    accessing a design document. + +    :param exc: The exception cought. +    :type exc: ResourceNotFound +    :param ddoc_path: A list representing the requested path. +    :type ddoc_path: list + +    :raise MissingDesignDocError: Raised when tried to access a missing design +                                  document. +    :raise MissingDesignDocListFunctionError: Raised when trying to access a +                                              missing list function on a +                                              design document. +    :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                           missing named view on a design +                                           document. +    :raise MissingDesignDocDeletedError: Raised when trying to access a +                                         deleted design document. +    :raise MissingDesignDocUnknownError: Raised when failed to access a design +                                         document for an yet unknown reason.      """ +    path = "".join(ddoc_path) +    if exc.message[1] == 'missing': +        raise errors.MissingDesignDocError(path) +    elif exc.message[1] == 'missing function' or \ +            exc.message[1].startswith('missing lists function'): +        raise errors.MissingDesignDocListFunctionError(path) +    elif exc.message[1] == 'missing_named_view': +        raise errors.MissingDesignDocNamedViewError(path) +    elif exc.message[1] == 'deleted': +        raise errors.MissingDesignDocDeletedError(path) +    # other errors are unknown for now +    raise errors.DesignDocUnknownError("%s: %s" % (path, str(exc.message))) + + +def raise_server_error(exc, ddoc_path): +    """ +    Raise an appropriate exception when catching a ServerError when +    accessing a design document. + +    :param exc: The exception cought. +    :type exc: ResourceNotFound +    :param ddoc_path: A list representing the requested path. +    :type ddoc_path: list + +    :raise MissingDesignDocListFunctionError: Raised when trying to access a +                                              missing list function on a +                                              design document. +    :raise MissingDesignDocUnknownError: Raised when failed to access a design +                                         document for an yet unknown reason. +    """ +    path = "".join(ddoc_path) +    if exc.message[1][0] == 'unnamed_error': +        raise errors.MissingDesignDocListFunctionError(path) +    # other errors are unknown for now +    raise errors.DesignDocUnknownError(path) + -    def _create_persistent_method(old_method_name, key, load_method_name, -                                  dump_method_name, store): -        """ -        Create a persistent method to replace C{old_method_name}. - -        The new method will load C{key} using C{load_method_name} and stores -        it using C{dump_method_name} depending on the value of C{store}. -        """ -        # get methods -        old_method = getattr(cls, old_method_name) -        load_method = getattr(cls, load_method_name) \ -            if load_method_name is not None \ -            else lambda self, data: setattr(self, key, data) -        dump_method = getattr(cls, dump_method_name) \ -            if dump_method_name is not None \ -            else lambda self: getattr(self, key) - -        def _new_method(self, *args, **kwargs): -            # get u1db data from couch db -            doc = self._get_doc('%s%s' % -                                (self.U1DB_DATA_DOC_ID_PREFIX, key)) -            load_method(self, doc.content['content']) -            # run old method -            retval = old_method(self, *args, **kwargs) -            # store u1db data on couch -            if store: -                doc.content = {'content': dump_method(self)} -                self._put_doc(doc) -            return retval - -        return _new_method - -    # ensure the class has a persistency map -    if not hasattr(cls, 'PERSISTENCY_MAP'): -        logger.error('Class %s has no PERSISTENCY_MAP attribute, skipping ' -                     'persistent methods substitution.' % cls) -        return cls -    # replace old methods with new persistent ones -    for key, ((load_method_name, dump_method_name), -              persistent_methods) in cls.PERSISTENCY_MAP.iteritems(): -        for (method_name, store) in persistent_methods: -            setattr(cls, method_name, -                    _create_persistent_method( -                        method_name, -                        key, -                        load_method_name, -                        dump_method_name, -                        store)) -    return cls - - -@persistent_class -class CouchDatabase(ObjectStoreDatabase): +class MultipartWriter(object):      """ -    A U1DB backend that uses Couch as its persistence layer. +    A multipart writer adapted from python-couchdb's one so we can PUT +    documents using couch's multipart PUT. + +    This stripped down version does not allow for nested structures, and +    contains only the essential things we need to PUT SoledadDocuments to the +    couch backend.      """ -    U1DB_TRANSACTION_LOG_KEY = '_transaction_log' -    U1DB_CONFLICTS_KEY = '_conflicts' -    U1DB_OTHER_GENERATIONS_KEY = '_other_generations' -    U1DB_INDEXES_KEY = '_indexes' -    U1DB_REPLICA_UID_KEY = '_replica_uid' - -    U1DB_DATA_KEYS = [ -        U1DB_TRANSACTION_LOG_KEY, -        U1DB_CONFLICTS_KEY, -        U1DB_OTHER_GENERATIONS_KEY, -        U1DB_INDEXES_KEY, -        U1DB_REPLICA_UID_KEY, -    ] - -    COUCH_ID_KEY = '_id' -    COUCH_REV_KEY = '_rev' -    COUCH_U1DB_ATTACHMENT_KEY = 'u1db_json' -    COUCH_U1DB_REV_KEY = 'u1db_rev' - -    # the following map describes information about methods usage of -    # properties that have to persist on the underlying database. The format -    # of the map is assumed to be: -    # -    #     { -    #         'property_name': [ -    #             ('property_load_method_name', 'property_dump_method_name'), -    #             [('method_1_name', bool), -    #              ... -    #              ('method_N_name', bool)]], -    #         ... -    #     } -    # -    # where the booleans indicate if the property should be stored after -    # each method execution (i.e. if the method alters the property). Property -    # load/dump methods will be run after/before properties are read/written -    # to the underlying db. -    PERSISTENCY_MAP = { -        U1DB_TRANSACTION_LOG_KEY: [ -            ('_load_transaction_log_from_json', None), -            [('_get_transaction_log', False), -             ('_get_generation', False), -             ('_get_generation_info', False), -             ('_get_trans_id_for_gen', False), -             ('whats_changed', False), -             ('_put_and_update_indexes', True)]], -        U1DB_CONFLICTS_KEY: [ -            (None, None), -            [('_has_conflicts', False), -             ('get_doc_conflicts', False), -             ('_prune_conflicts', False), -             ('resolve_doc', False), -             ('_replace_conflicts', True), -             ('_force_doc_sync_conflict', True)]], -        U1DB_OTHER_GENERATIONS_KEY: [ -            ('_load_other_generations_from_json', None), -            [('_get_replica_gen_and_trans_id', False), -             ('_do_set_replica_gen_and_trans_id', True)]], -        U1DB_INDEXES_KEY: [ -            ('_load_indexes_from_json', '_dump_indexes_as_json'), -            [('list_indexes', False), -             ('get_from_index', False), -             ('get_range_from_index', False), -             ('get_index_keys', False), -             ('_put_and_update_indexes', True), -             ('create_index', True), -             ('delete_index', True)]], -        U1DB_REPLICA_UID_KEY: [ -            (None, None), -            [('_allocate_doc_rev', False), -             ('_put_doc_if_newer', False), -             ('_ensure_maximal_rev', False), -             ('_prune_conflicts', False), -             ('_set_replica_uid', True)]]} +    CRLF = '\r\n' + +    def __init__(self, fileobj, headers=None, boundary=None): +        """ +        Initialize the multipart writer. +        """ +        self.fileobj = fileobj +        if boundary is None: +            boundary = self._make_boundary() +        self._boundary = boundary +        self._build_headers('related', headers) + +    def add(self, mimetype, content, headers={}): +        """ +        Add a part to the multipart stream. +        """ +        self.fileobj.write('--') +        self.fileobj.write(self._boundary) +        self.fileobj.write(self.CRLF) +        headers['Content-Type'] = mimetype +        self._write_headers(headers) +        if content: +            # XXX: throw an exception if a boundary appears in the content?? +            self.fileobj.write(content) +            self.fileobj.write(self.CRLF) + +    def close(self): +        """ +        Close the multipart stream. +        """ +        self.fileobj.write('--') +        self.fileobj.write(self._boundary) +        # be careful not to have anything after '--', otherwise old couch +        # versions (including bigcouch) will fail. +        self.fileobj.write('--') + +    def _make_boundary(self): +        """ +        Create a boundary to discern multi parts. +        """ +        try: +            from uuid import uuid4 +            return '==' + uuid4().hex + '==' +        except ImportError: +            from random import randrange +            token = randrange(sys.maxint) +            format = '%%0%dd' % len(repr(sys.maxint - 1)) +            return '===============' + (format % token) + '==' + +    def _write_headers(self, headers): +        """ +        Write a part header in the buffer stream. +        """ +        if headers: +            for name in sorted(headers.keys()): +                value = headers[name] +                self.fileobj.write(name) +                self.fileobj.write(': ') +                self.fileobj.write(value) +                self.fileobj.write(self.CRLF) +        self.fileobj.write(self.CRLF) + +    def _build_headers(self, subtype, headers): +        """ +        Build the main headers of the multipart stream. + +        This is here so we can send headers separete from content using +        python-couchdb API. +        """ +        self.headers = {} +        self.headers['Content-Type'] = 'multipart/%s; boundary="%s"' % \ +                                       (subtype, self._boundary) +        if headers: +            for name in sorted(headers.keys()): +                value = headers[name] +                self.headers[name] = value + + +class CouchDatabase(CommonBackend): +    """ +    A U1DB implementation that uses CouchDB as its persistence layer. +    """ + +    # We spawn threads to parallelize the CouchDatabase.get_docs() method +    MAX_GET_DOCS_THREADS = 20 + +    update_handler_lock = defaultdict(threading.Lock) + +    class _GetDocThread(threading.Thread): +        """ +        A thread that gets a document from a database. + +        TODO: switch this for a twisted deferred to thread. This depends on +        replacing python-couchdb for paisley in this module. +        """ + +        def __init__(self, db, doc_id, check_for_conflicts, +                     release_fun): +            """ +            :param db: The database from where to get the document. +            :type db: u1db.Database +            :param doc_id: The doc_id of the document to be retrieved. +            :type doc_id: str +            :param check_for_conflicts: Whether the get_doc() method should +                                        check for existing conflicts. +            :type check_for_conflicts: bool +            :param release_fun: A function that releases a semaphore, to be +                                called after the document is fetched. +            :type release_fun: function +            """ +            threading.Thread.__init__(self) +            self._db = db +            self._doc_id = doc_id +            self._check_for_conflicts = check_for_conflicts +            self._release_fun = release_fun +            self._doc = None + +        def run(self): +            """ +            Fetch the document, store it as a property, and call the release +            function. +            """ +            self._doc = self._db._get_doc( +                self._doc_id, self._check_for_conflicts) +            self._release_fun()      @classmethod -    def open_database(cls, url, create): +    def open_database(cls, url, create, ensure_ddocs=False):          """          Open a U1DB database using CouchDB as backend. -        @param url: the url of the database replica -        @type url: str -        @param create: should the replica be created if it does not exist? -        @type create: bool +        :param url: the url of the database replica +        :type url: str +        :param create: should the replica be created if it does not exist? +        :type create: bool +        :param ensure_ddocs: Ensure that the design docs exist on server. +        :type ensure_ddocs: bool -        @return: the database instance -        @rtype: CouchDatabase +        :return: the database instance +        :rtype: CouchDatabase          """          # get database from url          m = re.match('(^https?://[^/]+)/(.+)$', url) @@ -214,308 +406,1057 @@ class CouchDatabase(ObjectStoreDatabase):          except ResourceNotFound:              if not create:                  raise DatabaseDoesNotExist() -        return cls(url, dbname) +        return cls(url, dbname, ensure_ddocs=ensure_ddocs)      def __init__(self, url, dbname, replica_uid=None, full_commit=True, -                 session=None): +                 session=None, ensure_ddocs=True):          """          Create a new Couch data container. -        @param url: the url of the couch database -        @type url: str -        @param dbname: the database name -        @type dbname: str -        @param replica_uid: an optional unique replica identifier -        @type replica_uid: str -        @param full_commit: turn on the X-Couch-Full-Commit header -        @type full_commit: bool -        @param session: an http.Session instance or None for a default session -        @type session: http.Session +        :param url: the url of the couch database +        :type url: str +        :param dbname: the database name +        :type dbname: str +        :param replica_uid: an optional unique replica identifier +        :type replica_uid: str +        :param full_commit: turn on the X-Couch-Full-Commit header +        :type full_commit: bool +        :param session: an http.Session instance or None for a default session +        :type session: http.Session +        :param ensure_ddocs: Ensure that the design docs exist on server. +        :type ensure_ddocs: bool          """          # save params          self._url = url          self._full_commit = full_commit +        if session is None: +            session = Session(timeout=COUCH_TIMEOUT)          self._session = session +        self._factory = CouchDocument +        self._real_replica_uid = None          # configure couch          self._server = Server(url=self._url,                                full_commit=self._full_commit,                                session=self._session)          self._dbname = dbname -        # this will ensure that transaction and sync logs exist and are -        # up-to-date.          try:              self._database = self._server[self._dbname]          except ResourceNotFound:              self._server.create(self._dbname)              self._database = self._server[self._dbname] -        ObjectStoreDatabase.__init__(self, replica_uid=replica_uid) +        if replica_uid is not None: +            self._set_replica_uid(replica_uid) +        if ensure_ddocs: +            self.ensure_ddocs_on_db() +        # initialize a thread pool for parallelizing get_docs() +        self._sem_pool = threading.BoundedSemaphore( +            value=self.MAX_GET_DOCS_THREADS) + +    def ensure_ddocs_on_db(self): +        """ +        Ensure that the design documents used by the backend exist on the +        couch database. +        """ +        # we check for existence of one of the files, and put all of them if +        # that one does not exist +        try: +            self._database['_design/docs'] +            return +        except ResourceNotFound: +            for ddoc_name in ['docs', 'syncs', 'transactions']: +                ddoc = json.loads( +                    binascii.a2b_base64( +                        getattr(ddocs, ddoc_name))) +                self._database.save(ddoc) + +    def get_sync_target(self): +        """ +        Return a SyncTarget object, for another u1db to synchronize with. + +        :return: The sync target. +        :rtype: CouchSyncTarget +        """ +        return CouchSyncTarget(self) -    #------------------------------------------------------------------------- -    # methods from Database -    #------------------------------------------------------------------------- +    def delete_database(self): +        """ +        Delete a U1DB CouchDB database. +        """ +        del(self._server[self._dbname]) + +    def close(self): +        """ +        Release any resources associated with this database. + +        :return: True if db was succesfully closed. +        :rtype: bool +        """ +        self._url = None +        self._full_commit = None +        self._session = None +        self._server = None +        self._database = None +        return True + +    def _set_replica_uid(self, replica_uid): +        """ +        Force the replica uid to be set. + +        :param replica_uid: The new replica uid. +        :type replica_uid: str +        """ +        try: +            # set on existent config document +            doc = self._database['u1db_config'] +            doc['replica_uid'] = replica_uid +        except ResourceNotFound: +            # or create the config document +            doc = { +                '_id': 'u1db_config', +                'replica_uid': replica_uid, +            } +        self._database.save(doc) +        self._real_replica_uid = replica_uid + +    def _get_replica_uid(self): +        """ +        Get the replica uid. + +        :return: The replica uid. +        :rtype: str +        """ +        if self._real_replica_uid is not None: +            return self._real_replica_uid +        try: +            # grab replica_uid from server +            doc = self._database['u1db_config'] +            self._real_replica_uid = doc['replica_uid'] +            return self._real_replica_uid +        except ResourceNotFound: +            # create a unique replica_uid +            self._real_replica_uid = uuid.uuid4().hex +            self._set_replica_uid(self._real_replica_uid) +            return self._real_replica_uid + +    _replica_uid = property(_get_replica_uid, _set_replica_uid) + +    def _get_generation(self): +        """ +        Return the current generation. + +        :return: The current generation. +        :rtype: int + +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason. +        """ +        # query a couch list function +        ddoc_path = ['_design', 'transactions', '_list', 'generation', 'log'] +        res = self._database.resource(*ddoc_path) +        try: +            response = res.get_json() +            return response[2]['generation'] +        except ResourceNotFound as e: +            raise_missing_design_doc_error(e, ddoc_path) +        except ServerError as e: +            raise_server_error(e, ddoc_path) + +    def _get_generation_info(self): +        """ +        Return the current generation. + +        :return: A tuple containing the current generation and transaction id. +        :rtype: (int, str) + +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason. +        """ +        # query a couch list function +        ddoc_path = ['_design', 'transactions', '_list', 'generation', 'log'] +        res = self._database.resource(*ddoc_path) +        try: +            response = res.get_json() +            return (response[2]['generation'], response[2]['transaction_id']) +        except ResourceNotFound as e: +            raise_missing_design_doc_error(e, ddoc_path) +        except ServerError as e: +            raise_server_error(e, ddoc_path) + +    def _get_trans_id_for_gen(self, generation): +        """ +        Get the transaction id corresponding to a particular generation. + +        :param generation: The generation for which to get the transaction id. +        :type generation: int + +        :return: The transaction id for C{generation}. +        :rtype: str + +        :raise InvalidGeneration: Raised when the generation does not exist. +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason. +        """ +        if generation == 0: +            return '' +        # query a couch list function +        ddoc_path = [ +            '_design', 'transactions', '_list', 'trans_id_for_gen', 'log' +        ] +        res = self._database.resource(*ddoc_path) +        try: +            response = res.get_json(gen=generation) +            if response[2] == {}: +                raise InvalidGeneration +            return response[2]['transaction_id'] +        except ResourceNotFound as e: +            raise_missing_design_doc_error(e, ddoc_path) +        except ServerError as e: +            raise_server_error(e, ddoc_path) + +    def _get_transaction_log(self): +        """ +        This is only for the test suite, it is not part of the api. + +        :return: The complete transaction log. +        :rtype: [(str, str)] + +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason. +        """ +        # query a couch view +        ddoc_path = ['_design', 'transactions', '_view', 'log'] +        res = self._database.resource(*ddoc_path) +        try: +            response = res.get_json() +            return map( +                lambda row: (row['id'], row['value']), +                response[2]['rows']) +        except ResourceNotFound as e: +            raise_missing_design_doc_error(e, ddoc_path)      def _get_doc(self, doc_id, check_for_conflicts=False):          """ -        Get just the document content, without fancy handling. +        Extract the document from storage. + +        This can return None if the document doesn't exist. -        @param doc_id: The unique document identifier -        @type doc_id: str -        @param include_deleted: If set to True, deleted documents will be +        :param doc_id: The unique document identifier +        :type doc_id: str +        :param check_for_conflicts: If set to False, then the conflict check +                                    will be skipped. +        :type check_for_conflicts: bool + +        :return: The document. +        :rtype: CouchDocument +        """ +        # get document with all attachments (u1db content and eventual +        # conflicts) +        try: +            result = \ +                self._database.resource(doc_id).get_json( +                    attachments=True)[2] +        except ResourceNotFound: +            return None +        # restrict to u1db documents +        if 'u1db_rev' not in result: +            return None +        doc = self._factory(doc_id, result['u1db_rev']) +        # set contents or make tombstone +        if '_attachments' not in result \ +                or 'u1db_content' not in result['_attachments']: +            doc.make_tombstone() +        else: +            doc.content = json.loads( +                binascii.a2b_base64( +                    result['_attachments']['u1db_content']['data'])) +        # determine if there are conflicts +        if check_for_conflicts \ +                and '_attachments' in result \ +                and 'u1db_conflicts' in result['_attachments']: +            doc.has_conflicts = True +            doc.set_conflicts( +                self._build_conflicts( +                    doc.doc_id, +                    json.loads(binascii.a2b_base64( +                        result['_attachments']['u1db_conflicts']['data'])))) +        # store couch revision +        doc.couch_rev = result['_rev'] +        # store transactions +        doc.transactions = result['u1db_transactions'] +        return doc + +    def get_doc(self, doc_id, include_deleted=False): +        """ +        Get the JSON string for the given document. + +        :param doc_id: The unique document identifier +        :type doc_id: str +        :param include_deleted: If set to True, deleted documents will be              returned with empty content. Otherwise asking for a deleted              document will return None. -        @type include_deleted: bool +        :type include_deleted: bool -        @return: a Document object. -        @type: u1db.Document +        :return: A document object. +        :rtype: CouchDocument.          """ -        cdoc = self._database.get(doc_id) -        if cdoc is None: +        doc = self._get_doc(doc_id, check_for_conflicts=True) +        if doc is None: +            return None +        if doc.is_tombstone() and not include_deleted:              return None -        has_conflicts = False -        if check_for_conflicts: -            has_conflicts = self._has_conflicts(doc_id) -        doc = self._factory( -            doc_id=doc_id, -            rev=cdoc[self.COUCH_U1DB_REV_KEY], -            has_conflicts=has_conflicts) -        contents = self._database.get_attachment( -            cdoc, -            self.COUCH_U1DB_ATTACHMENT_KEY) -        if contents: -            doc.content = json.loads(contents.read()) -        else: -            doc.make_tombstone()          return doc      def get_all_docs(self, include_deleted=False):          """          Get the JSON content for all documents in the database. -        @param include_deleted: If set to True, deleted documents will be -            returned with empty content. Otherwise deleted documents will not -            be included in the results. -        @type include_deleted: bool +        :param include_deleted: If set to True, deleted documents will be +                                returned with empty content. Otherwise deleted +                                documents will not be included in the results. +        :type include_deleted: bool -        @return: (generation, [Document]) +        :return: (generation, [CouchDocument])              The current generation of the database, followed by a list of all              the documents in the database. -        @rtype: tuple +        :rtype: (int, [CouchDocument])          """ +          generation = self._get_generation()          results = [] -        for doc_id in self._database: -            if doc_id.startswith(self.U1DB_DATA_DOC_ID_PREFIX): -                continue -            doc = self._get_doc(doc_id, check_for_conflicts=True) -            if doc.content is None and not include_deleted: -                continue -            results.append(doc) +        for row in self._database.view('_all_docs'): +            doc = self.get_doc(row.id, include_deleted=include_deleted) +            if doc is not None: +                results.append(doc)          return (generation, results) -    def _put_doc(self, doc): +    def _put_doc(self, old_doc, doc): +        """ +        Put the document in the Couch backend database. + +        Note that C{old_doc} must have been fetched with the parameter +        C{check_for_conflicts} equal to True, so we can properly update the +        new document using the conflict information from the old one. + +        :param old_doc: The old document version. +        :type old_doc: CouchDocument +        :param doc: The document to be put. +        :type doc: CouchDocument + +        :raise RevisionConflict: Raised when trying to update a document but +                                 couch revisions mismatch. +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason. +        """ +        attachments = {}  # we save content and conflicts as attachments +        parts = []  # and we put it using couch's multipart PUT +        # save content as attachment +        if doc.is_tombstone() is False: +            content = doc.get_json() +            attachments['u1db_content'] = { +                'follows': True, +                'content_type': 'application/octet-stream', +                'length': len(content), +            } +            parts.append(content) +        # save conflicts as attachment +        if doc.has_conflicts is True: +            conflicts = json.dumps( +                map(lambda cdoc: (cdoc.rev, cdoc.content), +                    doc.get_conflicts())) +            attachments['u1db_conflicts'] = { +                'follows': True, +                'content_type': 'application/octet-stream', +                'length': len(conflicts), +            } +            parts.append(conflicts) +        # store old transactions, if any +        transactions = old_doc.transactions[:] if old_doc is not None else [] +        # create a new transaction id and timestamp it so the transaction log +        # is consistent when querying the database. +        transactions.append( +            # here we store milliseconds to keep consistent with javascript +            # Date.prototype.getTime() which was used before inside a couchdb +            # update handler. +            (int(time.time() * 1000), +            self._allocate_transaction_id())) +        # build the couch document +        couch_doc = { +            '_id': doc.doc_id, +            'u1db_rev': doc.rev, +            'u1db_transactions': transactions, +            '_attachments': attachments, +        } +        # if we are updating a doc we have to add the couch doc revision +        if old_doc is not None: +            couch_doc['_rev'] = old_doc.couch_rev +        # prepare the multipart PUT +        buf = StringIO() +        envelope = MultipartWriter(buf) +        envelope.add('application/json', json.dumps(couch_doc)) +        for part in parts: +            envelope.add('application/octet-stream', part) +        envelope.close() +        # try to save and fail if there's a revision conflict +        try: +            self._database.resource.put_json( +                doc.doc_id, body=buf.getvalue(), headers=envelope.headers) +            self._renew_couch_session() +        except ResourceConflict: +            raise RevisionConflict() + +    def put_doc(self, doc):          """          Update a document. -        This is called everytime we just want to do a raw put on the db (i.e. -        without index updates, document constraint checks, and conflict -        checks). - -        @param doc: The document to update. -        @type doc: u1db.Document - -        @return: The new revision identifier for the document. -        @rtype: str -        """ -        # prepare couch's Document -        cdoc = CouchDocument() -        cdoc[self.COUCH_ID_KEY] = doc.doc_id -        # we have to guarantee that couch's _rev is consistent -        old_cdoc = self._database.get(doc.doc_id) -        if old_cdoc is not None: -            cdoc[self.COUCH_REV_KEY] = old_cdoc[self.COUCH_REV_KEY] -        # store u1db's rev -        cdoc[self.COUCH_U1DB_REV_KEY] = doc.rev -        # save doc in db -        self._database.save(cdoc) -        # store u1db's content as json string -        if not doc.is_tombstone(): -            self._database.put_attachment( -                cdoc, doc.get_json(), -                filename=self.COUCH_U1DB_ATTACHMENT_KEY) -        else: -            self._database.delete_attachment( -                cdoc, -                self.COUCH_U1DB_ATTACHMENT_KEY) +        If the document currently has conflicts, put will fail. +        If the database specifies a maximum document size and the document +        exceeds it, put will fail and raise a DocumentTooBig exception. -    def get_sync_target(self): -        """ -        Return a SyncTarget object, for another u1db to synchronize with. +        :param doc: A Document with new content. +        :return: new_doc_rev - The new revision identifier for the document. +            The Document object will also be updated. -        @return: The sync target. -        @rtype: CouchSyncTarget +        :raise InvalidDocId: Raised if the document's id is invalid. +        :raise DocumentTooBig: Raised if the document size is too big. +        :raise ConflictedDoc: Raised if the document has conflicts.          """ -        return CouchSyncTarget(self) - -    def create_index(self, index_name, *index_expressions): +        if doc.doc_id is None: +            raise InvalidDocId() +        self._check_doc_id(doc.doc_id) +        self._check_doc_size(doc) +        old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) +        if old_doc and old_doc.has_conflicts: +            raise ConflictedDoc() +        if old_doc and doc.rev is None and old_doc.is_tombstone(): +            new_rev = self._allocate_doc_rev(old_doc.rev) +        else: +            if old_doc is not None: +                    if old_doc.rev != doc.rev: +                        raise RevisionConflict() +            else: +                if doc.rev is not None: +                    raise RevisionConflict() +            new_rev = self._allocate_doc_rev(doc.rev) +        doc.rev = new_rev +        self._put_doc(old_doc, doc) +        return new_rev + +    def whats_changed(self, old_generation=0):          """ -        Create a named index, which can then be queried for future lookups. - -        @param index_name: A unique name which can be used as a key prefix. -        @param index_expressions: Index expressions defining the index -            information. +        Return a list of documents that have changed since old_generation. + +        :param old_generation: The generation of the database in the old +                               state. +        :type old_generation: int + +        :return: (generation, trans_id, [(doc_id, generation, trans_id),...]) +                 The current generation of the database, its associated +                 transaction id, and a list of of changed documents since +                 old_generation, represented by tuples with for each document +                 its doc_id and the generation and transaction id corresponding +                 to the last intervening change and sorted by generation (old +                 changes first) +        :rtype: (int, str, [(str, int, str)]) + +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason.          """ -        if index_name in self._indexes: -            if self._indexes[index_name]._definition == list( -                    index_expressions): -                return -            raise errors.IndexNameTakenError -        index = InMemoryIndex(index_name, list(index_expressions)) -        for doc_id in self._database: -            if doc_id.startswith(self.U1DB_DATA_DOC_ID_PREFIX): -                continue  # skip special files -            doc = self._get_doc(doc_id) -            if doc.content is not None: -                index.add_json(doc_id, doc.get_json()) -        self._indexes[index_name] = index - -    def close(self): +        # query a couch list function +        ddoc_path = [ +            '_design', 'transactions', '_list', 'whats_changed', 'log' +        ] +        res = self._database.resource(*ddoc_path) +        try: +            response = res.get_json(old_gen=old_generation) +            results = map( +                lambda row: +                    (row['generation'], row['doc_id'], row['transaction_id']), +                response[2]['transactions']) +            results.reverse() +            cur_gen = old_generation +            seen = set() +            changes = [] +            newest_trans_id = '' +            for generation, doc_id, trans_id in results: +                if doc_id not in seen: +                    changes.append((doc_id, generation, trans_id)) +                    seen.add(doc_id) +            if changes: +                cur_gen = changes[0][1]  # max generation +                newest_trans_id = changes[0][2] +                changes.reverse() +            else: +                cur_gen, newest_trans_id = self._get_generation_info() + +            return cur_gen, newest_trans_id, changes +        except ResourceNotFound as e: +            raise_missing_design_doc_error(e, ddoc_path) +        except ServerError as e: +            raise_server_error(e, ddoc_path) + +    def delete_doc(self, doc):          """ -        Release any resources associated with this database. +        Mark a document as deleted. + +        Will abort if the current revision doesn't match doc.rev. +        This will also set doc.content to None. + +        :param doc: The document to mark as deleted. +        :type doc: CouchDocument. -        @return: True if db was succesfully closed. -        @rtype: bool +        :raise DocumentDoesNotExist: Raised if the document does not +                                            exist. +        :raise RevisionConflict: Raised if the revisions do not match. +        :raise DocumentAlreadyDeleted: Raised if the document is +                                              already deleted. +        :raise ConflictedDoc: Raised if the doc has conflicts.          """ -        # TODO: fix this method so the connection is properly closed and -        # test_close (+tearDown, which deletes the db) works without problems. -        self._url = None -        self._full_commit = None -        self._session = None -        #self._server = None -        self._database = None -        return True +        old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) +        if old_doc is None: +            raise DocumentDoesNotExist +        if old_doc.rev != doc.rev: +            raise RevisionConflict() +        if old_doc.is_tombstone(): +            raise DocumentAlreadyDeleted +        if old_doc.has_conflicts: +            raise ConflictedDoc() +        new_rev = self._allocate_doc_rev(doc.rev) +        doc.rev = new_rev +        doc.make_tombstone() +        self._put_doc(old_doc, doc) +        return new_rev + +    def _build_conflicts(self, doc_id, attached_conflicts): +        """ +        Build the conflicted documents list from the conflicts attachment +        fetched from a couch document. -    def sync(self, url, creds=None, autocreate=True): +        :param attached_conflicts: The document's conflicts as fetched from a +                                   couch document attachment. +        :type attached_conflicts: dict +        """ +        conflicts = [] +        for doc_rev, content in attached_conflicts: +            doc = self._factory(doc_id, doc_rev) +            if content is None: +                doc.make_tombstone() +            else: +                doc.content = content +            conflicts.append(doc) +        return conflicts + +    def _get_conflicts(self, doc_id, couch_rev=None):          """ -        Synchronize documents with remote replica exposed at url. +        Get the conflicted versions of a document. -        @param url: The url of the target replica to sync with. -        @type url: str -        @param creds: optional dictionary giving credentials. -            to authorize the operation with the server. -        @type creds: dict -        @param autocreate: Ask the target to create the db if non-existent. -        @type autocreate: bool +        If the C{couch_rev} parameter is not None, conflicts for a specific +        document's couch revision are returned. -        @return: The local generation before the synchronisation was performed. -        @rtype: int -        """ -        return Synchronizer(self, CouchSyncTarget(url, creds=creds)).sync( -            autocreate=autocreate) +        :param couch_rev: The couch document revision. +        :type couch_rev: str -    #------------------------------------------------------------------------- -    # methods from ObjectStoreDatabase -    #------------------------------------------------------------------------- +        :return: A list of conflicted versions of the document. +        :rtype: list +        """ +        # request conflicts attachment from server +        params = {} +        if couch_rev is not None: +            params['rev'] = couch_rev  # restric document's couch revision +        resource = self._database.resource(doc_id, 'u1db_conflicts') +        try: +            response = resource.get_json(**params) +            return self._build_conflicts( +                doc_id, json.loads(response[2].read())) +        except ResourceNotFound: +            return [] -    def _init_u1db_data(self): +    def get_doc_conflicts(self, doc_id):          """ -        Initialize u1db configuration data on backend storage. +        Get the list of conflicts for the given document. -        A U1DB database needs to keep track of all database transactions, -        document conflicts, the generation of other replicas it has seen, -        indexes created by users and so on. +        The order of the conflicts is such that the first entry is the value +        that would be returned by "get_doc". -        In this implementation, all this information is stored in special -        documents stored in the underlying with doc_id prefix equal to -        U1DB_DATA_DOC_ID_PREFIX. Those documents ids are reserved: put_doc(), -        get_doc() and delete_doc() will not allow documents with a doc_id with -        that prefix to be accessed or modified. +        :return: A list of the document entries that are conflicted. +        :rtype: [CouchDocument]          """ -        for key in self.U1DB_DATA_KEYS: -            doc_id = '%s%s' % (self.U1DB_DATA_DOC_ID_PREFIX, key) -            doc = self._get_doc(doc_id) -            if doc is None: -                doc = self._factory(doc_id) -                doc.content = {'content': getattr(self, key)} -                self._put_doc(doc) - -    #------------------------------------------------------------------------- -    # Couch specific methods -    #------------------------------------------------------------------------- +        conflict_docs = self._get_conflicts(doc_id) +        if len(conflict_docs) == 0: +            return [] +        this_doc = self._get_doc(doc_id, check_for_conflicts=True) +        return [this_doc] + conflict_docs -    INDEX_NAME_KEY = 'name' -    INDEX_DEFINITION_KEY = 'definition' -    INDEX_VALUES_KEY = 'values' +    def _get_replica_gen_and_trans_id(self, other_replica_uid): +        """ +        Return the last known generation and transaction id for the other db +        replica. + +        When you do a synchronization with another replica, the Database keeps +        track of what generation the other database replica was at, and what +        the associated transaction id was.  This is used to determine what data +        needs to be sent, and if two databases are claiming to be the same +        replica. + +        :param other_replica_uid: The identifier for the other replica. +        :type other_replica_uid: str + +        :return: A tuple containing the generation and transaction id we +                 encountered during synchronization. If we've never +                 synchronized with the replica, this is (0, ''). +        :rtype: (int, str) +        """ +        # query a couch view +        result = self._database.view('syncs/log') +        if len(result[other_replica_uid].rows) == 0: +            return (0, '') +        return ( +            result[other_replica_uid].rows[0]['value']['known_generation'], +            result[other_replica_uid].rows[0]['value']['known_transaction_id'] +        ) + +    def _set_replica_gen_and_trans_id(self, other_replica_uid, +                                      other_generation, other_transaction_id): +        """ +        Set the last-known generation and transaction id for the other +        database replica. + +        We have just performed some synchronization, and we want to track what +        generation the other replica was at. See also +        _get_replica_gen_and_trans_id. + +        :param other_replica_uid: The U1DB identifier for the other replica. +        :type other_replica_uid: str +        :param other_generation: The generation number for the other replica. +        :type other_generation: int +        :param other_transaction_id: The transaction id associated with the +            generation. +        :type other_transaction_id: str +        """ +        self._do_set_replica_gen_and_trans_id( +            other_replica_uid, other_generation, other_transaction_id) -    def delete_database(self): +    def _do_set_replica_gen_and_trans_id( +            self, other_replica_uid, other_generation, other_transaction_id):          """ -        Delete a U1DB CouchDB database. +        Set the last-known generation and transaction id for the other +        database replica. + +        We have just performed some synchronization, and we want to track what +        generation the other replica was at. See also +        _get_replica_gen_and_trans_id. + +        :param other_replica_uid: The U1DB identifier for the other replica. +        :type other_replica_uid: str +        :param other_generation: The generation number for the other replica. +        :type other_generation: int +        :param other_transaction_id: The transaction id associated with the +                                     generation. +        :type other_transaction_id: str + +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason.          """ -        del(self._server[self._dbname]) +        # query a couch update function +        ddoc_path = ['_design', 'syncs', '_update', 'put', 'u1db_sync_log'] +        res = self._database.resource(*ddoc_path) +        try: +            with CouchDatabase.update_handler_lock[self._get_replica_uid()]: +                res.put_json( +                    body={ +                        'other_replica_uid': other_replica_uid, +                        'other_generation': other_generation, +                        'other_transaction_id': other_transaction_id, +                    }, +                    headers={'content-type': 'application/json'}) +        except ResourceNotFound as e: +            raise_missing_design_doc_error(e, ddoc_path) + +    def _add_conflict(self, doc, my_doc_rev, my_content): +        """ +        Add a conflict to the document. + +        Note that this method does not actually update the backend; rather, it +        updates the CouchDocument object which will provide the conflict data +        when the atomic document update is made. + +        :param doc: The document to have conflicts added to. +        :type doc: CouchDocument +        :param my_doc_rev: The revision of the conflicted document. +        :type my_doc_rev: str +        :param my_content: The content of the conflicted document as a JSON +                           serialized string. +        :type my_content: str +        """ +        doc._ensure_fetch_conflicts(self._get_conflicts) +        doc.add_conflict( +            self._factory(doc_id=doc.doc_id, rev=my_doc_rev, +                          json=my_content)) -    def _dump_indexes_as_json(self): +    def _delete_conflicts(self, doc, conflict_revs):          """ -        Dump index definitions as JSON. +        Delete the conflicted revisions from the list of conflicts of C{doc}. + +        Note that this method does not actually update the backend; rather, it +        updates the CouchDocument object which will provide the conflict data +        when the atomic document update is made. + +        :param doc: The document to have conflicts deleted. +        :type doc: CouchDocument +        :param conflict_revs: A list of the revisions to be deleted. +        :param conflict_revs: [str]          """ -        indexes = {} -        for name, idx in self._indexes.iteritems(): -            indexes[name] = {} -            for attr in [self.INDEX_NAME_KEY, self.INDEX_DEFINITION_KEY, -                         self.INDEX_VALUES_KEY]: -                indexes[name][attr] = getattr(idx, '_' + attr) -        return indexes +        doc._ensure_fetch_conflicts(self._get_conflicts) +        doc.delete_conflicts(conflict_revs) -    def _load_indexes_from_json(self, indexes): +    def _prune_conflicts(self, doc, doc_vcr):          """ -        Load index definitions from stored JSON. +        Prune conflicts that are older then the current document's revision, or +        whose content match to the current document's content. + +        :param doc: The document to have conflicts pruned. +        :type doc: CouchDocument +        :param doc_vcr: A vector clock representing the current document's +                        revision. +        :type doc_vcr: u1db.vectorclock.VectorClock +        """ +        if doc.has_conflicts is True: +            autoresolved = False +            c_revs_to_prune = [] +            for c_doc in doc.get_conflicts(): +                c_vcr = vectorclock.VectorClockRev(c_doc.rev) +                if doc_vcr.is_newer(c_vcr): +                    c_revs_to_prune.append(c_doc.rev) +                elif doc.same_content_as(c_doc): +                    c_revs_to_prune.append(c_doc.rev) +                    doc_vcr.maximize(c_vcr) +                    autoresolved = True +            if autoresolved: +                doc_vcr.increment(self._replica_uid) +                doc.rev = doc_vcr.as_str() +            self._delete_conflicts(doc, c_revs_to_prune) + +    def _force_doc_sync_conflict(self, doc): +        """ +        Add a conflict and force a document put. -        @param indexes: A JSON representation of indexes as -            [('index-name', ['field', 'field2', ...]), ...]. -        @type indexes: str +        :param doc: The document to be put. +        :type doc: CouchDocument          """ -        self._indexes = {} -        for name, idx_dict in indexes.iteritems(): -            idx = InMemoryIndex(name, idx_dict[self.INDEX_DEFINITION_KEY]) -            idx._values = idx_dict[self.INDEX_VALUES_KEY] -            self._indexes[name] = idx +        my_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) +        self._prune_conflicts(doc, vectorclock.VectorClockRev(doc.rev)) +        self._add_conflict(doc, my_doc.rev, my_doc.get_json()) +        doc.has_conflicts = True +        self._put_doc(my_doc, doc) -    def _load_transaction_log_from_json(self, transaction_log): +    def resolve_doc(self, doc, conflicted_doc_revs):          """ -        Load transaction log from stored JSON. +        Mark a document as no longer conflicted. + +        We take the list of revisions that the client knows about that it is +        superseding. This may be a different list from the actual current +        conflicts, in which case only those are removed as conflicted.  This +        may fail if the conflict list is significantly different from the +        supplied information. (sync could have happened in the background from +        the time you GET_DOC_CONFLICTS until the point where you RESOLVE) + +        :param doc: A Document with the new content to be inserted. +        :type doc: CouchDocument +        :param conflicted_doc_revs: A list of revisions that the new content +                                    supersedes. +        :type conflicted_doc_revs: [str] + +        :raise MissingDesignDocError: Raised when tried to access a missing +                                      design document. +        :raise MissingDesignDocListFunctionError: Raised when trying to access +                                                  a missing list function on a +                                                  design document. +        :raise MissingDesignDocNamedViewError: Raised when trying to access a +                                               missing named view on a design +                                               document. +        :raise MissingDesignDocDeletedError: Raised when trying to access a +                                             deleted design document. +        :raise MissingDesignDocUnknownError: Raised when failed to access a +                                             design document for an yet +                                             unknown reason. +        """ +        cur_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) +        new_rev = self._ensure_maximal_rev(cur_doc.rev, +                                           conflicted_doc_revs) +        superseded_revs = set(conflicted_doc_revs) +        doc.rev = new_rev +        # this backend stores conflicts as properties of the documents, so we +        # have to copy these conflicts over to the document being updated. +        if cur_doc.rev in superseded_revs: +            # the newer doc version will supersede the one in the database, so +            # we copy conflicts before updating the backend. +            doc.set_conflicts(cur_doc.get_conflicts())  # copy conflicts over. +            self._delete_conflicts(doc, superseded_revs) +            self._put_doc(cur_doc, doc) +        else: +            # the newer doc version does not supersede the one in the +            # database, so we will add a conflict to the database and copy +            # those over to the document the user has in her hands. +            self._add_conflict(cur_doc, new_rev, doc.get_json()) +            self._delete_conflicts(cur_doc, superseded_revs) +            self._put_doc(cur_doc, cur_doc)  # just update conflicts +            # backend has been updated with current conflicts, now copy them +            # to the current document. +            doc.set_conflicts(cur_doc.get_conflicts()) + +    def _put_doc_if_newer(self, doc, save_conflict, replica_uid, replica_gen, +                          replica_trans_id=''): +        """ +        Insert/update document into the database with a given revision. + +        This api is used during synchronization operations. + +        If a document would conflict and save_conflict is set to True, the +        content will be selected as the 'current' content for doc.doc_id, +        even though doc.rev doesn't supersede the currently stored revision. +        The currently stored document will be added to the list of conflict +        alternatives for the given doc_id. + +        This forces the new content to be 'current' so that we get convergence +        after synchronizing, even if people don't resolve conflicts. Users can +        then notice that their content is out of date, update it, and +        synchronize again. (The alternative is that users could synchronize and +        think the data has propagated, but their local copy looks fine, and the +        remote copy is never updated again.) + +        :param doc: A document object +        :type doc: CouchDocument +        :param save_conflict: If this document is a conflict, do you want to +                              save it as a conflict, or just ignore it. +        :type save_conflict: bool +        :param replica_uid: A unique replica identifier. +        :type replica_uid: str +        :param replica_gen: The generation of the replica corresponding to the +                            this document. The replica arguments are optional, +                            but are used during synchronization. +        :type replica_gen: int +        :param replica_trans_id: The transaction_id associated with the +                                 generation. +        :type replica_trans_id: str + +        :return: (state, at_gen) -  If we don't have doc_id already, or if +                 doc_rev supersedes the existing document revision, then the +                 content will be inserted, and state is 'inserted'.  If +                 doc_rev is less than or equal to the existing revision, then +                 the put is ignored and state is respecitvely 'superseded' or +                 'converged'.  If doc_rev is not strictly superseded or +                 supersedes, then state is 'conflicted'. The document will not +                 be inserted if save_conflict is False.  For 'inserted' or +                 'converged', at_gen is the insertion/current generation. +        :rtype: (str, int) +        """ +        cur_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) +        # at this point, `doc` has arrived from the other syncing party, and +        # we will decide what to do with it. +        # First, we prepare the arriving doc to update couch database. +        old_doc = doc +        doc = self._factory(doc.doc_id, doc.rev, doc.get_json()) +        if cur_doc is not None: +            doc.couch_rev = cur_doc.couch_rev +        # fetch conflicts because we will eventually manipulate them +        doc._ensure_fetch_conflicts(self._get_conflicts) +        # from now on, it works just like u1db sqlite backend +        doc_vcr = vectorclock.VectorClockRev(doc.rev) +        if cur_doc is None: +            cur_vcr = vectorclock.VectorClockRev(None) +        else: +            cur_vcr = vectorclock.VectorClockRev(cur_doc.rev) +        self._validate_source(replica_uid, replica_gen, replica_trans_id) +        if doc_vcr.is_newer(cur_vcr): +            rev = doc.rev +            self._prune_conflicts(doc, doc_vcr) +            if doc.rev != rev: +                # conflicts have been autoresolved +                state = 'superseded' +            else: +                state = 'inserted' +            self._put_doc(cur_doc, doc) +        elif doc.rev == cur_doc.rev: +            # magical convergence +            state = 'converged' +        elif cur_vcr.is_newer(doc_vcr): +            # Don't add this to seen_ids, because we have something newer, +            # so we should send it back, and we should not generate a +            # conflict +            state = 'superseded' +        elif cur_doc.same_content_as(doc): +            # the documents have been edited to the same thing at both ends +            doc_vcr.maximize(cur_vcr) +            doc_vcr.increment(self._replica_uid) +            doc.rev = doc_vcr.as_str() +            self._put_doc(cur_doc, doc) +            state = 'superseded' +        else: +            state = 'conflicted' +            if save_conflict: +                self._force_doc_sync_conflict(doc) +        if replica_uid is not None and replica_gen is not None: +            self._set_replica_gen_and_trans_id( +                replica_uid, replica_gen, replica_trans_id) +        # update info +        old_doc.rev = doc.rev +        if doc.is_tombstone(): +            old_doc.is_tombstone() +        else: +            old_doc.content = doc.content +        old_doc.has_conflicts = doc.has_conflicts +        return state, self._get_generation() -        @param transaction_log: A JSON representation of transaction_log as -            [('generation', 'transaction_id'), ...]. -        @type transaction_log: list +    def get_docs(self, doc_ids, check_for_conflicts=True, +                 include_deleted=False):          """ -        self._transaction_log = [] -        for gen, trans_id in transaction_log: -            self._transaction_log.append((gen, trans_id)) +        Get the JSON content for many documents. + +        :param doc_ids: A list of document identifiers. +        :type doc_ids: list +        :param check_for_conflicts: If set to False, then the conflict check +                                    will be skipped, and 'None' will be +                                    returned instead of True/False. +        :type check_for_conflictsa: bool +        :param include_deleted: If set to True, deleted documents will be +                                returned with empty content. Otherwise deleted +                                documents will not be included in the results. +        :return: iterable giving the Document object for each document id +                 in matching doc_ids order. +        :rtype: iterable +        """ +        # Workaround for: +        # +        #   http://bugs.python.org/issue7980 +        #   https://leap.se/code/issues/5449 +        # +        # python-couchdb uses time.strptime, which is not thread safe. In +        # order to avoid the problem described on the issues above, we preload +        # strptime here by evaluating the conversion of an arbitrary date. +        # This will not be needed when/if we switch from python-couchdb to +        # paisley. +        time.strptime('Mar 4 1917', '%b %d %Y') +        # spawn threads to retrieve docs +        threads = [] +        for doc_id in doc_ids: +            self._sem_pool.acquire() +            t = self._GetDocThread(self, doc_id, check_for_conflicts, +                                   self._sem_pool.release) +            t.start() +            threads.append(t) +        # join threads and yield docs +        for t in threads: +            t.join() +            if t._doc.is_tombstone() and not include_deleted: +                continue +            yield t._doc -    def _load_other_generations_from_json(self, other_generations): +    def _renew_couch_session(self):          """ -        Load other generations from stored JSON. +        Create a new couch connection session. -        @param other_generations: A JSON representation of other_generations -            as {'replica_uid': ('generation', 'transaction_id'), ...}. -        @type other_generations: dict +        This is a workaround for #5448. Will not be needed once bigcouch is +        merged with couchdb.          """ -        self._other_generations = {} -        for replica_uid, [gen, trans_id] in other_generations.iteritems(): -            self._other_generations[replica_uid] = (gen, trans_id) +        self._database.resource.session = Session(timeout=COUCH_TIMEOUT) -class CouchSyncTarget(ObjectStoreSyncTarget): +class CouchSyncTarget(CommonSyncTarget):      """      Functionality for using a CouchDatabase as a synchronization target.      """ -    pass +    def get_sync_info(self, source_replica_uid): +        source_gen, source_trans_id = self._db._get_replica_gen_and_trans_id( +            source_replica_uid) +        my_gen, my_trans_id = self._db._get_generation_info() +        return ( +            self._db._replica_uid, my_gen, my_trans_id, source_gen, +            source_trans_id) -class NotEnoughCouchPermissions(Exception): -    """ -    Raised when failing to assert for enough permissions on underlying Couch -    Database. -    """ -    pass +    def record_sync_info(self, source_replica_uid, source_replica_generation, +                         source_replica_transaction_id): +        if self._trace_hook: +            self._trace_hook('record_sync_info') +        self._db._set_replica_gen_and_trans_id( +            source_replica_uid, source_replica_generation, +            source_replica_transaction_id)  class CouchServerState(ServerState): @@ -527,121 +1468,66 @@ class CouchServerState(ServerState):          """          Initialize the couch server state. -        @param couch_url: The URL for the couch database. -        @type couch_url: str -        @param shared_db_name: The name of the shared database. -        @type shared_db_name: str -        @param tokens_db_name: The name of the tokens database. -        @type tokens_db_name: str +        :param couch_url: The URL for the couch database. +        :type couch_url: str +        :param shared_db_name: The name of the shared database. +        :type shared_db_name: str +        :param tokens_db_name: The name of the tokens database. +        :type tokens_db_name: str          """          self._couch_url = couch_url          self._shared_db_name = shared_db_name          self._tokens_db_name = tokens_db_name -        try: -            self._check_couch_permissions() -        except NotEnoughCouchPermissions: -            logger.error("Not enough permissions on underlying couch " -                         "database (%s)." % self._couch_url) -        except (socket.error, socket.gaierror, socket.herror, -                socket.timeout), e: -            logger.error("Socket problem while trying to reach underlying " -                         "couch database: (%s, %s)." % -                         (self._couch_url, e)) - -    def _check_couch_permissions(self): -        """ -        Assert that Soledad Server has enough permissions on the underlying -        couch database. - -        Soledad Server has to be able to do the following in the couch server: - -            * Create, read and write from/to 'shared' db. -            * Create, read and write from/to 'user-<anything>' dbs. -            * Read from 'tokens' db. - -        This function tries to perform the actions above using the "low level" -        couch library to ensure that Soledad Server can do everything it needs -        on the underlying couch database. - -        @param couch_url: The URL of the couch database. -        @type couch_url: str - -        @raise NotEnoughCouchPermissions: Raised in case there are not enough -            permissions to read/write/create the needed couch databases. -        @rtype: bool -        """ - -        def _open_couch_db(dbname): -            server = Server(url=self._couch_url) -            try: -                server[dbname] -            except ResourceNotFound: -                server.create(dbname) -            return server[dbname] - -        def _create_delete_test_doc(db): -            doc_id, _ = db.save({'test': 'document'}) -            doc = db[doc_id] -            db.delete(doc) - -        try: -            # test read/write auth for shared db -            _create_delete_test_doc( -                _open_couch_db(self._shared_db_name)) -            # test read/write auth for user-<something> db -            _create_delete_test_doc( -                _open_couch_db('%stest-db' % USER_DB_PREFIX)) -            # test read auth for tokens db -            tokensdb = _open_couch_db(self._tokens_db_name) -            tokensdb.info() -        except Unauthorized: -            raise NotEnoughCouchPermissions(self._couch_url)      def open_database(self, dbname):          """          Open a couch database. -        @param dbname: The name of the database to open. -        @type dbname: str +        :param dbname: The name of the database to open. +        :type dbname: str -        @return: The CouchDatabase object. -        @rtype: CouchDatabase +        :return: The CouchDatabase object. +        :rtype: CouchDatabase          """ -        # TODO: open couch          return CouchDatabase.open_database(              self._couch_url + '/' + dbname, -            create=False) +            create=False, +            ensure_ddocs=False)      def ensure_database(self, dbname):          """          Ensure couch database exists. -        @param dbname: The name of the database to ensure. -        @type dbname: str +        Usually, this method is used by the server to ensure the existence of +        a database. In our setup, the Soledad user that accesses the underlying +        couch server should never have permission to create (or delete) +        databases. But, in case it ever does, by raising an exception here we +        have one more guarantee that no modified client will be able to +        enforce creation of a database when syncing. + +        :param dbname: The name of the database to ensure. +        :type dbname: str -        @return: The CouchDatabase object and the replica uid. -        @rtype: (CouchDatabase, str) +        :return: The CouchDatabase object and the replica uid. +        :rtype: (CouchDatabase, str)          """ -        db = CouchDatabase.open_database( -            self._couch_url + '/' + dbname, -            create=True) -        return db, db._replica_uid +        raise Unauthorized()      def delete_database(self, dbname):          """          Delete couch database. -        @param dbname: The name of the database to delete. -        @type dbname: str +        :param dbname: The name of the database to delete. +        :type dbname: str          """ -        CouchDatabase.delete_database(self._couch_url + '/' + dbname) +        raise Unauthorized()      def _set_couch_url(self, url):          """          Set the couchdb URL -        @param url: CouchDB URL -        @type url: str +        :param url: CouchDB URL +        :type url: str          """          self._couch_url = url @@ -649,7 +1535,7 @@ class CouchServerState(ServerState):          """          Return CouchDB URL -        @rtype: str +        :rtype: str          """          return self._couch_url diff --git a/common/src/leap/soledad/common/ddocs/README.txt b/common/src/leap/soledad/common/ddocs/README.txt new file mode 100644 index 00000000..5569d929 --- /dev/null +++ b/common/src/leap/soledad/common/ddocs/README.txt @@ -0,0 +1,34 @@ +This directory holds a folder structure containing javascript files that +represent the design documents needed by the CouchDB U1DB backend. These files +are compiled into the `../ddocs.py` file by setuptools when creating the +source distribution. + +The following table depicts the U1DB CouchDB backend method and the URI that +is queried to obtain/update data from/to the server. + +   +----------------------------------+------------------------------------------------------------------+ +   | u1db backend method              | URI                                                              | +   |----------------------------------+------------------------------------------------------------------| +   | _get_generation                  | _design/transactions/_list/generation/log                        | +   | _get_generation_info             | _design/transactions/_list/generation/log                        | +   | _get_trans_id_for_gen            | _design/transactions/_list/trans_id_for_gen/log                  | +   | _get_transaction_log             | _design/transactions/_view/log                                   | +   | _get_doc (*)                     | _design/docs/_view/get?key=<doc_id>                              | +   | _has_conflicts                   | _design/docs/_view/get?key=<doc_id>                              | +   | get_all_docs                     | _design/docs/_view/get                                           | +   | _put_doc                         | _design/docs/_update/put/<doc_id>                                | +   | _whats_changed                   | _design/transactions/_list/whats_changed/log?old_gen=<gen>       | +   | _get_conflicts (*)               | _design/docs/_view/conflicts?key=<doc_id>                        | +   | _get_replica_gen_and_trans_id    | _design/syncs/_view/log?other_replica_uid=<uid>                  | +   | _do_set_replica_gen_and_trans_id | _design/syncs/_update/put/u1db_sync_log                          | +   | _add_conflict                    | _design/docs/_update/add_conflict/<doc_id>                       | +   | _delete_conflicts                | _design/docs/_update/delete_conflicts/<doc_id>?doc_rev=<doc_rev> | +   | list_indexes                     | not implemented                                                  | +   | _get_index_definition            | not implemented                                                  | +   | delete_index                     | not implemented                                                  | +   | _get_indexed_fields              | not implemented                                                  | +   | _put_and_update_indexes          | not implemented                                                  | +   +----------------------------------+------------------------------------------------------------------+ + +(*) These methods also request CouchDB document attachments that store U1DB +    document contents. diff --git a/common/src/leap/soledad/common/ddocs/docs/views/get/map.js b/common/src/leap/soledad/common/ddocs/docs/views/get/map.js new file mode 100644 index 00000000..ae08d9e9 --- /dev/null +++ b/common/src/leap/soledad/common/ddocs/docs/views/get/map.js @@ -0,0 +1,20 @@ +function(doc) { +    if (doc.u1db_rev) { +        var is_tombstone = true; +        var has_conflicts = false; +        if (doc._attachments) { +            if (doc._attachments.u1db_content) +                is_tombstone = false; +            if (doc._attachments.u1db_conflicts) +                has_conflicts = true; +        } +        emit(doc._id, +            { +                "couch_rev": doc._rev, +                "u1db_rev": doc.u1db_rev, +                "is_tombstone": is_tombstone, +                "has_conflicts": has_conflicts, +            } +        ); +    } +} diff --git a/common/src/leap/soledad/common/ddocs/syncs/updates/put.js b/common/src/leap/soledad/common/ddocs/syncs/updates/put.js new file mode 100644 index 00000000..722f695a --- /dev/null +++ b/common/src/leap/soledad/common/ddocs/syncs/updates/put.js @@ -0,0 +1,22 @@ +function(doc, req){ +    if (!doc) { +        doc = {} +        doc['_id'] = 'u1db_sync_log'; +        doc['syncs'] = []; +    } +    body = JSON.parse(req.body); +    // remove outdated info +    doc['syncs'] = doc['syncs'].filter( +        function (entry) { +            return entry[0] != body['other_replica_uid']; +        } +    ); +    // store u1db rev +    doc['syncs'].push([ +        body['other_replica_uid'], +        body['other_generation'], +        body['other_transaction_id'] +    ]); +    return [doc, 'ok']; +} + diff --git a/common/src/leap/soledad/common/ddocs/syncs/views/log/map.js b/common/src/leap/soledad/common/ddocs/syncs/views/log/map.js new file mode 100644 index 00000000..a63c7cf4 --- /dev/null +++ b/common/src/leap/soledad/common/ddocs/syncs/views/log/map.js @@ -0,0 +1,12 @@ +function(doc) { +    if (doc._id == 'u1db_sync_log') { +        if (doc.syncs) +            doc.syncs.forEach(function (entry) { +                emit(entry[0], +                    { +                        'known_generation': entry[1], +                        'known_transaction_id': entry[2] +                    }); +            }); +    } +} diff --git a/common/src/leap/soledad/common/ddocs/transactions/lists/generation.js b/common/src/leap/soledad/common/ddocs/transactions/lists/generation.js new file mode 100644 index 00000000..dbdfff0d --- /dev/null +++ b/common/src/leap/soledad/common/ddocs/transactions/lists/generation.js @@ -0,0 +1,20 @@ +function(head, req) { +    var row; +    var rows=[]; +    // fetch all rows +    while(row = getRow()) { +        rows.push(row); +    } +    if (rows.length > 0) +        send(JSON.stringify({ +            "generation": rows.length, +            "doc_id": rows[rows.length-1]['id'], +            "transaction_id": rows[rows.length-1]['value'] +        })); +    else +        send(JSON.stringify({ +            "generation": 0, +            "doc_id": "", +            "transaction_id": "", +        })); +} diff --git a/common/src/leap/soledad/common/ddocs/transactions/lists/trans_id_for_gen.js b/common/src/leap/soledad/common/ddocs/transactions/lists/trans_id_for_gen.js new file mode 100644 index 00000000..2ec91794 --- /dev/null +++ b/common/src/leap/soledad/common/ddocs/transactions/lists/trans_id_for_gen.js @@ -0,0 +1,19 @@ +function(head, req) { +    var row; +    var rows=[]; +    var i = 1; +    var gen = 1; +    if (req.query.gen) +        gen = parseInt(req.query['gen']); +    // fetch all rows +    while(row = getRow()) +        rows.push(row); +    if (gen <= rows.length) +        send(JSON.stringify({ +            "generation": gen, +            "doc_id": rows[gen-1]['id'], +            "transaction_id": rows[gen-1]['value'], +        })); +    else +        send('{}'); +} diff --git a/common/src/leap/soledad/common/ddocs/transactions/lists/whats_changed.js b/common/src/leap/soledad/common/ddocs/transactions/lists/whats_changed.js new file mode 100644 index 00000000..b35cdf51 --- /dev/null +++ b/common/src/leap/soledad/common/ddocs/transactions/lists/whats_changed.js @@ -0,0 +1,22 @@ +function(head, req) { +    var row; +    var gen = 1; +    var old_gen = 0; +    if (req.query.old_gen) +        old_gen = parseInt(req.query['old_gen']); +    send('{"transactions":[\n'); +    // fetch all rows +    while(row = getRow()) { +        if (gen > old_gen) { +            if (gen > old_gen+1) +                send(',\n'); +            send(JSON.stringify({ +                "generation": gen, +                "doc_id": row["id"], +                "transaction_id": row["value"] +            })); +        } +        gen++; +    } +    send('\n]}'); +} diff --git a/common/src/leap/soledad/common/ddocs/transactions/views/log/map.js b/common/src/leap/soledad/common/ddocs/transactions/views/log/map.js new file mode 100644 index 00000000..94ef63ca --- /dev/null +++ b/common/src/leap/soledad/common/ddocs/transactions/views/log/map.js @@ -0,0 +1,7 @@ +function(doc) { +    if (doc.u1db_transactions) +        doc.u1db_transactions.forEach(function(t) { +            emit(t[0],  // use timestamp as key so the results are ordered +                 t[1]); // value is the transaction_id +        }); +} diff --git a/common/src/leap/soledad/common/document.py b/common/src/leap/soledad/common/document.py index cc24b53a..919ade12 100644 --- a/common/src/leap/soledad/common/document.py +++ b/common/src/leap/soledad/common/document.py @@ -29,6 +29,7 @@ from u1db import Document  #  class SoledadDocument(Document): +      """      Encryptable and syncable document. @@ -107,5 +108,3 @@ class SoledadDocument(Document):          _get_rev,          _set_rev,          doc="Wrapper to ensure `doc.rev` is always returned as bytes.") - - diff --git a/common/src/leap/soledad/common/errors.py b/common/src/leap/soledad/common/errors.py index 7c2d7296..3a7eadd2 100644 --- a/common/src/leap/soledad/common/errors.py +++ b/common/src/leap/soledad/common/errors.py @@ -25,11 +25,49 @@ from u1db import errors  from u1db.remote import http_errors +def register_exception(cls): +    """ +    A small decorator that registers exceptions in u1db maps. +    """ +    # update u1db "wire description to status" and "wire description to +    # exception" maps. +    http_errors.wire_description_to_status.update({ +        cls.wire_description: cls.status}) +    errors.wire_description_to_exc.update({ +        cls.wire_description: cls}) +    # do not modify the exception +    return cls + + +class SoledadError(errors.U1DBError): +    """ +    Base Soledad HTTP errors. +    """ +    pass + +  # -# LockResource: a lock based on a document in the shared database. +# Authorization errors  # -class InvalidTokenError(errors.U1DBError): +@register_exception +class InvalidAuthTokenError(errors.Unauthorized): +    """ +    Exception raised when failing to get authorization for some action because +    the provided token either does not exist in the tokens database, has a +    distinct structure from the expected one, or is associated with a user +    with a distinct uuid than the one provided by the client. +    """ + +    wire_descrition = "invalid auth token" +    status = 401 + +# +# LockResource errors +# + +@register_exception +class InvalidTokenError(SoledadError):      """      Exception raised when trying to unlock shared database with invalid token.      """ @@ -38,7 +76,8 @@ class InvalidTokenError(errors.U1DBError):      status = 401 -class NotLockedError(errors.U1DBError): +@register_exception +class NotLockedError(SoledadError):      """      Exception raised when trying to unlock shared database when it is not      locked. @@ -48,7 +87,8 @@ class NotLockedError(errors.U1DBError):      status = 404 -class AlreadyLockedError(errors.U1DBError): +@register_exception +class AlreadyLockedError(SoledadError):      """      Exception raised when trying to lock shared database but it is already      locked. @@ -57,13 +97,83 @@ class AlreadyLockedError(errors.U1DBError):      wire_description = "lock is locked"      status = 403 -# update u1db "wire description to status" and "wire description to exception" -# maps. -for e in [InvalidTokenError, NotLockedError, AlreadyLockedError]: -    http_errors.wire_description_to_status.update({ -        e.wire_description: e.status}) -    errors.wire_description_to_exc.update({ -        e.wire_description: e}) + +@register_exception +class LockTimedOutError(SoledadError): +    """ +    Exception raised when timing out while trying to lock the shared database. +    """ + +    wire_description = "lock timed out" +    status = 408 + + +@register_exception +class CouldNotObtainLockError(SoledadError): +    """ +    Exception raised when timing out while trying to lock the shared database. +    """ + +    wire_description = "error obtaining lock" +    status = 500 + + +# +# CouchDatabase errors +# + +@register_exception +class MissingDesignDocError(SoledadError): +    """ +    Raised when trying to access a missing couch design document. +    """ + +    wire_description = "missing design document" +    status = 500 + + +@register_exception +class MissingDesignDocNamedViewError(SoledadError): +    """ +    Raised when trying to access a missing named view on a couch design +    document. +    """ + +    wire_description = "missing design document named function" +    status = 500 + + +@register_exception +class MissingDesignDocListFunctionError(SoledadError): +    """ +    Raised when trying to access a missing list function on a couch design +    document. +    """ + +    wire_description = "missing design document list function" +    status = 500 + + +@register_exception +class MissingDesignDocDeletedError(SoledadError): +    """ +    Raised when trying to access a deleted couch design document. +    """ + +    wire_description = "design document was deleted" +    status = 500 + + +@register_exception +class DesignDocUnknownError(SoledadError): +    """ +    Raised when trying to access a couch design document and getting an +    unknown error. +    """ + +    wire_description = "missing design document unknown error" +    status = 500 +  # u1db error statuses also have to be updated  http_errors.ERROR_STATUSES = set( diff --git a/common/src/leap/soledad/common/objectstore.py b/common/src/leap/soledad/common/objectstore.py deleted file mode 100644 index 7aff3e32..00000000 --- a/common/src/leap/soledad/common/objectstore.py +++ /dev/null @@ -1,282 +0,0 @@ -# -*- coding: utf-8 -*- -# objectstore.py -# Copyright (C) 2013 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - - -""" -Abstract U1DB backend to handle storage using object stores (like CouchDB, for -example). - -This backend uses special documents to store all U1DB data (replica uid, -indexes, transaction logs and info about other dbs). The id of these documents -are reserved and have prefix equal to ObjectStore.U1DB_DATA_DOC_ID_PREFIX. - -Right now, this is only used by CouchDatabase backend, but can also be -extended to implement OpenStack or Amazon S3 storage, for example. - -See U1DB documentation for more information on how to use databases. -""" - - -from base64 import b64encode, b64decode - - -import uuid -import simplejson as json - - -from u1db import errors -from u1db.backends.inmemory import ( -    InMemoryDatabase, -    InMemorySyncTarget, -) - - -class ObjectStoreDatabase(InMemoryDatabase): -    """ -    A backend for storing u1db data in an object store. -    """ - -    U1DB_DATA_DOC_ID_PREFIX = 'u1db/' - -    @classmethod -    def open_database(cls, url, create, document_factory=None): -        """ -        Open a U1DB database using an object store as backend. - -        @param url: the url of the database replica -        @type url: str -        @param create: should the replica be created if it does not exist? -        @type create: bool -        @param document_factory: A function that will be called with the same -            parameters as Document.__init__. -        @type document_factory: callable - -        @return: the database instance -        @rtype: CouchDatabase -        """ -        raise NotImplementedError(cls.open_database) - -    def __init__(self, replica_uid=None, document_factory=None): -        """ -        Initialize the object store database. - -        @param replica_uid: an optional unique replica identifier -        @type replica_uid: str -        @param document_factory: A function that will be called with the same -            parameters as Document.__init__. -        @type document_factory: callable -        """ -        InMemoryDatabase.__init__( -            self, -            replica_uid, -            document_factory=document_factory) -        if self._replica_uid is None: -            self._replica_uid = uuid.uuid4().hex -        self._init_u1db_data() - -    def _init_u1db_data(self): -        """ -        Initialize u1db configuration data on backend storage. - -        A U1DB database needs to keep track of all database transactions, -        document conflicts, the generation of other replicas it has seen, -        indexes created by users and so on. - -        In this implementation, all this information is stored in special -        documents stored in the couch db with id prefix equal to -        U1DB_DATA_DOC_ID_PREFIX.  Those documents ids are reserved: -        put_doc(), get_doc() and delete_doc() will not allow documents with -        a doc_id with that prefix to be accessed or modified. -        """ -        raise NotImplementedError(self._init_u1db_data) - -    #------------------------------------------------------------------------- -    # methods from Database -    #------------------------------------------------------------------------- - -    def put_doc(self, doc): -        """ -        Update a document. - -        If the document currently has conflicts, put will fail. -        If the database specifies a maximum document size and the document -        exceeds it, put will fail and raise a DocumentTooBig exception. - -        This method prevents from updating the document with doc_id equals to -        self.U1DB_DATA_DOC_ID, which contains U1DB data. - -        @param doc: A Document with new content. -        @type doc: Document - -        @return: new_doc_rev - The new revision identifier for the document. -            The Document object will also be updated. -        @rtype: str -        """ -        if doc.doc_id is not None and \ -                doc.doc_id.startswith(self.U1DB_DATA_DOC_ID_PREFIX): -            raise errors.InvalidDocId() -        return InMemoryDatabase.put_doc(self, doc) - -    def _put_doc(self, doc): -        """ -        Update a document. - -        This is called everytime we just want to do a raw put on the db (i.e. -        without index updates, document constraint checks, and conflict -        checks). - -        @param doc: The document to update. -        @type doc: u1db.Document - -        @return: The new revision identifier for the document. -        @rtype: str -        """ -        raise NotImplementedError(self._put_doc) - -    def get_doc(self, doc_id, include_deleted=False): -        """ -        Get the JSON string for the given document. - -        This method prevents from getting the document with doc_id equals to -        self.U1DB_DATA_DOC_ID, which contains U1DB data. - -        @param doc_id: The unique document identifier -        @type doc_id: str -        @param include_deleted: If set to True, deleted documents will be -            returned with empty content. Otherwise asking for a deleted -            document will return None. -        @type include_deleted: bool - -        @return: a Document object. -        @rtype: Document -        """ -        if doc_id.startswith(self.U1DB_DATA_DOC_ID_PREFIX): -            raise errors.InvalidDocId() -        return InMemoryDatabase.get_doc(self, doc_id, include_deleted) - -    def _get_doc(self, doc_id): -        """ -        Get just the document content, without fancy handling. - -        @param doc_id: The unique document identifier -        @type doc_id: str -        @param include_deleted: If set to True, deleted documents will be -            returned with empty content. Otherwise asking for a deleted -            document will return None. -        @type include_deleted: bool - -        @return: a Document object. -        @type: u1db.Document -        """ -        raise NotImplementedError(self._get_doc) - -    def get_all_docs(self, include_deleted=False): -        """ -        Get the JSON content for all documents in the database. - -        @param include_deleted: If set to True, deleted documents will be -            returned with empty content. Otherwise deleted documents will not -            be included in the results. -        @type include_deleted: bool - -        @return: (generation, [Document]) -            The current generation of the database, followed by a list of all -            the documents in the database. -        @rtype: tuple -        """ -        generation = self._get_generation() -        results = [] -        for doc_id in self._database: -            if doc_id.startswith(self.U1DB_DATA_DOC_ID_PREFIX): -                continue -            doc = self._get_doc(doc_id, check_for_conflicts=True) -            if doc.content is None and not include_deleted: -                continue -            results.append(doc) -        return (generation, results) - -    def delete_doc(self, doc): -        """ -        Mark a document as deleted. - -        This method prevents from deleting the document with doc_id equals to -        self.U1DB_DATA_DOC_ID, which contains U1DB data. - -        @param doc: The document to mark as deleted. -        @type doc: u1db.Document - -        @return: The new revision id of the document. -        @type: str -        """ -        if doc.doc_id.startswith(self.U1DB_DATA_DOC_ID_PREFIX): -            raise errors.InvalidDocId() -        old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) -        if old_doc is None: -            raise errors.DocumentDoesNotExist -        if old_doc.rev != doc.rev: -            raise errors.RevisionConflict() -        if old_doc.is_tombstone(): -            raise errors.DocumentAlreadyDeleted -        if old_doc.has_conflicts: -            raise errors.ConflictedDoc() -        new_rev = self._allocate_doc_rev(doc.rev) -        doc.rev = new_rev -        doc.make_tombstone() -        self._put_and_update_indexes(old_doc, doc) -        return new_rev - -    # index-related methods - -    def create_index(self, index_name, *index_expressions): -        """ -        Create a named index, which can then be queried for future lookups. - -        See U1DB documentation for more information. - -        @param index_name: A unique name which can be used as a key prefix. -        @param index_expressions: Index expressions defining the index -            information. -        """ -        raise NotImplementedError(self.create_index) - -    #------------------------------------------------------------------------- -    # implemented methods from CommonBackend -    #------------------------------------------------------------------------- - -    def _put_and_update_indexes(self, old_doc, doc): -        """ -        Update a document and all indexes related to it. - -        @param old_doc: The old version of the document. -        @type old_doc: u1db.Document -        @param doc: The new version of the document. -        @type doc: u1db.Document -        """ -        for index in self._indexes.itervalues(): -            if old_doc is not None and not old_doc.is_tombstone(): -                index.remove_json(old_doc.doc_id, old_doc.get_json()) -            if not doc.is_tombstone(): -                index.add_json(doc.doc_id, doc.get_json()) -        trans_id = self._allocate_transaction_id() -        self._put_doc(doc) -        self._transaction_log.append((doc.doc_id, trans_id)) - - -class ObjectStoreSyncTarget(InMemorySyncTarget): -    """ -    Functionality for using an ObjectStore as a synchronization target. -    """ diff --git a/common/src/leap/soledad/common/tests/couchdb.ini.template b/common/src/leap/soledad/common/tests/couchdb.ini.template index 7d0316f0..1fc2205b 100644 --- a/common/src/leap/soledad/common/tests/couchdb.ini.template +++ b/common/src/leap/soledad/common/tests/couchdb.ini.template @@ -6,7 +6,7 @@  database_dir = %(tempdir)s/lib  view_index_dir = %(tempdir)s/lib  max_document_size = 4294967296 ; 4 GB -os_process_timeout = 5000 ; 5 seconds. for view and external servers. +os_process_timeout = 120000 ; 120 seconds. for view and external servers.  max_dbs_open = 100  delayed_commits = true ; set this to false to ensure an fsync before 201 Created is returned  uri_file = %(tempdir)s/lib/couch.uri @@ -74,6 +74,8 @@ use_users_db = false  [query_servers]  ; javascript = %(tempdir)s/server/main.js +javascript = /usr/bin/couchjs /usr/share/couchdb/server/main.js +coffeescript = /usr/bin/couchjs /usr/share/couchdb/server/main-coffee.js  ; Changing reduce_limit to false will disable reduce_limit. @@ -219,4 +221,4 @@ min_file_size = 131072  ;[admins]  ;testuser = -hashed-f50a252c12615697c5ed24ec5cd56b05d66fe91e,b05471ba260132953930cf9f97f327f5 -; pass for above user is 'testpass'
\ No newline at end of file +; pass for above user is 'testpass' diff --git a/common/src/leap/soledad/common/tests/test_couch.py b/common/src/leap/soledad/common/tests/test_couch.py index 42edf9fe..86bb4b93 100644 --- a/common/src/leap/soledad/common/tests/test_couch.py +++ b/common/src/leap/soledad/common/tests/test_couch.py @@ -24,14 +24,17 @@ import re  import copy  import shutil  from base64 import b64decode +from mock import Mock + +from couchdb.client import Server +from u1db import errors as u1db_errors  from leap.common.files import mkdir_p -from leap.soledad.common.document import SoledadDocument  from leap.soledad.common.tests import u1db_tests as tests  from leap.soledad.common.tests.u1db_tests import test_backends  from leap.soledad.common.tests.u1db_tests import test_sync -from leap.soledad.common import couch +from leap.soledad.common import couch, errors  import simplejson as json @@ -78,9 +81,10 @@ class CouchDBWrapper(object):          mkdir_p(os.path.join(self.tempdir, 'lib'))          mkdir_p(os.path.join(self.tempdir, 'log'))          args = ['couchdb', '-n', '-a', confPath] -        #null = open('/dev/null', 'w') +        null = open('/dev/null', 'w') +          self.process = subprocess.Popen( -            args, env=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, +            args, env=None, stdout=null.fileno(), stderr=null.fileno(),              close_fds=True)          # find port          logPath = os.path.join(self.tempdir, 'log', 'couch.log') @@ -123,21 +127,21 @@ class CouchDBTestCase(unittest.TestCase):      TestCase base class for tests against a real CouchDB server.      """ -    def setUp(self): +    @classmethod +    def setUpClass(cls):          """          Make sure we have a CouchDB instance for a test.          """ -        self.wrapper = CouchDBWrapper() -        self.wrapper.start() +        cls.wrapper = CouchDBWrapper() +        cls.wrapper.start()          #self.db = self.wrapper.db -        unittest.TestCase.setUp(self) -    def tearDown(self): +    @classmethod +    def tearDownClass(cls):          """          Stop CouchDB instance for test.          """ -        self.wrapper.stop() -        unittest.TestCase.tearDown(self) +        cls.wrapper.stop()  #----------------------------------------------------------------------------- @@ -148,7 +152,7 @@ class TestCouchBackendImpl(CouchDBTestCase):      def test__allocate_doc_id(self):          db = couch.CouchDatabase('http://localhost:' + str(self.wrapper.port), -                                 'u1db_tests') +                                 'u1db_tests', ensure_ddocs=True)          doc_id1 = db._allocate_doc_id()          self.assertTrue(doc_id1.startswith('D-'))          self.assertEqual(34, len(doc_id1)) @@ -163,32 +167,51 @@ class TestCouchBackendImpl(CouchDBTestCase):  def make_couch_database_for_test(test, replica_uid):      port = str(test.wrapper.port)      return couch.CouchDatabase('http://localhost:' + port, replica_uid, -                               replica_uid=replica_uid or 'test') +                               replica_uid=replica_uid or 'test', +                               ensure_ddocs=True)  def copy_couch_database_for_test(test, db):      port = str(test.wrapper.port) -    new_db = couch.CouchDatabase('http://localhost:' + port, -                                 db._replica_uid + '_copy', +    couch_url = 'http://localhost:' + port +    new_dbname = db._replica_uid + '_copy' +    new_db = couch.CouchDatabase(couch_url, +                                 new_dbname,                                   replica_uid=db._replica_uid or 'test') -    gen, docs = db.get_all_docs(include_deleted=True) -    for doc in docs: -        new_db._put_doc(doc) -    new_db._transaction_log = copy.deepcopy(db._transaction_log) -    new_db._conflicts = copy.deepcopy(db._conflicts) -    new_db._other_generations = copy.deepcopy(db._other_generations) -    new_db._indexes = copy.deepcopy(db._indexes) -    # save u1db data on couch -    for key in new_db.U1DB_DATA_KEYS: -        doc_id = '%s%s' % (new_db.U1DB_DATA_DOC_ID_PREFIX, key) -        doc = new_db._get_doc(doc_id) -        doc.content = {'content': getattr(new_db, key)} -        new_db._put_doc(doc) +    # copy all docs +    old_couch_db = Server(couch_url)[db._replica_uid] +    new_couch_db = Server(couch_url)[new_dbname] +    for doc_id in old_couch_db: +        doc = old_couch_db.get(doc_id) +        # copy design docs +        if ('u1db_rev' not in doc): +            new_couch_db.save(doc) +        # copy u1db docs +        else: +            new_doc = { +                '_id': doc['_id'], +                'u1db_transactions': doc['u1db_transactions'], +                'u1db_rev': doc['u1db_rev'] +            } +            attachments = [] +            if ('u1db_conflicts' in doc): +                new_doc['u1db_conflicts'] = doc['u1db_conflicts'] +                for c_rev in doc['u1db_conflicts']: +                    attachments.append('u1db_conflict_%s' % c_rev) +            new_couch_db.save(new_doc) +            # save conflict data +            attachments.append('u1db_content') +            for att_name in attachments: +                att = old_couch_db.get_attachment(doc_id, att_name) +                if (att is not None): +                    new_couch_db.put_attachment(new_doc, att, +                                                filename=att_name)      return new_db  def make_document_for_test(test, doc_id, rev, content, has_conflicts=False): -    return SoledadDocument(doc_id, rev, content, has_conflicts=has_conflicts) +    return couch.CouchDocument( +        doc_id, rev, content, has_conflicts=has_conflicts)  COUCH_SCENARIOS = [ @@ -202,8 +225,22 @@ class CouchTests(test_backends.AllDatabaseTests, CouchDBTestCase):      scenarios = COUCH_SCENARIOS +    def setUp(self): +        test_backends.AllDatabaseTests.setUp(self) +        # save db info because of test_close +        self._server = self.db._server +        self._dbname = self.db._dbname +      def tearDown(self): -        self.db.delete_database() +        # if current test is `test_close` we have to use saved objects to +        # delete the database because the close() method will have removed the +        # references needed to do it using the CouchDatabase. +        if self.id() == \ +                'leap.soledad.common.tests.test_couch.CouchTests.' \ +                'test_close(couch)': +            del(self._server[self._dbname]) +        else: +            self.db.delete_database()          test_backends.AllDatabaseTests.tearDown(self) @@ -246,17 +283,16 @@ class CouchWithConflictsTests(          test_backends.LocalDatabaseWithConflictsTests.tearDown(self) -# Notice: the CouchDB backend is currently used for storing encrypted data in -# the server, so indexing makes no sense. Thus, we ignore index testing for -# now. - -class CouchIndexTests(test_backends.DatabaseIndexTests, CouchDBTestCase): - -    scenarios = COUCH_SCENARIOS +# Notice: the CouchDB backend does not have indexing capabilities, so we do +# not test indexing now. -    def tearDown(self): -        self.db.delete_database() -        test_backends.DatabaseIndexTests.tearDown(self) +#class CouchIndexTests(test_backends.DatabaseIndexTests, CouchDBTestCase): +# +#    scenarios = COUCH_SCENARIOS +# +#    def tearDown(self): +#        self.db.delete_database() +#        test_backends.DatabaseIndexTests.tearDown(self)  #----------------------------------------------------------------------------- @@ -311,6 +347,89 @@ class CouchDatabaseSyncTargetTests(test_sync.DatabaseSyncTargetTests,                   [(doc.doc_id, doc.rev), (doc2.doc_id, doc2.rev)]}) +# The following tests need that the database have an index, so we fake one. +old_class = couch.CouchDatabase + +from u1db.backends.inmemory import InMemoryIndex + + +class IndexedCouchDatabase(couch.CouchDatabase): + +    def __init__(self, url, dbname, replica_uid=None, full_commit=True, +                     session=None, ensure_ddocs=True): +        old_class.__init__(self, url, dbname, replica_uid, full_commit, +                           session, ensure_ddocs=ensure_ddocs) +        self._indexes = {} + +    def _put_doc(self, old_doc, doc): +        for index in self._indexes.itervalues(): +            if old_doc is not None and not old_doc.is_tombstone(): +                index.remove_json(old_doc.doc_id, old_doc.get_json()) +            if not doc.is_tombstone(): +                index.add_json(doc.doc_id, doc.get_json()) +        old_class._put_doc(self, old_doc, doc) + +    def create_index(self, index_name, *index_expressions): +        if index_name in self._indexes: +            if self._indexes[index_name]._definition == list( +                    index_expressions): +                return +            raise u1db_errors.IndexNameTakenError +        index = InMemoryIndex(index_name, list(index_expressions)) +        _, all_docs = self.get_all_docs() +        for doc in all_docs: +            index.add_json(doc.doc_id, doc.get_json()) +        self._indexes[index_name] = index + +    def delete_index(self, index_name): +        del self._indexes[index_name] + +    def list_indexes(self): +        definitions = [] +        for idx in self._indexes.itervalues(): +            definitions.append((idx._name, idx._definition)) +        return definitions + +    def get_from_index(self, index_name, *key_values): +        try: +            index = self._indexes[index_name] +        except KeyError: +            raise u1db_errors.IndexDoesNotExist +        doc_ids = index.lookup(key_values) +        result = [] +        for doc_id in doc_ids: +            result.append(self._get_doc(doc_id, check_for_conflicts=True)) +        return result + +    def get_range_from_index(self, index_name, start_value=None, +                             end_value=None): +        """Return all documents with key values in the specified range.""" +        try: +            index = self._indexes[index_name] +        except KeyError: +            raise u1db_errors.IndexDoesNotExist +        if isinstance(start_value, basestring): +            start_value = (start_value,) +        if isinstance(end_value, basestring): +            end_value = (end_value,) +        doc_ids = index.lookup_range(start_value, end_value) +        result = [] +        for doc_id in doc_ids: +            result.append(self._get_doc(doc_id, check_for_conflicts=True)) +        return result + +    def get_index_keys(self, index_name): +        try: +            index = self._indexes[index_name] +        except KeyError: +            raise u1db_errors.IndexDoesNotExist +        keys = index.keys() +        # XXX inefficiency warning +        return list(set([tuple(key.split('\x01')) for key in keys])) + + +couch.CouchDatabase = IndexedCouchDatabase +  sync_scenarios = []  for name, scenario in COUCH_SCENARIOS:      scenario = dict(scenario) @@ -344,98 +463,184 @@ class CouchDatabaseSyncTests(test_sync.DatabaseSyncTests, CouchDBTestCase):          test_sync.DatabaseSyncTests.tearDown(self) -#----------------------------------------------------------------------------- -# The following tests test extra functionality introduced by our backends -#----------------------------------------------------------------------------- - -class CouchDatabaseStorageTests(CouchDBTestCase): - -    def _listify(self, l): -        if type(l) is dict: -            return { -                self._listify(a): self._listify(b) for a, b in l.iteritems()} -        if hasattr(l, '__iter__'): -            return [self._listify(i) for i in l] -        return l - -    def _fetch_u1db_data(self, db, key): -        doc = db._get_doc("%s%s" % (db.U1DB_DATA_DOC_ID_PREFIX, key)) -        return doc.content['content'] - -    def test_transaction_log_storage_after_put(self): -        db = couch.CouchDatabase('http://localhost:' + str(self.wrapper.port), -                                 'u1db_tests') -        db.create_doc({'simple': 'doc'}) -        content = self._fetch_u1db_data(db, db.U1DB_TRANSACTION_LOG_KEY) -        self.assertEqual( -            self._listify(db._transaction_log), -            self._listify(content)) - -    def test_conflict_log_storage_after_put_if_newer(self): -        db = couch.CouchDatabase('http://localhost:' + str(self.wrapper.port), -                                 'u1db_tests') -        doc = db.create_doc({'simple': 'doc'}) -        doc.set_json(nested_doc) -        doc.rev = db._replica_uid + ':2' -        db._force_doc_sync_conflict(doc) -        content = self._fetch_u1db_data(db, db.U1DB_CONFLICTS_KEY) -        self.assertEqual( -            self._listify(db._conflicts), -            self._listify(content)) - -    def test_other_gens_storage_after_set(self): -        db = couch.CouchDatabase('http://localhost:' + str(self.wrapper.port), -                                 'u1db_tests') -        doc = db.create_doc({'simple': 'doc'}) -        db._set_replica_gen_and_trans_id('a', 'b', 'c') -        content = self._fetch_u1db_data(db, db.U1DB_OTHER_GENERATIONS_KEY) -        self.assertEqual( -            self._listify(db._other_generations), -            self._listify(content)) +class CouchDatabaseExceptionsTests(CouchDBTestCase): -    def test_index_storage_after_create(self): -        db = couch.CouchDatabase('http://localhost:' + str(self.wrapper.port), -                                 'u1db_tests') -        doc = db.create_doc({'name': 'john'}) -        db.create_index('myindex', 'name') -        content = self._fetch_u1db_data(db, db.U1DB_INDEXES_KEY) -        myind = db._indexes['myindex'] -        index = { -            'myindex': { -                'definition': myind._definition, -                'name': myind._name, -                'values': myind._values, -            } -        } -        self.assertEqual( -            self._listify(index), -            self._listify(content)) +    def setUp(self): +        CouchDBTestCase.setUp(self) +        self.db = couch.CouchDatabase( +            'http://127.0.0.1:%d' % self.wrapper.port, 'test', +            ensure_ddocs=False)  # note that we don't enforce ddocs here -    def test_index_storage_after_delete(self): -        db = couch.CouchDatabase('http://localhost:' + str(self.wrapper.port), -                                 'u1db_tests') -        doc = db.create_doc({'name': 'john'}) -        db.create_index('myindex', 'name') -        db.create_index('myindex2', 'name') -        db.delete_index('myindex') -        content = self._fetch_u1db_data(db, db.U1DB_INDEXES_KEY) -        myind = db._indexes['myindex2'] -        index = { -            'myindex2': { -                'definition': myind._definition, -                'name': myind._name, -                'values': myind._values, -            } -        } -        self.assertEqual( -            self._listify(index), -            self._listify(content)) +    def tearDown(self): +        self.db.delete_database() -    def test_replica_uid_storage_after_db_creation(self): -        db = couch.CouchDatabase('http://localhost:' + str(self.wrapper.port), -                                 'u1db_tests') -        content = self._fetch_u1db_data(db, db.U1DB_REPLICA_UID_KEY) -        self.assertEqual(db._replica_uid, content) +    def test_missing_design_doc_raises(self): +        """ +        Test that all methods that access design documents will raise if the +        design docs are not present. +        """ +        # _get_generation() +        self.assertRaises( +            errors.MissingDesignDocError, +            self.db._get_generation) +        # _get_generation_info() +        self.assertRaises( +            errors.MissingDesignDocError, +            self.db._get_generation_info) +        # _get_trans_id_for_gen() +        self.assertRaises( +            errors.MissingDesignDocError, +            self.db._get_trans_id_for_gen, 1) +        # _get_transaction_log() +        self.assertRaises( +            errors.MissingDesignDocError, +            self.db._get_transaction_log) +        # whats_changed() +        self.assertRaises( +            errors.MissingDesignDocError, +            self.db.whats_changed) +        # _do_set_replica_gen_and_trans_id() +        self.assertRaises( +            errors.MissingDesignDocError, +            self.db._do_set_replica_gen_and_trans_id, 1, 2, 3) + +    def test_missing_design_doc_functions_raises(self): +        """ +        Test that all methods that access design documents list functions +        will raise if the functions are not present. +        """ +        self.db = couch.CouchDatabase( +            'http://127.0.0.1:%d' % self.wrapper.port, 'test', +            ensure_ddocs=True) +        # erase views from _design/transactions +        transactions = self.db._database['_design/transactions'] +        transactions['lists'] = {} +        self.db._database.save(transactions) +        # _get_generation() +        self.assertRaises( +            errors.MissingDesignDocListFunctionError, +            self.db._get_generation) +        # _get_generation_info() +        self.assertRaises( +            errors.MissingDesignDocListFunctionError, +            self.db._get_generation_info) +        # _get_trans_id_for_gen() +        self.assertRaises( +            errors.MissingDesignDocListFunctionError, +            self.db._get_trans_id_for_gen, 1) +        # whats_changed() +        self.assertRaises( +            errors.MissingDesignDocListFunctionError, +            self.db.whats_changed) + +    def test_absent_design_doc_functions_raises(self): +        """ +        Test that all methods that access design documents list functions +        will raise if the functions are not present. +        """ +        self.db = couch.CouchDatabase( +            'http://127.0.0.1:%d' % self.wrapper.port, 'test', +            ensure_ddocs=True) +        # erase views from _design/transactions +        transactions = self.db._database['_design/transactions'] +        del transactions['lists'] +        self.db._database.save(transactions) +        # _get_generation() +        self.assertRaises( +            errors.MissingDesignDocListFunctionError, +            self.db._get_generation) +        # _get_generation_info() +        self.assertRaises( +            errors.MissingDesignDocListFunctionError, +            self.db._get_generation_info) +        # _get_trans_id_for_gen() +        self.assertRaises( +            errors.MissingDesignDocListFunctionError, +            self.db._get_trans_id_for_gen, 1) +        # whats_changed() +        self.assertRaises( +            errors.MissingDesignDocListFunctionError, +            self.db.whats_changed) + +    def test_missing_design_doc_named_views_raises(self): +        """ +        Test that all methods that access design documents' named views  will +        raise if the views are not present. +        """ +        self.db = couch.CouchDatabase( +            'http://127.0.0.1:%d' % self.wrapper.port, 'test', +            ensure_ddocs=True) +        # erase views from _design/docs +        docs = self.db._database['_design/docs'] +        del docs['views'] +        self.db._database.save(docs) +        # erase views from _design/syncs +        syncs = self.db._database['_design/syncs'] +        del syncs['views'] +        self.db._database.save(syncs) +        # erase views from _design/transactions +        transactions = self.db._database['_design/transactions'] +        del transactions['views'] +        self.db._database.save(transactions) +        # _get_generation() +        self.assertRaises( +            errors.MissingDesignDocNamedViewError, +            self.db._get_generation) +        # _get_generation_info() +        self.assertRaises( +            errors.MissingDesignDocNamedViewError, +            self.db._get_generation_info) +        # _get_trans_id_for_gen() +        self.assertRaises( +            errors.MissingDesignDocNamedViewError, +            self.db._get_trans_id_for_gen, 1) +        # _get_transaction_log() +        self.assertRaises( +            errors.MissingDesignDocNamedViewError, +            self.db._get_transaction_log) +        # whats_changed() +        self.assertRaises( +            errors.MissingDesignDocNamedViewError, +            self.db.whats_changed) + +    def test_deleted_design_doc_raises(self): +        """ +        Test that all methods that access design documents will raise if the +        design docs are not present. +        """ +        self.db = couch.CouchDatabase( +            'http://127.0.0.1:%d' % self.wrapper.port, 'test', +            ensure_ddocs=True) +        # delete _design/docs +        del self.db._database['_design/docs'] +        # delete _design/syncs +        del self.db._database['_design/syncs'] +        # delete _design/transactions +        del self.db._database['_design/transactions'] +        # _get_generation() +        self.assertRaises( +            errors.MissingDesignDocDeletedError, +            self.db._get_generation) +        # _get_generation_info() +        self.assertRaises( +            errors.MissingDesignDocDeletedError, +            self.db._get_generation_info) +        # _get_trans_id_for_gen() +        self.assertRaises( +            errors.MissingDesignDocDeletedError, +            self.db._get_trans_id_for_gen, 1) +        # _get_transaction_log() +        self.assertRaises( +            errors.MissingDesignDocDeletedError, +            self.db._get_transaction_log) +        # whats_changed() +        self.assertRaises( +            errors.MissingDesignDocDeletedError, +            self.db.whats_changed) +        # _do_set_replica_gen_and_trans_id() +        self.assertRaises( +            errors.MissingDesignDocDeletedError, +            self.db._do_set_replica_gen_and_trans_id, 1, 2, 3)  load_tests = tests.load_with_scenarios diff --git a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py new file mode 100644 index 00000000..3c457cc5 --- /dev/null +++ b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py @@ -0,0 +1,389 @@ +# -*- coding: utf-8 -*- +# test_soledad.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +""" +""" + +import os +import mock +import tempfile +import threading + +from leap.soledad.client import Soledad +from leap.soledad.common.couch import CouchDatabase, CouchServerState +from leap.soledad.common.tests.test_couch import CouchDBTestCase +from leap.soledad.common.tests.u1db_tests import TestCaseWithServer +from leap.soledad.common.tests.test_target import ( +    make_token_soledad_app, +    make_leap_document_for_test, +    token_leap_sync_target, +) +from leap.soledad.common.tests.test_server import _couch_ensure_database + + +REPEAT_TIMES = 20 + + +# monkey path CouchServerState so it can ensure databases. + +CouchServerState.ensure_database = _couch_ensure_database + + +class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer): + +    @staticmethod +    def make_app_after_state(state): +        return make_token_soledad_app(state) + +    make_document_for_test = make_leap_document_for_test + +    sync_target = token_leap_sync_target + +    def _soledad_instance(self, user='user-uuid', passphrase=u'123', +                          prefix='', +                          secrets_path=Soledad.STORAGE_SECRETS_FILE_NAME, +                          local_db_path='soledad.u1db', server_url='', +                          cert_file=None, auth_token=None, secret_id=None): +        """ +        Instantiate Soledad. +        """ + +        # this callback ensures we save a document which is sent to the shared +        # db. +        def _put_doc_side_effect(doc): +            self._doc_put = doc + +        # we need a mocked shared db or else Soledad will try to access the +        # network to find if there are uploaded secrets. +        class MockSharedDB(object): + +            get_doc = mock.Mock(return_value=None) +            put_doc = mock.Mock(side_effect=_put_doc_side_effect) +            lock = mock.Mock(return_value=('atoken', 300)) +            unlock = mock.Mock() + +            def __call__(self): +                return self + +        Soledad._shared_db = MockSharedDB() +        return Soledad( +            user, +            passphrase, +            secrets_path=os.path.join(self.tempdir, prefix, secrets_path), +            local_db_path=os.path.join( +                self.tempdir, prefix, local_db_path), +            server_url=server_url, +            cert_file=cert_file, +            auth_token=auth_token, +            secret_id=secret_id) + +    def make_app(self): +        self.request_state = CouchServerState(self._couch_url, 'shared', +                                              'tokens') +        return self.make_app_after_state(self.request_state) + +    def setUp(self): +        TestCaseWithServer.setUp(self) +        CouchDBTestCase.setUp(self) +        self._couch_url = 'http://localhost:' + str(self.wrapper.port) +        self.db = CouchDatabase( +            self._couch_url, 'user-user-uuid', replica_uid='replica') +        self.tempdir = tempfile.mkdtemp(prefix="leap_tests-") + +    def tearDown(self): +        self.db.delete_database() +        CouchDBTestCase.tearDown(self) +        TestCaseWithServer.tearDown(self) + +    # +    # Sequential tests +    # + +    def test_correct_transaction_log_after_sequential_puts(self): +        """ +        Assert that the transaction_log increases accordingly with sequential +        puts. +        """ +        doc = self.db.create_doc({'ops': 0}) +        ops = 1 +        docs = [doc.doc_id] +        for i in range(0, REPEAT_TIMES): +            self.assertEqual( +                i+1, len(self.db._get_transaction_log())) +            doc.content['ops'] += 1 +            self.db.put_doc(doc) +            docs.append(doc.doc_id) + +        # assert length of transaction_log +        transaction_log = self.db._get_transaction_log() +        self.assertEqual( +            REPEAT_TIMES+1, len(transaction_log)) + +        # assert that all entries in the log belong to the same doc +        self.assertEqual(REPEAT_TIMES+1, len(docs)) +        for doc_id in docs: +            self.assertEqual( +                REPEAT_TIMES+1, +                len(filter(lambda t: t[0] == doc_id, transaction_log))) + +    def test_correct_transaction_log_after_sequential_deletes(self): +        """ +        Assert that the transaction_log increases accordingly with sequential +        puts and deletes. +        """ +        docs = [] +        for i in range(0, REPEAT_TIMES): +            doc = self.db.create_doc({'ops': 0}) +            self.assertEqual( +                2*i+1, len(self.db._get_transaction_log())) +            docs.append(doc.doc_id) +            self.db.delete_doc(doc) +            self.assertEqual( +                2*i+2, len(self.db._get_transaction_log())) + +        # assert length of transaction_log +        transaction_log = self.db._get_transaction_log() +        self.assertEqual( +            2*REPEAT_TIMES, len(transaction_log)) + +        # assert that each doc appears twice in the transaction_log +        self.assertEqual(REPEAT_TIMES, len(docs)) +        for doc_id in docs: +            self.assertEqual( +                2, +                len(filter(lambda t: t[0] == doc_id, transaction_log))) + +    def test_correct_sync_log_after_sequential_syncs(self): +        """ +        Assert that the sync_log increases accordingly with sequential syncs. +        """ +        self.startServer() +        sol = self._soledad_instance( +            auth_token='auth-token', +            server_url=self.getURL()) + +        def _create_docs_and_sync(sol, syncs): +            # create a lot of documents +            for i in range(0, REPEAT_TIMES): +                sol.create_doc({}) +            # assert sizes of transaction and sync logs +            self.assertEqual( +                syncs*REPEAT_TIMES, +                len(self.db._get_transaction_log())) +            self.assertEqual( +                1 if syncs > 0 else 0, +                len(self.db._database.view('syncs/log').rows)) +            # sync to the remote db +            sol.sync() +            gen, docs = self.db.get_all_docs() +            self.assertEqual((syncs+1)*REPEAT_TIMES, gen) +            self.assertEqual((syncs+1)*REPEAT_TIMES, len(docs)) +            # assert sizes of transaction and sync logs +            self.assertEqual((syncs+1)*REPEAT_TIMES, +                             len(self.db._get_transaction_log())) +            sync_log_rows = self.db._database.view('syncs/log').rows +            sync_log = sync_log_rows[0].value +            replica_uid = sync_log_rows[0].key +            known_gen = sync_log['known_generation'] +            known_trans_id = sync_log['known_transaction_id'] +            # assert sync_log has exactly 1 row +            self.assertEqual(1, len(sync_log_rows)) +            # assert it has the correct replica_uid, gen and trans_id +            self.assertEqual(sol._db._replica_uid, replica_uid) +            sol_gen, sol_trans_id = sol._db._get_generation_info() +            self.assertEqual(sol_gen, known_gen) +            self.assertEqual(sol_trans_id, known_trans_id) + +        _create_docs_and_sync(sol, 0) +        _create_docs_and_sync(sol, 1) + +    # +    # Concurrency tests +    # +     +    class _WorkerThread(threading.Thread): +         +        def __init__(self, params, run_method): +            threading.Thread.__init__(self) +            self._params = params +            self._run_method = run_method + +        def run(self): +            self._run_method(self) + +    def test_correct_transaction_log_after_concurrent_puts(self): +        """ +        Assert that the transaction_log increases accordingly with concurrent +        puts. +        """ +        pool = threading.BoundedSemaphore(value=1) +        threads = [] +        docs = [] + +        def _run_method(self): +            doc = self._params['db'].create_doc({}) +            pool.acquire() +            self._params['docs'].append(doc.doc_id) +            pool.release() + + +        for i in range(0, REPEAT_TIMES): +            thread = self._WorkerThread( +                {'docs': docs, 'db': self.db}, +                _run_method) +            thread.start() +            threads.append(thread) + +        for thread in threads: +            thread.join() +         +        # assert length of transaction_log +        transaction_log = self.db._get_transaction_log() +        self.assertEqual( +            REPEAT_TIMES, len(transaction_log)) + +        # assert all documents are in the log +        self.assertEqual(REPEAT_TIMES, len(docs)) +        for doc_id in docs: +            self.assertEqual( +                1, +                len(filter(lambda t: t[0] == doc_id, transaction_log))) + +    def test_correct_transaction_log_after_concurrent_deletes(self): +        """ +        Assert that the transaction_log increases accordingly with concurrent +        puts and deletes. +        """ +        threads = [] +        docs = [] +        pool = threading.BoundedSemaphore(value=1) + +        # create/delete method that will be run concurrently +        def _run_method(self): +            doc = self._params['db'].create_doc({}) +            pool.acquire() +            docs.append(doc.doc_id) +            pool.release() +            self._params['db'].delete_doc(doc) + +        # launch concurrent threads +        for i in range(0, REPEAT_TIMES): +            thread = self._WorkerThread({'db': self.db}, _run_method) +            thread.start() +            threads.append(thread) + +        # wait for threads to finish +        for thread in threads: +            thread.join() + +        # assert transaction log +        transaction_log = self.db._get_transaction_log() +        self.assertEqual( +            2*REPEAT_TIMES, len(transaction_log)) +        # assert that each doc appears twice in the transaction_log +        self.assertEqual(REPEAT_TIMES, len(docs)) +        for doc_id in docs: +            self.assertEqual( +                2, +                len(filter(lambda t: t[0] == doc_id, transaction_log))) + +    def test_correct_sync_log_after_concurrent_puts_and_sync(self): +        """ +        Assert that the sync_log is correct after concurrent syncs. +        """ +        threads = [] +        docs = [] +        pool = threading.BoundedSemaphore(value=1) +        self.startServer() +        sol = self._soledad_instance( +            auth_token='auth-token', +            server_url=self.getURL()) + +        def _run_method(self): +            # create a lot of documents +            doc = self._params['sol'].create_doc({}) +            pool.acquire() +            docs.append(doc.doc_id) +            pool.release() + +        # launch threads to create documents in parallel +        for i in range(0, REPEAT_TIMES): +            thread = self._WorkerThread( +                {'sol': sol, 'syncs': i}, +                _run_method) +            thread.start() +            threads.append(thread) + +        # wait for threads to finish +        for thread in threads: +            thread.join() +         +        # do the sync! +        sol.sync() + +        transaction_log = self.db._get_transaction_log() +        self.assertEqual(REPEAT_TIMES, len(transaction_log)) +        # assert all documents are in the remote log +        self.assertEqual(REPEAT_TIMES, len(docs)) +        for doc_id in docs: +            self.assertEqual( +                1, +                len(filter(lambda t: t[0] == doc_id, transaction_log))) + +    def test_concurrent_syncs_do_not_fail(self): +        """ +        Assert that concurrent attempts to sync end up being executed +        sequentially and do not fail. +        """ +        threads = [] +        docs = [] +        pool = threading.BoundedSemaphore(value=1) +        self.startServer() +        sol = self._soledad_instance( +            auth_token='auth-token', +            server_url=self.getURL()) + +        def _run_method(self): +            # create a lot of documents +            doc = self._params['sol'].create_doc({}) +            # do the sync! +            sol.sync() +            pool.acquire() +            docs.append(doc.doc_id) +            pool.release() + +        # launch threads to create documents in parallel +        for i in range(0, REPEAT_TIMES): +            thread = self._WorkerThread( +                {'sol': sol, 'syncs': i}, +                _run_method) +            thread.start() +            threads.append(thread) + +        # wait for threads to finish +        for thread in threads: +            thread.join() + +        transaction_log = self.db._get_transaction_log() +        self.assertEqual(REPEAT_TIMES, len(transaction_log)) +        # assert all documents are in the remote log +        self.assertEqual(REPEAT_TIMES, len(docs)) +        for doc_id in docs: +            self.assertEqual( +                1, +                len(filter(lambda t: t[0] == doc_id, transaction_log))) diff --git a/common/src/leap/soledad/common/tests/test_server.py b/common/src/leap/soledad/common/tests/test_server.py index 83df192b..f8d2a64f 100644 --- a/common/src/leap/soledad/common/tests/test_server.py +++ b/common/src/leap/soledad/common/tests/test_server.py @@ -25,6 +25,7 @@ import tempfile  import simplejson as json  import mock  import time +import binascii  from leap.common.testing.basetest import BaseLeapTest @@ -50,6 +51,17 @@ from leap.soledad.server import SoledadApp, LockResource  from leap.soledad.server.auth import URLToAuthorization +# monkey path CouchServerState so it can ensure databases. + +def _couch_ensure_database(self, dbname): +    db = CouchDatabase.open_database( +        self._couch_url + '/' + dbname, +        create=True) +    return db, db._replica_uid + +CouchServerState.ensure_database = _couch_ensure_database + +  class ServerAuthorizationTestCase(BaseLeapTest):      """      Tests related to Soledad server authorization. @@ -339,15 +351,16 @@ class EncryptedSyncTestCase(          _, doclist = sol1.get_all_docs()          self.assertEqual([], doclist)          doc1 = sol1.create_doc(json.loads(simple_doc)) -        # sync with server -        sol1._server_url = self.getURL() -        sol1.sync() -        # assert doc was sent to couch db +        # ensure remote db exists before syncing          db = CouchDatabase(              self._couch_url,              # the name of the user database is "user-<uuid>".              'user-user-uuid',          ) +        # sync with server +        sol1._server_url = self.getURL() +        sol1.sync() +        # assert doc was sent to couch db          _, doclist = db.get_all_docs()          self.assertEqual(1, len(doclist))          couchdoc = doclist[0] @@ -376,6 +389,7 @@ class EncryptedSyncTestCase(          doc2 = doclist[0]          # assert incoming doc is equal to the first sent doc          self.assertEqual(doc1, doc2) +        db.delete_database()      def test_encrypted_sym_sync_with_unicode_passphrase(self):          """ @@ -393,15 +407,16 @@ class EncryptedSyncTestCase(          _, doclist = sol1.get_all_docs()          self.assertEqual([], doclist)          doc1 = sol1.create_doc(json.loads(simple_doc)) -        # sync with server -        sol1._server_url = self.getURL() -        sol1.sync() -        # assert doc was sent to couch db +        # ensure remote db exists before syncing          db = CouchDatabase(              self._couch_url,              # the name of the user database is "user-<uuid>".              'user-user-uuid',          ) +        # sync with server +        sol1._server_url = self.getURL() +        sol1.sync() +        # assert doc was sent to couch db          _, doclist = db.get_all_docs()          self.assertEqual(1, len(doclist))          couchdoc = doclist[0] @@ -434,7 +449,94 @@ class EncryptedSyncTestCase(          doc2 = doclist[0]          # assert incoming doc is equal to the first sent doc          self.assertEqual(doc1, doc2) +        db.delete_database() +    def test_sync_very_large_files(self): +        """ +        Test if Soledad can sync very large files. +        """ +        # define the size of the "very large file" +        length = 100*(10**6)  # 100 MB +        self.startServer() +        # instantiate soledad and create a document +        sol1 = self._soledad_instance( +            # token is verified in test_target.make_token_soledad_app +            auth_token='auth-token' +        ) +        _, doclist = sol1.get_all_docs() +        self.assertEqual([], doclist) +        content = binascii.hexlify(os.urandom(length/2))  # len() == length +        doc1 = sol1.create_doc({'data': content}) +        # ensure remote db exists before syncing +        db = CouchDatabase( +            self._couch_url, +            # the name of the user database is "user-<uuid>". +            'user-user-uuid', +        ) +        # sync with server +        sol1._server_url = self.getURL() +        sol1.sync() +        # instantiate soledad with empty db, but with same secrets path +        sol2 = self._soledad_instance(prefix='x', auth_token='auth-token') +        _, doclist = sol2.get_all_docs() +        self.assertEqual([], doclist) +        sol2._secrets_path = sol1.secrets_path +        sol2._load_secrets() +        sol2._set_secret_id(sol1._secret_id) +        # sync the new instance +        sol2._server_url = self.getURL() +        sol2.sync() +        _, doclist = sol2.get_all_docs() +        self.assertEqual(1, len(doclist)) +        doc2 = doclist[0] +        # assert incoming doc is equal to the first sent doc +        self.assertEqual(doc1, doc2) +        # delete remote database +        db.delete_database() + + +    def test_sync_many_small_files(self): +        """ +        Test if Soledad can sync many smallfiles. +        """ +        number_of_docs = 100 +        self.startServer() +        # instantiate soledad and create a document +        sol1 = self._soledad_instance( +            # token is verified in test_target.make_token_soledad_app +            auth_token='auth-token' +        ) +        _, doclist = sol1.get_all_docs() +        self.assertEqual([], doclist) +        # create many small files +        for i in range(0, number_of_docs): +            sol1.create_doc(json.loads(simple_doc)) +        # ensure remote db exists before syncing +        db = CouchDatabase( +            self._couch_url, +            # the name of the user database is "user-<uuid>". +            'user-user-uuid', +        ) +        # sync with server +        sol1._server_url = self.getURL() +        sol1.sync() +        # instantiate soledad with empty db, but with same secrets path +        sol2 = self._soledad_instance(prefix='x', auth_token='auth-token') +        _, doclist = sol2.get_all_docs() +        self.assertEqual([], doclist) +        sol2._secrets_path = sol1.secrets_path +        sol2._load_secrets() +        sol2._set_secret_id(sol1._secret_id) +        # sync the new instance +        sol2._server_url = self.getURL() +        sol2.sync() +        _, doclist = sol2.get_all_docs() +        self.assertEqual(number_of_docs, len(doclist)) +        # assert incoming docs are equal to sent docs +        for doc in doclist: +            self.assertEqual(sol1.get_doc(doc.doc_id), doc) +        # delete remote database +        db.delete_database()  class LockResourceTestCase(          CouchDBTestCase, TestCaseWithServer): @@ -455,12 +557,21 @@ class LockResourceTestCase(          CouchDBTestCase.setUp(self)          self.tempdir = tempfile.mkdtemp(prefix="leap_tests-")          self._couch_url = 'http://localhost:' + str(self.wrapper.port) +        # create the databases +        CouchDatabase(self._couch_url, 'shared') +        CouchDatabase(self._couch_url, 'tokens')          self._state = CouchServerState(              self._couch_url, 'shared', 'tokens')      def tearDown(self):          CouchDBTestCase.tearDown(self)          TestCaseWithServer.tearDown(self) +        # delete remote database +        db = CouchDatabase( +            self._couch_url, +            'shared', +        ) +        db.delete_database()      def test__try_obtain_filesystem_lock(self):          responder = mock.Mock() diff --git a/common/src/leap/soledad/common/tests/u1db_tests/test_http_database.py b/common/src/leap/soledad/common/tests/u1db_tests/test_http_database.py index 3f3c7bba..9251000e 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/test_http_database.py +++ b/common/src/leap/soledad/common/tests/u1db_tests/test_http_database.py @@ -133,12 +133,11 @@ class TestHTTPDatabaseSimpleOperations(tests.TestCase):               None), self.got)      def test_get_doc_deleted_include_deleted(self): -        self.response_val = errors.HTTPError(404, -                                             json.dumps( -                                             {"error": errors.DOCUMENT_DELETED} -                                             ), -                                             {'x-u1db-rev': 'doc-rev-gone', -                                              'x-u1db-has-conflicts': 'false'}) +        self.response_val = errors.HTTPError( +            404, +            json.dumps({"error": errors.DOCUMENT_DELETED}), +            {'x-u1db-rev': 'doc-rev-gone', +             'x-u1db-has-conflicts': 'false'})          doc = self.db.get_doc('deleted', include_deleted=True)          self.assertEqual('deleted', doc.doc_id)          self.assertEqual('doc-rev-gone', doc.rev) diff --git a/common/src/leap/soledad/common/tests/u1db_tests/test_open.py b/common/src/leap/soledad/common/tests/u1db_tests/test_open.py index 13425b4f..63406245 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/test_open.py +++ b/common/src/leap/soledad/common/tests/u1db_tests/test_open.py @@ -24,7 +24,8 @@ from u1db import (  )  from leap.soledad.common.tests import u1db_tests as tests  from u1db.backends import sqlite_backend -from leap.soledad.common.tests.u1db_tests.test_backends import TestAlternativeDocument +from leap.soledad.common.tests.u1db_tests.test_backends \ +    import TestAlternativeDocument  class TestU1DBOpen(tests.TestCase): diff --git a/common/src/leap/soledad/common/tests/u1db_tests/test_sqlite_backend.py b/common/src/leap/soledad/common/tests/u1db_tests/test_sqlite_backend.py index a53ea6cc..8292dd07 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/test_sqlite_backend.py +++ b/common/src/leap/soledad/common/tests/u1db_tests/test_sqlite_backend.py @@ -30,7 +30,8 @@ from u1db import (  from leap.soledad.common.tests import u1db_tests as tests  from u1db.backends import sqlite_backend -from leap.soledad.common.tests.u1db_tests.test_backends import TestAlternativeDocument +from leap.soledad.common.tests.u1db_tests.test_backends \ +    import TestAlternativeDocument  simple_doc = '{"key": "value"}' diff --git a/common/src/leap/soledad/common/tests/u1db_tests/test_sync.py b/common/src/leap/soledad/common/tests/u1db_tests/test_sync.py index 5346d540..1f78f912 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/test_sync.py +++ b/common/src/leap/soledad/common/tests/u1db_tests/test_sync.py @@ -760,7 +760,7 @@ class DatabaseSyncTests(tests.DatabaseBaseTests,                                         {'docs': [], 'last_known_gen': 0},                                      'return':                                         {'docs': [(doc.doc_id, doc.rev)], -                                    'last_gen': 1}}) +                                        'last_gen': 1}})          self.assertEqual([doc], self.db1.get_from_index('test-idx', 'value'))      def test_sync_pulling_doesnt_update_other_if_changed(self): @@ -785,7 +785,7 @@ class DatabaseSyncTests(tests.DatabaseBaseTests,                                         {'docs': [], 'last_known_gen': 0},                                      'return':                                         {'docs': [(doc.doc_id, doc.rev)], -                                    'last_gen': 1}}) +                                        'last_gen': 1}})          self.assertEqual(1, self.db1._get_replica_gen_and_trans_id('test2')[0])          # c2 should not have gotten a '_record_sync_info' call, because the          # local database had been updated more than just by the messages @@ -819,8 +819,8 @@ class DatabaseSyncTests(tests.DatabaseBaseTests,          self.assertLastExchangeLog(self.db2,                                     {'receive':                                         {'docs': [(doc.doc_id, doc.rev)], -                                    'source_uid': 'test1', -                                    'source_gen': 1, 'last_known_gen': 0}, +                                        'source_uid': 'test1', +                                        'source_gen': 1, 'last_known_gen': 0},                                      'return': {'docs': [], 'last_gen': 1}})      def test_sync_ignores_superseded(self): @@ -839,11 +839,11 @@ class DatabaseSyncTests(tests.DatabaseBaseTests,          self.assertLastExchangeLog(self.db1,                                     {'receive':                                         {'docs': [(doc.doc_id, doc_rev1)], -                                    'source_uid': 'test2', -                                    'source_gen': 1, 'last_known_gen': 0}, +                                        'source_uid': 'test2', +                                        'source_gen': 1, 'last_known_gen': 0},                                      'return':                                         {'docs': [(doc.doc_id, doc_rev2)], -                                    'last_gen': 2}}) +                                        'last_gen': 2}})          self.assertGetDoc(self.db1, doc.doc_id, doc_rev2, new_content, False)      def test_sync_sees_remote_conflicted(self): @@ -861,11 +861,11 @@ class DatabaseSyncTests(tests.DatabaseBaseTests,          self.assertLastExchangeLog(self.db2,                                     {'receive':                                         {'docs': [(doc_id, doc1_rev)], -                                    'source_uid': 'test1', -                                    'source_gen': 1, 'last_known_gen': 0}, +                                        'source_uid': 'test1', +                                        'source_gen': 1, 'last_known_gen': 0},                                      'return':                                         {'docs': [(doc_id, doc2_rev)], -                                    'last_gen': 1}}) +                                        'last_gen': 1}})          self.assertTransactionLog([doc_id, doc_id], self.db1)          self.assertGetDoc(self.db1, doc_id, doc2_rev, new_doc, True)          self.assertGetDoc(self.db2, doc_id, doc2_rev, new_doc, False) @@ -892,10 +892,10 @@ class DatabaseSyncTests(tests.DatabaseBaseTests,          self.assertLastExchangeLog(self.db2,                                     {'receive':                                         {'docs': [(doc_id, doc1.rev)], -                                    'source_uid': 'test1', -                                    'source_gen': 2, 'last_known_gen': 1}, +                                        'source_uid': 'test1', +                                        'source_gen': 2, 'last_known_gen': 1},                                      'return': {'docs': [(doc_id, doc2.rev)], -                                    'last_gen': 2}}) +                                               'last_gen': 2}})          self.assertTransactionLog([doc_id, doc_id, doc_id], self.db1)          self.assertGetDocIncludeDeleted(self.db1, doc_id, doc2.rev, None, True)          self.assertGetDocIncludeDeleted( @@ -950,8 +950,8 @@ class DatabaseSyncTests(tests.DatabaseBaseTests,          self.assertLastExchangeLog(self.db2,                                     {'receive':                                         {'docs': [(doc_id, deleted_rev)], -                                    'source_uid': 'test1', -                                    'source_gen': 2, 'last_known_gen': 1}, +                                        'source_uid': 'test1', +                                        'source_gen': 2, 'last_known_gen': 1},                                      'return': {'docs': [], 'last_gen': 2}})          self.assertGetDocIncludeDeleted(              self.db1, doc_id, deleted_rev, None, False) @@ -1121,6 +1121,7 @@ class DatabaseSyncTests(tests.DatabaseBaseTests,  class TestDbSync(tests.TestCaseWithServer): +      """Test db.sync remote sync shortcut"""      scenarios = [ @@ -1189,6 +1190,7 @@ class TestDbSync(tests.TestCaseWithServer):  class TestRemoteSyncIntegration(tests.TestCaseWithServer): +      """Integration tests for the most common sync scenario local -> remote"""      make_app_with_state = staticmethod(make_http_app) @@ -1204,7 +1206,7 @@ class TestRemoteSyncIntegration(tests.TestCaseWithServer):          doc12 = self.db1.create_doc_from_json('{"a": 2}')          doc21 = self.db2.create_doc_from_json('{"b": 1}')          doc22 = self.db2.create_doc_from_json('{"b": 2}') -        #sanity +        # sanity          self.assertEqual(2, len(self.db1._get_transaction_log()))          self.assertEqual(2, len(self.db2._get_transaction_log()))          progress1 = [] diff --git a/common/versioneer.py b/common/versioneer.py index b43ab062..18dfd923 100644 --- a/common/versioneer.py +++ b/common/versioneer.py @@ -115,7 +115,7 @@ import sys  def run_command(args, cwd=None, verbose=False):      try: -        # remember shell=False, so use git.cmd on windows, not just git +        # remember shell=False, so use git.exe on windows, not just git          p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd)      except EnvironmentError:          e = sys.exc_info()[1] @@ -230,7 +230,7 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):      GIT = "git"      if sys.platform == "win32": -        GIT = "git.cmd" +        GIT = "git.exe"      stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"],                           cwd=root)      if stdout is None: @@ -305,7 +305,7 @@ import sys  def run_command(args, cwd=None, verbose=False):      try: -        # remember shell=False, so use git.cmd on windows, not just git +        # remember shell=False, so use git.exe on windows, not just git          p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd)      except EnvironmentError:          e = sys.exc_info()[1] @@ -430,7 +430,7 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):      GIT = "git"      if sys.platform == "win32": -        GIT = "git.cmd" +        GIT = "git.exe"      stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"],                           cwd=root)      if stdout is None: @@ -486,7 +486,7 @@ import sys  def do_vcs_install(versionfile_source, ipy):      GIT = "git"      if sys.platform == "win32": -        GIT = "git.cmd" +        GIT = "git.exe"      run_command([GIT, "add", "versioneer.py"])      run_command([GIT, "add", versionfile_source])      run_command([GIT, "add", ipy]) diff --git a/docs/sphinx/client.rst b/docs/sphinx/client.rst new file mode 100644 index 00000000..0c608c31 --- /dev/null +++ b/docs/sphinx/client.rst @@ -0,0 +1,44 @@ +Soledad Client documentation +============================ + +.. automodule:: leap.soledad.client +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + +.. automodule:: leap.soledad.client.auth +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + +.. automodule:: leap.soledad.client.crypto +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + +.. automodule:: leap.soledad.client.shared_db +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + +.. automodule:: leap.soledad.client.soledad_db +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + +.. automodule:: leap.soledad.client.sqlcipher +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + +.. automodule:: leap.soledad.client.target +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: diff --git a/docs/sphinx/common.rst b/docs/sphinx/common.rst new file mode 100644 index 00000000..8755b3bd --- /dev/null +++ b/docs/sphinx/common.rst @@ -0,0 +1,38 @@ +Soledad Common documentation +============================ + +.. automodule:: leap.soledad.common +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + +.. automodule:: leap.soledad.common.couch +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + +.. automodule:: leap.soledad.common.crypto +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + +.. automodule:: leap.soledad.common.ddocs +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + +.. automodule:: leap.soledad.common.document +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + +.. automodule:: leap.soledad.common.errors +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: diff --git a/docs/sphinx/conf.py b/docs/sphinx/conf.py new file mode 100644 index 00000000..552e5f56 --- /dev/null +++ b/docs/sphinx/conf.py @@ -0,0 +1,266 @@ +# -*- coding: utf-8 -*- +# +# Soledad documentation build configuration file, created by +# sphinx-quickstart on Mon Feb 17 18:20:43 2014. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +sys.path.insert(0, os.path.abspath('../../common/src')) +sys.path.insert(0, os.path.abspath('../../client/src')) +sys.path.insert(0, os.path.abspath('../../server/src')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ +    'sphinx.ext.autodoc', +    'sphinx.ext.todo', +    'sphinx.ext.coverage', +    'sphinx.ext.pngmath', +    'sphinx.ext.viewcode', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'Soledad' +copyright = u'2014, LEAP Encryption Access Project' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.4' +# The full version, including alpha/beta/rc tags. +release = '0.4.0' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages.  See the documentation for +# a list of builtin themes. +html_theme = 'default' + +# Theme options are theme-specific and customize the look and feel of a theme +# further.  For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents.  If None, it defaults to +# "<project> v<release> documentation". +#html_title = None + +# A shorter title for the navigation bar.  Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it.  The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'Soledaddoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +#  author, documentclass [howto, manual, or own class]). +latex_documents = [ +  ('index', 'Soledad.tex', u'Soledad Documentation', +   u'LEAP Encryption Access Project', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ +    ('index', 'soledad', u'Soledad Documentation', +     [u'LEAP Encryption Access Project'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +#  dir menu entry, description, category) +texinfo_documents = [ +  ('index', 'Soledad', u'Soledad Documentation', +   u'LEAP Encryption Access Project', 'Soledad', 'One line description of project.', +   'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False diff --git a/docs/sphinx/index.rst b/docs/sphinx/index.rst new file mode 100644 index 00000000..6298d034 --- /dev/null +++ b/docs/sphinx/index.rst @@ -0,0 +1,31 @@ +.. Soledad documentation master file, created by +   sphinx-quickstart on Mon Feb 17 17:54:47 2014. +   You can adapt this file completely to your liking, but it should at least +   contain the root `toctree` directive. + +Soledad documentation +===================== + +Contents: + +.. toctree:: +   :maxdepth: 2 + +   common +   client +   server + +.. automodule:: leap.soledad.common +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/sphinx/server.rst b/docs/sphinx/server.rst new file mode 100644 index 00000000..f093adf4 --- /dev/null +++ b/docs/sphinx/server.rst @@ -0,0 +1,27 @@ +Soledad Server documentation +============================ + +.. automodule:: leap.soledad.server +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + +.. automodule:: leap.soledad.server.auth +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + +.. automodule:: leap.soledad.server.gzip_middleware +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + +.. automodule:: leap.soledad.server.lock_resource +    :members: +    :undoc-members: +    :private-members: +    :show-inheritance: + diff --git a/scripts/README.rst b/scripts/README.rst index fdd1d642..37cf2c0e 100644 --- a/scripts/README.rst +++ b/scripts/README.rst @@ -2,16 +2,3 @@ Soledad Scripts  ===============  The scripts in this directory are meant to be used for development purposes. - -Currently, the scripts are: - -  * server-side-db.py: Gives access to server-side soledad user database, -    based on the configuration in /etc/leap/soledad-server.conf. One should -    use it as: - -      python -i server-side-db.py <uuid> - -  * client-side-db.py: Gives access to client-side soledad user database, -    based on data stored in ~/.config/leap/soledad. One should use it as: - -      python -i client-side-db.py <uuid> <passphrase> diff --git a/scripts/backends_cpu_usage/log_cpu_usage.py b/scripts/backends_cpu_usage/log_cpu_usage.py new file mode 100755 index 00000000..2674e1ff --- /dev/null +++ b/scripts/backends_cpu_usage/log_cpu_usage.py @@ -0,0 +1,46 @@ +#!/usr/bin/python + + +# Get the CPU usage and print to file. + + +import psutil +import time +import argparse +import os +import threading + + +class LogCpuUsage(threading.Thread): + +    def __init__(self, fname): +        threading.Thread.__init__(self) +        self._stopped = True +        self._fname = fname  + +    def run(self): +        self._stopped = False +        with open(self._fname, 'w') as f: +            start = time.time() +            while self._stopped is False: +                now = time.time() +                f.write("%f %f\n" % ((now - start), psutil.cpu_percent())) +                time.sleep(0.01) + +    def stop(self): +        self._stopped = True + + +if __name__ == '__main__': +    parser = argparse.ArgumentParser() +    parser.add_argument('file', help='where to save output') +    args = parser.parse_args() + +    if os.path.isfile(args.file): +        replace = raw_input('File %s exists, replace it (y/N)? ' % args.file) +        if replace.lower() != 'y': +            print 'Bailing out.' +            exit(1) +     +    log_cpu = LogCpuUsage(args.file) +    log_cpu.run() diff --git a/scripts/backends_cpu_usage/movingaverage.py b/scripts/backends_cpu_usage/movingaverage.py new file mode 100644 index 00000000..bac1b3e1 --- /dev/null +++ b/scripts/backends_cpu_usage/movingaverage.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# +#  Sean Reifschneider, tummy.com, ltd.  <jafo@tummy.com> +#  Released into the Public Domain, 2011-02-06 + +import itertools +from itertools import islice +from collections import deque + + +######################################################### +def movingaverage(data, subset_size, data_is_list = None, +		avoid_fp_drift = True): +	'''Return the moving averages of the data, with a window size of +	`subset_size`.  `subset_size` must be an integer greater than 0 and +	less than the length of the input data, or a ValueError will be raised. + +	`data_is_list` can be used to tune the algorithm for list or iteratable +	as an input.  The default value, `None` will auto-detect this. +	The algorithm used if `data` is a list is almost twice as fast as if +	it is an iteratable. + +	`avoid_fp_drift`, if True (the default) sums every sub-set rather than +	keeping a "rolling sum" (which may be subject to floating-point drift). +	While more correct, it is also dramatically slower for subset sizes +	much larger than 20. + +	NOTE: You really should consider setting `avoid_fp_drift = False` unless +	you are dealing with very small numbers (say, far smaller than 0.00001) +	or require extreme accuracy at the cost of execution time.  For +	`subset_size` < 20, the performance difference is very small. +	''' +	if subset_size < 1: +		raise ValueError('subset_size must be 1 or larger') + +	if data_is_list is None: +		data_is_list = hasattr(data, '__getslice__') + +	divisor = float(subset_size) +	if data_is_list: +		#  This only works if we can re-access old elements, but is much faster. +		#  In other words, it can't be just an iterable, it needs to be a list. + +		if subset_size > len(data): +			raise ValueError('subset_size must be smaller than data set size') + +		if avoid_fp_drift: +			for x in range(subset_size, len(data) + 1): +				yield sum(data[x - subset_size:x]) / divisor +		else: +			cur = sum(data[0:subset_size]) +			yield cur / divisor +			for x in range(subset_size, len(data)): +				cur += data[x] - data[x - subset_size] +				yield cur / divisor +	else: +		#  Based on the recipe at: +		#     http://docs.python.org/library/collections.html#deque-recipes +		it = iter(data) +		d = deque(islice(it, subset_size)) + +		if subset_size > len(d): +			raise ValueError('subset_size must be smaller than data set size') + +		if avoid_fp_drift: +			yield sum(d) / divisor +			for elem in it: +				d.popleft() +				d.append(elem) +				yield sum(d) / divisor +		else: +			s = sum(d) +			yield s / divisor +			for elem in it: +				s += elem - d.popleft() +				d.append(elem) +				yield s / divisor + + +########################## +if __name__ == '__main__': +	import unittest + +	class TestMovingAverage(unittest.TestCase): +		#################### +		def test_List(self): +			try: +				list(movingaverage([1,2,3], 0)) +				self.fail('Did not raise ValueError on subset_size=0') +			except ValueError: +				pass + +			try: +				list(movingaverage([1,2,3,4,5,6], 7)) +				self.fail('Did not raise ValueError on subset_size > len(data)') +			except ValueError: +				pass + +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)), +					[1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)), +					[1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5]) + +			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], +					3, False)), [40.0,42.0,45.0,43.0]) +			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], +					3, True)), [40.0,42.0,45.0,43.0]) + + +		###################### +		def test_XRange(self): +			try: +				list(movingaverage(xrange(1, 4), 0)) +				self.fail('Did not raise ValueError on subset_size=0') +			except ValueError: +				pass + +			try: +				list(movingaverage(xrange(1, 7), 7)) +				self.fail('Did not raise ValueError on subset_size > len(data)') +			except ValueError: +				pass + +			self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 2)), +					[1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), +					2)), [1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5]) + + +		########################### +		def test_ListRolling(self): +			try: +				list(movingaverage([1,2,3], 0, avoid_fp_drift = False)) +				self.fail('Did not raise ValueError on subset_size=0') +			except ValueError: +				pass + +			try: +				list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False)) +				self.fail('Did not raise ValueError on subset_size > len(data)') +			except ValueError: +				pass + +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1, +					avoid_fp_drift = False)), [1,2,3,4,5,6]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2, +					avoid_fp_drift = False)), +					[1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2, +					avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3, +					avoid_fp_drift = False)), [2,3,4,5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4, +					avoid_fp_drift = False)), [2.5,3.5,4.5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5, +					avoid_fp_drift = False)), [3,4]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6, +					avoid_fp_drift = False)), [3.5]) + +			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], +					3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) +			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], +					3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) + + +		############################# +		def test_XRangeRolling(self): +			try: +				list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False)) +				self.fail('Did not raise ValueError on subset_size=0') +			except ValueError: +				pass + +			try: +				list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False)) +				self.fail('Did not raise ValueError on subset_size > len(data)') +			except ValueError: +				pass + +			self.assertEqual(list(movingaverage(xrange(1, 7), 1, +					avoid_fp_drift = False)), [1,2,3,4,5,6]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 2, +					avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), +					2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 3, +					avoid_fp_drift = False)), [2,3,4,5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 4, +					avoid_fp_drift = False)), [2.5,3.5,4.5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 5, +					avoid_fp_drift = False)), [3,4]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 6, +					avoid_fp_drift = False)), [3.5]) + + +	###################################################################### +	suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage) +	unittest.TextTestRunner(verbosity = 2).run(suite) + diff --git a/scripts/backends_cpu_usage/plot.py b/scripts/backends_cpu_usage/plot.py new file mode 100755 index 00000000..4e5083ad --- /dev/null +++ b/scripts/backends_cpu_usage/plot.py @@ -0,0 +1,81 @@ +#!/usr/bin/python + + +from matplotlib import pyplot as plt +from movingaverage import movingaverage + + +def smooth(l): +    return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False) + + +files = [ +    ('sqlite', 'b'), +    ('sqlcipher', 'r'), +    ('u1dblite', 'g'), +    ('u1dbcipher', 'm'), +] + + +# config the plot +plt.xlabel('time (s)') +plt.ylabel('cpu usage (%)') +plt.title('u1db backends CPU usage') + + +for fi in files: + +    backend = fi[0] +    color = fi[1] +    filename = '%s.txt' % backend  + +    x = [] +    y = [] + +    xmax = None +    xmin = None +    ymax = None +    ymin = None + +    # read data from file +    with open(filename, 'r') as f: +        line = f.readline() +        while line is not None: +            time, cpu = tuple(line.strip().split(' ')) +            cpu = float(cpu) +            x.append(float(time)) +            y.append(cpu) +            if ymax == None or cpu > ymax: +                ymax = cpu +                xmax = time +            if ymin == None or cpu < ymin: +                ymin = cpu +                xmin = time +            line = f.readline() +            if line == '': +                break + +    kwargs = { +        'linewidth': 1.0, +        'linestyle': '-', +    #    'marker': '.', +        'color': color, +    } +    plt.plot( +        [n for n in smooth(x)], +        [n for n in smooth(y)], +        label=backend, **kwargs) + +    #plt.axes().get_xaxis().set_ticks(x) +    #plt.axes().get_xaxis().set_ticklabels(x) + +    # annotate max and min values +    #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) +    #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) + + +plt.ylim(0, 100) +plt.grid() +plt.legend() +plt.show() + diff --git a/scripts/backends_cpu_usage/test_u1db_sync.py b/scripts/backends_cpu_usage/test_u1db_sync.py new file mode 100755 index 00000000..26ef8f9f --- /dev/null +++ b/scripts/backends_cpu_usage/test_u1db_sync.py @@ -0,0 +1,113 @@ +#!/usr/bin/python + + +import u1db +import tempfile +import logging +import shutil +import os +import argparse +import time +import binascii +import random + + +from leap.soledad.client.sqlcipher import open as sqlcipher_open +from log_cpu_usage import LogCpuUsage +from u1dblite import open as u1dblite_open +from u1dbcipher import open as u1dbcipher_open + + +DOCS_TO_SYNC = 1000 +SMALLEST_DOC_SIZE = 1 * 1024  # 1 KB +BIGGEST_DOC_SIZE = 100 * 1024  # 100 KB + + +def get_data(size): +    return binascii.hexlify(os.urandom(size/2)) + + +def run_test(testname, open_fun, tempdir, docs,  *args): +    logger.info('Starting test \"%s\".' % testname) + +    # instantiate dbs +    db1 = open_fun(os.path.join(tempdir, testname + '1.db'), *args) +    db2 = open_fun(os.path.join(tempdir, testname + '2.db'), *args) + +    # get sync target and synchsonizer +    target = db2.get_sync_target() +    synchronizer = u1db.sync.Synchronizer(db1, target) + + +    # generate lots of small documents +    logger.info('Creating %d documents in source db...' % DOCS_TO_SYNC) +    for content in docs: +        db1.create_doc(content) +    logger.info('%d documents created in source db.' % DOCS_TO_SYNC) + +    # run the test +    filename = testname + '.txt' +    logger.info('Logging CPU usage to %s.' % filename) +    log_cpu = LogCpuUsage(filename) +    tstart = time.time() + +    # start logging cpu +    log_cpu.start() +    logger.info('Sleeping for 5 seconds...') +    time.sleep(5) + +    # sync +    logger.info('Starting sync...') +    sstart = time.time() +    synchronizer.sync() +    send = time.time() +    logger.info('Sync finished.') + +    # stop logging cpu +    logger.info('Sleeping for 5 seconds...') +    time.sleep(5) +    tend = time.time() +    log_cpu.stop() + +    # report +    logger.info('Total sync time: %f seconds' % (send - sstart)) +    logger.info('Total test time: %f seconds' % (tend - tstart)) +    logger.info('Finished test \"%s\".' % testname) + +    # close dbs +    db1.close() +    db2.close() + + +if __name__ == '__main__': +     +    # configure logger +    logger = logging.getLogger(__name__) +    LOG_FORMAT = '%(asctime)s %(message)s' +    logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + +    # get a temporary dir +    tempdir = tempfile.mkdtemp() +    logger.info('Using temporary directory %s' % tempdir) + + +    # create a lot of documents with random sizes +    docs = [] +    for i in xrange(DOCS_TO_SYNC): +        docs.append({ +            'index': i, +            #'data': get_data( +            #    random.randrange( +            #        SMALLEST_DOC_SIZE, BIGGEST_DOC_SIZE)) +        }) + +    # run tests +    run_test('sqlite', u1db.open, tempdir, docs, True) +    run_test('sqlcipher', sqlcipher_open, tempdir, docs, '123456', True) +    run_test('u1dblite', u1dblite_open, tempdir, docs) +    run_test('u1dbcipher', u1dbcipher_open, tempdir, docs, '123456', True) + +    # remove temporary dir +    logger.info('Removing temporary directory %s' % tempdir) +    shutil.rmtree(tempdir) diff --git a/scripts/build_debian_package.sh b/scripts/build_debian_package.sh new file mode 100755 index 00000000..cc62c3ac --- /dev/null +++ b/scripts/build_debian_package.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +# This script generates Soledad Debian packages. +# +# When invoking this script, you should pass a git repository URL and the name +# of the branch that contains the code you wish to build the packages from. +# +# The script will clone the given branch from the given repo, as well as the +# main Soledad repo in github which contains the most up-to-date debian +# branch. It will then merge the desired branch into the debian branch and +# build the packages. + +if [ $# -ne 2 ]; then +  echo "Usage: ${0} <url> <branch>" +  exit 1 +fi + +SOLEDAD_MAIN_REPO=git://github.com/leapcode/soledad.git + +url=$1 +branch=$2 +workdir=`mktemp -d` + +git clone -b ${branch} ${url} ${workdir}/soledad +export GIT_DIR=${workdir}/soledad/.git +export GIT_WORK_TREE=${workdir}/soledad +git remote add leapcode ${SOLEDAD_MAIN_REPO} +git fetch leapcode +git checkout debian +git merge --no-edit ${branch} +(cd ${workdir}/soledad && debuild -uc -us) +echo "Packages generated in ${workdir}" diff --git a/scripts/client-side-db.py b/scripts/client-side-db.py deleted file mode 100644 index 0c3df7a4..00000000 --- a/scripts/client-side-db.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/python - -# This script gives client-side access to one Soledad user database by using -# the data stored in ~/.config/leap/soledad/ - -import sys -import os - -from leap.common.config import get_path_prefix -from leap.soledad.client import Soledad - -if len(sys.argv) != 3: -    print 'Usage: %s <uuid> <passphrase>' % sys.argv[0] -    exit(1) - -uuid = sys.argv[1] -passphrase = unicode(sys.argv[2]) - -secrets_path = os.path.join(get_path_prefix(), 'leap', 'soledad', -                            '%s.secret' % uuid) -local_db_path = os.path.join(get_path_prefix(), 'leap', 'soledad', -                             '%s.db' % uuid) -server_url = 'http://dummy-url' -cert_file = 'cert' - -sol = Soledad(uuid, passphrase, secrets_path, local_db_path, server_url, -             cert_file) -db = sol._db - -# get replica info -replica_uid = db._replica_uid -gen, docs = db.get_all_docs() -print "replica_uid: %s" % replica_uid -print "generation:  %d" % gen -gen, trans_id = db._get_generation_info() -print "transaction_id: %s" % trans_id diff --git a/scripts/db_access/client_side_db.py b/scripts/db_access/client_side_db.py new file mode 100644 index 00000000..2bf4ab5e --- /dev/null +++ b/scripts/db_access/client_side_db.py @@ -0,0 +1,154 @@ +#!/usr/bin/python + +# This script gives client-side access to one Soledad user database. + + +import sys +import os +import argparse +import re +import tempfile +import getpass +import requests +import json +import srp._pysrp as srp +import binascii +import logging + +from leap.common.config import get_path_prefix +from leap.soledad.client import Soledad + + +# create a logger +logger = logging.getLogger(__name__) +LOG_FORMAT = '%(asctime)s %(message)s' +logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + +safe_unhexlify = lambda x: binascii.unhexlify(x) if ( +    len(x) % 2 == 0) else binascii.unhexlify('0' + x) + + +def fail(reason): +    logger.error('Fail: ' + reason) +    exit(2) + + +def get_api_info(provider): +    info = requests.get( +        'https://'+provider+'/provider.json', verify=False).json() +    return info['api_uri'], info['api_version'] + + +def login(username, passphrase, provider, api_uri, api_version): +    usr = srp.User(username, passphrase, srp.SHA256, srp.NG_1024) +    auth = None +    try: +        auth = authenticate(api_uri, api_version, usr).json() +    except requests.exceptions.ConnectionError: +        fail('Could not connect to server.') +    if 'errors' in auth: +        fail(str(auth['errors'])) +    return api_uri, api_version, auth + + +def authenticate(api_uri, api_version, usr): +    api_url = "%s/%s" % (api_uri, api_version) +    session = requests.session() +    uname, A = usr.start_authentication() +    params = {'login': uname, 'A': binascii.hexlify(A)} +    init = session.post( +        api_url + '/sessions', data=params, verify=False).json() +    if 'errors' in init: +        fail('test user not found') +    M = usr.process_challenge( +        safe_unhexlify(init['salt']), safe_unhexlify(init['B'])) +    return session.put(api_url + '/sessions/' + uname, verify=False, +                       data={'client_auth': binascii.hexlify(M)}) + + +def get_soledad_info(username, provider, passphrase, basedir): +    api_uri, api_version = get_api_info(provider) +    auth = login(username, passphrase, provider, api_uri, api_version) +    # get soledad server url +    service_url = '%s/%s/config/soledad-service.json' % \ +                  (api_uri, api_version) +    soledad_hosts = requests.get(service_url, verify=False).json()['hosts'] +    hostnames = soledad_hosts.keys() +    # allow for choosing the host +    host = hostnames[0] +    if len(hostnames) > 1: +        i = 1 +        print "There are many available hosts:" +        for h in hostnames: +            print "  (%d) %s.%s" % (i, h, provider) +            i += 1 +        choice = raw_input("Choose a host to use (default: 1): ") +        if choice != '': +            host = hostnames[int(choice) - 1] +    server_url = 'https://%s:%d/user-%s' % \ +              (soledad_hosts[host]['hostname'], soledad_hosts[host]['port'], +               auth[2]['id']) +    # get provider ca certificate +    ca_cert = requests.get('https://%s/ca.crt' % provider, verify=False).text +    cert_file = os.path.join(basedir, 'ca.crt') +    with open(cert_file, 'w') as f: +      f.write(ca_cert) +    return auth[2]['id'], server_url, cert_file, auth[2]['token'] + + +def get_soledad_instance(username, provider, passphrase, basedir): +    # setup soledad info +    uuid, server_url, cert_file, token = \ +        get_soledad_info(username, provider, passphrase, basedir) +    logger.info('UUID is %s' % uuid) +    logger.info('Server URL is %s' % server_url) +    secrets_path = os.path.join( +        basedir, '%s.secret' % uuid) +    local_db_path = os.path.join( +        basedir, '%s.db' % uuid) +    # instantiate soledad +    return Soledad( +        uuid, +        unicode(passphrase), +        secrets_path=secrets_path, +        local_db_path=local_db_path, +        server_url=server_url, +        cert_file=cert_file, +        auth_token=token) + + +# main program + +if __name__ == '__main__': + +    class ValidateUserHandle(argparse.Action): +        def __call__(self, parser, namespace, values, option_string=None): +            m = re.compile('^([^@]+)@([^@]+\.[^@]+)$') +            res = m.match(values) +            if res == None: +                parser.error('User handle should have the form user@provider.') +            setattr(namespace, 'username', res.groups()[0]) +            setattr(namespace, 'provider', res.groups()[1]) + +    # parse command line +    parser = argparse.ArgumentParser() +    parser.add_argument( +        'user@provider', action=ValidateUserHandle, help='the user handle') +    parser.add_argument( +        '-b', dest='basedir', required=False, default=None, help='the user handle') +    args = parser.parse_args() + +    # get the password +    passphrase = getpass.getpass( +        'Password for %s@%s: ' % (args.username, args.provider)) + +    # get the basedir +    basedir = args.basedir +    if basedir is None: +        basedir = tempfile.mkdtemp() +    logger.info('Using %s as base directory.' % basedir) + +    # get the soledad instance +    s = get_soledad_instance( +        args.username, args.provider, passphrase, basedir) diff --git a/scripts/db_access/reset_db.py b/scripts/db_access/reset_db.py new file mode 100644 index 00000000..80871856 --- /dev/null +++ b/scripts/db_access/reset_db.py @@ -0,0 +1,79 @@ +#!/usr/bin/python + +# This script can be run on server side to completelly reset a user database. +# +# WARNING: running this script over a database will delete all documents but +# the one with id u1db_config (which contains db metadata) and design docs +# needed for couch backend. + + +import sys +from ConfigParser import ConfigParser +import threading +import logging +from couchdb import Database as CouchDatabase + + +if len(sys.argv) != 2: +    print 'Usage: %s <uuid>' % sys.argv[0] +    exit(1) + +uuid = sys.argv[1] + + +# create a logger +logger = logging.getLogger(__name__) +LOG_FORMAT = '%(asctime)s %(message)s' +logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + +# get couch url +cp = ConfigParser() +cp.read('/etc/leap/soledad-server.conf') +url = cp.get('soledad-server', 'couch_url') + + +# confirm +yes = raw_input("Are you sure you want to reset the database for user %s " +                "(type YES)? " % uuid) +if yes != 'YES': +    print 'Bailing out...' +    exit(2) + + +db = CouchDatabase('%s/user-%s' % (url, uuid)) + + +class _DeleterThread(threading.Thread): + +    def __init__(self, db, doc_id, release_fun): +        threading.Thread.__init__(self) +        self._db = db +        self._doc_id = doc_id +        self._release_fun = release_fun + +    def run(self): +        logger.info('[%s] deleting doc...' % self._doc_id) +        del self._db[self._doc_id] +        logger.info('[%s] done.' % self._doc_id) +        self._release_fun() + + +semaphore_pool = threading.BoundedSemaphore(value=20) + + +threads = [] +for doc_id in db: +    if doc_id != 'u1db_config' and not doc_id.startswith('_design'): +        semaphore_pool.acquire() +        logger.info('[main] launching thread for doc: %s' % doc_id) +        t = _DeleterThread(db, doc_id, semaphore_pool.release) +        t.start() +        threads.append(t) + + +logger.info('[main] waiting for threads.') +map(lambda thread: thread.join(), threads) + + +logger.info('[main] done.') diff --git a/scripts/server-side-db.py b/scripts/db_access/server_side_db.py index 01a9aaac..18641a0f 100644 --- a/scripts/server-side-db.py +++ b/scripts/db_access/server_side_db.py @@ -2,6 +2,10 @@  # This script gives server-side access to one Soledad user database by using  # the configuration stored in /etc/leap/soledad-server.conf. +# +# Use it like this: +#  +#     python -i server-side-db.py <uuid>  import sys  from ConfigParser import ConfigParser diff --git a/scripts/doc_put_memory_usage/find_max_upload_size.py b/scripts/doc_put_memory_usage/find_max_upload_size.py new file mode 100755 index 00000000..02c68015 --- /dev/null +++ b/scripts/doc_put_memory_usage/find_max_upload_size.py @@ -0,0 +1,169 @@ +#!/usr/bin/python + +# This script finds the maximum upload size for a document in the current +# server. It pulls couch URL from Soledad config file and attempts multiple +# PUTs until it finds the maximum size supported by the server. +# +# As the Soledad couch user is not an admin, you have to pass a database into +# which the test will be run. The database should already exist and be +# initialized with soledad design documents. +# +# Use it like this: +# +#     ./find_max_upload_size.py <dbname> +#     ./find_max_upload_size.py -h + +import os +import configparser +import logging +import argparse +import random +import string +import binascii +import json +import time +import uuid + + +from couchdb.client import Database +from socket import error as socket_error +from leap.soledad.common.couch import CouchDatabase + + +SOLEDAD_CONFIG_FILE = '/etc/leap/soledad-server.conf' +PREFIX = '/tmp/soledad_test' +LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s' +RETRIES = 3  # number of times to retry uploading a document of a certain +             # size after a failure + + +# configure logger +logger = logging.getLogger(__name__) + + +def config_log(level): +    logging.basicConfig(format=LOG_FORMAT, level=level) + + +def log_to_file(filename): +    handler = logging.FileHandler(filename, mode='a') +    handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) +    logger.addHandler(handler) + + +# create test dir +if not os.path.exists(PREFIX): +    os.mkdir(PREFIX) + + +def get_couch_url(config_file=SOLEDAD_CONFIG_FILE): +    config = configparser.ConfigParser() +    config.read(config_file) +    return config['soledad-server']['couch_url'] + + +# generate or load an uploadable doc with the given size in mb +def get_content(size): +    fname = os.path.join(PREFIX, 'content-%d.json' % size) +    if os.path.exists(fname): +        logger.debug('Loading content with %d MB...' % size) +        with open(fname, 'r') as f: +            return f.read() +    else: +        length = int(size * 1024 ** 2) +        logger.debug('Generating body with %d MB...' % size) +        content = binascii.hexlify(os.urandom(length))[:length] +        with open(fname, 'w') as f: +            f.write(content) +        return content + + +def delete_doc(db): +    doc = db.get('largedoc') +    db.delete(doc) + + +def upload(db, size, couch_db): +    # try many times to be sure that size is infeasible +    for i in range(RETRIES): +        # wait until server is up to upload +        while True: +            try: +                'largedoc' in couch_db +                break +            except socket_error: +                logger.debug('Waiting for server to come up...') +                time.sleep(1) +        # attempt to upload +        try: +            logger.debug( +                'Trying to upload %d MB document (attempt %d/%d)...' % +                (size, (i+1), RETRIES)) +            content = get_content(size) +            logger.debug('Starting upload of %d bytes.' % len(content)) +            doc = db.create_doc({'data': content}, doc_id='largedoc') +            delete_doc(couch_db) +            logger.debug('Success uploading %d MB doc.' % size) +            return True +        except Exception as e: +            logger.debug('Failed to upload %d MB doc: %s' % (size, str(e))) +    return False + + +def find_max_upload_size(db_uri): +    db = CouchDatabase.open_database(db_uri, False) +    couch_db = Database(db_uri) +    logger.debug('Database URI: %s' % db_uri) +    # delete eventual leftover from last run +    if 'largedoc' in couch_db: +        delete_doc(couch_db) +    # phase 1: increase upload size exponentially +    logger.info('Starting phase 1: increasing size exponentially.') +    size = 1 +    #import ipdb; ipdb.set_trace() +    while True: +        if upload(db, size, couch_db): +            size *= 2 +        else: +            break + +    # phase 2: binary search for maximum value +    unable = size +    able = size / 2 +    logger.info('Starting phase 2: binary search for maximum value.') +    while unable - able > 1: +        size = able + ((unable - able) / 2) +        if upload(db, size, couch_db): +            able = size +        else: +            unable = size +    return able + + +if __name__ == '__main__': +    # parse command line +    parser = argparse.ArgumentParser() +    parser.add_argument( +        '-d', action='store_true', dest='debug', +        help='print debugging information') +    parser.add_argument( +        '-l', dest='logfile', +        help='log output to file') +    parser.add_argument( +        'db_uri', help='the couch database URI to test') +    args = parser.parse_args() + +    # log to file +    if args.logfile is not None: +        log_to_file(args.logfile) + +    # set loglevel +    if args.debug is True: +        config_log(logging.DEBUG) +    else: +        config_log(logging.INFO) + +    # run test and report +    logger.info('Will test using db at %s.' % args.db_uri) +    maxsize = find_max_upload_size(args.db_uri) +    logger.info('Max upload size is %d MB.' % maxsize) diff --git a/scripts/doc_put_memory_usage/get-mem.py b/scripts/doc_put_memory_usage/get-mem.py new file mode 100755 index 00000000..d64875fc --- /dev/null +++ b/scripts/doc_put_memory_usage/get-mem.py @@ -0,0 +1,16 @@ +#!/usr/bin/python + + +import psutil +import time + + +delta = 50 * 60 +start = time.time() + +while True: +    now = time.time() +    print "%s %s" % (now - start, psutil.phymem_usage().used) +    time.sleep(0.1) +    if now > start + delta: +        break diff --git a/scripts/doc_put_memory_usage/plot-mem.py b/scripts/doc_put_memory_usage/plot-mem.py new file mode 100755 index 00000000..e24679a2 --- /dev/null +++ b/scripts/doc_put_memory_usage/plot-mem.py @@ -0,0 +1,73 @@ +#!/usr/bin/python + + +from matplotlib import pyplot as plt + + +files = [ +    ('local', 'couchdb-json', 'b'), +    ('local', 'bigcouch-json', 'r'), +    ('local', 'couchdb-multipart', 'g'), +    ('local', 'bigcouch-multipart', 'm'), +] + + +# config the plot +plt.xlabel('time') +plt.ylabel('memory usage') +plt.title('bigcouch versus couch memory usage') + + +for fi in files: + +    machine = fi[0] +    database = fi[1] +    color = fi[2] +    filename = '%s-%s.txt' % (machine, database) + +    x = [] +    y = [] + +    xmax = None +    xmin = None +    ymax = None +    ymin = None + +    # read data from file +    with open(filename, 'r') as f: +        line = f.readline() +        while line is not None: +            time, mem = tuple(line.strip().split(' ')) +            mem = float(mem) / (10**9) +            x.append(float(time)) +            y.append(mem) +            if ymax == None or mem > ymax: +                ymax = mem +                xmax = time +            if ymin == None or mem < ymin: +                ymin = mem +                xmin = time +            line = f.readline() +            if line == '': +                break + +    kwargs = { +        'linewidth': 1.0, +        'linestyle': '-', +    #    'marker': '.', +        'color': color, +    } +    plt.plot(x, y, label=database, **kwargs) + +    #plt.axes().get_xaxis().set_ticks(x) +    #plt.axes().get_xaxis().set_ticklabels(x) + +    # annotate max and min values +    #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) +    #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) + + +plt.grid() +plt.legend() +plt.show() + diff --git a/scripts/profiling/sync/sync-many.py b/scripts/profiling/sync/sync-many.py new file mode 100644 index 00000000..83793b0a --- /dev/null +++ b/scripts/profiling/sync/sync-many.py @@ -0,0 +1,125 @@ +#!/usr/bin/python + +# The purpose of this script is to stress a soledad server by: +# +#   - Instantiating multiple clients. +#   - Creating many documents in each client. +#   - Syncing all at the same time with th server multiple times, until +#     they've all reached an agreement on the state of the databases and +#     there's nothing else to be synced. + + +import threading +import tempfile +import argparse +import logging +import re +import getpass +import time +import shutil + + +from client_side_db import get_soledad_instance + + +from leap.soledad.client import BootstrapSequenceError + + +NUMBER_OF_REPLICAS = 1 +DOCUMENTS_PER_REPLICA = 10 + + +# create a logger +logger = logging.getLogger(__name__) +LOG_FORMAT = '%(asctime)s %(message)s' +logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + +class WorkerThread(threading.Thread): + +    def __init__(self, thread_id, soledad, all_set): +        threading.Thread.__init__(self) +        self._id = thread_id +        self._soledad = soledad +        self._all_set = all_set +        self._done_creating = threading.Event() + +    def run(self): +        # create many documents +        logger.info('[replica %d] creating documents...' % self._id) +        for i in xrange(DOCUMENTS_PER_REPLICA): +            self._soledad.create_doc({'a_doc': i}) +        # wait for others +        self._done_creating.set() +        logger.info('[replica %d] done creating documents.' % self._id) +        self._all_set.wait() +        # sync +        successes = 0 +        while True: +            logger.info('[replica %d] syncing.' % self._id) +            if self._id == 1: +                time.sleep(5) +            old_gen = self._soledad.sync() +            logger.info('[replica %d] synced.' % self._id) +            new_gen = self._soledad._db._get_generation() +            logger.info('[replica %d] old gen %d - new gen %d.' % +                (self._id, old_gen, new_gen)) +            if old_gen == new_gen: +                successes += 1 +                logger.info('[replica %d] sync not needed.' % self._id) +                if successes == 3: +                    break + + +def stress_test(username, provider, passphrase, basedir): +    threads = [] +    all_set = threading.Event() +    for i in xrange(NUMBER_OF_REPLICAS): +        logging.info('[main] starting thread %d.' % i) +        s = get_soledad_instance( +            username, +            provider, +            passphrase, +            tempfile.mkdtemp(dir=basedir)) +        t = WorkerThread(i, s, all_set) +        t.start() +        threads.append(t) +    map(lambda t: t._done_creating.wait(), threads) +    all_set.set() +    map(lambda t: t.join(), threads) +    logger.info('Removing dir %s' % basedir) +    shutil.rmtree(basedir) + + +# main program + +if __name__ == '__main__': + +    class ValidateUserHandle(argparse.Action): +        def __call__(self, parser, namespace, values, option_string=None): +            m = re.compile('^([^@]+)@([^@]+\.[^@]+)$') +            res = m.match(values) +            if res == None: +                parser.error('User handle should have the form user@provider.') +            setattr(namespace, 'username', res.groups()[0]) +            setattr(namespace, 'provider', res.groups()[1]) + +    # parse command line +    parser = argparse.ArgumentParser() +    parser.add_argument( +        'user@provider', action=ValidateUserHandle, help='the user handle') +    parser.add_argument( +        '-b', dest='basedir', required=False, default=None, help='the user handle') +    args = parser.parse_args() + +    # get the password +    passphrase = getpass.getpass( +        'Password for %s@%s: ' % (args.username, args.provider)) + +    # get the basedir +    basedir = args.basedir +    if basedir is None: +        basedir = tempfile.mkdtemp() +    logger.info('[main] using %s as base directory.' % basedir) + +    stress_test(args.username, args.provider, passphrase, basedir) diff --git a/scripts/update_design_docs.py b/scripts/update_design_docs.py new file mode 100644 index 00000000..e7b5a29c --- /dev/null +++ b/scripts/update_design_docs.py @@ -0,0 +1,147 @@ +#!/usr/bin/python + +# This script updates Soledad's design documents in the session database and +# all user databases with contents from the installed leap.soledad.common +# package. + +import json +import logging +import argparse +import re +import threading +import binascii + + +from getpass import getpass +from ConfigParser import ConfigParser +from couchdb.client import Server +from couchdb.http import Resource, Session +from datetime import datetime +from urlparse import urlparse + + +from leap.soledad.common import ddocs + + +# parse command line for the log file name +logger_fname = "/tmp/update-design-docs_%s.log" % \ +               str(datetime.now()).replace(' ', '_') +parser = argparse.ArgumentParser() +parser.add_argument('--log', action='store', default=logger_fname, type=str, +                    required=False, help='the name of the log file', nargs=1) +args = parser.parse_args() + + +# configure the logger +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +print "Logging to %s." % args.log +logging.basicConfig( +    filename=args.log, +    format="%(asctime)-15s %(message)s") + + +# configure threads +max_threads = 20 +semaphore_pool = threading.BoundedSemaphore(value=max_threads) +threads = [] + +# get couch url +cp = ConfigParser() +cp.read('/etc/leap/soledad-server.conf') +url = urlparse(cp.get('soledad-server', 'couch_url')) + +# get admin password +netloc = re.sub('^.*@', '', url.netloc) +url = url._replace(netloc=netloc) +password = getpass("Admin password for %s: " % url.geturl()) +url = url._replace(netloc='admin:%s@%s' % (password, netloc)) + +resource = Resource(url.geturl(), Session(retry_delays=[1,2,4,8], timeout=10)) +server = Server(url=resource) + +hidden_url = re.sub( +    'http://(.*):.*@', +    'http://\\1:xxxxx@', +    url.geturl()) + +print """ +========== +ATTENTION! +========== + +This script will modify Soledad's shared and user databases in: + +  %s + +This script does not make a backup of the couch db data, so make sure you +have a copy or you may loose data. +""" % hidden_url +confirm = raw_input("Proceed (type uppercase YES)? ") + +if confirm != "YES": +    exit(1) + +# convert design doc content + +design_docs = { +    '_design/docs': json.loads(binascii.a2b_base64(ddocs.docs)), +    '_design/syncs': json.loads(binascii.a2b_base64(ddocs.syncs)), +    '_design/transactions': json.loads(binascii.a2b_base64(ddocs.transactions)), +} + +# +# Thread +# + +class DBWorkerThread(threading.Thread): + +    def __init__(self, server, dbname, db_idx, db_len, release_fun): +        threading.Thread.__init__(self) +        self._dbname = dbname +        self._cdb = server[self._dbname] +        self._db_idx = db_idx +        self._db_len = db_len +        self._release_fun = release_fun + +    def run(self): + +        logger.info("(%d/%d) Updating db %s." % (self._db_idx, self._db_len, +                    self._dbname)) + +        for doc_id in design_docs: +            doc = self._cdb[doc_id] +            for key in ['lists', 'views', 'updates']: +                if key in design_docs[doc_id]: +                    doc[key] = design_docs[doc_id][key] +            self._cdb.save(doc) + +        # release the semaphore +        self._release_fun() + + +db_idx = 0 +db_len = len(server) +for dbname in server: + +    db_idx += 1 + +    if not (dbname.startswith('user-') or dbname == 'shared') \ +            or dbname == 'user-test-db': +        logger.info("(%d/%d) Skipping db %s." % (db_idx, db_len, dbname)) +        continue + + +    # get access to couch db +    cdb = Server(url.geturl())[dbname] + +    #--------------------------------------------------------------------- +    # Start DB worker thread +    #--------------------------------------------------------------------- +    semaphore_pool.acquire() +    thread = DBWorkerThread(server, dbname, db_idx, db_len, semaphore_pool.release) +    thread.daemon = True +    thread.start() +    threads.append(thread) + +map(lambda thread: thread.join(), threads) diff --git a/server/changes/VERSION_COMPAT b/server/changes/VERSION_COMPAT new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/server/changes/VERSION_COMPAT diff --git a/server/src/leap/soledad/server/__init__.py b/server/src/leap/soledad/server/__init__.py index c80b4c68..c170f230 100644 --- a/server/src/leap/soledad/server/__init__.py +++ b/server/src/leap/soledad/server/__init__.py @@ -87,9 +87,6 @@ and lock documents on the shared database is handled by  """  import configparser -import time -import hashlib -import os  from u1db.remote import http_app @@ -98,10 +95,6 @@ from u1db.remote import http_app  from OpenSSL import tsafe  old_tsafe = tsafe -from twisted.web.wsgi import WSGIResource -from twisted.internet import reactor -from twisted.internet.error import TimeoutError -from twisted.python.lockfile import FilesystemLock  from twisted import version  if version.base() == "12.0.0":      # Put OpenSSL's tsafe back into place. This can probably be removed if we @@ -110,22 +103,21 @@ if version.base() == "12.0.0":      sys.modules['OpenSSL.tsafe'] = old_tsafe  from leap.soledad.server.auth import SoledadTokenAuthMiddleware -from leap.soledad.common import ( -    SHARED_DB_NAME, -    SHARED_DB_LOCK_DOC_ID_PREFIX, -) +from leap.soledad.server.gzip_middleware import GzipMiddleware +from leap.soledad.server.lock_resource import LockResource + +from leap.soledad.common import SHARED_DB_NAME  from leap.soledad.common.couch import CouchServerState -from leap.soledad.common.errors import ( -    InvalidTokenError, -    NotLockedError, -    AlreadyLockedError, -)  #-----------------------------------------------------------------------------  # Soledad WSGI application  #----------------------------------------------------------------------------- +MAX_REQUEST_SIZE = 200  # in Mb +MAX_ENTRY_SIZE = 200  # in Mb + +  class SoledadApp(http_app.HTTPApp):      """      Soledad WSGI application @@ -136,6 +128,9 @@ class SoledadApp(http_app.HTTPApp):      The name of the shared database that holds user's encrypted secrets.      """ +    max_request_size = MAX_REQUEST_SIZE * 1024 * 1024 +    max_entry_size = MAX_ENTRY_SIZE * 1024 * 1024 +      def __call__(self, environ, start_response):          """          Handle a WSGI call to the Soledad application. @@ -149,195 +144,12 @@ class SoledadApp(http_app.HTTPApp):          @return: HTTP application results.          @rtype: list          """ -        # ensure the shared database exists -        self.state.ensure_database(self.SHARED_DB_NAME)          return http_app.HTTPApp.__call__(self, environ, start_response) -# -# LockResource: a lock based on a document in the shared database. -# - -@http_app.url_to_resource.register -class LockResource(object): -    """ -    Handle requests for locking documents. - -    This class uses Twisted's Filesystem lock to manage a lock in the shared -    database. -    """ - -    url_pattern = '/%s/lock/{uuid}' % SoledadApp.SHARED_DB_NAME -    """ -    """ - -    TIMEOUT = 300  # XXX is 5 minutes reasonable? -    """ -    The timeout after which the lock expires. -    """ - -    # used for lock doc storage -    TIMESTAMP_KEY = '_timestamp' -    LOCK_TOKEN_KEY = '_token' - -    FILESYSTEM_LOCK_TRIES = 5 -    FILESYSTEM_LOCK_SLEEP_SECONDS = 1 - - -    def __init__(self, uuid, state, responder): -        """ -        Initialize the lock resource. Parameters to this constructor are -        automatically passed by u1db. - -        :param uuid: The user unique id. -        :type uuid: str -        :param state: The backend database state. -        :type state: u1db.remote.ServerState -        :param responder: The infrastructure to send responses to client. -        :type responder: u1db.remote.HTTPResponder -        """ -        self._shared_db = state.open_database(SoledadApp.SHARED_DB_NAME) -        self._lock_doc_id = '%s%s' % (SHARED_DB_LOCK_DOC_ID_PREFIX, uuid) -        self._lock = FilesystemLock( -            hashlib.sha512(self._lock_doc_id).hexdigest()) -        self._state = state -        self._responder = responder - -    @http_app.http_method(content=str) -    def put(self, content=None): -        """ -        Handle a PUT request to the lock document. - -        A lock is a document in the shared db with doc_id equal to -        'lock-<uuid>' and the timestamp of its creation as content. This -        method obtains a threaded-lock and creates a lock document if it does -        not exist or if it has expired. - -        It returns '201 Created' and a pair containing a token to unlock and -        the lock timeout, or '403 AlreadyLockedError' and the remaining amount -        of seconds the lock will still be valid. - -        :param content: The content of the PUT request. It is only here -                        because PUT requests with empty content are considered -                        invalid requests by u1db. -        :type content: str -        """ -        # obtain filesystem lock -        if not self._try_obtain_filesystem_lock(): -            self._responder.send_response_json(408)  # error: request timeout -            return - -        created_lock = False -        now = time.time() -        token = hashlib.sha256(os.urandom(10)).hexdigest()  # for releasing -        lock_doc = self._shared_db.get_doc(self._lock_doc_id) -        remaining = self._remaining(lock_doc, now) - -        # if there's no lock, create one -        if lock_doc is None: -            lock_doc = self._shared_db.create_doc( -                { -                    self.TIMESTAMP_KEY: now, -                    self.LOCK_TOKEN_KEY: token, -                }, -                doc_id=self._lock_doc_id) -            created_lock = True -        else: -            if remaining == 0: -                # lock expired, create new one -                lock_doc.content = { -                    self.TIMESTAMP_KEY: now, -                    self.LOCK_TOKEN_KEY: token, -                } -                self._shared_db.put_doc(lock_doc) -                created_lock = True - -        self._try_release_filesystem_lock() - -        # send response to client -        if created_lock is True: -            self._responder.send_response_json( -                201, timeout=self.TIMEOUT, token=token)  # success: created -        else: -            wire_descr = AlreadyLockedError.wire_description -            self._responder.send_response_json( -                AlreadyLockedError.status,  # error: forbidden -                error=AlreadyLockedError.wire_description, remaining=remaining) - -    @http_app.http_method(token=str) -    def delete(self, token=None): -        """ -        Delete the lock if the C{token} is valid. - -        Delete the lock document in case C{token} is equal to the token stored -        in the lock document. - -        :param token: The token returned when locking. -        :type token: str - -        :raise NotLockedError: Raised in case the lock is not locked. -        :raise InvalidTokenError: Raised in case the token is invalid for -                                  unlocking. -        """ -        lock_doc = self._shared_db.get_doc(self._lock_doc_id) -        if lock_doc is None or self._remaining(lock_doc, time.time()) == 0: -            self._responder.send_response_json( -                NotLockedError.status,  # error: not found -                error=NotLockedError.wire_description) -        elif token != lock_doc.content[self.LOCK_TOKEN_KEY]: -            self._responder.send_response_json( -                InvalidTokenError.status,  # error: unauthorized -                error=InvalidTokenError.wire_description) -        else: -            self._shared_db.delete_doc(lock_doc) -            self._responder.send_response_json(200)  # success: should use 204 -                                                     # but u1db does not -                                                     # support it. - -    def _remaining(self, lock_doc, now): -        """ -        Return the number of seconds the lock contained in C{lock_doc} is -        still valid, when compared to C{now}. - -        :param lock_doc: The document containing the lock. -        :type lock_doc: u1db.Document -        :param now: The time to which to compare the lock timestamp. -        :type now: float - -        :return: The amount of seconds the lock is still valid. -        :rtype: float -        """ -        if lock_doc is not None: -            lock_timestamp = lock_doc.content[self.TIMESTAMP_KEY] -            remaining = lock_timestamp + self.TIMEOUT - now -            return remaining if remaining > 0 else 0.0 -        return 0.0 - -    def _try_obtain_filesystem_lock(self): -        """ -        Try to obtain the file system lock. - -        @return: Whether the lock was succesfully obtained. -        @rtype: bool -        """ -        tries = self.FILESYSTEM_LOCK_TRIES -        while tries > 0: -            try: -                return self._lock.lock() -            except Exception as e: -                tries -= 1 -                time.sleep(self.FILESYSTEM_LOCK_SLEEP_SECONDS) -        return False - -    def _try_release_filesystem_lock(self): -        """ -        Release the filesystem lock. -        """ -        try: -            self._lock.unlock() -            return True -        except Exception: -            return False +http_app.url_to_resource.register(LockResource) +http_app.SyncResource.max_request_size = MAX_REQUEST_SIZE * 1024 * 1024 +http_app.SyncResource.max_entry_size = MAX_ENTRY_SIZE * 1024 * 1024  #----------------------------------------------------------------------------- @@ -379,8 +191,9 @@ def application(environ, start_response):          SoledadApp.SHARED_DB_NAME,          SoledadTokenAuthMiddleware.TOKENS_DB)      # WSGI application that may be used by `twistd -web` -    application = SoledadTokenAuthMiddleware(SoledadApp(state)) -    resource = WSGIResource(reactor, reactor.getThreadPool(), application) +    application = GzipMiddleware( +        SoledadTokenAuthMiddleware(SoledadApp(state))) +      return application(environ, start_response) diff --git a/server/src/leap/soledad/server/_version.py b/server/src/leap/soledad/server/_version.py index 85f0e54c..ec611c39 100644 --- a/server/src/leap/soledad/server/_version.py +++ b/server/src/leap/soledad/server/_version.py @@ -17,6 +17,7 @@ git_full = "$Format:%H$"  import subprocess  import sys +  def run_command(args, cwd=None, verbose=False):      try:          # remember shell=False, so use git.cmd on windows, not just git @@ -36,11 +37,10 @@ def run_command(args, cwd=None, verbose=False):          return None      return stdout - -import sys  import re  import os.path +  def get_expanded_variables(versionfile_source):      # the code embedded in _version.py can just fetch the value of these      # variables. When used from setup.py, we don't want to import @@ -48,7 +48,7 @@ def get_expanded_variables(versionfile_source):      # used from _version.py.      variables = {}      try: -        f = open(versionfile_source,"r") +        f = open(versionfile_source, "r")          for line in f.readlines():              if line.strip().startswith("git_refnames ="):                  mo = re.search(r'=\s*"(.*)"', line) @@ -63,12 +63,13 @@ def get_expanded_variables(versionfile_source):          pass      return variables +  def versions_from_expanded_variables(variables, tag_prefix, verbose=False):      refnames = variables["refnames"].strip()      if refnames.startswith("$Format"):          if verbose:              print("variables are unexpanded, not using") -        return {} # unexpanded, so not in an unpacked git-archive tarball +        return {}  # unexpanded, so not in an unpacked git-archive tarball      refs = set([r.strip() for r in refnames.strip("()").split(",")])      # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of      # just "foo-1.0". If we see a "tag: " prefix, prefer those. @@ -93,13 +94,14 @@ def versions_from_expanded_variables(variables, tag_prefix, verbose=False):              r = ref[len(tag_prefix):]              if verbose:                  print("picking %s" % r) -            return { "version": r, -                     "full": variables["full"].strip() } +            return {"version": r, +                    "full": variables["full"].strip()}      # no suitable tags, so we use the full revision id      if verbose:          print("no suitable tags, using full revision id") -    return { "version": variables["full"].strip(), -             "full": variables["full"].strip() } +    return {"version": variables["full"].strip(), +            "full": variables["full"].strip()} +  def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):      # this runs 'git' from the root of the source tree. That either means @@ -116,7 +118,7 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):          here = os.path.abspath(__file__)      except NameError:          # some py2exe/bbfreeze/non-CPython implementations don't do __file__ -        return {} # not always correct +        return {}  # not always correct      # versionfile_source is the relative path from the top of the source tree      # (where the .git directory might live) to this file. Invert this to find @@ -126,7 +128,16 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):          for i in range(len(versionfile_source.split("/"))):              root = os.path.dirname(root)      else: -        root = os.path.dirname(here) +        root = os.path.dirname( +            os.path.join('..', here)) + +    ###################################################### +    # XXX patch for our specific configuration with +    # the three projects leap.soledad.{common, client, server} +    # inside the same repo. +    ###################################################### +    root = os.path.dirname(os.path.join('..', root)) +      if not os.path.exists(os.path.join(root, ".git")):          if verbose:              print("no .git in %s" % root) @@ -141,7 +152,8 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):          return {}      if not stdout.startswith(tag_prefix):          if verbose: -            print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix)) +            print("tag '%s' doesn't start with prefix '%s'" % +                  (stdout, tag_prefix))          return {}      tag = stdout[len(tag_prefix):]      stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=root) @@ -153,7 +165,8 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):      return {"version": tag, "full": full} -def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False): +def versions_from_parentdir(parentdir_prefix, versionfile_source, +                            verbose=False):      if IN_LONG_VERSION_PY:          # We're running from _version.py. If it's from a source tree          # (execute-in-place), we can work upwards to find the root of the @@ -163,7 +176,7 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False)              here = os.path.abspath(__file__)          except NameError:              # py2exe/bbfreeze/non-CPython don't have __file__ -            return {} # without __file__, we have no hope +            return {}  # without __file__, we have no hope          # versionfile_source is the relative path from the top of the source          # tree to _version.py. Invert this to find the root from __file__.          root = here @@ -180,7 +193,8 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False)      dirname = os.path.basename(root)      if not dirname.startswith(parentdir_prefix):          if verbose: -            print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" % +            print("guessing rootdir is '%s', but '%s' doesn't start " +                  "with prefix '%s'" %                    (root, dirname, parentdir_prefix))          return None      return {"version": dirname[len(parentdir_prefix):], "full": ""} @@ -189,8 +203,9 @@ tag_prefix = ""  parentdir_prefix = "leap.soledad.server-"  versionfile_source = "src/leap/soledad/server/_version.py" +  def get_versions(default={"version": "unknown", "full": ""}, verbose=False): -    variables = { "refnames": git_refnames, "full": git_full } +    variables = {"refnames": git_refnames, "full": git_full}      ver = versions_from_expanded_variables(variables, tag_prefix, verbose)      if not ver:          ver = versions_from_vcs(tag_prefix, versionfile_source, verbose) @@ -200,4 +215,3 @@ def get_versions(default={"version": "unknown", "full": ""}, verbose=False):      if not ver:          ver = default      return ver - diff --git a/server/src/leap/soledad/server/auth.py b/server/src/leap/soledad/server/auth.py index 0ae49576..e9d2b032 100644 --- a/server/src/leap/soledad/server/auth.py +++ b/server/src/leap/soledad/server/auth.py @@ -25,7 +25,7 @@ import httplib  import simplejson as json -from u1db import DBNAME_CONSTRAINTS +from u1db import DBNAME_CONSTRAINTS, errors as u1db_errors  from abc import ABCMeta, abstractmethod  from routes.mapper import Mapper  from couchdb.client import Server @@ -37,16 +37,7 @@ from leap.soledad.common import (      SHARED_DB_LOCK_DOC_ID_PREFIX,      USER_DB_PREFIX,  ) - - -#----------------------------------------------------------------------------- -# Authentication -#----------------------------------------------------------------------------- - -class Unauthorized(Exception): -    """ -    User authentication failed. -    """ +from leap.soledad.common.errors import InvalidAuthTokenError  class URLToAuthorization(object): @@ -279,10 +270,16 @@ class SoledadAuthMiddleware(object):              return self._unauthorized_error("Wrong authentication scheme")          # verify if user is athenticated -        if not self._verify_authentication_data(uuid, auth_data): -            return self._unauthorized_error( +        try: +            if not self._verify_authentication_data(uuid, auth_data): +                return self._unauthorized_error( +                    start_response, +                    self._get_auth_error_string()) +        except u1db_errors.Unauthorized as e: +            return self._error(                  start_response, -                self._get_auth_error_string()) +                401, +                e.wire_description)          # verify if user is authorized to perform action          if not self._verify_authorization(environ, uuid): @@ -319,6 +316,9 @@ class SoledadAuthMiddleware(object):          @return: Whether the token is valid for authenticating the request.          @rtype: bool + +        @raise Unauthorized: Raised when C{auth_data} is not enough to +                             authenticate C{uuid}.          """          return None @@ -386,11 +386,20 @@ class SoledadTokenAuthMiddleware(SoledadAuthMiddleware):          @return: Whether the token is valid for authenticating the request.          @rtype: bool + +        @raise Unauthorized: Raised when C{auth_data} is not enough to +                             authenticate C{uuid}.          """          token = auth_data  # we expect a cleartext token at this point -        return self._verify_token_in_couchdb(uuid, token) +        try: +            return self._verify_token_in_couch(uuid, token) +        except InvalidAuthTokenError: +            raise +        except Exception as e: +            log.err(e) +            return False -    def _verify_token_in_couchdb(self, uuid, token): +    def _verify_token_in_couch(self, uuid, token):          """          Query couchdb to decide if C{token} is valid for C{uuid}. @@ -398,19 +407,19 @@ class SoledadTokenAuthMiddleware(SoledadAuthMiddleware):          @type uuid: str          @param token: The token.          @type token: str + +        @raise InvalidAuthTokenError: Raised when token received from user is +                                      either missing in the tokens db or is +                                      invalid.          """          server = Server(url=self._app.state.couch_url) -        try: -            dbname = self.TOKENS_DB -            db = server[dbname] -            token = db.get(token) -            if token is None: -                return False -            return token[self.TOKENS_TYPE_KEY] == self.TOKENS_TYPE_DEF and \ -                token[self.TOKENS_USER_ID_KEY] == uuid -        except Exception as e: -            log.err(e) -            return False +        dbname = self.TOKENS_DB +        db = server[dbname] +        token = db.get(token) +        if token is None or \ +                token[self.TOKENS_TYPE_KEY] != self.TOKENS_TYPE_DEF or \ +                token[self.TOKENS_USER_ID_KEY] != uuid: +            raise InvalidAuthTokenError()          return True      def _get_auth_error_string(self): diff --git a/server/src/leap/soledad/server/gzip_middleware.py b/server/src/leap/soledad/server/gzip_middleware.py new file mode 100644 index 00000000..986c5738 --- /dev/null +++ b/server/src/leap/soledad/server/gzip_middleware.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +# gzip_middleware.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. +""" +Gzip middleware for WSGI apps. +""" +import StringIO +from gzip import GzipFile + + +class GzipMiddleware(object): +    """ +    GzipMiddleware class for WSGI. +    """ +    def __init__(self, app, compresslevel=9): +        self.app = app +        self.compresslevel = compresslevel + +    def __call__(self, environ, start_response): +        if 'gzip' not in environ.get('HTTP_ACCEPT_ENCODING', ''): +            return self.app(environ, start_response) + +        buffer = StringIO.StringIO() +        output = GzipFile( +            mode='wb', +            compresslevel=self.compresslevel, +            fileobj=buffer +        ) + +        start_response_args = [] + +        def dummy_start_response(status, headers, exc_info=None): +            start_response_args.append(status) +            start_response_args.append(headers) +            start_response_args.append(exc_info) +            return output.write + +        app_iter = self.app(environ, dummy_start_response) +        for line in app_iter: +            output.write(line) +        if hasattr(app_iter, 'close'): +            app_iter.close() +        output.close() +        buffer.seek(0) +        result = buffer.getvalue() +        headers = [] +        for name, value in start_response_args[1]: +            if name.lower() != 'content-length': +                headers.append((name, value)) +        headers.append(('Content-Length', str(len(result)))) +        headers.append(('Content-Encoding', 'gzip')) +        start_response(start_response_args[0], headers, start_response_args[2]) +        buffer.close() +        return [result] diff --git a/server/src/leap/soledad/server/lock_resource.py b/server/src/leap/soledad/server/lock_resource.py new file mode 100644 index 00000000..a7870f77 --- /dev/null +++ b/server/src/leap/soledad/server/lock_resource.py @@ -0,0 +1,232 @@ +# -*- coding: utf-8 -*- +# lock_resource.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +""" +LockResource: a lock based on a document in the shared database. +""" + + +import hashlib +import time +import os +import tempfile +import errno + + +from u1db.remote import http_app +from twisted.python.lockfile import FilesystemLock + + +from leap.soledad.common import ( +    SHARED_DB_NAME, +    SHARED_DB_LOCK_DOC_ID_PREFIX, +) +from leap.soledad.common.errors import ( +    InvalidTokenError, +    NotLockedError, +    AlreadyLockedError, +    LockTimedOutError, +    CouldNotObtainLockError, +) + + +class LockResource(object): +    """ +    Handle requests for locking documents. + +    This class uses Twisted's Filesystem lock to manage a lock in the shared +    database. +    """ + +    url_pattern = '/%s/lock/{uuid}' % SHARED_DB_NAME +    """ +    """ + +    TIMEOUT = 300  # XXX is 5 minutes reasonable? +    """ +    The timeout after which the lock expires. +    """ + +    # used for lock doc storage +    TIMESTAMP_KEY = '_timestamp' +    LOCK_TOKEN_KEY = '_token' + +    FILESYSTEM_LOCK_TRIES = 5 +    FILESYSTEM_LOCK_SLEEP_SECONDS = 1 + +    def __init__(self, uuid, state, responder): +        """ +        Initialize the lock resource. Parameters to this constructor are +        automatically passed by u1db. + +        :param uuid: The user unique id. +        :type uuid: str +        :param state: The backend database state. +        :type state: u1db.remote.ServerState +        :param responder: The infrastructure to send responses to client. +        :type responder: u1db.remote.HTTPResponder +        """ +        self._shared_db = state.open_database(SHARED_DB_NAME) +        self._lock_doc_id = '%s%s' % (SHARED_DB_LOCK_DOC_ID_PREFIX, uuid) +        self._lock = FilesystemLock( +            os.path.join( +                tempfile.gettempdir(), +                hashlib.sha512(self._lock_doc_id).hexdigest())) +        self._state = state +        self._responder = responder + +    @http_app.http_method(content=str) +    def put(self, content=None): +        """ +        Handle a PUT request to the lock document. + +        A lock is a document in the shared db with doc_id equal to +        'lock-<uuid>' and the timestamp of its creation as content. This +        method obtains a threaded-lock and creates a lock document if it does +        not exist or if it has expired. + +        It returns '201 Created' and a pair containing a token to unlock and +        the lock timeout, or '403 AlreadyLockedError' and the remaining amount +        of seconds the lock will still be valid. + +        :param content: The content of the PUT request. It is only here +                        because PUT requests with empty content are considered +                        invalid requests by u1db. +        :type content: str +        """ +        # obtain filesystem lock +        if not self._try_obtain_filesystem_lock(): +            self._responder.send_response_json( +                LockTimedOutError.status,  # error: request timeout +                error=LockTimedOutError.wire_description) +            return + +        created_lock = False +        now = time.time() +        token = hashlib.sha256(os.urandom(10)).hexdigest()  # for releasing +        lock_doc = self._shared_db.get_doc(self._lock_doc_id) +        remaining = self._remaining(lock_doc, now) + +        # if there's no lock, create one +        if lock_doc is None: +            lock_doc = self._shared_db.create_doc( +                { +                    self.TIMESTAMP_KEY: now, +                    self.LOCK_TOKEN_KEY: token, +                }, +                doc_id=self._lock_doc_id) +            created_lock = True +        else: +            if remaining == 0: +                # lock expired, create new one +                lock_doc.content = { +                    self.TIMESTAMP_KEY: now, +                    self.LOCK_TOKEN_KEY: token, +                } +                self._shared_db.put_doc(lock_doc) +                created_lock = True + +        self._try_release_filesystem_lock() + +        # send response to client +        if created_lock is True: +            self._responder.send_response_json( +                201, timeout=self.TIMEOUT, token=token)  # success: created +        else: +            self._responder.send_response_json( +                AlreadyLockedError.status,  # error: forbidden +                error=AlreadyLockedError.wire_description, remaining=remaining) + +    @http_app.http_method(token=str) +    def delete(self, token=None): +        """ +        Delete the lock if the C{token} is valid. + +        Delete the lock document in case C{token} is equal to the token stored +        in the lock document. + +        :param token: The token returned when locking. +        :type token: str + +        :raise NotLockedError: Raised in case the lock is not locked. +        :raise InvalidTokenError: Raised in case the token is invalid for +                                  unlocking. +        """ +        lock_doc = self._shared_db.get_doc(self._lock_doc_id) +        if lock_doc is None or self._remaining(lock_doc, time.time()) == 0: +            self._responder.send_response_json( +                NotLockedError.status,  # error: not found +                error=NotLockedError.wire_description) +        elif token != lock_doc.content[self.LOCK_TOKEN_KEY]: +            self._responder.send_response_json( +                InvalidTokenError.status,  # error: unauthorized +                error=InvalidTokenError.wire_description) +        else: +            self._shared_db.delete_doc(lock_doc) +            self._responder.send_response_json(200)  # success: should use 204 +                                                     # but u1db does not +                                                     # support it. + +    def _remaining(self, lock_doc, now): +        """ +        Return the number of seconds the lock contained in C{lock_doc} is +        still valid, when compared to C{now}. + +        :param lock_doc: The document containing the lock. +        :type lock_doc: u1db.Document +        :param now: The time to which to compare the lock timestamp. +        :type now: float + +        :return: The amount of seconds the lock is still valid. +        :rtype: float +        """ +        if lock_doc is not None: +            lock_timestamp = lock_doc.content[self.TIMESTAMP_KEY] +            remaining = lock_timestamp + self.TIMEOUT - now +            return remaining if remaining > 0 else 0.0 +        return 0.0 + +    def _try_obtain_filesystem_lock(self): +        """ +        Try to obtain the file system lock. + +        @return: Whether the lock was succesfully obtained. +        @rtype: bool +        """ +        tries = self.FILESYSTEM_LOCK_TRIES +        while tries > 0: +            try: +                return self._lock.lock() +            except OSError as e: +                tries -= 1 +                if tries == 0: +                    raise CouldNotObtainLockError(e.message) +                time.sleep(self.FILESYSTEM_LOCK_SLEEP_SECONDS) +        return False + +    def _try_release_filesystem_lock(self): +        """ +        Release the filesystem lock. +        """ +        try: +            self._lock.unlock() +            return True +        except OSError as e: +            if e.errno == errno.ENOENT: +                return True +            return False diff --git a/server/versioneer.py b/server/versioneer.py index b43ab062..18dfd923 100644 --- a/server/versioneer.py +++ b/server/versioneer.py @@ -115,7 +115,7 @@ import sys  def run_command(args, cwd=None, verbose=False):      try: -        # remember shell=False, so use git.cmd on windows, not just git +        # remember shell=False, so use git.exe on windows, not just git          p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd)      except EnvironmentError:          e = sys.exc_info()[1] @@ -230,7 +230,7 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):      GIT = "git"      if sys.platform == "win32": -        GIT = "git.cmd" +        GIT = "git.exe"      stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"],                           cwd=root)      if stdout is None: @@ -305,7 +305,7 @@ import sys  def run_command(args, cwd=None, verbose=False):      try: -        # remember shell=False, so use git.cmd on windows, not just git +        # remember shell=False, so use git.exe on windows, not just git          p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd)      except EnvironmentError:          e = sys.exc_info()[1] @@ -430,7 +430,7 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):      GIT = "git"      if sys.platform == "win32": -        GIT = "git.cmd" +        GIT = "git.exe"      stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"],                           cwd=root)      if stdout is None: @@ -486,7 +486,7 @@ import sys  def do_vcs_install(versionfile_source, ipy):      GIT = "git"      if sys.platform == "win32": -        GIT = "git.cmd" +        GIT = "git.exe"      run_command([GIT, "add", "versioneer.py"])      run_command([GIT, "add", versionfile_source])      run_command([GIT, "add", ipy]) | 
