diff options
Diffstat (limited to 'client/src/leap')
-rw-r--r-- | client/src/leap/soledad/client/__init__.py | 103 | ||||
-rw-r--r-- | client/src/leap/soledad/client/_version.py | 222 | ||||
-rw-r--r-- | client/src/leap/soledad/client/auth.py | 1 | ||||
-rw-r--r-- | client/src/leap/soledad/client/sqlcipher.py | 108 | ||||
-rw-r--r-- | client/src/leap/soledad/client/target.py | 121 |
5 files changed, 492 insertions, 63 deletions
diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 62f93b3d..a0b3f45a 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -315,6 +315,47 @@ class Soledad(object): # initialization/destruction methods # + def _get_or_gen_crypto_secrets(self): + """ + Retrieves or generates the crypto secrets. + + Might raise BootstrapSequenceError + """ + doc = self._get_secrets_from_shared_db() + + if doc: + logger.info( + 'Found cryptographic secrets in shared recovery ' + 'database.') + _, mac = self.import_recovery_document(doc.content) + if mac is False: + self.put_secrets_in_shared_db() + self._store_secrets() # save new secrets in local file + if self._secret_id is None: + self._set_secret_id(self._secrets.items()[0][0]) + else: + # STAGE 3 - there are no secrets in server also, so + # generate a secret and store it in remote db. + logger.info( + 'No cryptographic secrets found, creating new ' + ' secrets...') + self._set_secret_id(self._gen_secret()) + try: + self._put_secrets_in_shared_db() + except Exception as ex: + # storing generated secret in shared db failed for + # some reason, so we erase the generated secret and + # raise. + try: + os.unlink(self._secrets_path) + except OSError as e: + if e.errno != errno.ENOENT: # no such file or directory + logger.exception(e) + logger.exception(ex) + raise BootstrapSequenceError( + 'Could not store generated secret in the shared ' + 'database, bailing out...') + def _bootstrap(self): """ Bootstrap local Soledad instance. @@ -342,6 +383,8 @@ class Soledad(object): self._init_dirs() self._crypto = SoledadCrypto(self) + secrets_problem = None + # STAGE 1 - verify if secrets exist locally if not self._has_secret(): # try to load from local storage. @@ -360,38 +403,10 @@ class Soledad(object): except AlreadyLockedError: raise BootstrapSequenceError('Database is already locked.') - doc = self._get_secrets_from_shared_db() - if doc: - logger.info( - 'Found cryptographic secrets in shared recovery ' - 'database.') - _, mac = self.import_recovery_document(doc.content) - if mac is False: - self.put_secrets_in_shared_db() - self._store_secrets() # save new secrets in local file - if self._secret_id is None: - self._set_secret_id(self._secrets.items()[0][0]) - else: - # STAGE 3 - there are no secrets in server also, so - # generate a secret and store it in remote db. - logger.info( - 'No cryptographic secrets found, creating new ' - ' secrets...') - self._set_secret_id(self._gen_secret()) - try: - self._put_secrets_in_shared_db() - except Exception: - # storing generated secret in shared db failed for - # some reason, so we erase the generated secret and - # raise. - try: - os.unlink(self._secrets_path) - except OSError as e: - if errno == 2: # no such file or directory - pass - raise BootstrapSequenceError( - 'Could not store generated secret in the shared ' - 'database, bailing out...') + try: + self._get_or_gen_crypto_secrets() + except Exception as e: + secrets_problem = e # release the lock on shared db try: @@ -416,7 +431,10 @@ class Soledad(object): # --- end of atomic operation in shared db --- # STAGE 4 - local database initialization - self._init_db() + if secrets_problem is None: + self._init_db() + else: + raise secrets_problem def _init_dirs(self): """ @@ -955,6 +973,23 @@ class Soledad(object): if self._db: return self._db.get_from_index(index_name, *key_values) + def get_count_from_index(self, index_name, *key_values): + """ + Return the count of the documents that match the keys and + values supplied. + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: tuple + :return: count. + :rtype: int + """ + if self._db: + return self._db.get_count_from_index(index_name, *key_values) + def get_range_from_index(self, index_name, start_value, end_value): """ Return documents that fall within the specified range. @@ -1248,7 +1283,7 @@ class Soledad(object): #----------------------------------------------------------------------------- # We need a more reasonable timeout (in seconds) -SOLEDAD_TIMEOUT = 10 +SOLEDAD_TIMEOUT = 120 class VerifiedHTTPSConnection(httplib.HTTPSConnection): diff --git a/client/src/leap/soledad/client/_version.py b/client/src/leap/soledad/client/_version.py index 7f1169a5..65efb714 100644 --- a/client/src/leap/soledad/client/_version.py +++ b/client/src/leap/soledad/client/_version.py @@ -1,13 +1,217 @@ -# This file was generated by the `freeze_debianver` command in setup.py -# Using 'versioneer.py' (0.7+) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. +IN_LONG_VERSION_PY = True +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (build by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. -version_version = '0.4.4' -version_full = '544994901c871dedf5243672d3ce1f24a78c0ae9' +# This file is released into the public domain. Generated by +# versioneer-0.7+ (https://github.com/warner/python-versioneer) +# these strings will be replaced by git during git-archive +git_refnames = "$Format:%d$" +git_full = "$Format:%H$" -def get_versions(default={}, verbose=False): - return {'version': version_version, 'full': version_full} + +import subprocess +import sys + + +def run_command(args, cwd=None, verbose=False): + try: + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd) + except EnvironmentError: + e = sys.exc_info()[1] + if verbose: + print("unable to run %s" % args[0]) + print(e) + return None + stdout = p.communicate()[0].strip() + if sys.version >= '3': + stdout = stdout.decode() + if p.returncode != 0: + if verbose: + print("unable to run %s (error)" % args[0]) + return None + return stdout + +import re +import os.path + + +def get_expanded_variables(versionfile_source): + # the code embedded in _version.py can just fetch the value of these + # variables. When used from setup.py, we don't want to import + # _version.py, so we do it with a regexp instead. This function is not + # used from _version.py. + variables = {} + try: + f = open(versionfile_source, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + variables["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + variables["full"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return variables + + +def versions_from_expanded_variables(variables, tag_prefix, verbose=False): + refnames = variables["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("variables are unexpanded, not using") + return {} # unexpanded, so not in an unpacked git-archive tarball + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r'\d', r)]) + if verbose: + print("discarding '%s', no digits" % ",".join(refs-tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + if verbose: + print("picking %s" % r) + return {"version": r, + "full": variables["full"].strip()} + # no suitable tags, so we use the full revision id + if verbose: + print("no suitable tags, using full revision id") + return {"version": variables["full"].strip(), + "full": variables["full"].strip()} + + +def versions_from_vcs(tag_prefix, versionfile_source, verbose=False): + # this runs 'git' from the root of the source tree. That either means + # someone ran a setup.py command (and this code is in versioneer.py, so + # IN_LONG_VERSION_PY=False, thus the containing directory is the root of + # the source tree), or someone ran a project-specific entry point (and + # this code is in _version.py, so IN_LONG_VERSION_PY=True, thus the + # containing directory is somewhere deeper in the source tree). This only + # gets called if the git-archive 'subst' variables were *not* expanded, + # and _version.py hasn't already been rewritten with a short version + # string, meaning we're inside a checked out source tree. + + try: + here = os.path.abspath(__file__) + except NameError: + # some py2exe/bbfreeze/non-CPython implementations don't do __file__ + return {} # not always correct + + # versionfile_source is the relative path from the top of the source tree + # (where the .git directory might live) to this file. Invert this to find + # the root from __file__. + root = here + if IN_LONG_VERSION_PY: + for i in range(len(versionfile_source.split("/"))): + root = os.path.dirname(root) + else: + root = os.path.dirname( + os.path.join('..', here)) + + ###################################################### + # XXX patch for our specific configuration with + # the three projects leap.soledad.{common, client, server} + # inside the same repo. + ###################################################### + root = os.path.dirname(os.path.join('..', root)) + + if not os.path.exists(os.path.join(root, ".git")): + if verbose: + print("no .git in %s" % root) + return {} + + GIT = "git" + if sys.platform == "win32": + GIT = "git.cmd" + stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"], + cwd=root) + if stdout is None: + return {} + if not stdout.startswith(tag_prefix): + if verbose: + print("tag '%s' doesn't start with prefix '%s'" % + (stdout, tag_prefix)) + return {} + tag = stdout[len(tag_prefix):] + stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=root) + if stdout is None: + return {} + full = stdout.strip() + if tag.endswith("-dirty"): + full += "-dirty" + return {"version": tag, "full": full} + + +def versions_from_parentdir(parentdir_prefix, versionfile_source, + verbose=False): + if IN_LONG_VERSION_PY: + # We're running from _version.py. If it's from a source tree + # (execute-in-place), we can work upwards to find the root of the + # tree, and then check the parent directory for a version string. If + # it's in an installed application, there's no hope. + try: + here = os.path.abspath(__file__) + except NameError: + # py2exe/bbfreeze/non-CPython don't have __file__ + return {} # without __file__, we have no hope + # versionfile_source is the relative path from the top of the source + # tree to _version.py. Invert this to find the root from __file__. + root = here + for i in range(len(versionfile_source.split("/"))): + root = os.path.dirname(root) + else: + # we're running from versioneer.py, which means we're running from + # the setup.py in a source tree. sys.argv[0] is setup.py in the root. + here = os.path.abspath(sys.argv[0]) + root = os.path.dirname(here) + + # Source tarballs conventionally unpack into a directory that includes + # both the project name and a version string. + dirname = os.path.basename(root) + if not dirname.startswith(parentdir_prefix): + if verbose: + print("guessing rootdir is '%s', but '%s' doesn't start " + "with prefix '%s'" % + (root, dirname, parentdir_prefix)) + return None + return {"version": dirname[len(parentdir_prefix):], "full": ""} + +tag_prefix = "" +parentdir_prefix = "leap.soledad.client-" +versionfile_source = "src/leap/soledad/client/_version.py" + + +def get_versions(default={"version": "unknown", "full": ""}, verbose=False): + variables = {"refnames": git_refnames, "full": git_full} + ver = versions_from_expanded_variables(variables, tag_prefix, verbose) + if not ver: + ver = versions_from_vcs(tag_prefix, versionfile_source, verbose) + if not ver: + ver = versions_from_parentdir(parentdir_prefix, versionfile_source, + verbose) + if not ver: + ver = default + return ver diff --git a/client/src/leap/soledad/client/auth.py b/client/src/leap/soledad/client/auth.py index d85e3ba6..72ab0008 100644 --- a/client/src/leap/soledad/client/auth.py +++ b/client/src/leap/soledad/client/auth.py @@ -68,4 +68,3 @@ class TokenBasedAuth(object): else: raise errors.UnknownAuthMethod( 'Wrong credentials: %s' % self._creds) - diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 894c6f97..43c871c3 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -43,16 +43,19 @@ So, as the statements above were introduced for backwards compatibility with SLCipher 1.1 databases, we do not implement them as all SQLCipher databases handled by Soledad should be created by SQLCipher >= 2.0. """ +import httplib import logging import os -import time import string import threading +import time - -from u1db.backends import sqlite_backend from pysqlcipher import dbapi2 +from u1db.backends import sqlite_backend +from u1db.sync import Synchronizer from u1db import errors as u1db_errors + +from leap.soledad.client.target import SoledadSyncTarget from leap.soledad.common.document import SoledadDocument logger = logging.getLogger(__name__) @@ -144,6 +147,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): _index_storage_value = 'expand referenced encrypted' k_lock = threading.Lock() + _syncer = None def __init__(self, sqlcipher_file, password, document_factory=None, crypto=None, raw_key=False, cipher='aes-256-cbc', @@ -336,13 +340,46 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): :return: The local generation before the synchronisation was performed. :rtype: int """ - from u1db.sync import Synchronizer - from leap.soledad.client.target import SoledadSyncTarget - return Synchronizer( - self, - SoledadSyncTarget(url, - creds=creds, - crypto=self._crypto)).sync(autocreate=autocreate) + if not self.syncer: + self._create_syncer(url, creds=creds) + + try: + res = self.syncer.sync(autocreate=autocreate) + except httplib.CannotSendRequest: + # raised when you reuse httplib.HTTP object for new request + # while you havn't called its getresponse() + # this catch works for the current connclass used + # by our HTTPClientBase, since it uses httplib. + # we will have to replace it if it changes. + logger.info("Replacing connection and trying again...") + self._syncer = None + self._create_syncer(url, creds=creds) + res = self.syncer.sync(autocreate=autocreate) + return res + + @property + def syncer(self): + """ + Accesor for synchronizer. + """ + return self._syncer + + def _create_syncer(self, url, creds=None): + """ + Creates a synchronizer + + :param url: The url of the target replica to sync with. + :type url: str + :param creds: optional dictionary giving credentials. + to authorize the operation with the server. + :type creds: dict + """ + if self._syncer is None: + self._syncer = Synchronizer( + self, + SoledadSyncTarget(url, + creds=creds, + crypto=self._crypto)) def _extra_schema_init(self, c): """ @@ -697,6 +734,57 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): # XXX change passphrase param! db_handle.cursor().execute('PRAGMA rekey = "x\'%s"' % passphrase) + # Extra query methods: extensions to the base sqlite implmentation. + + def get_count_from_index(self, index_name, *key_values): + """ + Returns the count for a given combination of index_name + and key values. + + Extension method made from similar methods in u1db version 13.09 + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: tuple + :return: count. + :rtype: int + """ + c = self._db_handle.cursor() + definition = self._get_index_definition(index_name) + + if len(key_values) != len(definition): + raise u1db_errors.InvalidValueForIndex() + tables = ["document_fields d%d" % i for i in range(len(definition))] + novalue_where = ["d.doc_id = d%d.doc_id" + " AND d%d.field_name = ?" + % (i, i) for i in range(len(definition))] + exact_where = [novalue_where[i] + + (" AND d%d.value = ?" % (i,)) + for i in range(len(definition))] + args = [] + where = [] + for idx, (field, value) in enumerate(zip(definition, key_values)): + args.append(field) + where.append(exact_where[idx]) + args.append(value) + + tables = ["document_fields d%d" % i for i in range(len(definition))] + statement = ( + "SELECT COUNT(*) FROM document d, %s WHERE %s " % ( + ', '.join(tables), + ' AND '.join(where), + )) + try: + c.execute(statement, tuple(args)) + except dbapi2.OperationalError, e: + raise dbapi2.OperationalError( + str(e) + '\nstatement: %s\nargs: %s\n' % (statement, args)) + res = c.fetchall() + return res[0][0] + def __del__(self): """ Closes db_handle upon object destruction. diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index d8899a97..3b3d6870 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -14,22 +14,26 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. - - """ A U1DB backend for encrypting data before sending to server and decrypting after receiving. """ - -import simplejson as json +import binascii +import cStringIO +import gzip import hashlib import hmac -import binascii +import logging +import urllib +import simplejson as json +from time import sleep -from u1db.remote import utils +from u1db.remote import utils, http_errors from u1db.errors import BrokenSyncStream +from u1db import errors from u1db.remote.http_target import HTTPSyncTarget +from u1db.remote.http_client import _encode_query_parameter from leap.soledad.common import soledad_assert @@ -53,11 +57,13 @@ from leap.soledad.client.crypto import ( UnknownEncryptionMethod, ) +logger = logging.getLogger(__name__) # # Exceptions # + class DocumentNotEncrypted(Exception): """ Raised for failures in document encryption. @@ -222,6 +228,24 @@ def decrypt_doc(crypto, doc): return plainjson +def _gunzip(data): + """ + Uncompress data that is gzipped. + + :param data: gzipped data + :type data: basestring + """ + buffer = cStringIO.StringIO() + buffer.write(data) + buffer.seek(0) + try: + data = gzip.GzipFile(mode='r', fileobj=buffer).read() + except Exception: + logger.warning("Error while decrypting gzipped data") + buffer.close() + return data + + # # SoledadSyncTarget # @@ -353,6 +377,82 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): raise BrokenSyncStream return res + def _request(self, method, url_parts, params=None, body=None, + content_type=None): + """ + Overloaded method. See u1db docs. + Patched for adding gzip encoding. + """ + + self._ensure_connection() + unquoted_url = url_query = self._url.path + if url_parts: + if not url_query.endswith('/'): + url_query += '/' + unquoted_url = url_query + url_query += '/'.join(urllib.quote(part, safe='') + for part in url_parts) + # oauth performs its own quoting + unquoted_url += '/'.join(url_parts) + encoded_params = {} + if params: + for key, value in params.items(): + key = unicode(key).encode('utf-8') + encoded_params[key] = _encode_query_parameter(value) + url_query += ('?' + urllib.urlencode(encoded_params)) + if body is not None and not isinstance(body, basestring): + body = json.dumps(body) + content_type = 'application/json' + headers = {} + if content_type: + headers['content-type'] = content_type + + # Patched: We would like to receive gzip pretty please + # ---------------------------------------------------- + headers['accept-encoding'] = "gzip" + # ---------------------------------------------------- + + headers.update( + self._sign_request(method, unquoted_url, encoded_params)) + + for delay in self._delays: + try: + self._conn.request(method, url_query, body, headers) + return self._response() + except errors.Unavailable, e: + sleep(delay) + raise e + + def _response(self): + """ + Overloaded method, see u1db docs. + We patched it for decrypting gzip content. + """ + resp = self._conn.getresponse() + body = resp.read() + headers = dict(resp.getheaders()) + + # Patched: We would like to decode gzip + # ---------------------------------------------------- + encoding = headers.get('content-encoding', '') + if "gzip" in encoding: + body = _gunzip(body) + # ---------------------------------------------------- + + if resp.status in (200, 201): + return body, headers + elif resp.status in http_errors.ERROR_STATUSES: + try: + respdic = json.loads(body) + except ValueError: + pass + else: + self._error(respdic) + # special case + if resp.status == 503: + raise errors.Unavailable(body, headers) + raise errors.HTTPError(resp.status, body, headers) + def sync_exchange(self, docs_by_generations, source_replica_uid, last_known_generation, last_known_trans_id, return_doc_cb, ensure_callback=None): @@ -364,8 +464,9 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): syncing. :param docs_by_generations: A list of (doc_id, generation, trans_id) - of local documents that were changed since the last local - generation the remote replica knows about. + of local documents that were changed since + the last local generation the remote + replica knows about. :type docs_by_generations: list of tuples :param source_replica_uid: The uid of the source replica. :type source_replica_uid: str @@ -391,6 +492,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self._conn.putheader('content-type', 'application/x-u1db-sync-stream') for header_name, header_value in self._sign_request('POST', url, {}): self._conn.putheader(header_name, header_value) + self._conn.putheader('accept-encoding', 'gzip') entries = ['['] size = 1 @@ -428,7 +530,8 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): for entry in entries: self._conn.send(entry) entries = None - data, _ = self._response() + data, headers = self._response() + res = self._parse_sync_stream(data, return_doc_cb, ensure_callback) data = None return res['new_generation'], res['new_transaction_id'] |