diff options
author | Tomás Touceda <chiiph@leap.se> | 2014-04-04 16:34:57 -0300 |
---|---|---|
committer | Tomás Touceda <chiiph@leap.se> | 2014-04-04 16:34:57 -0300 |
commit | ce22976cc0e203e53799e771aa5e3717d498cc5c (patch) | |
tree | 8c69733f1f8a83ac83e40bf7e522a5fe8eae9b50 /client/src | |
parent | deb78a5f3502ece98ec3e0b70f93025c4a1b3da5 (diff) | |
parent | a3fed4d42ab4a7be7bc7ebe86b35805ac73d62de (diff) |
Diffstat (limited to 'client/src')
-rw-r--r-- | client/src/leap/soledad/client/__init__.py | 171 | ||||
-rw-r--r-- | client/src/leap/soledad/client/_version.py | 46 | ||||
-rw-r--r-- | client/src/leap/soledad/client/auth.py | 1 | ||||
-rw-r--r-- | client/src/leap/soledad/client/sqlcipher.py | 220 | ||||
-rw-r--r-- | client/src/leap/soledad/client/target.py | 121 |
5 files changed, 455 insertions, 104 deletions
diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 62f93b3d..46e3cd5f 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -34,6 +34,8 @@ import urlparse import hmac from hashlib import sha256 +from threading import Lock +from collections import defaultdict try: import cchardet as chardet @@ -52,6 +54,7 @@ from leap.soledad.common.errors import ( InvalidTokenError, NotLockedError, AlreadyLockedError, + LockTimedOutError, ) from leap.soledad.common.crypto import ( MacMethods, @@ -245,6 +248,12 @@ class Soledad(object): Prefix for default values for path. """ + syncing_lock = defaultdict(Lock) + """ + A dictionary that hold locks which avoid multiple sync attempts from the + same database replica. + """ + def __init__(self, uuid, passphrase, secrets_path, local_db_path, server_url, cert_file, auth_token=None, secret_id=None): """ @@ -315,6 +324,47 @@ class Soledad(object): # initialization/destruction methods # + def _get_or_gen_crypto_secrets(self): + """ + Retrieves or generates the crypto secrets. + + Might raise BootstrapSequenceError + """ + doc = self._get_secrets_from_shared_db() + + if doc: + logger.info( + 'Found cryptographic secrets in shared recovery ' + 'database.') + _, mac = self.import_recovery_document(doc.content) + if mac is False: + self.put_secrets_in_shared_db() + self._store_secrets() # save new secrets in local file + if self._secret_id is None: + self._set_secret_id(self._secrets.items()[0][0]) + else: + # STAGE 3 - there are no secrets in server also, so + # generate a secret and store it in remote db. + logger.info( + 'No cryptographic secrets found, creating new ' + ' secrets...') + self._set_secret_id(self._gen_secret()) + try: + self._put_secrets_in_shared_db() + except Exception as ex: + # storing generated secret in shared db failed for + # some reason, so we erase the generated secret and + # raise. + try: + os.unlink(self._secrets_path) + except OSError as e: + if e.errno != errno.ENOENT: # no such file or directory + logger.exception(e) + logger.exception(ex) + raise BootstrapSequenceError( + 'Could not store generated secret in the shared ' + 'database, bailing out...') + def _bootstrap(self): """ Bootstrap local Soledad instance. @@ -342,6 +392,8 @@ class Soledad(object): self._init_dirs() self._crypto = SoledadCrypto(self) + secrets_problem = None + # STAGE 1 - verify if secrets exist locally if not self._has_secret(): # try to load from local storage. @@ -359,39 +411,13 @@ class Soledad(object): token, timeout = self._shared_db.lock() except AlreadyLockedError: raise BootstrapSequenceError('Database is already locked.') + except LockTimedOutError: + raise BootstrapSequenceError('Lock operation timed out.') - doc = self._get_secrets_from_shared_db() - if doc: - logger.info( - 'Found cryptographic secrets in shared recovery ' - 'database.') - _, mac = self.import_recovery_document(doc.content) - if mac is False: - self.put_secrets_in_shared_db() - self._store_secrets() # save new secrets in local file - if self._secret_id is None: - self._set_secret_id(self._secrets.items()[0][0]) - else: - # STAGE 3 - there are no secrets in server also, so - # generate a secret and store it in remote db. - logger.info( - 'No cryptographic secrets found, creating new ' - ' secrets...') - self._set_secret_id(self._gen_secret()) - try: - self._put_secrets_in_shared_db() - except Exception: - # storing generated secret in shared db failed for - # some reason, so we erase the generated secret and - # raise. - try: - os.unlink(self._secrets_path) - except OSError as e: - if errno == 2: # no such file or directory - pass - raise BootstrapSequenceError( - 'Could not store generated secret in the shared ' - 'database, bailing out...') + try: + self._get_or_gen_crypto_secrets() + except Exception as e: + secrets_problem = e # release the lock on shared db try: @@ -416,7 +442,10 @@ class Soledad(object): # --- end of atomic operation in shared db --- # STAGE 4 - local database initialization - self._init_db() + if secrets_problem is None: + self._init_db() + else: + raise secrets_problem def _init_dirs(self): """ @@ -749,7 +778,7 @@ class Soledad(object): ============================== WARNING ============================== This method converts the document's contents to unicode in-place. This - meanse that after calling C{put_doc(doc)}, the contents of the + means that after calling C{put_doc(doc)}, the contents of the document, i.e. C{doc.content}, might be different from before the call. ============================== WARNING ============================== @@ -806,9 +835,9 @@ class Soledad(object): in matching doc_ids order. :rtype: generator """ - return self._db.get_docs(doc_ids, - check_for_conflicts=check_for_conflicts, - include_deleted=include_deleted) + return self._db.get_docs( + doc_ids, check_for_conflicts=check_for_conflicts, + include_deleted=include_deleted) def get_all_docs(self, include_deleted=False): """Get the JSON content for all documents in the database. @@ -824,7 +853,7 @@ class Soledad(object): def _convert_to_unicode(self, content): """ - Converts content to utf8 (or all the strings in content) + Converts content to unicode (or all the strings in content) NOTE: Even though this method supports any type, it will currently ignore contents of lists, tuple or any other @@ -839,13 +868,14 @@ class Soledad(object): if isinstance(content, unicode): return content elif isinstance(content, str): + result = chardet.detect(content) + default = "utf-8" + encoding = result["encoding"] or default try: - result = chardet.detect(content) - default = "utf-8" - encoding = result["encoding"] or default content = content.decode(encoding) - except UnicodeError: - pass + except UnicodeError as e: + logger.error("Unicode error: {0!r}. Using 'replace'".format(e)) + content = content.decode(encoding, 'replace') return content else: if isinstance(content, dict): @@ -910,7 +940,8 @@ class Soledad(object): "number(fieldname, width)", "lower(fieldname)" """ if self._db: - return self._db.create_index(index_name, *index_expressions) + return self._db.create_index( + index_name, *index_expressions) def delete_index(self, index_name): """ @@ -955,6 +986,23 @@ class Soledad(object): if self._db: return self._db.get_from_index(index_name, *key_values) + def get_count_from_index(self, index_name, *key_values): + """ + Return the count of the documents that match the keys and + values supplied. + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: tuple + :return: count. + :rtype: int + """ + if self._db: + return self._db.get_count_from_index(index_name, *key_values) + def get_range_from_index(self, index_name, start_value, end_value): """ Return documents that fall within the specified range. @@ -1028,6 +1076,9 @@ class Soledad(object): """ Synchronize the local encrypted replica with a remote replica. + This method blocks until a syncing lock is acquired, so there are no + attempts of concurrent syncs from the same client replica. + :param url: the url of the target replica to sync with :type url: str @@ -1036,11 +1087,13 @@ class Soledad(object): :rtype: str """ if self._db: - local_gen = self._db.sync( - urlparse.urljoin(self.server_url, 'user-%s' % self._uuid), - creds=self._creds, autocreate=True) - signal(SOLEDAD_DONE_DATA_SYNC, self._uuid) - return local_gen + # acquire lock before attempt to sync + with Soledad.syncing_lock[self._db._get_replica_uid()]: + local_gen = self._db.sync( + urlparse.urljoin(self.server_url, 'user-%s' % self._uuid), + creds=self._creds, autocreate=False) + signal(SOLEDAD_DONE_DATA_SYNC, self._uuid) + return local_gen def need_sync(self, url): """ @@ -1158,7 +1211,7 @@ class Soledad(object): """ soledad_assert(self.STORAGE_SECRETS_KEY in data) # check mac of the recovery document - mac_auth = False + #mac_auth = False # XXX ? mac = None if MAC_KEY in data: soledad_assert(data[MAC_KEY] is not None) @@ -1181,7 +1234,7 @@ class Soledad(object): if mac != data[MAC_KEY]: raise WrongMac('Could not authenticate recovery document\'s ' 'contents.') - mac_auth = True + #mac_auth = True # XXX ? # include secrets in the secret pool. secrets = 0 for secret_id, secret_data in data[self.STORAGE_SECRETS_KEY].items(): @@ -1248,7 +1301,7 @@ class Soledad(object): #----------------------------------------------------------------------------- # We need a more reasonable timeout (in seconds) -SOLEDAD_TIMEOUT = 10 +SOLEDAD_TIMEOUT = 120 class VerifiedHTTPSConnection(httplib.HTTPSConnection): @@ -1258,9 +1311,17 @@ class VerifiedHTTPSConnection(httplib.HTTPSConnection): # derived from httplib.py def connect(self): - "Connect to a host on a given (SSL) port." - sock = socket.create_connection((self.host, self.port), - SOLEDAD_TIMEOUT, self.source_address) + """ + Connect to a host on a given (SSL) port. + """ + try: + source = self.source_address + sock = socket.create_connection((self.host, self.port), + SOLEDAD_TIMEOUT, source) + except AttributeError: + # source_address was introduced in 2.7 + sock = socket.create_connection((self.host, self.port), + SOLEDAD_TIMEOUT) if self._tunnel_host: self.sock = sock self._tunnel() diff --git a/client/src/leap/soledad/client/_version.py b/client/src/leap/soledad/client/_version.py index 8db26fe5..65efb714 100644 --- a/client/src/leap/soledad/client/_version.py +++ b/client/src/leap/soledad/client/_version.py @@ -17,6 +17,7 @@ git_full = "$Format:%H$" import subprocess import sys + def run_command(args, cwd=None, verbose=False): try: # remember shell=False, so use git.cmd on windows, not just git @@ -36,11 +37,10 @@ def run_command(args, cwd=None, verbose=False): return None return stdout - -import sys import re import os.path + def get_expanded_variables(versionfile_source): # the code embedded in _version.py can just fetch the value of these # variables. When used from setup.py, we don't want to import @@ -48,7 +48,7 @@ def get_expanded_variables(versionfile_source): # used from _version.py. variables = {} try: - f = open(versionfile_source,"r") + f = open(versionfile_source, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -63,12 +63,13 @@ def get_expanded_variables(versionfile_source): pass return variables + def versions_from_expanded_variables(variables, tag_prefix, verbose=False): refnames = variables["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("variables are unexpanded, not using") - return {} # unexpanded, so not in an unpacked git-archive tarball + return {} # unexpanded, so not in an unpacked git-archive tarball refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. @@ -93,13 +94,14 @@ def versions_from_expanded_variables(variables, tag_prefix, verbose=False): r = ref[len(tag_prefix):] if verbose: print("picking %s" % r) - return { "version": r, - "full": variables["full"].strip() } + return {"version": r, + "full": variables["full"].strip()} # no suitable tags, so we use the full revision id if verbose: print("no suitable tags, using full revision id") - return { "version": variables["full"].strip(), - "full": variables["full"].strip() } + return {"version": variables["full"].strip(), + "full": variables["full"].strip()} + def versions_from_vcs(tag_prefix, versionfile_source, verbose=False): # this runs 'git' from the root of the source tree. That either means @@ -116,7 +118,7 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False): here = os.path.abspath(__file__) except NameError: # some py2exe/bbfreeze/non-CPython implementations don't do __file__ - return {} # not always correct + return {} # not always correct # versionfile_source is the relative path from the top of the source tree # (where the .git directory might live) to this file. Invert this to find @@ -126,7 +128,16 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False): for i in range(len(versionfile_source.split("/"))): root = os.path.dirname(root) else: - root = os.path.dirname(here) + root = os.path.dirname( + os.path.join('..', here)) + + ###################################################### + # XXX patch for our specific configuration with + # the three projects leap.soledad.{common, client, server} + # inside the same repo. + ###################################################### + root = os.path.dirname(os.path.join('..', root)) + if not os.path.exists(os.path.join(root, ".git")): if verbose: print("no .git in %s" % root) @@ -141,7 +152,8 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False): return {} if not stdout.startswith(tag_prefix): if verbose: - print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix)) + print("tag '%s' doesn't start with prefix '%s'" % + (stdout, tag_prefix)) return {} tag = stdout[len(tag_prefix):] stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=root) @@ -153,7 +165,8 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False): return {"version": tag, "full": full} -def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False): +def versions_from_parentdir(parentdir_prefix, versionfile_source, + verbose=False): if IN_LONG_VERSION_PY: # We're running from _version.py. If it's from a source tree # (execute-in-place), we can work upwards to find the root of the @@ -163,7 +176,7 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False) here = os.path.abspath(__file__) except NameError: # py2exe/bbfreeze/non-CPython don't have __file__ - return {} # without __file__, we have no hope + return {} # without __file__, we have no hope # versionfile_source is the relative path from the top of the source # tree to _version.py. Invert this to find the root from __file__. root = here @@ -180,7 +193,8 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False) dirname = os.path.basename(root) if not dirname.startswith(parentdir_prefix): if verbose: - print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" % + print("guessing rootdir is '%s', but '%s' doesn't start " + "with prefix '%s'" % (root, dirname, parentdir_prefix)) return None return {"version": dirname[len(parentdir_prefix):], "full": ""} @@ -189,8 +203,9 @@ tag_prefix = "" parentdir_prefix = "leap.soledad.client-" versionfile_source = "src/leap/soledad/client/_version.py" + def get_versions(default={"version": "unknown", "full": ""}, verbose=False): - variables = { "refnames": git_refnames, "full": git_full } + variables = {"refnames": git_refnames, "full": git_full} ver = versions_from_expanded_variables(variables, tag_prefix, verbose) if not ver: ver = versions_from_vcs(tag_prefix, versionfile_source, verbose) @@ -200,4 +215,3 @@ def get_versions(default={"version": "unknown", "full": ""}, verbose=False): if not ver: ver = default return ver - diff --git a/client/src/leap/soledad/client/auth.py b/client/src/leap/soledad/client/auth.py index d85e3ba6..72ab0008 100644 --- a/client/src/leap/soledad/client/auth.py +++ b/client/src/leap/soledad/client/auth.py @@ -68,4 +68,3 @@ class TokenBasedAuth(object): else: raise errors.UnknownAuthMethod( 'Wrong credentials: %s' % self._creds) - diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 894c6f97..3aea340d 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -43,16 +43,19 @@ So, as the statements above were introduced for backwards compatibility with SLCipher 1.1 databases, we do not implement them as all SQLCipher databases handled by Soledad should be created by SQLCipher >= 2.0. """ +import httplib import logging import os -import time import string import threading +import time - -from u1db.backends import sqlite_backend from pysqlcipher import dbapi2 +from u1db.backends import sqlite_backend +from u1db.sync import Synchronizer from u1db import errors as u1db_errors + +from leap.soledad.client.target import SoledadSyncTarget from leap.soledad.common.document import SoledadDocument logger = logging.getLogger(__name__) @@ -88,10 +91,10 @@ def open(path, password, create=True, document_factory=None, crypto=None, database does not already exist. :param path: The filesystem path for the database to open. - :param type: str + :type path: str :param create: True/False, should the database be created if it doesn't already exist? - :param type: bool + :param create: bool :param document_factory: A function that will be called with the same parameters as Document.__init__. :type document_factory: callable @@ -144,25 +147,30 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): _index_storage_value = 'expand referenced encrypted' k_lock = threading.Lock() + create_doc_lock = threading.Lock() + update_indexes_lock = threading.Lock() + _syncer = None def __init__(self, sqlcipher_file, password, document_factory=None, crypto=None, raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024): """ - Create a new sqlcipher file. + Connect to an existing SQLCipher database, creating a new sqlcipher + database file if needed. :param sqlcipher_file: The path for the SQLCipher file. :type sqlcipher_file: str :param password: The password that protects the SQLCipher db. :type password: str :param document_factory: A function that will be called with the same - parameters as Document.__init__. + parameters as Document.__init__. :type document_factory: callable :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt - document contents when syncing. + document contents when syncing. :type crypto: soledad.crypto.SoledadCrypto - :param raw_key: Whether C{password} is a raw 64-char hex string or a - passphrase that should be hashed to obtain the encyrption key. + :param raw_key: Whether password is a raw 64-char hex string or a + passphrase that should be hashed to obtain the + encyrption key. :type raw_key: bool :param cipher: The cipher and mode to use. :type cipher: str @@ -186,6 +194,13 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): self._set_crypto_pragmas( self._db_handle, password, raw_key, cipher, kdf_iter, cipher_page_size) + if os.environ.get('LEAP_SQLITE_NOSYNC'): + self._pragma_synchronous_off(self._db_handle) + else: + self._pragma_synchronous_normal(self._db_handle) + if os.environ.get('LEAP_SQLITE_MEMSTORE'): + self._pragma_mem_temp_store(self._db_handle) + self._pragma_write_ahead_logging(self._db_handle) self._real_replica_uid = None self._ensure_schema() self._crypto = crypto @@ -336,13 +351,46 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): :return: The local generation before the synchronisation was performed. :rtype: int """ - from u1db.sync import Synchronizer - from leap.soledad.client.target import SoledadSyncTarget - return Synchronizer( - self, - SoledadSyncTarget(url, - creds=creds, - crypto=self._crypto)).sync(autocreate=autocreate) + if not self.syncer: + self._create_syncer(url, creds=creds) + + try: + res = self.syncer.sync(autocreate=autocreate) + except httplib.CannotSendRequest: + # raised when you reuse httplib.HTTP object for new request + # while you havn't called its getresponse() + # this catch works for the current connclass used + # by our HTTPClientBase, since it uses httplib. + # we will have to replace it if it changes. + logger.info("Replacing connection and trying again...") + self._syncer = None + self._create_syncer(url, creds=creds) + res = self.syncer.sync(autocreate=autocreate) + return res + + @property + def syncer(self): + """ + Accesor for synchronizer. + """ + return self._syncer + + def _create_syncer(self, url, creds=None): + """ + Creates a synchronizer + + :param url: The url of the target replica to sync with. + :type url: str + :param creds: optional dictionary giving credentials. + to authorize the operation with the server. + :type creds: dict + """ + if self._syncer is None: + self._syncer = Synchronizer( + self, + SoledadSyncTarget(url, + creds=creds, + crypto=self._crypto)) def _extra_schema_init(self, c): """ @@ -359,6 +407,22 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): 'ALTER TABLE document ' 'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE') + def create_doc(self, content, doc_id=None): + """ + Create a new document in the local encrypted database. + + :param content: the contents of the new document + :type content: dict + :param doc_id: an optional identifier specifying the document id + :type doc_id: str + + :return: the new document + :rtype: SoledadDocument + """ + with self.create_doc_lock: + return sqlite_backend.SQLitePartialExpandDatabase.create_doc( + self, content, doc_id=doc_id) + def _put_and_update_indexes(self, old_doc, doc): """ Update a document and all indexes related to it. @@ -368,12 +432,13 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): :param doc: The new version of the document. :type doc: u1db.Document """ - sqlite_backend.SQLitePartialExpandDatabase._put_and_update_indexes( - self, old_doc, doc) - c = self._db_handle.cursor() - c.execute('UPDATE document SET syncable=? ' - 'WHERE doc_id=?', - (doc.syncable, doc.doc_id)) + with self.update_indexes_lock: + sqlite_backend.SQLitePartialExpandDatabase._put_and_update_indexes( + self, old_doc, doc) + c = self._db_handle.cursor() + c.execute('UPDATE document SET syncable=? ' + 'WHERE doc_id=?', + (doc.syncable, doc.doc_id)) def _get_doc(self, doc_id, check_for_conflicts=False): """ @@ -697,6 +762,115 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): # XXX change passphrase param! db_handle.cursor().execute('PRAGMA rekey = "x\'%s"' % passphrase) + @classmethod + def _pragma_synchronous_off(cls, db_handle): + """ + Change the setting of the "synchronous" flag to OFF. + """ + logger.debug("SQLCIPHER: SETTING SYNCHRONOUS OFF") + db_handle.cursor().execute('PRAGMA synchronous=OFF') + + @classmethod + def _pragma_synchronous_normal(cls, db_handle): + """ + Change the setting of the "synchronous" flag to NORMAL. + """ + logger.debug("SQLCIPHER: SETTING SYNCHRONOUS NORMAL") + db_handle.cursor().execute('PRAGMA synchronous=NORMAL') + + @classmethod + def _pragma_mem_temp_store(cls, db_handle): + """ + Use a in-memory store for temporary tables. + """ + logger.debug("SQLCIPHER: SETTING TEMP_STORE MEMORY") + db_handle.cursor().execute('PRAGMA temp_store=MEMORY') + + @classmethod + def _pragma_write_ahead_logging(cls, db_handle): + """ + Enable write-ahead logging, and set the autocheckpoint to 50 pages. + + Setting the autocheckpoint to a small value, we make the reads not + suffer too much performance degradation. + + From the sqlite docs: + + "There is a tradeoff between average read performance and average write + performance. To maximize the read performance, one wants to keep the + WAL as small as possible and hence run checkpoints frequently, perhaps + as often as every COMMIT. To maximize write performance, one wants to + amortize the cost of each checkpoint over as many writes as possible, + meaning that one wants to run checkpoints infrequently and let the WAL + grow as large as possible before each checkpoint. The decision of how + often to run checkpoints may therefore vary from one application to + another depending on the relative read and write performance + requirements of the application. The default strategy is to run a + checkpoint once the WAL reaches 1000 pages" + """ + logger.debug("SQLCIPHER: SETTING WRITE-AHEAD LOGGING") + db_handle.cursor().execute('PRAGMA journal_mode=WAL') + # The optimum value can still use a little bit of tuning, but we favor + # small sizes of the WAL file to get fast reads, since we assume that + # the writes will be quick enough to not block too much. + + # TODO + # As a further improvement, we might want to set autocheckpoint to 0 + # here and do the checkpoints manually in a separate thread, to avoid + # any blocks in the main thread (we should run a loopingcall from here) + db_handle.cursor().execute('PRAGMA wal_autocheckpoint=50') + + # Extra query methods: extensions to the base sqlite implmentation. + + def get_count_from_index(self, index_name, *key_values): + """ + Returns the count for a given combination of index_name + and key values. + + Extension method made from similar methods in u1db version 13.09 + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: tuple + :return: count. + :rtype: int + """ + c = self._db_handle.cursor() + definition = self._get_index_definition(index_name) + + if len(key_values) != len(definition): + raise u1db_errors.InvalidValueForIndex() + tables = ["document_fields d%d" % i for i in range(len(definition))] + novalue_where = ["d.doc_id = d%d.doc_id" + " AND d%d.field_name = ?" + % (i, i) for i in range(len(definition))] + exact_where = [novalue_where[i] + + (" AND d%d.value = ?" % (i,)) + for i in range(len(definition))] + args = [] + where = [] + for idx, (field, value) in enumerate(zip(definition, key_values)): + args.append(field) + where.append(exact_where[idx]) + args.append(value) + + tables = ["document_fields d%d" % i for i in range(len(definition))] + statement = ( + "SELECT COUNT(*) FROM document d, %s WHERE %s " % ( + ', '.join(tables), + ' AND '.join(where), + )) + try: + c.execute(statement, tuple(args)) + except dbapi2.OperationalError, e: + raise dbapi2.OperationalError( + str(e) + '\nstatement: %s\nargs: %s\n' % (statement, args)) + res = c.fetchall() + return res[0][0] + def __del__(self): """ Closes db_handle upon object destruction. diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index d8899a97..3b3d6870 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -14,22 +14,26 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. - - """ A U1DB backend for encrypting data before sending to server and decrypting after receiving. """ - -import simplejson as json +import binascii +import cStringIO +import gzip import hashlib import hmac -import binascii +import logging +import urllib +import simplejson as json +from time import sleep -from u1db.remote import utils +from u1db.remote import utils, http_errors from u1db.errors import BrokenSyncStream +from u1db import errors from u1db.remote.http_target import HTTPSyncTarget +from u1db.remote.http_client import _encode_query_parameter from leap.soledad.common import soledad_assert @@ -53,11 +57,13 @@ from leap.soledad.client.crypto import ( UnknownEncryptionMethod, ) +logger = logging.getLogger(__name__) # # Exceptions # + class DocumentNotEncrypted(Exception): """ Raised for failures in document encryption. @@ -222,6 +228,24 @@ def decrypt_doc(crypto, doc): return plainjson +def _gunzip(data): + """ + Uncompress data that is gzipped. + + :param data: gzipped data + :type data: basestring + """ + buffer = cStringIO.StringIO() + buffer.write(data) + buffer.seek(0) + try: + data = gzip.GzipFile(mode='r', fileobj=buffer).read() + except Exception: + logger.warning("Error while decrypting gzipped data") + buffer.close() + return data + + # # SoledadSyncTarget # @@ -353,6 +377,82 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): raise BrokenSyncStream return res + def _request(self, method, url_parts, params=None, body=None, + content_type=None): + """ + Overloaded method. See u1db docs. + Patched for adding gzip encoding. + """ + + self._ensure_connection() + unquoted_url = url_query = self._url.path + if url_parts: + if not url_query.endswith('/'): + url_query += '/' + unquoted_url = url_query + url_query += '/'.join(urllib.quote(part, safe='') + for part in url_parts) + # oauth performs its own quoting + unquoted_url += '/'.join(url_parts) + encoded_params = {} + if params: + for key, value in params.items(): + key = unicode(key).encode('utf-8') + encoded_params[key] = _encode_query_parameter(value) + url_query += ('?' + urllib.urlencode(encoded_params)) + if body is not None and not isinstance(body, basestring): + body = json.dumps(body) + content_type = 'application/json' + headers = {} + if content_type: + headers['content-type'] = content_type + + # Patched: We would like to receive gzip pretty please + # ---------------------------------------------------- + headers['accept-encoding'] = "gzip" + # ---------------------------------------------------- + + headers.update( + self._sign_request(method, unquoted_url, encoded_params)) + + for delay in self._delays: + try: + self._conn.request(method, url_query, body, headers) + return self._response() + except errors.Unavailable, e: + sleep(delay) + raise e + + def _response(self): + """ + Overloaded method, see u1db docs. + We patched it for decrypting gzip content. + """ + resp = self._conn.getresponse() + body = resp.read() + headers = dict(resp.getheaders()) + + # Patched: We would like to decode gzip + # ---------------------------------------------------- + encoding = headers.get('content-encoding', '') + if "gzip" in encoding: + body = _gunzip(body) + # ---------------------------------------------------- + + if resp.status in (200, 201): + return body, headers + elif resp.status in http_errors.ERROR_STATUSES: + try: + respdic = json.loads(body) + except ValueError: + pass + else: + self._error(respdic) + # special case + if resp.status == 503: + raise errors.Unavailable(body, headers) + raise errors.HTTPError(resp.status, body, headers) + def sync_exchange(self, docs_by_generations, source_replica_uid, last_known_generation, last_known_trans_id, return_doc_cb, ensure_callback=None): @@ -364,8 +464,9 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): syncing. :param docs_by_generations: A list of (doc_id, generation, trans_id) - of local documents that were changed since the last local - generation the remote replica knows about. + of local documents that were changed since + the last local generation the remote + replica knows about. :type docs_by_generations: list of tuples :param source_replica_uid: The uid of the source replica. :type source_replica_uid: str @@ -391,6 +492,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self._conn.putheader('content-type', 'application/x-u1db-sync-stream') for header_name, header_value in self._sign_request('POST', url, {}): self._conn.putheader(header_name, header_value) + self._conn.putheader('accept-encoding', 'gzip') entries = ['['] size = 1 @@ -428,7 +530,8 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): for entry in entries: self._conn.send(entry) entries = None - data, _ = self._response() + data, headers = self._response() + res = self._parse_sync_stream(data, return_doc_cb, ensure_callback) data = None return res['new_generation'], res['new_transaction_id'] |