summaryrefslogtreecommitdiff
path: root/client/src
diff options
context:
space:
mode:
Diffstat (limited to 'client/src')
-rw-r--r--client/src/leap/soledad/client/__init__.py171
-rw-r--r--client/src/leap/soledad/client/_version.py46
-rw-r--r--client/src/leap/soledad/client/auth.py1
-rw-r--r--client/src/leap/soledad/client/sqlcipher.py220
-rw-r--r--client/src/leap/soledad/client/target.py121
5 files changed, 455 insertions, 104 deletions
diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py
index 62f93b3d..46e3cd5f 100644
--- a/client/src/leap/soledad/client/__init__.py
+++ b/client/src/leap/soledad/client/__init__.py
@@ -34,6 +34,8 @@ import urlparse
import hmac
from hashlib import sha256
+from threading import Lock
+from collections import defaultdict
try:
import cchardet as chardet
@@ -52,6 +54,7 @@ from leap.soledad.common.errors import (
InvalidTokenError,
NotLockedError,
AlreadyLockedError,
+ LockTimedOutError,
)
from leap.soledad.common.crypto import (
MacMethods,
@@ -245,6 +248,12 @@ class Soledad(object):
Prefix for default values for path.
"""
+ syncing_lock = defaultdict(Lock)
+ """
+ A dictionary that hold locks which avoid multiple sync attempts from the
+ same database replica.
+ """
+
def __init__(self, uuid, passphrase, secrets_path, local_db_path,
server_url, cert_file, auth_token=None, secret_id=None):
"""
@@ -315,6 +324,47 @@ class Soledad(object):
# initialization/destruction methods
#
+ def _get_or_gen_crypto_secrets(self):
+ """
+ Retrieves or generates the crypto secrets.
+
+ Might raise BootstrapSequenceError
+ """
+ doc = self._get_secrets_from_shared_db()
+
+ if doc:
+ logger.info(
+ 'Found cryptographic secrets in shared recovery '
+ 'database.')
+ _, mac = self.import_recovery_document(doc.content)
+ if mac is False:
+ self.put_secrets_in_shared_db()
+ self._store_secrets() # save new secrets in local file
+ if self._secret_id is None:
+ self._set_secret_id(self._secrets.items()[0][0])
+ else:
+ # STAGE 3 - there are no secrets in server also, so
+ # generate a secret and store it in remote db.
+ logger.info(
+ 'No cryptographic secrets found, creating new '
+ ' secrets...')
+ self._set_secret_id(self._gen_secret())
+ try:
+ self._put_secrets_in_shared_db()
+ except Exception as ex:
+ # storing generated secret in shared db failed for
+ # some reason, so we erase the generated secret and
+ # raise.
+ try:
+ os.unlink(self._secrets_path)
+ except OSError as e:
+ if e.errno != errno.ENOENT: # no such file or directory
+ logger.exception(e)
+ logger.exception(ex)
+ raise BootstrapSequenceError(
+ 'Could not store generated secret in the shared '
+ 'database, bailing out...')
+
def _bootstrap(self):
"""
Bootstrap local Soledad instance.
@@ -342,6 +392,8 @@ class Soledad(object):
self._init_dirs()
self._crypto = SoledadCrypto(self)
+ secrets_problem = None
+
# STAGE 1 - verify if secrets exist locally
if not self._has_secret(): # try to load from local storage.
@@ -359,39 +411,13 @@ class Soledad(object):
token, timeout = self._shared_db.lock()
except AlreadyLockedError:
raise BootstrapSequenceError('Database is already locked.')
+ except LockTimedOutError:
+ raise BootstrapSequenceError('Lock operation timed out.')
- doc = self._get_secrets_from_shared_db()
- if doc:
- logger.info(
- 'Found cryptographic secrets in shared recovery '
- 'database.')
- _, mac = self.import_recovery_document(doc.content)
- if mac is False:
- self.put_secrets_in_shared_db()
- self._store_secrets() # save new secrets in local file
- if self._secret_id is None:
- self._set_secret_id(self._secrets.items()[0][0])
- else:
- # STAGE 3 - there are no secrets in server also, so
- # generate a secret and store it in remote db.
- logger.info(
- 'No cryptographic secrets found, creating new '
- ' secrets...')
- self._set_secret_id(self._gen_secret())
- try:
- self._put_secrets_in_shared_db()
- except Exception:
- # storing generated secret in shared db failed for
- # some reason, so we erase the generated secret and
- # raise.
- try:
- os.unlink(self._secrets_path)
- except OSError as e:
- if errno == 2: # no such file or directory
- pass
- raise BootstrapSequenceError(
- 'Could not store generated secret in the shared '
- 'database, bailing out...')
+ try:
+ self._get_or_gen_crypto_secrets()
+ except Exception as e:
+ secrets_problem = e
# release the lock on shared db
try:
@@ -416,7 +442,10 @@ class Soledad(object):
# --- end of atomic operation in shared db ---
# STAGE 4 - local database initialization
- self._init_db()
+ if secrets_problem is None:
+ self._init_db()
+ else:
+ raise secrets_problem
def _init_dirs(self):
"""
@@ -749,7 +778,7 @@ class Soledad(object):
============================== WARNING ==============================
This method converts the document's contents to unicode in-place. This
- meanse that after calling C{put_doc(doc)}, the contents of the
+ means that after calling C{put_doc(doc)}, the contents of the
document, i.e. C{doc.content}, might be different from before the
call.
============================== WARNING ==============================
@@ -806,9 +835,9 @@ class Soledad(object):
in matching doc_ids order.
:rtype: generator
"""
- return self._db.get_docs(doc_ids,
- check_for_conflicts=check_for_conflicts,
- include_deleted=include_deleted)
+ return self._db.get_docs(
+ doc_ids, check_for_conflicts=check_for_conflicts,
+ include_deleted=include_deleted)
def get_all_docs(self, include_deleted=False):
"""Get the JSON content for all documents in the database.
@@ -824,7 +853,7 @@ class Soledad(object):
def _convert_to_unicode(self, content):
"""
- Converts content to utf8 (or all the strings in content)
+ Converts content to unicode (or all the strings in content)
NOTE: Even though this method supports any type, it will
currently ignore contents of lists, tuple or any other
@@ -839,13 +868,14 @@ class Soledad(object):
if isinstance(content, unicode):
return content
elif isinstance(content, str):
+ result = chardet.detect(content)
+ default = "utf-8"
+ encoding = result["encoding"] or default
try:
- result = chardet.detect(content)
- default = "utf-8"
- encoding = result["encoding"] or default
content = content.decode(encoding)
- except UnicodeError:
- pass
+ except UnicodeError as e:
+ logger.error("Unicode error: {0!r}. Using 'replace'".format(e))
+ content = content.decode(encoding, 'replace')
return content
else:
if isinstance(content, dict):
@@ -910,7 +940,8 @@ class Soledad(object):
"number(fieldname, width)", "lower(fieldname)"
"""
if self._db:
- return self._db.create_index(index_name, *index_expressions)
+ return self._db.create_index(
+ index_name, *index_expressions)
def delete_index(self, index_name):
"""
@@ -955,6 +986,23 @@ class Soledad(object):
if self._db:
return self._db.get_from_index(index_name, *key_values)
+ def get_count_from_index(self, index_name, *key_values):
+ """
+ Return the count of the documents that match the keys and
+ values supplied.
+
+ :param index_name: The index to query
+ :type index_name: str
+ :param key_values: values to match. eg, if you have
+ an index with 3 fields then you would have:
+ get_from_index(index_name, val1, val2, val3)
+ :type key_values: tuple
+ :return: count.
+ :rtype: int
+ """
+ if self._db:
+ return self._db.get_count_from_index(index_name, *key_values)
+
def get_range_from_index(self, index_name, start_value, end_value):
"""
Return documents that fall within the specified range.
@@ -1028,6 +1076,9 @@ class Soledad(object):
"""
Synchronize the local encrypted replica with a remote replica.
+ This method blocks until a syncing lock is acquired, so there are no
+ attempts of concurrent syncs from the same client replica.
+
:param url: the url of the target replica to sync with
:type url: str
@@ -1036,11 +1087,13 @@ class Soledad(object):
:rtype: str
"""
if self._db:
- local_gen = self._db.sync(
- urlparse.urljoin(self.server_url, 'user-%s' % self._uuid),
- creds=self._creds, autocreate=True)
- signal(SOLEDAD_DONE_DATA_SYNC, self._uuid)
- return local_gen
+ # acquire lock before attempt to sync
+ with Soledad.syncing_lock[self._db._get_replica_uid()]:
+ local_gen = self._db.sync(
+ urlparse.urljoin(self.server_url, 'user-%s' % self._uuid),
+ creds=self._creds, autocreate=False)
+ signal(SOLEDAD_DONE_DATA_SYNC, self._uuid)
+ return local_gen
def need_sync(self, url):
"""
@@ -1158,7 +1211,7 @@ class Soledad(object):
"""
soledad_assert(self.STORAGE_SECRETS_KEY in data)
# check mac of the recovery document
- mac_auth = False
+ #mac_auth = False # XXX ?
mac = None
if MAC_KEY in data:
soledad_assert(data[MAC_KEY] is not None)
@@ -1181,7 +1234,7 @@ class Soledad(object):
if mac != data[MAC_KEY]:
raise WrongMac('Could not authenticate recovery document\'s '
'contents.')
- mac_auth = True
+ #mac_auth = True # XXX ?
# include secrets in the secret pool.
secrets = 0
for secret_id, secret_data in data[self.STORAGE_SECRETS_KEY].items():
@@ -1248,7 +1301,7 @@ class Soledad(object):
#-----------------------------------------------------------------------------
# We need a more reasonable timeout (in seconds)
-SOLEDAD_TIMEOUT = 10
+SOLEDAD_TIMEOUT = 120
class VerifiedHTTPSConnection(httplib.HTTPSConnection):
@@ -1258,9 +1311,17 @@ class VerifiedHTTPSConnection(httplib.HTTPSConnection):
# derived from httplib.py
def connect(self):
- "Connect to a host on a given (SSL) port."
- sock = socket.create_connection((self.host, self.port),
- SOLEDAD_TIMEOUT, self.source_address)
+ """
+ Connect to a host on a given (SSL) port.
+ """
+ try:
+ source = self.source_address
+ sock = socket.create_connection((self.host, self.port),
+ SOLEDAD_TIMEOUT, source)
+ except AttributeError:
+ # source_address was introduced in 2.7
+ sock = socket.create_connection((self.host, self.port),
+ SOLEDAD_TIMEOUT)
if self._tunnel_host:
self.sock = sock
self._tunnel()
diff --git a/client/src/leap/soledad/client/_version.py b/client/src/leap/soledad/client/_version.py
index 8db26fe5..65efb714 100644
--- a/client/src/leap/soledad/client/_version.py
+++ b/client/src/leap/soledad/client/_version.py
@@ -17,6 +17,7 @@ git_full = "$Format:%H$"
import subprocess
import sys
+
def run_command(args, cwd=None, verbose=False):
try:
# remember shell=False, so use git.cmd on windows, not just git
@@ -36,11 +37,10 @@ def run_command(args, cwd=None, verbose=False):
return None
return stdout
-
-import sys
import re
import os.path
+
def get_expanded_variables(versionfile_source):
# the code embedded in _version.py can just fetch the value of these
# variables. When used from setup.py, we don't want to import
@@ -48,7 +48,7 @@ def get_expanded_variables(versionfile_source):
# used from _version.py.
variables = {}
try:
- f = open(versionfile_source,"r")
+ f = open(versionfile_source, "r")
for line in f.readlines():
if line.strip().startswith("git_refnames ="):
mo = re.search(r'=\s*"(.*)"', line)
@@ -63,12 +63,13 @@ def get_expanded_variables(versionfile_source):
pass
return variables
+
def versions_from_expanded_variables(variables, tag_prefix, verbose=False):
refnames = variables["refnames"].strip()
if refnames.startswith("$Format"):
if verbose:
print("variables are unexpanded, not using")
- return {} # unexpanded, so not in an unpacked git-archive tarball
+ return {} # unexpanded, so not in an unpacked git-archive tarball
refs = set([r.strip() for r in refnames.strip("()").split(",")])
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
@@ -93,13 +94,14 @@ def versions_from_expanded_variables(variables, tag_prefix, verbose=False):
r = ref[len(tag_prefix):]
if verbose:
print("picking %s" % r)
- return { "version": r,
- "full": variables["full"].strip() }
+ return {"version": r,
+ "full": variables["full"].strip()}
# no suitable tags, so we use the full revision id
if verbose:
print("no suitable tags, using full revision id")
- return { "version": variables["full"].strip(),
- "full": variables["full"].strip() }
+ return {"version": variables["full"].strip(),
+ "full": variables["full"].strip()}
+
def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):
# this runs 'git' from the root of the source tree. That either means
@@ -116,7 +118,7 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):
here = os.path.abspath(__file__)
except NameError:
# some py2exe/bbfreeze/non-CPython implementations don't do __file__
- return {} # not always correct
+ return {} # not always correct
# versionfile_source is the relative path from the top of the source tree
# (where the .git directory might live) to this file. Invert this to find
@@ -126,7 +128,16 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):
for i in range(len(versionfile_source.split("/"))):
root = os.path.dirname(root)
else:
- root = os.path.dirname(here)
+ root = os.path.dirname(
+ os.path.join('..', here))
+
+ ######################################################
+ # XXX patch for our specific configuration with
+ # the three projects leap.soledad.{common, client, server}
+ # inside the same repo.
+ ######################################################
+ root = os.path.dirname(os.path.join('..', root))
+
if not os.path.exists(os.path.join(root, ".git")):
if verbose:
print("no .git in %s" % root)
@@ -141,7 +152,8 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):
return {}
if not stdout.startswith(tag_prefix):
if verbose:
- print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix))
+ print("tag '%s' doesn't start with prefix '%s'" %
+ (stdout, tag_prefix))
return {}
tag = stdout[len(tag_prefix):]
stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=root)
@@ -153,7 +165,8 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):
return {"version": tag, "full": full}
-def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False):
+def versions_from_parentdir(parentdir_prefix, versionfile_source,
+ verbose=False):
if IN_LONG_VERSION_PY:
# We're running from _version.py. If it's from a source tree
# (execute-in-place), we can work upwards to find the root of the
@@ -163,7 +176,7 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False)
here = os.path.abspath(__file__)
except NameError:
# py2exe/bbfreeze/non-CPython don't have __file__
- return {} # without __file__, we have no hope
+ return {} # without __file__, we have no hope
# versionfile_source is the relative path from the top of the source
# tree to _version.py. Invert this to find the root from __file__.
root = here
@@ -180,7 +193,8 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False)
dirname = os.path.basename(root)
if not dirname.startswith(parentdir_prefix):
if verbose:
- print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" %
+ print("guessing rootdir is '%s', but '%s' doesn't start "
+ "with prefix '%s'" %
(root, dirname, parentdir_prefix))
return None
return {"version": dirname[len(parentdir_prefix):], "full": ""}
@@ -189,8 +203,9 @@ tag_prefix = ""
parentdir_prefix = "leap.soledad.client-"
versionfile_source = "src/leap/soledad/client/_version.py"
+
def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
- variables = { "refnames": git_refnames, "full": git_full }
+ variables = {"refnames": git_refnames, "full": git_full}
ver = versions_from_expanded_variables(variables, tag_prefix, verbose)
if not ver:
ver = versions_from_vcs(tag_prefix, versionfile_source, verbose)
@@ -200,4 +215,3 @@ def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
if not ver:
ver = default
return ver
-
diff --git a/client/src/leap/soledad/client/auth.py b/client/src/leap/soledad/client/auth.py
index d85e3ba6..72ab0008 100644
--- a/client/src/leap/soledad/client/auth.py
+++ b/client/src/leap/soledad/client/auth.py
@@ -68,4 +68,3 @@ class TokenBasedAuth(object):
else:
raise errors.UnknownAuthMethod(
'Wrong credentials: %s' % self._creds)
-
diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py
index 894c6f97..3aea340d 100644
--- a/client/src/leap/soledad/client/sqlcipher.py
+++ b/client/src/leap/soledad/client/sqlcipher.py
@@ -43,16 +43,19 @@ So, as the statements above were introduced for backwards compatibility with
SLCipher 1.1 databases, we do not implement them as all SQLCipher databases
handled by Soledad should be created by SQLCipher >= 2.0.
"""
+import httplib
import logging
import os
-import time
import string
import threading
+import time
-
-from u1db.backends import sqlite_backend
from pysqlcipher import dbapi2
+from u1db.backends import sqlite_backend
+from u1db.sync import Synchronizer
from u1db import errors as u1db_errors
+
+from leap.soledad.client.target import SoledadSyncTarget
from leap.soledad.common.document import SoledadDocument
logger = logging.getLogger(__name__)
@@ -88,10 +91,10 @@ def open(path, password, create=True, document_factory=None, crypto=None,
database does not already exist.
:param path: The filesystem path for the database to open.
- :param type: str
+ :type path: str
:param create: True/False, should the database be created if it doesn't
already exist?
- :param type: bool
+ :param create: bool
:param document_factory: A function that will be called with the same
parameters as Document.__init__.
:type document_factory: callable
@@ -144,25 +147,30 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):
_index_storage_value = 'expand referenced encrypted'
k_lock = threading.Lock()
+ create_doc_lock = threading.Lock()
+ update_indexes_lock = threading.Lock()
+ _syncer = None
def __init__(self, sqlcipher_file, password, document_factory=None,
crypto=None, raw_key=False, cipher='aes-256-cbc',
kdf_iter=4000, cipher_page_size=1024):
"""
- Create a new sqlcipher file.
+ Connect to an existing SQLCipher database, creating a new sqlcipher
+ database file if needed.
:param sqlcipher_file: The path for the SQLCipher file.
:type sqlcipher_file: str
:param password: The password that protects the SQLCipher db.
:type password: str
:param document_factory: A function that will be called with the same
- parameters as Document.__init__.
+ parameters as Document.__init__.
:type document_factory: callable
:param crypto: An instance of SoledadCrypto so we can encrypt/decrypt
- document contents when syncing.
+ document contents when syncing.
:type crypto: soledad.crypto.SoledadCrypto
- :param raw_key: Whether C{password} is a raw 64-char hex string or a
- passphrase that should be hashed to obtain the encyrption key.
+ :param raw_key: Whether password is a raw 64-char hex string or a
+ passphrase that should be hashed to obtain the
+ encyrption key.
:type raw_key: bool
:param cipher: The cipher and mode to use.
:type cipher: str
@@ -186,6 +194,13 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):
self._set_crypto_pragmas(
self._db_handle, password, raw_key, cipher, kdf_iter,
cipher_page_size)
+ if os.environ.get('LEAP_SQLITE_NOSYNC'):
+ self._pragma_synchronous_off(self._db_handle)
+ else:
+ self._pragma_synchronous_normal(self._db_handle)
+ if os.environ.get('LEAP_SQLITE_MEMSTORE'):
+ self._pragma_mem_temp_store(self._db_handle)
+ self._pragma_write_ahead_logging(self._db_handle)
self._real_replica_uid = None
self._ensure_schema()
self._crypto = crypto
@@ -336,13 +351,46 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):
:return: The local generation before the synchronisation was performed.
:rtype: int
"""
- from u1db.sync import Synchronizer
- from leap.soledad.client.target import SoledadSyncTarget
- return Synchronizer(
- self,
- SoledadSyncTarget(url,
- creds=creds,
- crypto=self._crypto)).sync(autocreate=autocreate)
+ if not self.syncer:
+ self._create_syncer(url, creds=creds)
+
+ try:
+ res = self.syncer.sync(autocreate=autocreate)
+ except httplib.CannotSendRequest:
+ # raised when you reuse httplib.HTTP object for new request
+ # while you havn't called its getresponse()
+ # this catch works for the current connclass used
+ # by our HTTPClientBase, since it uses httplib.
+ # we will have to replace it if it changes.
+ logger.info("Replacing connection and trying again...")
+ self._syncer = None
+ self._create_syncer(url, creds=creds)
+ res = self.syncer.sync(autocreate=autocreate)
+ return res
+
+ @property
+ def syncer(self):
+ """
+ Accesor for synchronizer.
+ """
+ return self._syncer
+
+ def _create_syncer(self, url, creds=None):
+ """
+ Creates a synchronizer
+
+ :param url: The url of the target replica to sync with.
+ :type url: str
+ :param creds: optional dictionary giving credentials.
+ to authorize the operation with the server.
+ :type creds: dict
+ """
+ if self._syncer is None:
+ self._syncer = Synchronizer(
+ self,
+ SoledadSyncTarget(url,
+ creds=creds,
+ crypto=self._crypto))
def _extra_schema_init(self, c):
"""
@@ -359,6 +407,22 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):
'ALTER TABLE document '
'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE')
+ def create_doc(self, content, doc_id=None):
+ """
+ Create a new document in the local encrypted database.
+
+ :param content: the contents of the new document
+ :type content: dict
+ :param doc_id: an optional identifier specifying the document id
+ :type doc_id: str
+
+ :return: the new document
+ :rtype: SoledadDocument
+ """
+ with self.create_doc_lock:
+ return sqlite_backend.SQLitePartialExpandDatabase.create_doc(
+ self, content, doc_id=doc_id)
+
def _put_and_update_indexes(self, old_doc, doc):
"""
Update a document and all indexes related to it.
@@ -368,12 +432,13 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):
:param doc: The new version of the document.
:type doc: u1db.Document
"""
- sqlite_backend.SQLitePartialExpandDatabase._put_and_update_indexes(
- self, old_doc, doc)
- c = self._db_handle.cursor()
- c.execute('UPDATE document SET syncable=? '
- 'WHERE doc_id=?',
- (doc.syncable, doc.doc_id))
+ with self.update_indexes_lock:
+ sqlite_backend.SQLitePartialExpandDatabase._put_and_update_indexes(
+ self, old_doc, doc)
+ c = self._db_handle.cursor()
+ c.execute('UPDATE document SET syncable=? '
+ 'WHERE doc_id=?',
+ (doc.syncable, doc.doc_id))
def _get_doc(self, doc_id, check_for_conflicts=False):
"""
@@ -697,6 +762,115 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):
# XXX change passphrase param!
db_handle.cursor().execute('PRAGMA rekey = "x\'%s"' % passphrase)
+ @classmethod
+ def _pragma_synchronous_off(cls, db_handle):
+ """
+ Change the setting of the "synchronous" flag to OFF.
+ """
+ logger.debug("SQLCIPHER: SETTING SYNCHRONOUS OFF")
+ db_handle.cursor().execute('PRAGMA synchronous=OFF')
+
+ @classmethod
+ def _pragma_synchronous_normal(cls, db_handle):
+ """
+ Change the setting of the "synchronous" flag to NORMAL.
+ """
+ logger.debug("SQLCIPHER: SETTING SYNCHRONOUS NORMAL")
+ db_handle.cursor().execute('PRAGMA synchronous=NORMAL')
+
+ @classmethod
+ def _pragma_mem_temp_store(cls, db_handle):
+ """
+ Use a in-memory store for temporary tables.
+ """
+ logger.debug("SQLCIPHER: SETTING TEMP_STORE MEMORY")
+ db_handle.cursor().execute('PRAGMA temp_store=MEMORY')
+
+ @classmethod
+ def _pragma_write_ahead_logging(cls, db_handle):
+ """
+ Enable write-ahead logging, and set the autocheckpoint to 50 pages.
+
+ Setting the autocheckpoint to a small value, we make the reads not
+ suffer too much performance degradation.
+
+ From the sqlite docs:
+
+ "There is a tradeoff between average read performance and average write
+ performance. To maximize the read performance, one wants to keep the
+ WAL as small as possible and hence run checkpoints frequently, perhaps
+ as often as every COMMIT. To maximize write performance, one wants to
+ amortize the cost of each checkpoint over as many writes as possible,
+ meaning that one wants to run checkpoints infrequently and let the WAL
+ grow as large as possible before each checkpoint. The decision of how
+ often to run checkpoints may therefore vary from one application to
+ another depending on the relative read and write performance
+ requirements of the application. The default strategy is to run a
+ checkpoint once the WAL reaches 1000 pages"
+ """
+ logger.debug("SQLCIPHER: SETTING WRITE-AHEAD LOGGING")
+ db_handle.cursor().execute('PRAGMA journal_mode=WAL')
+ # The optimum value can still use a little bit of tuning, but we favor
+ # small sizes of the WAL file to get fast reads, since we assume that
+ # the writes will be quick enough to not block too much.
+
+ # TODO
+ # As a further improvement, we might want to set autocheckpoint to 0
+ # here and do the checkpoints manually in a separate thread, to avoid
+ # any blocks in the main thread (we should run a loopingcall from here)
+ db_handle.cursor().execute('PRAGMA wal_autocheckpoint=50')
+
+ # Extra query methods: extensions to the base sqlite implmentation.
+
+ def get_count_from_index(self, index_name, *key_values):
+ """
+ Returns the count for a given combination of index_name
+ and key values.
+
+ Extension method made from similar methods in u1db version 13.09
+
+ :param index_name: The index to query
+ :type index_name: str
+ :param key_values: values to match. eg, if you have
+ an index with 3 fields then you would have:
+ get_from_index(index_name, val1, val2, val3)
+ :type key_values: tuple
+ :return: count.
+ :rtype: int
+ """
+ c = self._db_handle.cursor()
+ definition = self._get_index_definition(index_name)
+
+ if len(key_values) != len(definition):
+ raise u1db_errors.InvalidValueForIndex()
+ tables = ["document_fields d%d" % i for i in range(len(definition))]
+ novalue_where = ["d.doc_id = d%d.doc_id"
+ " AND d%d.field_name = ?"
+ % (i, i) for i in range(len(definition))]
+ exact_where = [novalue_where[i]
+ + (" AND d%d.value = ?" % (i,))
+ for i in range(len(definition))]
+ args = []
+ where = []
+ for idx, (field, value) in enumerate(zip(definition, key_values)):
+ args.append(field)
+ where.append(exact_where[idx])
+ args.append(value)
+
+ tables = ["document_fields d%d" % i for i in range(len(definition))]
+ statement = (
+ "SELECT COUNT(*) FROM document d, %s WHERE %s " % (
+ ', '.join(tables),
+ ' AND '.join(where),
+ ))
+ try:
+ c.execute(statement, tuple(args))
+ except dbapi2.OperationalError, e:
+ raise dbapi2.OperationalError(
+ str(e) + '\nstatement: %s\nargs: %s\n' % (statement, args))
+ res = c.fetchall()
+ return res[0][0]
+
def __del__(self):
"""
Closes db_handle upon object destruction.
diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py
index d8899a97..3b3d6870 100644
--- a/client/src/leap/soledad/client/target.py
+++ b/client/src/leap/soledad/client/target.py
@@ -14,22 +14,26 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
"""
A U1DB backend for encrypting data before sending to server and decrypting
after receiving.
"""
-
-import simplejson as json
+import binascii
+import cStringIO
+import gzip
import hashlib
import hmac
-import binascii
+import logging
+import urllib
+import simplejson as json
+from time import sleep
-from u1db.remote import utils
+from u1db.remote import utils, http_errors
from u1db.errors import BrokenSyncStream
+from u1db import errors
from u1db.remote.http_target import HTTPSyncTarget
+from u1db.remote.http_client import _encode_query_parameter
from leap.soledad.common import soledad_assert
@@ -53,11 +57,13 @@ from leap.soledad.client.crypto import (
UnknownEncryptionMethod,
)
+logger = logging.getLogger(__name__)
#
# Exceptions
#
+
class DocumentNotEncrypted(Exception):
"""
Raised for failures in document encryption.
@@ -222,6 +228,24 @@ def decrypt_doc(crypto, doc):
return plainjson
+def _gunzip(data):
+ """
+ Uncompress data that is gzipped.
+
+ :param data: gzipped data
+ :type data: basestring
+ """
+ buffer = cStringIO.StringIO()
+ buffer.write(data)
+ buffer.seek(0)
+ try:
+ data = gzip.GzipFile(mode='r', fileobj=buffer).read()
+ except Exception:
+ logger.warning("Error while decrypting gzipped data")
+ buffer.close()
+ return data
+
+
#
# SoledadSyncTarget
#
@@ -353,6 +377,82 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth):
raise BrokenSyncStream
return res
+ def _request(self, method, url_parts, params=None, body=None,
+ content_type=None):
+ """
+ Overloaded method. See u1db docs.
+ Patched for adding gzip encoding.
+ """
+
+ self._ensure_connection()
+ unquoted_url = url_query = self._url.path
+ if url_parts:
+ if not url_query.endswith('/'):
+ url_query += '/'
+ unquoted_url = url_query
+ url_query += '/'.join(urllib.quote(part, safe='')
+ for part in url_parts)
+ # oauth performs its own quoting
+ unquoted_url += '/'.join(url_parts)
+ encoded_params = {}
+ if params:
+ for key, value in params.items():
+ key = unicode(key).encode('utf-8')
+ encoded_params[key] = _encode_query_parameter(value)
+ url_query += ('?' + urllib.urlencode(encoded_params))
+ if body is not None and not isinstance(body, basestring):
+ body = json.dumps(body)
+ content_type = 'application/json'
+ headers = {}
+ if content_type:
+ headers['content-type'] = content_type
+
+ # Patched: We would like to receive gzip pretty please
+ # ----------------------------------------------------
+ headers['accept-encoding'] = "gzip"
+ # ----------------------------------------------------
+
+ headers.update(
+ self._sign_request(method, unquoted_url, encoded_params))
+
+ for delay in self._delays:
+ try:
+ self._conn.request(method, url_query, body, headers)
+ return self._response()
+ except errors.Unavailable, e:
+ sleep(delay)
+ raise e
+
+ def _response(self):
+ """
+ Overloaded method, see u1db docs.
+ We patched it for decrypting gzip content.
+ """
+ resp = self._conn.getresponse()
+ body = resp.read()
+ headers = dict(resp.getheaders())
+
+ # Patched: We would like to decode gzip
+ # ----------------------------------------------------
+ encoding = headers.get('content-encoding', '')
+ if "gzip" in encoding:
+ body = _gunzip(body)
+ # ----------------------------------------------------
+
+ if resp.status in (200, 201):
+ return body, headers
+ elif resp.status in http_errors.ERROR_STATUSES:
+ try:
+ respdic = json.loads(body)
+ except ValueError:
+ pass
+ else:
+ self._error(respdic)
+ # special case
+ if resp.status == 503:
+ raise errors.Unavailable(body, headers)
+ raise errors.HTTPError(resp.status, body, headers)
+
def sync_exchange(self, docs_by_generations, source_replica_uid,
last_known_generation, last_known_trans_id,
return_doc_cb, ensure_callback=None):
@@ -364,8 +464,9 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth):
syncing.
:param docs_by_generations: A list of (doc_id, generation, trans_id)
- of local documents that were changed since the last local
- generation the remote replica knows about.
+ of local documents that were changed since
+ the last local generation the remote
+ replica knows about.
:type docs_by_generations: list of tuples
:param source_replica_uid: The uid of the source replica.
:type source_replica_uid: str
@@ -391,6 +492,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth):
self._conn.putheader('content-type', 'application/x-u1db-sync-stream')
for header_name, header_value in self._sign_request('POST', url, {}):
self._conn.putheader(header_name, header_value)
+ self._conn.putheader('accept-encoding', 'gzip')
entries = ['[']
size = 1
@@ -428,7 +530,8 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth):
for entry in entries:
self._conn.send(entry)
entries = None
- data, _ = self._response()
+ data, headers = self._response()
+
res = self._parse_sync_stream(data, return_doc_cb, ensure_callback)
data = None
return res['new_generation'], res['new_transaction_id']