diff options
author | Kali Kaneko <kali@leap.se> | 2015-08-28 15:41:49 -0400 |
---|---|---|
committer | Kali Kaneko <kali@leap.se> | 2015-08-28 15:41:49 -0400 |
commit | 19d96f97d752ef481eec5f44b88aee0a6bbb8dcb (patch) | |
tree | 29ff012d268389043b10fb806a4384c67d92c982 /client | |
parent | da1a936af43962f4531eda51fa5834391f6745a1 (diff) | |
parent | 20966f78951d734f100ed6a6a6feedd15dbe79e7 (diff) |
Merge tag '0.7.2' into debian/experimental
Tag leap.soledad version 0.7.2
Diffstat (limited to 'client')
38 files changed, 6025 insertions, 4654 deletions
diff --git a/client/AUTHORS b/client/AUTHORS new file mode 100644 index 00000000..934cf129 --- /dev/null +++ b/client/AUTHORS @@ -0,0 +1,10 @@ +drebs <drebs@leap.se> +Tomás Touceda <chiiph@leap.se> +Kali Kaneko <kali@leap.se> +Ivan Alejandro <ivanalejandro0@gmail.com> +Micah Anderson <micah@riseup.net> +Victor Shyba <victor.shyba@gmail.com> +Bruno Wagner <bwgpro@gmail.com> +Ruben Pollan <meskio@sindominio.net> +Duda Dornelles <ddornell@thoughtworks.com> +antialias <antialias@leap.se> diff --git a/client/changes/VERSION_COMPAT b/client/changes/VERSION_COMPAT index e69de29b..9323d445 100644 --- a/client/changes/VERSION_COMPAT +++ b/client/changes/VERSION_COMPAT @@ -0,0 +1,11 @@ +################################################# +# This file keeps track of the recent changes +# introduced in internal leap dependencies. +# Add your changes here so we can properly update +# requirements.pip during the release process. +# (leave header when resetting) +################################################# +# +# BEGIN DEPENDENCY LIST ------------------------- +# leap.foo.bar>=x.y.z +# diff --git a/client/pkg/generate_wheels.sh b/client/pkg/generate_wheels.sh new file mode 100755 index 00000000..e29c327e --- /dev/null +++ b/client/pkg/generate_wheels.sh @@ -0,0 +1,13 @@ +#!/bin/sh +# Generate wheels for dependencies +# Use at your own risk. + +if [ "$WHEELHOUSE" = "" ]; then + WHEELHOUSE=$HOME/wheelhouse +fi + +pip wheel --wheel-dir $WHEELHOUSE pip +pip wheel --wheel-dir $WHEELHOUSE --allow-external u1db --allow-unverified u1db --allow-external dirspec --allow-unverified dirspec -r pkg/requirements.pip +if [ -f pkg/requirements-testing.pip ]; then + pip wheel --wheel-dir $WHEELHOUSE -r pkg/requirements-testing.pip +fi diff --git a/client/pkg/pip_install_requirements.sh b/client/pkg/pip_install_requirements.sh new file mode 100755 index 00000000..d0479365 --- /dev/null +++ b/client/pkg/pip_install_requirements.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# Update pip and install LEAP base/testing requirements. +# For convenience, $insecure_packages are allowed with insecure flags enabled. +# Use at your own risk. +# See $usage for help + +insecure_packages="u1db dirspec" +leap_wheelhouse=https://lizard.leap.se/wheels + +show_help() { + usage="Usage: $0 [--testing] [--use-leap-wheels]\n --testing\t\tInstall dependencies from requirements-testing.pip\n +\t\t\tOtherwise, it will install requirements.pip\n +--use-leap-wheels\tUse wheels from leap.se" + echo -e $usage + + exit 1 +} + +process_arguments() { + testing=false + while [ "$#" -gt 0 ]; do + # From http://stackoverflow.com/a/31443098 + case "$1" in + --help) show_help;; + --testing) testing=true; shift 1;; + --use-leap-wheels) use_leap_wheels=true; shift 1;; + + -h) show_help;; + -*) echo "unknown option: $1" >&2; exit 1;; + esac + done +} + +return_wheelhouse() { + if $use_leap_wheels ; then + WHEELHOUSE=$leap_wheelhouse + elif [ "$WHEELHOUSE" = "" ]; then + WHEELHOUSE=$HOME/wheelhouse + fi + + # Tested with bash and zsh + if [[ $WHEELHOUSE != http* && ! -d "$WHEELHOUSE" ]]; then + mkdir $WHEELHOUSE + fi + + echo "$WHEELHOUSE" +} + +return_install_options() { + wheelhouse=`return_wheelhouse` + install_options="-U --find-links=$wheelhouse" + if $use_leap_wheels ; then + install_options="$install_options --trusted-host lizard.leap.se" + fi + + echo $install_options +} + +return_insecure_flags() { + for insecure_package in $insecure_packages; do + flags="$flags --allow-external $insecure_package --allow-unverified $insecure_package" + done + + echo $flags +} + +return_packages() { + if $testing ; then + packages="-r pkg/requirements-testing.pip" + else + packages="-r pkg/requirements.pip" + fi + + echo $packages +} + +process_arguments $@ +install_options=`return_install_options` +insecure_flags=`return_insecure_flags` +packages=`return_packages` + +pip install -U wheel +pip install $install_options pip +pip install $install_options $insecure_flags $packages diff --git a/client/pkg/requirements-latest.pip b/client/pkg/requirements-latest.pip new file mode 100644 index 00000000..a629aa57 --- /dev/null +++ b/client/pkg/requirements-latest.pip @@ -0,0 +1,8 @@ +--index-url https://pypi.python.org/simple/ + +--allow-external u1db --allow-unverified u1db +--allow-external dirspec --allow-unverified dirspec + +-e 'git+https://github.com/pixelated-project/leap_pycommon.git@develop#egg=leap.common' +-e '../common' +-e . diff --git a/client/pkg/requirements-leap.pip b/client/pkg/requirements-leap.pip new file mode 100644 index 00000000..c5fbcd5f --- /dev/null +++ b/client/pkg/requirements-leap.pip @@ -0,0 +1,2 @@ +leap.common>=0.4.1 +leap.soledad.common>=0.6.5 diff --git a/client/pkg/requirements-testing.pip b/client/pkg/requirements-testing.pip new file mode 100644 index 00000000..94ab6e8e --- /dev/null +++ b/client/pkg/requirements-testing.pip @@ -0,0 +1 @@ +pep8 diff --git a/client/pkg/requirements.pip b/client/pkg/requirements.pip index 90277fc1..4569b1c4 100644 --- a/client/pkg/requirements.pip +++ b/client/pkg/requirements.pip @@ -1,5 +1,4 @@ -pysqlcipher -simplejson +pysqlcipher>2.6.3 u1db scrypt pycryptopp @@ -8,17 +7,5 @@ pycryptopp # repos instead. chardet zope.proxy - -# -# leap deps -# - -leap.soledad.common>=0.6.0 - -# -# XXX things to fix yet: -# - -# this is not strictly needed by us, but we need it -# until u1db adds it to its release as a dep. +twisted oauth diff --git a/client/pkg/utils.py b/client/pkg/utils.py index deace14b..e4253549 100644 --- a/client/pkg/utils.py +++ b/client/pkg/utils.py @@ -14,16 +14,30 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. - """ Utils to help in the setup process """ - import os import re import sys +def is_develop_mode(): + """ + Returns True if we're calling the setup script using the argument for + setuptools development mode. + + This avoids messing up with dependency pinning and order, the + responsibility of installing the leap dependencies is left to the + developer. + """ + args = sys.argv + devflags = "setup.py", "develop" + if (args[0], args[1]) == devflags: + return True + return False + + def get_reqs_from_files(reqfiles): """ Returns the contents of the top requirement file listed as a @@ -58,9 +72,9 @@ def parse_requirements(reqfiles=['requirements.txt', if re.match(r'\s*-e\s+', line): pass # do not try to do anything with externals on vcs - #requirements.append(re.sub(r'\s*-e\s+.*#egg=(.*)$', r'\1', - #line)) - # http://foo.bar/baz/foobar/zipball/master#egg=foobar + # requirements.append(re.sub(r'\s*-e\s+.*#egg=(.*)$', r'\1', + # line)) + # http://foo.bar/baz/foobar/zipball/master#egg=foobar elif re.match(r'\s*https?:', line): requirements.append(re.sub(r'\s*https?:.*#egg=(.*)$', r'\1', line)) diff --git a/client/setup.cfg b/client/setup.cfg new file mode 100644 index 00000000..6b530888 --- /dev/null +++ b/client/setup.cfg @@ -0,0 +1,7 @@ +[pep8] +exclude = versioneer.py,_version.py,ddocs.py,*.egg,build +ignore = E731 + +[flake8] +exclude = versioneer.py,_version.py,ddocs.py,*.egg,build +ignore = E731 diff --git a/client/setup.py b/client/setup.py index c3e4936f..dc555641 100644 --- a/client/setup.py +++ b/client/setup.py @@ -20,6 +20,9 @@ setup file for leap.soledad.client import re from setuptools import setup from setuptools import find_packages +from setuptools import Command + +from pkg import utils import versioneer versioneer.versionfile_source = 'src/leap/soledad/client/_version.py' @@ -27,8 +30,6 @@ versioneer.versionfile_build = 'leap/soledad/client/_version.py' versioneer.tag_prefix = '' # tags are like 1.2.0 versioneer.parentdir_prefix = 'leap.soledad.client-' -from pkg import utils - trove_classifiers = ( "Development Status :: 3 - Alpha", @@ -60,9 +61,6 @@ if len(_version_short) > 0: cmdclass = versioneer.get_cmdclass() -from setuptools import Command - - class freeze_debianver(Command): """ Freezes the version in a debian branch. @@ -106,8 +104,25 @@ def get_versions(default={}, verbose=False): cmdclass["freeze_debianver"] = freeze_debianver + # XXX add ref to docs +requirements = utils.parse_requirements() + +if utils.is_develop_mode(): + print + print ("[WARNING] Skipping leap-specific dependencies " + "because development mode is detected.") + print ("[WARNING] You can install " + "the latest published versions with " + "'pip install -r pkg/requirements-leap.pip'") + print ("[WARNING] Or you can instead do 'python setup.py develop' " + "from the parent folder of each one of them.") + print +else: + requirements += utils.parse_requirements( + reqfiles=["pkg/requirements-leap.pip"]) + setup( name='leap.soledad.client', version=VERSION, @@ -130,6 +145,6 @@ setup( namespace_packages=["leap", "leap.soledad"], packages=find_packages('src'), package_dir={'': 'src'}, - install_requires=utils.parse_requirements(), + install_requires=requirements, extras_require={'signaling': ['leap.common>=0.3.0']}, ) diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 07255406..245a8971 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -16,1351 +16,12 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. """ Soledad - Synchronization Of Locally Encrypted Data Among Devices. - -Soledad is the part of LEAP that manages storage and synchronization of -application data. It is built on top of U1DB reference Python API and -implements (1) a SQLCipher backend for local storage in the client, (2) a -SyncTarget that encrypts data before syncing, and (3) a CouchDB backend for -remote storage in the server side. -""" -import binascii -import errno -import httplib -import logging -import os -import socket -import ssl -import urlparse -import hmac - -from hashlib import sha256 - -try: - import cchardet as chardet -except ImportError: - import chardet - -from u1db.remote import http_client -from u1db.remote.ssl_match_hostname import match_hostname - -import scrypt -import simplejson as json - -from leap.common.config import get_path_prefix -from leap.soledad.common import ( - SHARED_DB_NAME, - soledad_assert, - soledad_assert_type -) -from leap.soledad.common.errors import ( - InvalidTokenError, - NotLockedError, - AlreadyLockedError, - LockTimedOutError, -) -from leap.soledad.common.crypto import ( - MacMethods, - UnknownMacMethod, - WrongMac, - MAC_KEY, - MAC_METHOD_KEY, -) -from leap.soledad.client.events import ( - SOLEDAD_CREATING_KEYS, - SOLEDAD_DONE_CREATING_KEYS, - SOLEDAD_DOWNLOADING_KEYS, - SOLEDAD_DONE_DOWNLOADING_KEYS, - SOLEDAD_UPLOADING_KEYS, - SOLEDAD_DONE_UPLOADING_KEYS, - SOLEDAD_NEW_DATA_TO_SYNC, - SOLEDAD_DONE_DATA_SYNC, - signal, -) -from leap.soledad.common.document import SoledadDocument -from leap.soledad.client.crypto import SoledadCrypto -from leap.soledad.client.shared_db import SoledadSharedDatabase -from leap.soledad.client.sqlcipher import open as sqlcipher_open -from leap.soledad.client.sqlcipher import SQLCipherDatabase -from leap.soledad.client.target import SoledadSyncTarget - - -logger = logging.getLogger(name=__name__) - - -# -# Constants -# - -SOLEDAD_CERT = None """ -Path to the certificate file used to certify the SSL connection between -Soledad client and server. -""" - - -# -# Soledad: local encrypted storage and remote encrypted sync. -# - -class NoStorageSecret(Exception): - """ - Raised when trying to use a storage secret but none is available. - """ - pass - - -class PassphraseTooShort(Exception): - """ - Raised when trying to change the passphrase but the provided passphrase is - too short. - """ - - -class BootstrapSequenceError(Exception): - """ - Raised when an attempt to generate a secret and store it in a recovery - documents on server failed. - """ - - -class Soledad(object): - """ - Soledad provides encrypted data storage and sync. - - A Soledad instance is used to store and retrieve data in a local encrypted - database and synchronize this database with Soledad server. - - This class is also responsible for bootstrapping users' account by - creating cryptographic secrets and/or storing/fetching them on Soledad - server. - - Soledad uses C{leap.common.events} to signal events. The possible events - to be signaled are: - - SOLEDAD_CREATING_KEYS: emitted during bootstrap sequence when key - generation starts. - SOLEDAD_DONE_CREATING_KEYS: emitted during bootstrap sequence when key - generation finishes. - SOLEDAD_UPLOADING_KEYS: emitted during bootstrap sequence when soledad - starts sending keys to server. - SOLEDAD_DONE_UPLOADING_KEYS: emitted during bootstrap sequence when - soledad finishes sending keys to server. - SOLEDAD_DOWNLOADING_KEYS: emitted during bootstrap sequence when - soledad starts to retrieve keys from server. - SOLEDAD_DONE_DOWNLOADING_KEYS: emitted during bootstrap sequence when - soledad finishes downloading keys from server. - SOLEDAD_NEW_DATA_TO_SYNC: emitted upon call to C{need_sync()} when - there's indeed new data to be synchronized between local database - replica and server's replica. - SOLEDAD_DONE_DATA_SYNC: emitted inside C{sync()} method when it has - finished synchronizing with remote replica. - """ - - LOCAL_DATABASE_FILE_NAME = 'soledad.u1db' - """ - The name of the local SQLCipher U1DB database file. - """ - - STORAGE_SECRETS_FILE_NAME = "soledad.json" - """ - The name of the file where the storage secrets will be stored. - """ - - GENERATED_SECRET_LENGTH = 1024 - """ - The length of the generated secret used to derive keys for symmetric - encryption for local and remote storage. - """ - - LOCAL_STORAGE_SECRET_LENGTH = 512 - """ - The length of the secret used to derive a passphrase for the SQLCipher - database. - """ - - REMOTE_STORAGE_SECRET_LENGTH = \ - GENERATED_SECRET_LENGTH - LOCAL_STORAGE_SECRET_LENGTH - """ - The length of the secret used to derive an encryption key and a MAC auth - key for remote storage. - """ - - SALT_LENGTH = 64 - """ - The length of the salt used to derive the key for the storage secret - encryption. - """ - - MINIMUM_PASSPHRASE_LENGTH = 6 - """ - The minimum length for a passphrase. The passphrase length is only checked - when the user changes her passphrase, not when she instantiates Soledad. - """ - - IV_SEPARATOR = ":" - """ - A separator used for storing the encryption initial value prepended to the - ciphertext. - """ - - UUID_KEY = 'uuid' - STORAGE_SECRETS_KEY = 'storage_secrets' - SECRET_KEY = 'secret' - CIPHER_KEY = 'cipher' - LENGTH_KEY = 'length' - KDF_KEY = 'kdf' - KDF_SALT_KEY = 'kdf_salt' - KDF_LENGTH_KEY = 'kdf_length' - KDF_SCRYPT = 'scrypt' - CIPHER_AES256 = 'aes256' - """ - Keys used to access storage secrets in recovery documents. - """ - - DEFAULT_PREFIX = os.path.join(get_path_prefix(), 'leap', 'soledad') - """ - Prefix for default values for path. - """ - - def __init__(self, uuid, passphrase, secrets_path, local_db_path, - server_url, cert_file, - auth_token=None, secret_id=None, defer_encryption=True): - """ - Initialize configuration, cryptographic keys and dbs. - - :param uuid: User's uuid. - :type uuid: str - - :param passphrase: The passphrase for locking and unlocking encryption - secrets for local and remote storage. - :type passphrase: unicode - - :param secrets_path: Path for storing encrypted key used for - symmetric encryption. - :type secrets_path: str - - :param local_db_path: Path for local encrypted storage db. - :type local_db_path: str - - :param server_url: URL for Soledad server. This is used either to sync - with the user's remote db and to interact with the - shared recovery database. - :type server_url: str - - :param cert_file: Path to the certificate of the ca used - to validate the SSL certificate used by the remote - soledad server. - :type cert_file: str - - :param auth_token: Authorization token for accessing remote databases. - :type auth_token: str - - :param secret_id: The id of the storage secret to be used. - :type secret_id: str - - :param defer_encryption: Whether to defer encryption/decryption of - documents, or do it inline while syncing. - :type defer_encryption: bool - - :raise BootstrapSequenceError: Raised when the secret generation and - storage on server sequence has failed - for some reason. - """ - # get config params - self._uuid = uuid - soledad_assert_type(passphrase, unicode) - self._passphrase = passphrase - # init crypto variables - self._secrets = {} - self._secret_id = secret_id - self._defer_encryption = defer_encryption - - self._init_config(secrets_path, local_db_path, server_url) - - self._set_token(auth_token) - self._shared_db_instance = None - # configure SSL certificate - global SOLEDAD_CERT - SOLEDAD_CERT = cert_file - # initiate bootstrap sequence - self._bootstrap() # might raise BootstrapSequenceError() - - def _init_config(self, secrets_path, local_db_path, server_url): - """ - Initialize configuration using default values for missing params. - """ - # initialize secrets_path - self._secrets_path = secrets_path - if self._secrets_path is None: - self._secrets_path = os.path.join( - self.DEFAULT_PREFIX, self.STORAGE_SECRETS_FILE_NAME) - # initialize local_db_path - self._local_db_path = local_db_path - if self._local_db_path is None: - self._local_db_path = os.path.join( - self.DEFAULT_PREFIX, self.LOCAL_DATABASE_FILE_NAME) - # initialize server_url - self._server_url = server_url - soledad_assert( - self._server_url is not None, - 'Missing URL for Soledad server.') - - # - # initialization/destruction methods - # - - def _get_or_gen_crypto_secrets(self): - """ - Retrieves or generates the crypto secrets. - - Might raise BootstrapSequenceError - """ - doc = self._get_secrets_from_shared_db() - - if doc: - logger.info( - 'Found cryptographic secrets in shared recovery ' - 'database.') - _, mac = self.import_recovery_document(doc.content) - if mac is False: - self.put_secrets_in_shared_db() - self._store_secrets() # save new secrets in local file - if self._secret_id is None: - self._set_secret_id(self._secrets.items()[0][0]) - else: - # STAGE 3 - there are no secrets in server also, so - # generate a secret and store it in remote db. - logger.info( - 'No cryptographic secrets found, creating new ' - ' secrets...') - self._set_secret_id(self._gen_secret()) - try: - self._put_secrets_in_shared_db() - except Exception as ex: - # storing generated secret in shared db failed for - # some reason, so we erase the generated secret and - # raise. - try: - os.unlink(self._secrets_path) - except OSError as e: - if e.errno != errno.ENOENT: # no such file or directory - logger.exception(e) - logger.exception(ex) - raise BootstrapSequenceError( - 'Could not store generated secret in the shared ' - 'database, bailing out...') - - def _bootstrap(self): - """ - Bootstrap local Soledad instance. - - Soledad Client bootstrap is the following sequence of stages: - - * stage 0 - local environment setup. - - directory initialization. - - crypto submodule initialization - * stage 1 - local secret loading: - - if secrets exist locally, load them. - * stage 2 - remote secret loading: - - else, if secrets exist in server, download them. - * stage 3 - secret generation: - - else, generate a new secret and store in server. - * stage 4 - database initialization. - - This method decides which bootstrap stages have already been performed - and performs the missing ones in order. - - :raise BootstrapSequenceError: Raised when the secret generation and - storage on server sequence has failed for some reason. - """ - # STAGE 0 - local environment setup - self._init_dirs() - self._crypto = SoledadCrypto(self) - - secrets_problem = None - - # STAGE 1 - verify if secrets exist locally - if not self._has_secret(): # try to load from local storage. - - # STAGE 2 - there are no secrets in local storage, so try to fetch - # encrypted secrets from server. - logger.info( - 'Trying to fetch cryptographic secrets from shared recovery ' - 'database...') - - # --- start of atomic operation in shared db --- - - # obtain lock on shared db - token = timeout = None - try: - token, timeout = self._shared_db.lock() - except AlreadyLockedError: - raise BootstrapSequenceError('Database is already locked.') - except LockTimedOutError: - raise BootstrapSequenceError('Lock operation timed out.') - - try: - self._get_or_gen_crypto_secrets() - except Exception as e: - secrets_problem = e - - # release the lock on shared db - try: - self._shared_db.unlock(token) - self._shared_db.close() - except NotLockedError: - # for some reason the lock expired. Despite that, secret - # loading or generation/storage must have been executed - # successfully, so we pass. - pass - except InvalidTokenError: - # here, our lock has not only expired but also some other - # client application has obtained a new lock and is currently - # doing its thing in the shared database. Using the same - # reasoning as above, we assume everything went smooth and - # pass. - pass - except Exception as e: - logger.error("Unhandled exception when unlocking shared " - "database.") - logger.exception(e) - - # --- end of atomic operation in shared db --- - - # STAGE 4 - local database initialization - if secrets_problem is None: - self._init_db() - else: - raise secrets_problem - - def _init_dirs(self): - """ - Create work directories. - - :raise OSError: in case file exists and is not a dir. - """ - paths = map( - lambda x: os.path.dirname(x), - [self._local_db_path, self._secrets_path]) - for path in paths: - try: - if not os.path.isdir(path): - logger.info('Creating directory: %s.' % path) - os.makedirs(path) - except OSError as exc: - if exc.errno == errno.EEXIST and os.path.isdir(path): - pass - else: - raise - - def _init_db(self): - """ - Initialize the U1DB SQLCipher database for local storage. - - Currently, Soledad uses the default SQLCipher cipher, i.e. - 'aes-256-cbc'. We use scrypt to derive a 256-bit encryption key and - uses the 'raw PRAGMA key' format to handle the key to SQLCipher. - - The first C{self.REMOTE_STORAGE_SECRET_LENGTH} bytes of the storage - secret are used for remote storage encryption. We use the next - C{self.LOCAL_STORAGE_SECRET} bytes to derive a key for local storage. - From these bytes, the first C{self.SALT_LENGTH} are used as the salt - and the rest as the password for the scrypt hashing. - """ - # salt indexes - salt_start = self.REMOTE_STORAGE_SECRET_LENGTH - salt_end = salt_start + self.SALT_LENGTH - # password indexes - pwd_start = salt_end - pwd_end = salt_start + self.LOCAL_STORAGE_SECRET_LENGTH - # calculate the key for local encryption - secret = self._get_storage_secret() - key = scrypt.hash( - secret[pwd_start:pwd_end], # the password - secret[salt_start:salt_end], # the salt - buflen=32, # we need a key with 256 bits (32 bytes) - ) - - self._db = sqlcipher_open( - self._local_db_path, - binascii.b2a_hex(key), # sqlcipher only accepts the hex version - create=True, - document_factory=SoledadDocument, - crypto=self._crypto, - raw_key=True, - defer_encryption=self._defer_encryption) - - def close(self): - """ - Close underlying U1DB database. - """ - logger.debug("Closing soledad") - if hasattr(self, '_db') and isinstance( - self._db, - SQLCipherDatabase): - self._db.stop_sync() - self._db.close() - - # - # Management of secret for symmetric encryption. - # - - def _get_storage_secret(self): - """ - Return the storage secret. - - Storage secret is encrypted before being stored. This method decrypts - and returns the stored secret. - - :return: The storage secret. - :rtype: str - """ - # calculate the encryption key - key = scrypt.hash( - self._passphrase_as_string(), - # the salt is stored base64 encoded - binascii.a2b_base64( - self._secrets[self._secret_id][self.KDF_SALT_KEY]), - buflen=32, # we need a key with 256 bits (32 bytes). - ) - # recover the initial value and ciphertext - iv, ciphertext = self._secrets[self._secret_id][self.SECRET_KEY].split( - self.IV_SEPARATOR, 1) - ciphertext = binascii.a2b_base64(ciphertext) - return self._crypto.decrypt_sym(ciphertext, key, iv=iv) - - def _set_secret_id(self, secret_id): - """ - Define the id of the storage secret to be used. - - This method will also replace the secret in the crypto object. - - :param secret_id: The id of the storage secret to be used. - :type secret_id: str - """ - self._secret_id = secret_id - - def _load_secrets(self): - """ - Load storage secrets from local file. - """ - # does the file exist in disk? - if not os.path.isfile(self._secrets_path): - raise IOError('File does not exist: %s' % self._secrets_path) - # read storage secrets from file - content = None - with open(self._secrets_path, 'r') as f: - content = json.loads(f.read()) - _, mac = self.import_recovery_document(content) - if mac is False: - self._store_secrets() - self._put_secrets_in_shared_db() - # choose first secret if no secret_id was given - if self._secret_id is None: - self._set_secret_id(self._secrets.items()[0][0]) - - def _has_secret(self): - """ - Return whether there is a storage secret available for use or not. - - :return: Whether there's a storage secret for symmetric encryption. - :rtype: bool - """ - if self._secret_id is None or self._secret_id not in self._secrets: - try: - self._load_secrets() # try to load from disk - except IOError, e: - logger.warning('IOError: %s' % str(e)) - try: - self._get_storage_secret() - return True - except Exception: - return False - - def _gen_secret(self): - """ - Generate a secret for symmetric encryption and store in a local - encrypted file. - - This method emits the following signals: - - * SOLEDAD_CREATING_KEYS - * SOLEDAD_DONE_CREATING_KEYS - - A secret has the following structure: - - { - '<secret_id>': { - 'kdf': 'scrypt', - 'kdf_salt': '<b64 repr of salt>' - 'kdf_length': <key length> - 'cipher': 'aes256', - 'length': <secret length>, - 'secret': '<encrypted b64 repr of storage_secret>', - } - } - - :return: The id of the generated secret. - :rtype: str - """ - signal(SOLEDAD_CREATING_KEYS, self._uuid) - # generate random secret - secret = os.urandom(self.GENERATED_SECRET_LENGTH) - secret_id = sha256(secret).hexdigest() - # generate random salt - salt = os.urandom(self.SALT_LENGTH) - # get a 256-bit key - key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32) - iv, ciphertext = self._crypto.encrypt_sym(secret, key) - self._secrets[secret_id] = { - # leap.soledad.crypto submodule uses AES256 for symmetric - # encryption. - self.KDF_KEY: self.KDF_SCRYPT, - self.KDF_SALT_KEY: binascii.b2a_base64(salt), - self.KDF_LENGTH_KEY: len(key), - self.CIPHER_KEY: self.CIPHER_AES256, - self.LENGTH_KEY: len(secret), - self.SECRET_KEY: '%s%s%s' % ( - str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)), - } - self._store_secrets() - signal(SOLEDAD_DONE_CREATING_KEYS, self._uuid) - return secret_id - - def _store_secrets(self): - """ - Store secrets in C{Soledad.STORAGE_SECRETS_FILE_PATH}. - """ - with open(self._secrets_path, 'w') as f: - f.write( - json.dumps( - self.export_recovery_document())) - - def change_passphrase(self, new_passphrase): - """ - Change the passphrase that encrypts the storage secret. - - :param new_passphrase: The new passphrase. - :type new_passphrase: unicode - - :raise NoStorageSecret: Raised if there's no storage secret available. - """ - # maybe we want to add more checks to guarantee passphrase is - # reasonable? - soledad_assert_type(new_passphrase, unicode) - if len(new_passphrase) < self.MINIMUM_PASSPHRASE_LENGTH: - raise PassphraseTooShort( - 'Passphrase must be at least %d characters long!' % - self.MINIMUM_PASSPHRASE_LENGTH) - # ensure there's a secret for which the passphrase will be changed. - if not self._has_secret(): - raise NoStorageSecret() - secret = self._get_storage_secret() - # generate random salt - new_salt = os.urandom(self.SALT_LENGTH) - # get a 256-bit key - key = scrypt.hash(new_passphrase.encode('utf-8'), new_salt, buflen=32) - iv, ciphertext = self._crypto.encrypt_sym(secret, key) - # XXX update all secrets in the dict - self._secrets[self._secret_id] = { - # leap.soledad.crypto submodule uses AES256 for symmetric - # encryption. - self.KDF_KEY: self.KDF_SCRYPT, # TODO: remove hard coded kdf - self.KDF_SALT_KEY: binascii.b2a_base64(new_salt), - self.KDF_LENGTH_KEY: len(key), - self.CIPHER_KEY: self.CIPHER_AES256, - self.LENGTH_KEY: len(secret), - self.SECRET_KEY: '%s%s%s' % ( - str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)), - } - self._passphrase = new_passphrase - self._store_secrets() - self._put_secrets_in_shared_db() - - # - # General crypto utility methods. - # - - @property - def _shared_db(self): - """ - Return an instance of the shared recovery database object. - - :return: The shared database. - :rtype: SoledadSharedDatabase - """ - if self._shared_db_instance is None: - self._shared_db_instance = SoledadSharedDatabase.open_database( - urlparse.urljoin(self.server_url, SHARED_DB_NAME), - self._uuid, - False, # db should exist at this point. - creds=self._creds) - return self._shared_db_instance - - def _shared_db_doc_id(self): - """ - Calculate the doc_id of the document in the shared db that stores key - material. - - :return: the hash - :rtype: str - """ - return sha256( - '%s%s' % - (self._passphrase_as_string(), self.uuid)).hexdigest() - - def _get_secrets_from_shared_db(self): - """ - Retrieve the document with encrypted key material from the shared - database. - - :return: a document with encrypted key material in its contents - :rtype: SoledadDocument - """ - signal(SOLEDAD_DOWNLOADING_KEYS, self._uuid) - db = self._shared_db - if not db: - logger.warning('No shared db found') - return - doc = db.get_doc(self._shared_db_doc_id()) - signal(SOLEDAD_DONE_DOWNLOADING_KEYS, self._uuid) - return doc - - def _put_secrets_in_shared_db(self): - """ - Assert local keys are the same as shared db's ones. - - Try to fetch keys from shared recovery database. If they already exist - in the remote db, assert that that data is the same as local data. - Otherwise, upload keys to shared recovery database. - """ - soledad_assert( - self._has_secret(), - 'Tried to send keys to server but they don\'t exist in local ' - 'storage.') - # try to get secrets doc from server, otherwise create it - doc = self._get_secrets_from_shared_db() - if doc is None: - doc = SoledadDocument( - doc_id=self._shared_db_doc_id()) - # fill doc with encrypted secrets - doc.content = self.export_recovery_document() - # upload secrets to server - signal(SOLEDAD_UPLOADING_KEYS, self._uuid) - db = self._shared_db - if not db: - logger.warning('No shared db found') - return - db.put_doc(doc) - signal(SOLEDAD_DONE_UPLOADING_KEYS, self._uuid) - - # - # Document storage, retrieval and sync. - # - - def put_doc(self, doc): - """ - Update a document in the local encrypted database. - - ============================== WARNING ============================== - This method converts the document's contents to unicode in-place. This - means that after calling C{put_doc(doc)}, the contents of the - document, i.e. C{doc.content}, might be different from before the - call. - ============================== WARNING ============================== - - :param doc: the document to update - :type doc: SoledadDocument - - :return: the new revision identifier for the document - :rtype: str - """ - doc.content = self._convert_to_unicode(doc.content) - return self._db.put_doc(doc) - - def delete_doc(self, doc): - """ - Delete a document from the local encrypted database. - - :param doc: the document to delete - :type doc: SoledadDocument - - :return: the new revision identifier for the document - :rtype: str - """ - return self._db.delete_doc(doc) - - def get_doc(self, doc_id, include_deleted=False): - """ - Retrieve a document from the local encrypted database. - - :param doc_id: the unique document identifier - :type doc_id: str - :param include_deleted: if True, deleted documents will be - returned with empty content; otherwise asking - for a deleted document will return None - :type include_deleted: bool - - :return: the document object or None - :rtype: SoledadDocument - """ - return self._db.get_doc(doc_id, include_deleted=include_deleted) - - def get_docs(self, doc_ids, check_for_conflicts=True, - include_deleted=False): - """ - Get the content for many documents. - - :param doc_ids: a list of document identifiers - :type doc_ids: list - :param check_for_conflicts: if set False, then the conflict check will - be skipped, and 'None' will be returned instead of True/False - :type check_for_conflicts: bool - - :return: iterable giving the Document object for each document id - in matching doc_ids order. - :rtype: generator - """ - return self._db.get_docs( - doc_ids, check_for_conflicts=check_for_conflicts, - include_deleted=include_deleted) - - def get_all_docs(self, include_deleted=False): - """Get the JSON content for all documents in the database. - - :param include_deleted: If set to True, deleted documents will be - returned with empty content. Otherwise deleted - documents will not be included in the results. - :return: (generation, [Document]) - The current generation of the database, followed by a list of - all the documents in the database. - """ - return self._db.get_all_docs(include_deleted) - - def _convert_to_unicode(self, content): - """ - Converts content to unicode (or all the strings in content) - - NOTE: Even though this method supports any type, it will - currently ignore contents of lists, tuple or any other - iterable than dict. We don't need support for these at the - moment - - :param content: content to convert - :type content: object - - :rtype: object - """ - if isinstance(content, unicode): - return content - elif isinstance(content, str): - result = chardet.detect(content) - default = "utf-8" - encoding = result["encoding"] or default - try: - content = content.decode(encoding) - except UnicodeError as e: - logger.error("Unicode error: {0!r}. Using 'replace'".format(e)) - content = content.decode(encoding, 'replace') - return content - else: - if isinstance(content, dict): - for key in content.keys(): - content[key] = self._convert_to_unicode(content[key]) - return content - - def create_doc(self, content, doc_id=None): - """ - Create a new document in the local encrypted database. - - :param content: the contents of the new document - :type content: dict - :param doc_id: an optional identifier specifying the document id - :type doc_id: str - - :return: the new document - :rtype: SoledadDocument - """ - return self._db.create_doc( - self._convert_to_unicode(content), doc_id=doc_id) - - def create_doc_from_json(self, json, doc_id=None): - """ - Create a new document. - - You can optionally specify the document identifier, but the document - must not already exist. See 'put_doc' if you want to override an - existing document. - If the database specifies a maximum document size and the document - exceeds it, create will fail and raise a DocumentTooBig exception. - - :param json: The JSON document string - :type json: str - :param doc_id: An optional identifier specifying the document id. - :type doc_id: - :return: The new document - :rtype: SoledadDocument - """ - return self._db.create_doc_from_json(json, doc_id=doc_id) - - def create_index(self, index_name, *index_expressions): - """ - Create an named index, which can then be queried for future lookups. - Creating an index which already exists is not an error, and is cheap. - Creating an index which does not match the index_expressions of the - existing index is an error. - Creating an index will block until the expressions have been evaluated - and the index generated. - - :param index_name: A unique name which can be used as a key prefix - :type index_name: str - :param index_expressions: index expressions defining the index - information. - :type index_expressions: dict - - Examples: - - "fieldname", or "fieldname.subfieldname" to index alphabetically - sorted on the contents of a field. - - "number(fieldname, width)", "lower(fieldname)" - """ - if self._db: - return self._db.create_index( - index_name, *index_expressions) - - def delete_index(self, index_name): - """ - Remove a named index. - - :param index_name: The name of the index we are removing - :type index_name: str - """ - if self._db: - return self._db.delete_index(index_name) - - def list_indexes(self): - """ - List the definitions of all known indexes. - - :return: A list of [('index-name', ['field', 'field2'])] definitions. - :rtype: list - """ - if self._db: - return self._db.list_indexes() - - def get_from_index(self, index_name, *key_values): - """ - Return documents that match the keys supplied. - - You must supply exactly the same number of values as have been defined - in the index. It is possible to do a prefix match by using '*' to - indicate a wildcard match. You can only supply '*' to trailing entries, - (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) - It is also possible to append a '*' to the last supplied value (eg - 'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') - - :param index_name: The index to query - :type index_name: str - :param key_values: values to match. eg, if you have - an index with 3 fields then you would have: - get_from_index(index_name, val1, val2, val3) - :type key_values: tuple - :return: List of [Document] - :rtype: list - """ - if self._db: - return self._db.get_from_index(index_name, *key_values) - - def get_count_from_index(self, index_name, *key_values): - """ - Return the count of the documents that match the keys and - values supplied. - - :param index_name: The index to query - :type index_name: str - :param key_values: values to match. eg, if you have - an index with 3 fields then you would have: - get_from_index(index_name, val1, val2, val3) - :type key_values: tuple - :return: count. - :rtype: int - """ - if self._db: - return self._db.get_count_from_index(index_name, *key_values) - - def get_range_from_index(self, index_name, start_value, end_value): - """ - Return documents that fall within the specified range. - - Both ends of the range are inclusive. For both start_value and - end_value, one must supply exactly the same number of values as have - been defined in the index, or pass None. In case of a single column - index, a string is accepted as an alternative for a tuple with a single - value. It is possible to do a prefix match by using '*' to indicate - a wildcard match. You can only supply '*' to trailing entries, (eg - 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also - possible to append a '*' to the last supplied value (eg 'val*', '*', - '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') - - :param index_name: The index to query - :type index_name: str - :param start_values: tuples of values that define the lower bound of - the range. eg, if you have an index with 3 fields then you would - have: (val1, val2, val3) - :type start_values: tuple - :param end_values: tuples of values that define the upper bound of the - range. eg, if you have an index with 3 fields then you would have: - (val1, val2, val3) - :type end_values: tuple - :return: List of [Document] - :rtype: list - """ - if self._db: - return self._db.get_range_from_index( - index_name, start_value, end_value) - - def get_index_keys(self, index_name): - """ - Return all keys under which documents are indexed in this index. - - :param index_name: The index to query - :type index_name: str - :return: [] A list of tuples of indexed keys. - :rtype: list - """ - if self._db: - return self._db.get_index_keys(index_name) - - def get_doc_conflicts(self, doc_id): - """ - Get the list of conflicts for the given document. - - :param doc_id: the document id - :type doc_id: str - - :return: a list of the document entries that are conflicted - :rtype: list - """ - if self._db: - return self._db.get_doc_conflicts(doc_id) - - def resolve_doc(self, doc, conflicted_doc_revs): - """ - Mark a document as no longer conflicted. - - :param doc: a document with the new content to be inserted. - :type doc: SoledadDocument - :param conflicted_doc_revs: a list of revisions that the new content - supersedes. - :type conflicted_doc_revs: list - """ - if self._db: - return self._db.resolve_doc(doc, conflicted_doc_revs) - - def sync(self, defer_decryption=True): - """ - Synchronize the local encrypted replica with a remote replica. - - This method blocks until a syncing lock is acquired, so there are no - attempts of concurrent syncs from the same client replica. - - :param url: the url of the target replica to sync with - :type url: str - - :param defer_decryption: Whether to defer the decryption process using - the intermediate database. If False, - decryption will be done inline. - :type defer_decryption: bool - - :return: The local generation before the synchronisation was - performed. - :rtype: str - """ - if self._db: - try: - local_gen = self._db.sync( - urlparse.urljoin(self.server_url, 'user-%s' % self._uuid), - creds=self._creds, autocreate=False, - defer_decryption=defer_decryption) - signal(SOLEDAD_DONE_DATA_SYNC, self._uuid) - return local_gen - except Exception as e: - logger.error("Soledad exception when syncing: %s" % str(e)) - - def stop_sync(self): - """ - Stop the current syncing process. - """ - if self._db: - self._db.stop_sync() - - def need_sync(self, url): - """ - Return if local db replica differs from remote url's replica. - - :param url: The remote replica to compare with local replica. - :type url: str - - :return: Whether remote replica and local replica differ. - :rtype: bool - """ - target = SoledadSyncTarget( - url, self._db._get_replica_uid(), creds=self._creds, - crypto=self._crypto) - info = target.get_sync_info(self._db._get_replica_uid()) - # compare source generation with target's last known source generation - if self._db._get_generation() != info[4]: - signal(SOLEDAD_NEW_DATA_TO_SYNC, self._uuid) - return True - return False - - @property - def syncing(self): - """ - Property, True if the syncer is syncing. - """ - return self._db.syncing - - def _set_token(self, token): - """ - Set the authentication token for remote database access. - - Build the credentials dictionary with the following format: - - self._{ - 'token': { - 'uuid': '<uuid>' - 'token': '<token>' - } - - :param token: The authentication token. - :type token: str - """ - self._creds = { - 'token': { - 'uuid': self._uuid, - 'token': token, - } - } - - def _get_token(self): - """ - Return current token from credentials dictionary. - """ - return self._creds['token']['token'] - - token = property(_get_token, _set_token, doc='The authentication Token.') - - # - # Recovery document export and import methods - # - - def export_recovery_document(self): - """ - Export the storage secrets. - - A recovery document has the following structure: - - { - 'storage_secrets': { - '<storage_secret id>': { - 'kdf': 'scrypt', - 'kdf_salt': '<b64 repr of salt>' - 'kdf_length': <key length> - 'cipher': 'aes256', - 'length': <secret length>, - 'secret': '<encrypted storage_secret>', - }, - }, - 'kdf': 'scrypt', - 'kdf_salt': '<b64 repr of salt>', - 'kdf_length: <key length>, - '_mac_method': 'hmac', - '_mac': '<mac>' - } - - Note that multiple storage secrets might be stored in one recovery - document. This method will also calculate a MAC of a string - representation of the secrets dictionary. - - :return: The recovery document. - :rtype: dict - """ - # create salt and key for calculating MAC - salt = os.urandom(self.SALT_LENGTH) - key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32) - data = { - self.STORAGE_SECRETS_KEY: self._secrets, - self.KDF_KEY: self.KDF_SCRYPT, - self.KDF_SALT_KEY: binascii.b2a_base64(salt), - self.KDF_LENGTH_KEY: len(key), - MAC_METHOD_KEY: MacMethods.HMAC, - MAC_KEY: hmac.new( - key, - json.dumps(self._secrets), - sha256).hexdigest(), - } - return data - - def import_recovery_document(self, data): - """ - Import storage secrets for symmetric encryption and uuid (if present) - from a recovery document. - - Note that this method does not store the imported data on disk. For - that, use C{self._store_secrets()}. - - :param data: The recovery document. - :type data: dict - - :return: A tuple containing the number of imported secrets and whether - there was MAC informationa available for authenticating. - :rtype: (int, bool) - """ - soledad_assert(self.STORAGE_SECRETS_KEY in data) - # check mac of the recovery document - mac = None - if MAC_KEY in data: - soledad_assert(data[MAC_KEY] is not None) - soledad_assert(MAC_METHOD_KEY in data) - soledad_assert(self.KDF_KEY in data) - soledad_assert(self.KDF_SALT_KEY in data) - soledad_assert(self.KDF_LENGTH_KEY in data) - if data[MAC_METHOD_KEY] == MacMethods.HMAC: - key = scrypt.hash( - self._passphrase_as_string(), - binascii.a2b_base64(data[self.KDF_SALT_KEY]), - buflen=32) - mac = hmac.new( - key, - json.dumps(data[self.STORAGE_SECRETS_KEY]), - sha256).hexdigest() - else: - raise UnknownMacMethod('Unknown MAC method: %s.' % - data[MAC_METHOD_KEY]) - if mac != data[MAC_KEY]: - raise WrongMac('Could not authenticate recovery document\'s ' - 'contents.') - # include secrets in the secret pool. - secrets = 0 - for secret_id, secret_data in data[self.STORAGE_SECRETS_KEY].items(): - if secret_id not in self._secrets: - secrets += 1 - self._secrets[secret_id] = secret_data - return secrets, mac - - # - # Setters/getters - # - - def _get_uuid(self): - return self._uuid - - uuid = property(_get_uuid, doc='The user uuid.') - - def _get_secret_id(self): - return self._secret_id - - secret_id = property( - _get_secret_id, - doc='The active secret id.') - - def _get_secrets_path(self): - return self._secrets_path - - secrets_path = property( - _get_secrets_path, - doc='The path for the file containing the encrypted symmetric secret.') - - def _get_local_db_path(self): - return self._local_db_path - - local_db_path = property( - _get_local_db_path, - doc='The path for the local database replica.') - - def _get_server_url(self): - return self._server_url - - server_url = property( - _get_server_url, - doc='The URL of the Soledad server.') - - storage_secret = property( - _get_storage_secret, - doc='The secret used for symmetric encryption.') - - def _get_passphrase(self): - return self._passphrase - - passphrase = property( - _get_passphrase, - doc='The passphrase for locking and unlocking encryption secrets for ' - 'local and remote storage.') - - def _passphrase_as_string(self): - return self._passphrase.encode('utf-8') - - -# ---------------------------------------------------------------------------- -# Monkey patching u1db to be able to provide a custom SSL cert -# ---------------------------------------------------------------------------- - -# We need a more reasonable timeout (in seconds) -SOLEDAD_TIMEOUT = 120 - - -class VerifiedHTTPSConnection(httplib.HTTPSConnection): - """ - HTTPSConnection verifying server side certificates. - """ - # derived from httplib.py - - def connect(self): - """ - Connect to a host on a given (SSL) port. - """ - try: - source = self.source_address - sock = socket.create_connection((self.host, self.port), - SOLEDAD_TIMEOUT, source) - except AttributeError: - # source_address was introduced in 2.7 - sock = socket.create_connection((self.host, self.port), - SOLEDAD_TIMEOUT) - if self._tunnel_host: - self.sock = sock - self._tunnel() - - highest_supported = ssl.PROTOCOL_SSLv23 - - try: - # needs python 2.7.9+ - # negotiate the best available version, - # but explicitely disabled bad ones. - ctx = ssl.SSLContext(highest_supported) - ctx.options |= ssl.OP_NO_SSLv2 - ctx.options |= ssl.OP_NO_SSLv3 - - ctx.load_verify_locations(cafile=SOLEDAD_CERT) - ctx.verify_mode = ssl.CERT_REQUIRED - self.sock = ctx.wrap_socket(sock) - - except AttributeError: - self.sock = ssl.wrap_socket( - sock, ca_certs=SOLEDAD_CERT, cert_reqs=ssl.CERT_REQUIRED, - ssl_version=highest_supported) - - match_hostname(self.sock.getpeercert(), self.host) - - -old__VerifiedHTTPSConnection = http_client._VerifiedHTTPSConnection -http_client._VerifiedHTTPSConnection = VerifiedHTTPSConnection - - -__all__ = ['soledad_assert', 'Soledad'] +from leap.soledad.client.api import Soledad +from leap.soledad.common import soledad_assert from ._version import get_versions __version__ = get_versions()['version'] del get_versions + +__all__ = ['soledad_assert', 'Soledad', '__version__'] diff --git a/client/src/leap/soledad/client/_version.py b/client/src/leap/soledad/client/_version.py index 5b2e62d4..0f54bae6 100644 --- a/client/src/leap/soledad/client/_version.py +++ b/client/src/leap/soledad/client/_version.py @@ -1,4 +1,3 @@ - # This file was generated by the `freeze_debianver` command in setup.py # Using 'versioneer.py' (0.7+) from # revision-control system data, or from the parent directory name of an @@ -7,7 +6,3 @@ version_version = '0.6.5' version_full = '963717dc05824bcb4c69b912d948647ece3cf3aa' - - -def get_versions(default={}, verbose=False): - return {'version': version_version, 'full': version_full} diff --git a/client/src/leap/soledad/client/adbapi.py b/client/src/leap/soledad/client/adbapi.py new file mode 100644 index 00000000..237159bd --- /dev/null +++ b/client/src/leap/soledad/client/adbapi.py @@ -0,0 +1,294 @@ +# -*- coding: utf-8 -*- +# adbapi.py +# Copyright (C) 2013, 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +An asyncrhonous interface to soledad using sqlcipher backend. +It uses twisted.enterprise.adbapi. +""" +import re +import os +import sys +import logging + +from functools import partial +from threading import BoundedSemaphore + +from twisted.enterprise import adbapi +from twisted.python import log +from zope.proxy import ProxyBase, setProxiedObject +from pysqlcipher.dbapi2 import OperationalError +from pysqlcipher.dbapi2 import DatabaseError + +from leap.soledad.common.errors import DatabaseAccessError + +from leap.soledad.client import sqlcipher as soledad_sqlcipher +from leap.soledad.client.pragmas import set_init_pragmas + + +logger = logging.getLogger(name=__name__) + + +DEBUG_SQL = os.environ.get("LEAP_DEBUG_SQL") +if DEBUG_SQL: + log.startLogging(sys.stdout) + +""" +How long the SQLCipher connection should wait for the lock to go away until +raising an exception. +""" +SQLCIPHER_CONNECTION_TIMEOUT = 10 + +""" +How many times a SQLCipher query should be retried in case of timeout. +""" +SQLCIPHER_MAX_RETRIES = 10 + + +def getConnectionPool(opts, openfun=None, driver="pysqlcipher", + sync_enc_pool=None): + """ + Return a connection pool. + + :param opts: + Options for the SQLCipher connection. + :type opts: SQLCipherOptions + :param openfun: + Callback invoked after every connect() on the underlying DB-API + object. + :type openfun: callable + :param driver: + The connection driver. + :type driver: str + + :return: A U1DB connection pool. + :rtype: U1DBConnectionPool + """ + if openfun is None and driver == "pysqlcipher": + openfun = partial(set_init_pragmas, opts=opts) + return U1DBConnectionPool( + "%s.dbapi2" % driver, opts=opts, sync_enc_pool=sync_enc_pool, + database=opts.path, check_same_thread=False, cp_openfun=openfun, + timeout=SQLCIPHER_CONNECTION_TIMEOUT) + + +class U1DBConnection(adbapi.Connection): + """ + A wrapper for a U1DB connection instance. + """ + + u1db_wrapper = soledad_sqlcipher.SoledadSQLCipherWrapper + """ + The U1DB wrapper to use. + """ + + def __init__(self, pool, sync_enc_pool, init_u1db=False): + """ + :param pool: The pool of connections to that owns this connection. + :type pool: adbapi.ConnectionPool + :param init_u1db: Wether the u1db database should be initialized. + :type init_u1db: bool + """ + self.init_u1db = init_u1db + self._sync_enc_pool = sync_enc_pool + try: + adbapi.Connection.__init__(self, pool) + except DatabaseError: + raise DatabaseAccessError('Could not open sqlcipher database') + + def reconnect(self): + """ + Reconnect to the U1DB database. + """ + if self._connection is not None: + self._pool.disconnect(self._connection) + self._connection = self._pool.connect() + + if self.init_u1db: + self._u1db = self.u1db_wrapper( + self._connection, + self._pool.opts, + self._sync_enc_pool) + + def __getattr__(self, name): + """ + Route the requested attribute either to the U1DB wrapper or to the + connection. + + :param name: The name of the attribute. + :type name: str + """ + if name.startswith('u1db_'): + attr = re.sub('^u1db_', '', name) + return getattr(self._u1db, attr) + else: + return getattr(self._connection, name) + + +class U1DBTransaction(adbapi.Transaction): + """ + A wrapper for a U1DB 'cursor' object. + """ + + def __getattr__(self, name): + """ + Route the requested attribute either to the U1DB wrapper of the + connection or to the actual connection cursor. + + :param name: The name of the attribute. + :type name: str + """ + if name.startswith('u1db_'): + attr = re.sub('^u1db_', '', name) + return getattr(self._connection._u1db, attr) + else: + return getattr(self._cursor, name) + + +class U1DBConnectionPool(adbapi.ConnectionPool): + """ + Represent a pool of connections to an U1DB database. + """ + + connectionFactory = U1DBConnection + transactionFactory = U1DBTransaction + + def __init__(self, *args, **kwargs): + """ + Initialize the connection pool. + """ + # extract soledad-specific objects from keyword arguments + self.opts = kwargs.pop("opts") + self._sync_enc_pool = kwargs.pop("sync_enc_pool") + try: + adbapi.ConnectionPool.__init__(self, *args, **kwargs) + except DatabaseError: + raise DatabaseAccessError('Could not open sqlcipher database') + + # all u1db connections, hashed by thread-id + self._u1dbconnections = {} + + # The replica uid, primed by the connections on init. + self.replica_uid = ProxyBase(None) + + conn = self.connectionFactory( + self, self._sync_enc_pool, init_u1db=True) + replica_uid = conn._u1db._real_replica_uid + setProxiedObject(self.replica_uid, replica_uid) + + def runU1DBQuery(self, meth, *args, **kw): + """ + Execute a U1DB query in a thread, using a pooled connection. + + Concurrent threads trying to update the same database may timeout + because of other threads holding the database lock. Because of this, + we will retry SQLCIPHER_MAX_RETRIES times and fail after that. + + :param meth: The U1DB wrapper method name. + :type meth: str + + :return: a Deferred which will fire the return value of + 'self._runU1DBQuery(Transaction(...), *args, **kw)', or a Failure. + :rtype: twisted.internet.defer.Deferred + """ + meth = "u1db_%s" % meth + semaphore = BoundedSemaphore(SQLCIPHER_MAX_RETRIES - 1) + + def _run_interaction(): + return self.runInteraction( + self._runU1DBQuery, meth, *args, **kw) + + def _errback(failure): + failure.trap(OperationalError) + if failure.getErrorMessage() == "database is locked": + should_retry = semaphore.acquire(False) + if should_retry: + logger.warning( + "Database operation timed out while waiting for " + "lock, trying again...") + return _run_interaction() + return failure + + d = _run_interaction() + d.addErrback(_errback) + return d + + def _runU1DBQuery(self, trans, meth, *args, **kw): + """ + Execute a U1DB query. + + :param trans: An U1DB transaction. + :type trans: adbapi.Transaction + :param meth: the U1DB wrapper method name. + :type meth: str + """ + meth = getattr(trans, meth) + return meth(*args, **kw) + # XXX should return a fetchall? + + # XXX add _runOperation too + + def _runInteraction(self, interaction, *args, **kw): + """ + Interact with the database and return the result. + + :param interaction: + A callable object whose first argument is an + L{adbapi.Transaction}. + :type interaction: callable + :return: a Deferred which will fire the return value of + 'interaction(Transaction(...), *args, **kw)', or a Failure. + :rtype: twisted.internet.defer.Deferred + """ + tid = self.threadID() + u1db = self._u1dbconnections.get(tid) + conn = self.connectionFactory( + self, self._sync_enc_pool, init_u1db=not bool(u1db)) + + if self.replica_uid is None: + replica_uid = conn._u1db._real_replica_uid + setProxiedObject(self.replica_uid, replica_uid) + + if u1db is None: + self._u1dbconnections[tid] = conn._u1db + else: + conn._u1db = u1db + + trans = self.transactionFactory(self, conn) + try: + result = interaction(trans, *args, **kw) + trans.close() + conn.commit() + return result + except: + excType, excValue, excTraceback = sys.exc_info() + try: + conn.rollback() + except: + log.err(None, "Rollback failed") + raise excType, excValue, excTraceback + + def finalClose(self): + """ + A final close, only called by the shutdown trigger. + """ + self.shutdownID = None + self.threadpool.stop() + self.running = False + for conn in self.connections.values(): + self._close(conn) + for u1db in self._u1dbconnections.values(): + self._close(u1db) + self.connections.clear() diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py new file mode 100644 index 00000000..a6a98551 --- /dev/null +++ b/client/src/leap/soledad/client/api.py @@ -0,0 +1,1001 @@ +# -*- coding: utf-8 -*- +# api.py +# Copyright (C) 2013, 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Soledad - Synchronization Of Locally Encrypted Data Among Devices. + +This module holds the public api for Soledad. + +Soledad is the part of LEAP that manages storage and synchronization of +application data. It is built on top of U1DB reference Python API and +implements (1) a SQLCipher backend for local storage in the client, (2) a +SyncTarget that encrypts data before syncing, and (3) a CouchDB backend for +remote storage in the server side. +""" +import binascii +import errno +import httplib +import logging +import os +import socket +import ssl +import urlparse + +try: + import cchardet as chardet +except ImportError: + import chardet +from itertools import chain + +from StringIO import StringIO +from collections import defaultdict +from u1db.remote import http_client +from u1db.remote.ssl_match_hostname import match_hostname +from zope.interface import implements + +from leap.common.config import get_path_prefix +from leap.common.plugins import collect_plugins +from twisted.internet.defer import DeferredLock + +from leap.soledad.common import SHARED_DB_NAME +from leap.soledad.common import soledad_assert +from leap.soledad.common import soledad_assert_type + +from leap.soledad.client import adbapi +from leap.soledad.client import events as soledad_events +from leap.soledad.client import interfaces as soledad_interfaces +from leap.soledad.client.crypto import SoledadCrypto +from leap.soledad.client.secrets import SoledadSecrets +from leap.soledad.client.shared_db import SoledadSharedDatabase +from leap.soledad.client import sqlcipher +from leap.soledad.client import encdecpool + +logger = logging.getLogger(name=__name__) + +# +# Constants +# + +""" +Path to the certificate file used to certify the SSL connection between +Soledad client and server. +""" +SOLEDAD_CERT = None + + +class Soledad(object): + """ + Soledad provides encrypted data storage and sync. + + A Soledad instance is used to store and retrieve data in a local encrypted + database and synchronize this database with Soledad server. + + This class is also responsible for bootstrapping users' account by + creating cryptographic secrets and/or storing/fetching them on Soledad + server. + + Soledad uses ``leap.common.events`` to signal events. The possible events + to be signaled are: + + SOLEDAD_CREATING_KEYS: emitted during bootstrap sequence when key + generation starts. + SOLEDAD_DONE_CREATING_KEYS: emitted during bootstrap sequence when key + generation finishes. + SOLEDAD_UPLOADING_KEYS: emitted during bootstrap sequence when soledad + starts sending keys to server. + SOLEDAD_DONE_UPLOADING_KEYS: emitted during bootstrap sequence when + soledad finishes sending keys to server. + SOLEDAD_DOWNLOADING_KEYS: emitted during bootstrap sequence when + soledad starts to retrieve keys from server. + SOLEDAD_DONE_DOWNLOADING_KEYS: emitted during bootstrap sequence when + soledad finishes downloading keys from server. + SOLEDAD_NEW_DATA_TO_SYNC: emitted upon call to C{need_sync()} when + there's indeed new data to be synchronized between local database + replica and server's replica. + SOLEDAD_DONE_DATA_SYNC: emitted inside C{sync()} method when it has + finished synchronizing with remote replica. + """ + implements(soledad_interfaces.ILocalStorage, + soledad_interfaces.ISyncableStorage, + soledad_interfaces.ISecretsStorage) + + local_db_file_name = 'soledad.u1db' + secrets_file_name = "soledad.json" + default_prefix = os.path.join(get_path_prefix(), 'leap', 'soledad') + + """ + A dictionary that holds locks which avoid multiple sync attempts from the + same database replica. The dictionary indexes are the paths to each local + db, so we guarantee that only one sync happens for a local db at a time. + """ + _sync_lock = defaultdict(DeferredLock) + + def __init__(self, uuid, passphrase, secrets_path, local_db_path, + server_url, cert_file, shared_db=None, + auth_token=None, defer_encryption=False, syncable=True): + """ + Initialize configuration, cryptographic keys and dbs. + + :param uuid: User's uuid. + :type uuid: str + + :param passphrase: + The passphrase for locking and unlocking encryption secrets for + local and remote storage. + :type passphrase: unicode + + :param secrets_path: + Path for storing encrypted key used for symmetric encryption. + :type secrets_path: str + + :param local_db_path: Path for local encrypted storage db. + :type local_db_path: str + + :param server_url: + URL for Soledad server. This is used either to sync with the user's + remote db and to interact with the shared recovery database. + :type server_url: str + + :param cert_file: + Path to the certificate of the ca used to validate the SSL + certificate used by the remote soledad server. + :type cert_file: str + + :param shared_db: + The shared database. + :type shared_db: HTTPDatabase + + :param auth_token: + Authorization token for accessing remote databases. + :type auth_token: str + + :param defer_encryption: + Whether to defer encryption/decryption of documents, or do it + inline while syncing. + :type defer_encryption: bool + + :param syncable: + If set to ``False``, this database will not attempt to synchronize + with remote replicas (default is ``True``) + :type syncable: bool + + :raise BootstrapSequenceError: + Raised when the secret initialization sequence (i.e. retrieval + from server or generation and storage on server) has failed for + some reason. + """ + # store config params + self._uuid = uuid + self._passphrase = passphrase + self._local_db_path = local_db_path + self._server_url = server_url + self._defer_encryption = defer_encryption + self._secrets_path = None + self._sync_enc_pool = None + + self.shared_db = shared_db + + # configure SSL certificate + global SOLEDAD_CERT + SOLEDAD_CERT = cert_file + + self._set_token(auth_token) + + self._init_config_with_defaults() + self._init_working_dirs() + + self._secrets_path = secrets_path + + # Initialize shared recovery database + self.init_shared_db(server_url, uuid, self._creds, syncable=syncable) + + # The following can raise BootstrapSequenceError, that will be + # propagated upwards. + self._init_secrets() + + self._crypto = SoledadCrypto(self._secrets.remote_storage_secret) + self._init_u1db_sqlcipher_backend() + + if syncable: + self._init_u1db_syncer() + + # + # initialization/destruction methods + # + def _init_config_with_defaults(self): + """ + Initialize configuration using default values for missing params. + """ + soledad_assert_type(self._passphrase, unicode) + + def initialize(attr, val): + return ((getattr(self, attr, None) is None) and + setattr(self, attr, val)) + + initialize("_secrets_path", os.path.join( + self.default_prefix, self.secrets_file_name)) + initialize("_local_db_path", os.path.join( + self.default_prefix, self.local_db_file_name)) + # initialize server_url + soledad_assert(self._server_url is not None, + 'Missing URL for Soledad server.') + + def _init_working_dirs(self): + """ + Create work directories. + + :raise OSError: in case file exists and is not a dir. + """ + paths = map(lambda x: os.path.dirname(x), [ + self._local_db_path, self._secrets_path]) + for path in paths: + create_path_if_not_exists(path) + + def _init_secrets(self): + """ + Initialize Soledad secrets. + """ + self._secrets = SoledadSecrets( + self.uuid, self._passphrase, self._secrets_path, + self.shared_db) + self._secrets.bootstrap() + + def _init_u1db_sqlcipher_backend(self): + """ + Initialize the U1DB SQLCipher database for local storage. + + Instantiates a modified twisted adbapi that will maintain a threadpool + with a u1db-sqclipher connection for each thread, and will return + deferreds for each u1db query. + + Currently, Soledad uses the default SQLCipher cipher, i.e. + 'aes-256-cbc'. We use scrypt to derive a 256-bit encryption key, + and internally the SQLCipherDatabase initialization uses the 'raw + PRAGMA key' format to handle the key to SQLCipher. + """ + tohex = binascii.b2a_hex + # sqlcipher only accepts the hex version + key = tohex(self._secrets.get_local_storage_key()) + sync_db_key = tohex(self._secrets.get_sync_db_key()) + + opts = sqlcipher.SQLCipherOptions( + self._local_db_path, key, + is_raw_key=True, create=True, + defer_encryption=self._defer_encryption, + sync_db_key=sync_db_key, + ) + self._sqlcipher_opts = opts + + # the sync_db is used both for deferred encryption and decryption, so + # we want to initialize it anyway to allow for all combinations of + # deferred encryption and decryption configurations. + self._initialize_sync_db(opts) + self._dbpool = adbapi.getConnectionPool( + opts, sync_enc_pool=self._sync_enc_pool) + + def _init_u1db_syncer(self): + """ + Initialize the U1DB synchronizer. + """ + replica_uid = self._dbpool.replica_uid + self._dbsyncer = sqlcipher.SQLCipherU1DBSync( + self._sqlcipher_opts, self._crypto, replica_uid, + SOLEDAD_CERT, + defer_encryption=self._defer_encryption, + sync_db=self._sync_db, + sync_enc_pool=self._sync_enc_pool) + + # + # Closing methods + # + + def close(self): + """ + Close underlying U1DB database. + """ + logger.debug("Closing soledad") + self._dbpool.close() + if getattr(self, '_dbsyncer', None): + self._dbsyncer.close() + # close the sync database + if self._sync_db: + self._sync_db.close() + self._sync_db = None + if self._defer_encryption: + self._sync_enc_pool.stop() + + # + # ILocalStorage + # + + def _defer(self, meth, *args, **kw): + """ + Defer a method to be run on a U1DB connection pool. + + :param meth: A method to defer to the U1DB connection pool. + :type meth: callable + :return: A deferred. + :rtype: twisted.internet.defer.Deferred + """ + return self._dbpool.runU1DBQuery(meth, *args, **kw) + + def put_doc(self, doc): + """ + Update a document. + + If the document currently has conflicts, put will fail. + If the database specifies a maximum document size and the document + exceeds it, put will fail and raise a DocumentTooBig exception. + + ============================== WARNING ============================== + This method converts the document's contents to unicode in-place. This + means that after calling `put_doc(doc)`, the contents of the + document, i.e. `doc.content`, might be different from before the + call. + ============================== WARNING ============================== + + :param doc: A document with new content. + :type doc: leap.soledad.common.document.SoledadDocument + :return: A deferred whose callback will be invoked with the new + revision identifier for the document. The document object will + also be updated. + :rtype: twisted.internet.defer.Deferred + """ + doc.content = _convert_to_unicode(doc.content) + return self._defer("put_doc", doc) + + def delete_doc(self, doc): + """ + Mark a document as deleted. + + Will abort if the current revision doesn't match doc.rev. + This will also set doc.content to None. + + :param doc: A document to be deleted. + :type doc: leap.soledad.common.document.SoledadDocument + :return: A deferred. + :rtype: twisted.internet.defer.Deferred + """ + return self._defer("delete_doc", doc) + + def get_doc(self, doc_id, include_deleted=False): + """ + Get the JSON string for the given document. + + :param doc_id: The unique document identifier + :type doc_id: str + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise asking for a deleted + document will return None. + :type include_deleted: bool + :return: A deferred whose callback will be invoked with a document + object. + :rtype: twisted.internet.defer.Deferred + """ + return self._defer( + "get_doc", doc_id, include_deleted=include_deleted) + + def get_docs( + self, doc_ids, check_for_conflicts=True, include_deleted=False): + """ + Get the JSON content for many documents. + + :param doc_ids: A list of document identifiers. + :type doc_ids: list + :param check_for_conflicts: If set to False, then the conflict check + will be skipped, and 'None' will be returned instead of True/False. + :type check_for_conflicts: bool + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted documents will not + be included in the results. + :type include_deleted: bool + :return: A deferred whose callback will be invoked with an iterable + giving the document object for each document id in matching + doc_ids order. + :rtype: twisted.internet.defer.Deferred + """ + return self._defer( + "get_docs", doc_ids, check_for_conflicts=check_for_conflicts, + include_deleted=include_deleted) + + def get_all_docs(self, include_deleted=False): + """ + Get the JSON content for all documents in the database. + + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted documents will not + be included in the results. + :type include_deleted: bool + + :return: A deferred which, when fired, will pass the a tuple + containing (generation, [Document]) to the callback, with the + current generation of the database, followed by a list of all the + documents in the database. + :rtype: twisted.internet.defer.Deferred + """ + return self._defer("get_all_docs", include_deleted) + + def create_doc(self, content, doc_id=None): + """ + Create a new document. + + You can optionally specify the document identifier, but the document + must not already exist. See 'put_doc' if you want to override an + existing document. + If the database specifies a maximum document size and the document + exceeds it, create will fail and raise a DocumentTooBig exception. + + :param content: A Python dictionary. + :type content: dict + :param doc_id: An optional identifier specifying the document id. + :type doc_id: str + :return: A deferred whose callback will be invoked with a document. + :rtype: twisted.internet.defer.Deferred + """ + # TODO we probably should pass an optional "encoding" parameter to + # create_doc (and probably to put_doc too). There are cases (mail + # payloads for example) in which we already have the encoding in the + # headers, so we don't need to guess it. + return self._defer( + "create_doc", _convert_to_unicode(content), doc_id=doc_id) + + def create_doc_from_json(self, json, doc_id=None): + """ + Create a new document. + + You can optionally specify the document identifier, but the document + must not already exist. See 'put_doc' if you want to override an + existing document. + If the database specifies a maximum document size and the document + exceeds it, create will fail and raise a DocumentTooBig exception. + + :param json: The JSON document string + :type json: dict + :param doc_id: An optional identifier specifying the document id. + :type doc_id: str + :return: A deferred whose callback will be invoked with a document. + :rtype: twisted.internet.defer.Deferred + """ + return self._defer("create_doc_from_json", json, doc_id=doc_id) + + def create_index(self, index_name, *index_expressions): + """ + Create a named index, which can then be queried for future lookups. + + Creating an index which already exists is not an error, and is cheap. + Creating an index which does not match the index_expressions of the + existing index is an error. + Creating an index will block until the expressions have been evaluated + and the index generated. + + :param index_name: A unique name which can be used as a key prefix + :type index_name: str + :param index_expressions: index expressions defining the index + information. + + Examples: + + "fieldname", or "fieldname.subfieldname" to index alphabetically + sorted on the contents of a field. + + "number(fieldname, width)", "lower(fieldname)" + :type index_expresions: list of str + :return: A deferred. + :rtype: twisted.internet.defer.Deferred + """ + return self._defer("create_index", index_name, *index_expressions) + + def delete_index(self, index_name): + """ + Remove a named index. + + :param index_name: The name of the index we are removing + :type index_name: str + :return: A deferred. + :rtype: twisted.internet.defer.Deferred + """ + return self._defer("delete_index", index_name) + + def list_indexes(self): + """ + List the definitions of all known indexes. + + :return: A deferred whose callback will be invoked with a list of + [('index-name', ['field', 'field2'])] definitions. + :rtype: twisted.internet.defer.Deferred + """ + return self._defer("list_indexes") + + def get_from_index(self, index_name, *key_values): + """ + Return documents that match the keys supplied. + + You must supply exactly the same number of values as have been defined + in the index. It is possible to do a prefix match by using '*' to + indicate a wildcard match. You can only supply '*' to trailing entries, + (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) + It is also possible to append a '*' to the last supplied value (eg + 'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: list + :return: A deferred whose callback will be invoked with a list of + [Document]. + :rtype: twisted.internet.defer.Deferred + """ + return self._defer("get_from_index", index_name, *key_values) + + def get_count_from_index(self, index_name, *key_values): + """ + Return the count for a given combination of index_name + and key values. + + Extension method made from similar methods in u1db version 13.09 + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: tuple + :return: A deferred whose callback will be invoked with the count. + :rtype: twisted.internet.defer.Deferred + """ + return self._defer("get_count_from_index", index_name, *key_values) + + def get_range_from_index(self, index_name, start_value, end_value): + """ + Return documents that fall within the specified range. + + Both ends of the range are inclusive. For both start_value and + end_value, one must supply exactly the same number of values as have + been defined in the index, or pass None. In case of a single column + index, a string is accepted as an alternative for a tuple with a single + value. It is possible to do a prefix match by using '*' to indicate + a wildcard match. You can only supply '*' to trailing entries, (eg + 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also + possible to append a '*' to the last supplied value (eg 'val*', '*', + '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + + :param index_name: The index to query + :type index_name: str + :param start_values: tuples of values that define the lower bound of + the range. eg, if you have an index with 3 fields then you would + have: (val1, val2, val3) + :type start_values: tuple + :param end_values: tuples of values that define the upper bound of the + range. eg, if you have an index with 3 fields then you would have: + (val1, val2, val3) + :type end_values: tuple + :return: A deferred whose callback will be invoked with a list of + [Document]. + :rtype: twisted.internet.defer.Deferred + """ + + return self._defer( + "get_range_from_index", index_name, start_value, end_value) + + def get_index_keys(self, index_name): + """ + Return all keys under which documents are indexed in this index. + + :param index_name: The index to query + :type index_name: str + :return: A deferred whose callback will be invoked with a list of + tuples of indexed keys. + :rtype: twisted.internet.defer.Deferred + """ + return self._defer("get_index_keys", index_name) + + def get_doc_conflicts(self, doc_id): + """ + Get the list of conflicts for the given document. + + The order of the conflicts is such that the first entry is the value + that would be returned by "get_doc". + + :param doc_id: The unique document identifier + :type doc_id: str + :return: A deferred whose callback will be invoked with a list of the + Document entries that are conflicted. + :rtype: twisted.internet.defer.Deferred + """ + return self._defer("get_doc_conflicts", doc_id) + + def resolve_doc(self, doc, conflicted_doc_revs): + """ + Mark a document as no longer conflicted. + + We take the list of revisions that the client knows about that it is + superseding. This may be a different list from the actual current + conflicts, in which case only those are removed as conflicted. This + may fail if the conflict list is significantly different from the + supplied information. (sync could have happened in the background from + the time you GET_DOC_CONFLICTS until the point where you RESOLVE) + + :param doc: A Document with the new content to be inserted. + :type doc: SoledadDocument + :param conflicted_doc_revs: A list of revisions that the new content + supersedes. + :type conflicted_doc_revs: list(str) + :return: A deferred. + :rtype: twisted.internet.defer.Deferred + """ + return self._defer("resolve_doc", doc, conflicted_doc_revs) + + @property + def local_db_path(self): + return self._local_db_path + + @property + def uuid(self): + return self._uuid + + # + # ISyncableStorage + # + + def sync(self, defer_decryption=True): + """ + Synchronize documents with the server replica. + + This method uses a lock to prevent multiple concurrent sync processes + over the same local db file. + + :param defer_decryption: + Whether to defer decryption of documents, or do it inline while + syncing. + :type defer_decryption: bool + + :return: A deferred lock that will run the actual sync process when + the lock is acquired, and which will fire with with the local + generation before the synchronization was performed. + :rtype: twisted.internet.defer.Deferred + """ + d = self.sync_lock.run( + self._sync, + defer_decryption) + return d + + def _sync(self, defer_decryption): + """ + Synchronize documents with the server replica. + + :param defer_decryption: + Whether to defer decryption of documents, or do it inline while + syncing. + :type defer_decryption: bool + + :return: A deferred whose callback will be invoked with the local + generation before the synchronization was performed. + :rtype: twisted.internet.defer.Deferred + """ + sync_url = urlparse.urljoin(self._server_url, 'user-%s' % self.uuid) + d = self._dbsyncer.sync( + sync_url, + creds=self._creds, + defer_decryption=defer_decryption) + + def _sync_callback(local_gen): + self._last_received_docs = docs = self._dbsyncer.received_docs + + # Post-Sync Hooks + if docs: + iface = soledad_interfaces.ISoledadPostSyncPlugin + suitable_plugins = collect_plugins(iface) + for plugin in suitable_plugins: + watched = plugin.watched_doc_types + r = [filter( + lambda s: s.startswith(preffix), + docs) for preffix in watched] + filtered = list(chain(*r)) + plugin.process_received_docs(filtered) + + return local_gen + + def _sync_errback(failure): + s = StringIO() + failure.printDetailedTraceback(file=s) + msg = "Soledad exception when syncing!\n" + s.getvalue() + logger.error(msg) + return failure + + def _emit_done_data_sync(passthrough): + soledad_events.emit( + soledad_events.SOLEDAD_DONE_DATA_SYNC, self.uuid) + return passthrough + + d.addCallbacks(_sync_callback, _sync_errback) + d.addBoth(_emit_done_data_sync) + return d + + @property + def sync_lock(self): + """ + Class based lock to prevent concurrent syncs using the same local db + file. + + :return: A shared lock based on this instance's db file path. + :rtype: DeferredLock + """ + return self._sync_lock[self._local_db_path] + + @property + def syncing(self): + """ + Return wether Soledad is currently synchronizing with the server. + + :return: Wether Soledad is currently synchronizing with the server. + :rtype: bool + """ + return self.sync_lock.locked + + def _set_token(self, token): + """ + Set the authentication token for remote database access. + + Internally, this builds the credentials dictionary with the following + format: + + { + 'token': { + 'uuid': '<uuid>' + 'token': '<token>' + } + } + + :param token: The authentication token. + :type token: str + """ + self._creds = { + 'token': { + 'uuid': self.uuid, + 'token': token, + } + } + + def _get_token(self): + """ + Return current token from credentials dictionary. + """ + return self._creds['token']['token'] + + token = property(_get_token, _set_token, doc='The authentication Token.') + + def _initialize_sync_db(self, opts): + """ + Initialize the Symmetrically-Encrypted document to be synced database, + and the queue to communicate with subprocess workers. + + :param opts: + :type opts: SQLCipherOptions + """ + soledad_assert(opts.sync_db_key is not None) + sync_db_path = None + if opts.path != ":memory:": + sync_db_path = "%s-sync" % opts.path + else: + sync_db_path = ":memory:" + + # we copy incoming options because the opts object might be used + # somewhere else + sync_opts = sqlcipher.SQLCipherOptions.copy( + opts, path=sync_db_path, create=True) + self._sync_db = sqlcipher.getConnectionPool( + sync_opts, extra_queries=self._sync_db_extra_init) + if self._defer_encryption: + # initialize syncing queue encryption pool + self._sync_enc_pool = encdecpool.SyncEncrypterPool( + self._crypto, self._sync_db) + self._sync_enc_pool.start() + + @property + def _sync_db_extra_init(self): + """ + Queries for creating tables for the local sync documents db if needed. + They are passed as extra initialization to initialize_sqlciphjer_db + + :rtype: tuple of strings + """ + maybe_create = "CREATE TABLE IF NOT EXISTS %s (%s)" + encr = encdecpool.SyncEncrypterPool + decr = encdecpool.SyncDecrypterPool + sql_encr_table_query = (maybe_create % ( + encr.TABLE_NAME, encr.FIELD_NAMES)) + sql_decr_table_query = (maybe_create % ( + decr.TABLE_NAME, decr.FIELD_NAMES)) + return (sql_encr_table_query, sql_decr_table_query) + + # + # ISecretsStorage + # + + def init_shared_db(self, server_url, uuid, creds, syncable=True): + """ + Initialize the shared database. + + :param server_url: URL of the remote database. + :type server_url: str + :param uuid: The user's unique id. + :type uuid: str + :param creds: A tuple containing the authentication method and + credentials. + :type creds: tuple + :param syncable: + If syncable is False, the database will not attempt to sync against + a remote replica. + :type syncable: bool + """ + # only case this is False is for testing purposes + if self.shared_db is None: + shared_db_url = urlparse.urljoin(server_url, SHARED_DB_NAME) + self.shared_db = SoledadSharedDatabase.open_database( + shared_db_url, + uuid, + creds=creds, + syncable=syncable) + + @property + def storage_secret(self): + """ + Return the secret used for local storage encryption. + + :return: The secret used for local storage encryption. + :rtype: str + """ + return self._secrets.storage_secret + + @property + def remote_storage_secret(self): + """ + Return the secret used for encryption of remotely stored data. + + :return: The secret used for remote storage encryption. + :rtype: str + """ + return self._secrets.remote_storage_secret + + @property + def secrets(self): + """ + Return the secrets object. + + :return: The secrets object. + :rtype: SoledadSecrets + """ + return self._secrets + + def change_passphrase(self, new_passphrase): + """ + Change the passphrase that encrypts the storage secret. + + :param new_passphrase: The new passphrase. + :type new_passphrase: unicode + + :raise NoStorageSecret: Raised if there's no storage secret available. + """ + self._secrets.change_passphrase(new_passphrase) + + # + # Raw SQLCIPHER Queries + # + + def raw_sqlcipher_query(self, *args, **kw): + """ + Run a raw sqlcipher query in the local database, and return a deferred + that will be fired with the result. + """ + return self._dbpool.runQuery(*args, **kw) + + def raw_sqlcipher_operation(self, *args, **kw): + """ + Run a raw sqlcipher operation in the local database, and return a + deferred that will be fired with None. + """ + return self._dbpool.runOperation(*args, **kw) + + +def _convert_to_unicode(content): + """ + Convert content to unicode (or all the strings in content). + + NOTE: Even though this method supports any type, it will + currently ignore contents of lists, tuple or any other + iterable than dict. We don't need support for these at the + moment + + :param content: content to convert + :type content: object + + :rtype: object + """ + # Chardet doesn't guess very well with some smallish payloads. + # This parameter might need some empirical tweaking. + CUTOFF_CONFIDENCE = 0.90 + + if isinstance(content, unicode): + return content + elif isinstance(content, str): + encoding = "utf-8" + result = chardet.detect(content) + if result["confidence"] > CUTOFF_CONFIDENCE: + encoding = result["encoding"] + try: + content = content.decode(encoding) + except UnicodeError as e: + logger.error("Unicode error: {0!r}. Using 'replace'".format(e)) + content = content.decode(encoding, 'replace') + return content + else: + if isinstance(content, dict): + for key in content.keys(): + content[key] = _convert_to_unicode(content[key]) + return content + + +def create_path_if_not_exists(path): + try: + if not os.path.isdir(path): + logger.info('Creating directory: %s.' % path) + os.makedirs(path) + except OSError as exc: + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + +# ---------------------------------------------------------------------------- +# Monkey patching u1db to be able to provide a custom SSL cert +# ---------------------------------------------------------------------------- + +# We need a more reasonable timeout (in seconds) +SOLEDAD_TIMEOUT = 120 + + +class VerifiedHTTPSConnection(httplib.HTTPSConnection): + """ + HTTPSConnection verifying server side certificates. + """ + # derived from httplib.py + + def connect(self): + """ + Connect to a host on a given (SSL) port. + """ + try: + source = self.source_address + sock = socket.create_connection((self.host, self.port), + SOLEDAD_TIMEOUT, source) + except AttributeError: + # source_address was introduced in 2.7 + sock = socket.create_connection((self.host, self.port), + SOLEDAD_TIMEOUT) + if self._tunnel_host: + self.sock = sock + self._tunnel() + + self.sock = ssl.wrap_socket(sock, + ca_certs=SOLEDAD_CERT, + cert_reqs=ssl.CERT_REQUIRED) + match_hostname(self.sock.getpeercert(), self.host) + + +old__VerifiedHTTPSConnection = http_client._VerifiedHTTPSConnection +http_client._VerifiedHTTPSConnection = VerifiedHTTPSConnection diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index 7133f804..90ad656e 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -23,31 +23,13 @@ import hmac import hashlib import json import logging -import multiprocessing -import threading from pycryptopp.cipher.aes import AES -from pycryptopp.cipher.xsalsa20 import XSalsa20 -from zope.proxy import sameProxiedObjects from leap.soledad.common import soledad_assert from leap.soledad.common import soledad_assert_type -from leap.soledad.common.document import SoledadDocument - - -from leap.soledad.common.crypto import ( - EncryptionSchemes, - UnknownEncryptionScheme, - MacMethods, - UnknownMacMethod, - WrongMac, - ENC_JSON_KEY, - ENC_SCHEME_KEY, - ENC_METHOD_KEY, - ENC_IV_KEY, - MAC_KEY, - MAC_METHOD_KEY, -) +from leap.soledad.common import crypto + logger = logging.getLogger(__name__) @@ -55,91 +37,41 @@ logger = logging.getLogger(__name__) MAC_KEY_LENGTH = 64 -class EncryptionMethods(object): - """ - Representation of encryption methods that can be used. - """ - - AES_256_CTR = 'aes-256-ctr' - XSALSA20 = 'xsalsa20' - -# -# Exceptions -# - - -class DocumentNotEncrypted(Exception): - """ - Raised for failures in document encryption. - """ - pass - - -class UnknownEncryptionMethod(Exception): - """ - Raised when trying to encrypt/decrypt with unknown method. +def encrypt_sym(data, key): """ - pass - - -class NoSymmetricSecret(Exception): - """ - Raised when trying to get a hashed passphrase. - """ - - -def encrypt_sym(data, key, method): - """ - Encrypt C{data} using a {password}. - - Currently, the only encryption methods supported are AES-256 in CTR - mode and XSalsa20. + Encrypt data using AES-256 cipher in CTR mode. :param data: The data to be encrypted. :type data: str - :param key: The key used to encrypt C{data} (must be 256 bits long). + :param key: The key used to encrypt data (must be 256 bits long). :type key: str - :param method: The encryption method to use. - :type method: str - :return: A tuple with the initial value and the encrypted data. + :return: A tuple with the initialization vector and the encrypted data. :rtype: (long, str) """ soledad_assert_type(key, str) - soledad_assert( len(key) == 32, # 32 x 8 = 256 bits. 'Wrong key size: %s bits (must be 256 bits long).' % (len(key) * 8)) - iv = None - # AES-256 in CTR mode - if method == EncryptionMethods.AES_256_CTR: - iv = os.urandom(16) - ciphertext = AES(key=key, iv=iv).process(data) - # XSalsa20 - elif method == EncryptionMethods.XSALSA20: - iv = os.urandom(24) - ciphertext = XSalsa20(key=key, iv=iv).process(data) - else: - # raise if method is unknown - raise UnknownEncryptionMethod('Unkwnown method: %s' % method) + + iv = os.urandom(16) + ciphertext = AES(key=key, iv=iv).process(data) + return binascii.b2a_base64(iv), ciphertext -def decrypt_sym(data, key, method, **kwargs): +def decrypt_sym(data, key, iv): """ - Decrypt data using symmetric secret. - - Currently, the only encryption method supported is AES-256 CTR mode. + Decrypt some data previously encrypted using AES-256 cipher in CTR mode. :param data: The data to be decrypted. :type data: str - :param key: The key used to decrypt C{data} (must be 256 bits long). + :param key: The symmetric key used to decrypt data (must be 256 bits + long). :type key: str - :param method: The encryption method to use. - :type method: str - :param kwargs: Other parameters specific to each encryption method. - :type kwargs: dict + :param iv: The initialization vector. + :type iv: long :return: The decrypted data. :rtype: str @@ -149,19 +81,8 @@ def decrypt_sym(data, key, method, **kwargs): soledad_assert( len(key) == 32, # 32 x 8 = 256 bits. 'Wrong key size: %s (must be 256 bits long).' % len(key)) - soledad_assert( - 'iv' in kwargs, - '%s needs an initial value.' % method) - # AES-256 in CTR mode - if method == EncryptionMethods.AES_256_CTR: - return AES( - key=key, iv=binascii.a2b_base64(kwargs['iv'])).process(data) - elif method == EncryptionMethods.XSALSA20: - return XSalsa20( - key=key, iv=binascii.a2b_base64(kwargs['iv'])).process(data) - - # raise if method is unknown - raise UnknownEncryptionMethod('Unkwnown method: %s' % method) + return AES( + key=key, iv=binascii.a2b_base64(iv)).process(data) def doc_mac_key(doc_id, secret): @@ -176,17 +97,13 @@ def doc_mac_key(doc_id, secret): :param doc_id: The id of the document. :type doc_id: str - :param secret: soledad secret storage - :type secret: Soledad.storage_secret + :param secret: The Soledad storage secret + :type secret: str :return: The key. :rtype: str - - :raise NoSymmetricSecret: if no symmetric secret was supplied. """ - if secret is None: - raise NoSymmetricSecret() - + soledad_assert(secret is not None) return hmac.new( secret[:MAC_KEY_LENGTH], doc_id, @@ -198,25 +115,17 @@ class SoledadCrypto(object): General cryptographic functionality encapsulated in a object that can be passed along. """ - def __init__(self, soledad): + def __init__(self, secret): """ Initialize the crypto object. - :param soledad: A Soledad instance for key lookup. - :type soledad: leap.soledad.Soledad + :param secret: The Soledad remote storage secret. + :type secret: str """ - self._soledad = soledad + self._secret = secret - def encrypt_sym(self, data, key, - method=EncryptionMethods.AES_256_CTR): - return encrypt_sym(data, key, method) - - def decrypt_sym(self, data, key, - method=EncryptionMethods.AES_256_CTR, **kwargs): - return decrypt_sym(data, key, method, **kwargs) - - def doc_mac_key(self, doc_id, secret): - return doc_mac_key(doc_id, self.secret) + def doc_mac_key(self, doc_id): + return doc_mac_key(doc_id, self._secret) def doc_passphrase(self, doc_id): """ @@ -224,7 +133,7 @@ class SoledadCrypto(object): The password is derived using HMAC having sha256 as underlying hash function. The key used for HMAC are the first - C{soledad.REMOTE_STORAGE_SECRET_KENGTH} bytes of Soledad's storage + C{soledad.REMOTE_STORAGE_SECRET_LENGTH} bytes of Soledad's storage secret stripped from the first MAC_KEY_LENGTH characters. The HMAC message is C{doc_id}. @@ -234,34 +143,50 @@ class SoledadCrypto(object): :return: The passphrase. :rtype: str - - :raise NoSymmetricSecret: if no symmetric secret was supplied. """ - if self.secret is None: - raise NoSymmetricSecret() + soledad_assert(self._secret is not None) return hmac.new( - self.secret[ - MAC_KEY_LENGTH: - self._soledad.REMOTE_STORAGE_SECRET_LENGTH], + self._secret[MAC_KEY_LENGTH:], doc_id, hashlib.sha256).digest() - # - # secret setters/getters - # + def encrypt_doc(self, doc): + """ + Wrapper around encrypt_docstr that accepts the document as argument. + + :param doc: the document. + :type doc: SoledadDocument + """ + key = self.doc_passphrase(doc.doc_id) - def _get_secret(self): - return self._soledad.storage_secret + return encrypt_docstr( + doc.get_json(), doc.doc_id, doc.rev, key, self._secret) + + def decrypt_doc(self, doc): + """ + Wrapper around decrypt_doc_dict that accepts the document as argument. + + :param doc: the document. + :type doc: SoledadDocument + + :return: json string with the decrypted document + :rtype: str + """ + key = self.doc_passphrase(doc.doc_id) + return decrypt_doc_dict( + doc.content, doc.doc_id, doc.rev, key, self._secret) + + @property + def secret(self): + return self._secret - secret = property( - _get_secret, doc='The secret used for symmetric encryption') # # Crypto utilities for a SoledadDocument. # - -def mac_doc(doc_id, doc_rev, ciphertext, mac_method, secret): +def mac_doc(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, enc_iv, + mac_method, secret): """ Calculate a MAC for C{doc} using C{ciphertext}. @@ -277,38 +202,38 @@ def mac_doc(doc_id, doc_rev, ciphertext, mac_method, secret): :type doc_rev: str :param ciphertext: The content of the document. :type ciphertext: str + :param enc_scheme: The encryption scheme. + :type enc_scheme: str + :param enc_method: The encryption method. + :type enc_method: str + :param enc_iv: The encryption initialization vector. + :type enc_iv: str :param mac_method: The MAC method to use. :type mac_method: str - :param secret: soledad secret - :type secret: Soledad.secret_storage + :param secret: The Soledad storage secret + :type secret: str :return: The calculated MAC. :rtype: str - """ - if mac_method == MacMethods.HMAC: - return hmac.new( - doc_mac_key(doc_id, secret), - str(doc_id) + str(doc_rev) + ciphertext, - hashlib.sha256).digest() - # raise if we do not know how to handle this MAC method - raise UnknownMacMethod('Unknown MAC method: %s.' % mac_method) - - -def encrypt_doc(crypto, doc): - """ - Wrapper around encrypt_docstr that accepts a crypto object and the document - as arguments. - :param crypto: a soledad crypto object. - :type crypto: SoledadCrypto - :param doc: the document. - :type doc: SoledadDocument - """ - key = crypto.doc_passphrase(doc.doc_id) - secret = crypto.secret - - return encrypt_docstr( - doc.get_json(), doc.doc_id, doc.rev, key, secret) + :raise crypto.UnknownMacMethodError: Raised when C{mac_method} is unknown. + """ + try: + soledad_assert(mac_method == crypto.MacMethods.HMAC) + except AssertionError: + raise crypto.UnknownMacMethodError + template = "{doc_id}{doc_rev}{ciphertext}{enc_scheme}{enc_method}{enc_iv}" + content = template.format( + doc_id=doc_id, + doc_rev=doc_rev, + ciphertext=ciphertext, + enc_scheme=enc_scheme, + enc_method=enc_method, + enc_iv=enc_iv) + return hmac.new( + doc_mac_key(doc_id, secret), + content, + hashlib.sha256).digest() def encrypt_docstr(docstr, doc_id, doc_rev, key, secret): @@ -319,12 +244,12 @@ def encrypt_docstr(docstr, doc_id, doc_rev, key, secret): string representing the following: { - ENC_JSON_KEY: '<encrypted doc JSON string>', - ENC_SCHEME_KEY: 'symkey', - ENC_METHOD_KEY: EncryptionMethods.AES_256_CTR, - ENC_IV_KEY: '<the initial value used to encrypt>', + crypto.ENC_JSON_KEY: '<encrypted doc JSON string>', + crypto.ENC_SCHEME_KEY: 'symkey', + crypto.ENC_METHOD_KEY: crypto.EncryptionMethods.AES_256_CTR, + crypto.ENC_IV_KEY: '<the initial value used to encrypt>', MAC_KEY: '<mac>' - MAC_METHOD_KEY: 'hmac' + crypto.MAC_METHOD_KEY: 'hmac' } :param docstr: A representation of the document to be encrypted. @@ -339,72 +264,115 @@ def encrypt_docstr(docstr, doc_id, doc_rev, key, secret): :param key: The key used to encrypt ``data`` (must be 256 bits long). :type key: str - :param secret: The Soledad secret (used for MAC auth). + :param secret: The Soledad storage secret (used for MAC auth). :type secret: str :return: The JSON serialization of the dict representing the encrypted content. :rtype: str """ - # encrypt content using AES-256 CTR mode - iv, ciphertext = encrypt_sym( + enc_scheme = crypto.EncryptionSchemes.SYMKEY + enc_method = crypto.EncryptionMethods.AES_256_CTR + mac_method = crypto.MacMethods.HMAC + enc_iv, ciphertext = encrypt_sym( str(docstr), # encryption/decryption routines expect str - key, method=EncryptionMethods.AES_256_CTR) + key) + mac = binascii.b2a_hex( # store the mac as hex. + mac_doc( + doc_id, + doc_rev, + ciphertext, + enc_scheme, + enc_method, + enc_iv, + mac_method, + secret)) # Return a representation for the encrypted content. In the following, we # convert binary data to hexadecimal representation so the JSON # serialization does not complain about what it tries to serialize. hex_ciphertext = binascii.b2a_hex(ciphertext) + logger.debug("Encrypting doc: %s" % doc_id) return json.dumps({ - ENC_JSON_KEY: hex_ciphertext, - ENC_SCHEME_KEY: EncryptionSchemes.SYMKEY, - ENC_METHOD_KEY: EncryptionMethods.AES_256_CTR, - ENC_IV_KEY: iv, - MAC_KEY: binascii.b2a_hex(mac_doc( # store the mac as hex. - doc_id, doc_rev, ciphertext, - MacMethods.HMAC, secret)), - MAC_METHOD_KEY: MacMethods.HMAC, + crypto.ENC_JSON_KEY: hex_ciphertext, + crypto.ENC_SCHEME_KEY: enc_scheme, + crypto.ENC_METHOD_KEY: enc_method, + crypto.ENC_IV_KEY: enc_iv, + crypto.MAC_KEY: mac, + crypto.MAC_METHOD_KEY: mac_method, }) -def decrypt_doc(crypto, doc): +def _verify_doc_mac(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, + enc_iv, mac_method, secret, doc_mac): """ - Wrapper around decrypt_doc_dict that accepts a crypto object and the - document as arguments. + Verify that C{doc_mac} is a correct MAC for the given document. - :param crypto: a soledad crypto object. - :type crypto: SoledadCrypto - :param doc: the document. - :type doc: SoledadDocument + :param doc_id: The id of the document. + :type doc_id: str + :param doc_rev: The revision of the document. + :type doc_rev: str + :param ciphertext: The content of the document. + :type ciphertext: str + :param enc_scheme: The encryption scheme. + :type enc_scheme: str + :param enc_method: The encryption method. + :type enc_method: str + :param enc_iv: The encryption initialization vector. + :type enc_iv: str + :param mac_method: The MAC method to use. + :type mac_method: str + :param secret: The Soledad storage secret + :type secret: str + :param doc_mac: The MAC to be verified against. + :type doc_mac: str - :return: json string with the decrypted document - :rtype: str + :raise crypto.UnknownMacMethodError: Raised when C{mac_method} is unknown. + :raise crypto.WrongMacError: Raised when MAC could not be verified. """ - key = crypto.doc_passphrase(doc.doc_id) - secret = crypto.secret - return decrypt_doc_dict(doc.content, doc.doc_id, doc.rev, key, secret) + calculated_mac = mac_doc( + doc_id, + doc_rev, + ciphertext, + enc_scheme, + enc_method, + enc_iv, + mac_method, + secret) + # we compare mac's hashes to avoid possible timing attacks that might + # exploit python's builtin comparison operator behaviour, which fails + # immediatelly when non-matching bytes are found. + doc_mac_hash = hashlib.sha256( + binascii.a2b_hex( # the mac is stored as hex + doc_mac)).digest() + calculated_mac_hash = hashlib.sha256(calculated_mac).digest() + + if doc_mac_hash != calculated_mac_hash: + logger.warning("Wrong MAC while decrypting doc...") + raise crypto.WrongMacError("Could not authenticate document's " + "contents.") def decrypt_doc_dict(doc_dict, doc_id, doc_rev, key, secret): """ - Decrypt C{doc}'s content. + Decrypt a symmetrically encrypted C{doc}'s content. Return the JSON string representation of the document's decrypted content. The passed doc_dict argument should have the following structure: { - ENC_JSON_KEY: '<enc_blob>', - ENC_SCHEME_KEY: '<enc_scheme>', - ENC_METHOD_KEY: '<enc_method>', - ENC_IV_KEY: '<initial value used to encrypt>', # (optional) + crypto.ENC_JSON_KEY: '<enc_blob>', + crypto.ENC_SCHEME_KEY: '<enc_scheme>', + crypto.ENC_METHOD_KEY: '<enc_method>', + crypto.ENC_IV_KEY: '<initial value used to encrypt>', # (optional) MAC_KEY: '<mac>' - MAC_METHOD_KEY: 'hmac' + crypto.MAC_METHOD_KEY: 'hmac' } C{enc_blob} is the encryption of the JSON serialization of the document's content. For now Soledad just deals with documents whose C{enc_scheme} is - EncryptionSchemes.SYMKEY and C{enc_method} is - EncryptionMethods.AES_256_CTR. + crypto.EncryptionSchemes.SYMKEY and C{enc_method} is + crypto.EncryptionMethods.AES_256_CTR. :param doc_dict: The content of the document to be decrypted. :type doc_dict: dict @@ -418,53 +386,40 @@ def decrypt_doc_dict(doc_dict, doc_id, doc_rev, key, secret): :param key: The key used to encrypt ``data`` (must be 256 bits long). :type key: str - :param secret: - :type secret: + :param secret: The Soledad storage secret. + :type secret: str :return: The JSON serialization of the decrypted content. :rtype: str + + :raise UnknownEncryptionMethodError: Raised when trying to decrypt from an + unknown encryption method. """ - soledad_assert(ENC_JSON_KEY in doc_dict) - soledad_assert(ENC_SCHEME_KEY in doc_dict) - soledad_assert(ENC_METHOD_KEY in doc_dict) - soledad_assert(MAC_KEY in doc_dict) - soledad_assert(MAC_METHOD_KEY in doc_dict) - - # verify MAC - ciphertext = binascii.a2b_hex( # content is stored as hex. - doc_dict[ENC_JSON_KEY]) - mac = mac_doc( - doc_id, doc_rev, - ciphertext, - doc_dict[MAC_METHOD_KEY], secret) - # we compare mac's hashes to avoid possible timing attacks that might - # exploit python's builtin comparison operator behaviour, which fails - # immediatelly when non-matching bytes are found. - doc_mac_hash = hashlib.sha256( - binascii.a2b_hex( # the mac is stored as hex - doc_dict[MAC_KEY])).digest() - calculated_mac_hash = hashlib.sha256(mac).digest() + # assert document dictionary structure + expected_keys = set([ + crypto.ENC_JSON_KEY, + crypto.ENC_SCHEME_KEY, + crypto.ENC_METHOD_KEY, + crypto.ENC_IV_KEY, + crypto.MAC_KEY, + crypto.MAC_METHOD_KEY, + ]) + soledad_assert(expected_keys.issubset(set(doc_dict.keys()))) - if doc_mac_hash != calculated_mac_hash: - logger.warning("Wrong MAC while decrypting doc...") - raise WrongMac('Could not authenticate document\'s contents.') - # decrypt doc's content - enc_scheme = doc_dict[ENC_SCHEME_KEY] - plainjson = None - if enc_scheme == EncryptionSchemes.SYMKEY: - enc_method = doc_dict[ENC_METHOD_KEY] - if enc_method == EncryptionMethods.AES_256_CTR: - soledad_assert(ENC_IV_KEY in doc_dict) - plainjson = decrypt_sym( - ciphertext, key, - method=enc_method, - iv=doc_dict[ENC_IV_KEY]) - else: - raise UnknownEncryptionMethod(enc_method) - else: - raise UnknownEncryptionScheme(enc_scheme) - - return plainjson + ciphertext = binascii.a2b_hex(doc_dict[crypto.ENC_JSON_KEY]) + enc_scheme = doc_dict[crypto.ENC_SCHEME_KEY] + enc_method = doc_dict[crypto.ENC_METHOD_KEY] + enc_iv = doc_dict[crypto.ENC_IV_KEY] + doc_mac = doc_dict[crypto.MAC_KEY] + mac_method = doc_dict[crypto.MAC_METHOD_KEY] + + soledad_assert(enc_scheme == crypto.EncryptionSchemes.SYMKEY) + + _verify_doc_mac( + doc_id, doc_rev, ciphertext, enc_scheme, enc_method, + enc_iv, mac_method, secret, doc_mac) + + return decrypt_sym(ciphertext, key, enc_iv) def is_symmetrically_encrypted(doc): @@ -476,534 +431,8 @@ def is_symmetrically_encrypted(doc): :rtype: bool """ - if doc.content and ENC_SCHEME_KEY in doc.content: - if doc.content[ENC_SCHEME_KEY] == EncryptionSchemes.SYMKEY: + if doc.content and crypto.ENC_SCHEME_KEY in doc.content: + if doc.content[crypto.ENC_SCHEME_KEY] \ + == crypto.EncryptionSchemes.SYMKEY: return True return False - - -# -# Encrypt/decrypt pools of workers -# - -class SyncEncryptDecryptPool(object): - """ - Base class for encrypter/decrypter pools. - """ - WORKERS = 5 - - def __init__(self, crypto, sync_db, write_lock): - """ - Initialize the pool of encryption-workers. - - :param crypto: A SoledadCryto instance to perform the encryption. - :type crypto: leap.soledad.crypto.SoledadCrypto - - :param sync_db: a database connection handle - :type sync_db: handle - - :param write_lock: a write lock for controlling concurrent access - to the sync_db - :type write_lock: threading.Lock - """ - self._pool = multiprocessing.Pool(self.WORKERS) - self._crypto = crypto - self._sync_db = sync_db - self._sync_db_write_lock = write_lock - - def close(self): - """ - Cleanly close the pool of workers. - """ - logger.debug("Closing %s" % (self.__class__.__name__,)) - self._pool.close() - try: - self._pool.join() - except Exception: - pass - - def terminate(self): - """ - Terminate the pool of workers. - """ - logger.debug("Terminating %s" % (self.__class__.__name__,)) - self._pool.terminate() - - -def encrypt_doc_task(doc_id, doc_rev, content, key, secret): - """ - Encrypt the content of the given document. - - :param doc_id: The document id. - :type doc_id: str - :param doc_rev: The document revision. - :type doc_rev: str - :param content: The serialized content of the document. - :type content: str - :param key: The encryption key. - :type key: str - :param secret: The Soledad secret (used for MAC auth). - :type secret: str - - :return: A tuple containing the doc id, revision and encrypted content. - :rtype: tuple(str, str, str) - """ - encrypted_content = encrypt_docstr( - content, doc_id, doc_rev, key, secret) - return doc_id, doc_rev, encrypted_content - - -class SyncEncrypterPool(SyncEncryptDecryptPool): - """ - Pool of workers that spawn subprocesses to execute the symmetric encryption - of documents to be synced. - """ - # TODO implement throttling to reduce cpu usage?? - WORKERS = 5 - TABLE_NAME = "docs_tosync" - FIELD_NAMES = "doc_id, rev, content" - - def encrypt_doc(self, doc, workers=True): - """ - Symmetrically encrypt a document. - - :param doc: The document with contents to be encrypted. - :type doc: SoledadDocument - - :param workers: Whether to defer the decryption to the multiprocess - pool of workers. Useful for debugging purposes. - :type workers: bool - """ - soledad_assert(self._crypto is not None, "need a crypto object") - docstr = doc.get_json() - key = self._crypto.doc_passphrase(doc.doc_id) - secret = self._crypto.secret - args = doc.doc_id, doc.rev, docstr, key, secret - - try: - if workers: - res = self._pool.apply_async( - encrypt_doc_task, args, - callback=self.encrypt_doc_cb) - else: - # encrypt inline - res = encrypt_doc_task(*args) - self.encrypt_doc_cb(res) - - except Exception as exc: - logger.exception(exc) - - def encrypt_doc_cb(self, result): - """ - Insert results of encryption routine into the local sync database. - - :param result: A tuple containing the doc id, revision and encrypted - content. - :type result: tuple(str, str, str) - """ - doc_id, doc_rev, content = result - self.insert_encrypted_local_doc(doc_id, doc_rev, content) - - def insert_encrypted_local_doc(self, doc_id, doc_rev, content): - """ - Insert the contents of the encrypted doc into the local sync - database. - - :param doc_id: The document id. - :type doc_id: str - :param doc_rev: The document revision. - :type doc_rev: str - :param content: The serialized content of the document. - :type content: str - :param content: The encrypted document. - :type content: str - """ - sql_del = "DELETE FROM '%s' WHERE doc_id=?" % (self.TABLE_NAME,) - sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?)" % (self.TABLE_NAME,) - - con = self._sync_db - with self._sync_db_write_lock: - with con: - con.execute(sql_del, (doc_id, )) - con.execute(sql_ins, (doc_id, doc_rev, content)) - - -def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret): - """ - Decrypt the content of the given document. - - :param doc_id: The document id. - :type doc_id: str - :param doc_rev: The document revision. - :type doc_rev: str - :param content: The encrypted content of the document. - :type content: str - :param gen: The generation corresponding to the modification of that - document. - :type gen: int - :param trans_id: The transaction id corresponding to the modification of - that document. - :type trans_id: str - :param key: The encryption key. - :type key: str - :param secret: The Soledad secret (used for MAC auth). - :type secret: str - - :return: A tuple containing the doc id, revision and encrypted content. - :rtype: tuple(str, str, str) - """ - decrypted_content = decrypt_doc_dict( - content, doc_id, doc_rev, key, secret) - return doc_id, doc_rev, decrypted_content, gen, trans_id - - -def get_insertable_docs_by_gen(expected, got): - """ - Return a list of documents ready to be inserted. This list is computed - by aligning the expected list with the already gotten docs, and returning - the maximum number of docs that can be processed in the expected order - before finding a gap. - - :param expected: A list of generations to be inserted. - :type expected: list - - :param got: A dictionary whose values are the docs to be inserted. - :type got: dict - """ - ordered = [got.get(i) for i in expected] - if None in ordered: - return ordered[:ordered.index(None)] - else: - return ordered - - -class SyncDecrypterPool(SyncEncryptDecryptPool): - """ - Pool of workers that spawn subprocesses to execute the symmetric decryption - of documents that were received. - - The decryption of the received documents is done in two steps: - - 1. All the encrypted docs are collected, together with their generation - and transaction-id - 2. The docs are enqueued for decryption. When completed, they are - inserted following the generation order. - """ - # TODO implement throttling to reduce cpu usage?? - TABLE_NAME = "docs_received" - FIELD_NAMES = "doc_id, rev, content, gen, trans_id" - - write_encrypted_lock = threading.Lock() - - def __init__(self, *args, **kwargs): - """ - Initialize the decrypter pool, and setup a dict for putting the - results of the decrypted docs until they are picked by the insert - routine that gets them in order. - """ - self._insert_doc_cb = kwargs.pop("insert_doc_cb") - SyncEncryptDecryptPool.__init__(self, *args, **kwargs) - self.decrypted_docs = {} - self.source_replica_uid = None - - def set_source_replica_uid(self, source_replica_uid): - """ - Set the source replica uid for this decrypter pool instance. - - :param source_replica_uid: The uid of the source replica. - :type source_replica_uid: str - """ - self.source_replica_uid = source_replica_uid - - def insert_encrypted_received_doc(self, doc_id, doc_rev, content, - gen, trans_id): - """ - Insert a received message with encrypted content, to be decrypted later - on. - - :param doc_id: The Document ID. - :type doc_id: str - :param doc_rev: The Document Revision - :param doc_rev: str - :param content: the Content of the document - :type content: str - :param gen: the Document Generation - :type gen: int - :param trans_id: Transaction ID - :type trans_id: str - """ - docstr = json.dumps(content) - sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?, ?, ?)" % ( - self.TABLE_NAME,) - - con = self._sync_db - with self._sync_db_write_lock: - with con: - con.execute(sql_ins, (doc_id, doc_rev, docstr, gen, trans_id)) - - def insert_marker_for_received_doc(self, doc_id, doc_rev, gen): - """ - Insert a marker with the document id, revision and generation on the - sync db. This document does not have an encrypted payload, so the - content has already been inserted into the decrypted_docs dictionary - from where it can be picked following generation order. - We need to leave here the marker to be able to calculate the expected - insertion order for a synchronization batch. - - :param doc_id: The Document ID. - :type doc_id: str - :param doc_rev: The Document Revision - :param doc_rev: str - :param gen: the Document Generation - :type gen: int - """ - sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?, ?, ?)" % ( - self.TABLE_NAME,) - con = self._sync_db - with self._sync_db_write_lock: - with con: - con.execute(sql_ins, (doc_id, doc_rev, '', gen, '')) - - def insert_received_doc(self, doc_id, doc_rev, content, gen, trans_id): - """ - Insert a document that is not symmetrically encrypted. - We store it in the staging area (the decrypted_docs dictionary) to be - picked up in order as the preceding documents are decrypted. - - :param doc_id: The Document ID. - :type doc_id: str - :param doc_rev: The Document Revision - :param doc_rev: str - :param content: the Content of the document - :type content: str - :param gen: the Document Generation - :type gen: int - :param trans_id: Transaction ID - :type trans_id: str - """ - # XXX this need a deeper review / testing. - # I believe that what I'm doing here is prone to problems - # if the sync is interrupted (ie, client crash) in the worst possible - # moment. We would need a recover strategy in that case - # (or, insert the document in the table all the same, but with a flag - # saying if the document is sym-encrypted or not), - content = json.dumps(content) - result = doc_id, doc_rev, content, gen, trans_id - self.decrypted_docs[gen] = result - self.insert_marker_for_received_doc(doc_id, doc_rev, gen) - - def delete_encrypted_received_doc(self, doc_id, doc_rev): - """ - Delete a encrypted received doc after it was inserted into the local - db. - - :param doc_id: Document ID. - :type doc_id: str - :param doc_rev: Document revision. - :type doc_rev: str - """ - sql_del = "DELETE FROM '%s' WHERE doc_id=? AND rev=?" % ( - self.TABLE_NAME,) - con = self._sync_db - with self._sync_db_write_lock: - with con: - con.execute(sql_del, (doc_id, doc_rev)) - - def decrypt_doc(self, doc_id, rev, source_replica_uid, workers=True): - """ - Symmetrically decrypt a document. - - :param doc_id: The ID for the document with contents to be encrypted. - :type doc: str - :param rev: The revision of the document. - :type rev: str - :param source_replica_uid: - :type source_replica_uid: str - - :param workers: Whether to defer the decryption to the multiprocess - pool of workers. Useful for debugging purposes. - :type workers: bool - """ - self.source_replica_uid = source_replica_uid - - # insert_doc_cb is a proxy object that gets updated with the right - # insert function only when the sync_target invokes the sync_exchange - # method. so, if we don't still have a non-empty callback, we refuse - # to proceed. - if sameProxiedObjects(self._insert_doc_cb.get(source_replica_uid), - None): - logger.debug("Sync decrypter pool: no insert_doc_cb() yet.") - return - - # XXX move to get_doc function... - c = self._sync_db.cursor() - sql = "SELECT * FROM '%s' WHERE doc_id=? AND rev=?" % ( - self.TABLE_NAME,) - try: - c.execute(sql, (doc_id, rev)) - res = c.fetchone() - except Exception as exc: - logger.warning("Error getting docs from syncdb: %r" % (exc,)) - return - if res is None: - logger.debug("Doc %s:%s does not exist in sync db" % (doc_id, rev)) - return - - soledad_assert(self._crypto is not None, "need a crypto object") - try: - doc_id, rev, docstr, gen, trans_id = res - except ValueError: - logger.warning("Wrong entry in sync db") - return - - if len(docstr) == 0: - # not encrypted payload - return - - try: - content = json.loads(docstr) - except TypeError: - logger.warning("Wrong type while decoding json: %s" % repr(docstr)) - return - - key = self._crypto.doc_passphrase(doc_id) - secret = self._crypto.secret - args = doc_id, rev, content, gen, trans_id, key, secret - - try: - if workers: - # Ouch. This is sent to the workers asynchronously, so - # we have no way of logging errors. We'd have to inspect - # lingering results by querying successful / get() over them... - # Or move the heck out of it to twisted. - res = self._pool.apply_async( - decrypt_doc_task, args, - callback=self.decrypt_doc_cb) - else: - # decrypt inline - res = decrypt_doc_task(*args) - self.decrypt_doc_cb(res) - - except Exception as exc: - logger.exception(exc) - - def decrypt_doc_cb(self, result): - """ - Temporarily store the decryption result in a dictionary where it will - be picked by process_decrypted. - - :param result: A tuple containing the doc id, revision and encrypted - content. - :type result: tuple(str, str, str) - """ - doc_id, rev, content, gen, trans_id = result - logger.debug("Sync decrypter pool: decrypted doc %s: %s %s" % (doc_id, rev, gen)) - self.decrypted_docs[gen] = result - - def get_docs_by_generation(self): - """ - Get all documents in the received table from the sync db, - ordered by generation. - - :return: list of doc_id, rev, generation - """ - c = self._sync_db.cursor() - sql = "SELECT doc_id, rev, gen FROM %s ORDER BY gen" % ( - self.TABLE_NAME,) - c.execute(sql) - return c.fetchall() - - def count_received_encrypted_docs(self): - """ - Count how many documents we have in the table for received and - encrypted docs. - - :return: The count of documents. - :rtype: int - """ - if self._sync_db is None: - logger.warning("cannot return count with null sync_db") - return - c = self._sync_db.cursor() - sql = "SELECT COUNT(*) FROM %s" % (self.TABLE_NAME,) - c.execute(sql) - res = c.fetchone() - if res is not None: - return res[0] - else: - return 0 - - def decrypt_received_docs(self): - """ - Get all the encrypted documents from the sync database and dispatch a - decrypt worker to decrypt each one of them. - """ - docs_by_generation = self.get_docs_by_generation() - logger.debug("Sync decrypter pool: There are %d documents to " \ - "decrypt." % len(docs_by_generation)) - for doc_id, rev, gen in filter(None, docs_by_generation): - self.decrypt_doc(doc_id, rev, self.source_replica_uid) - - def process_decrypted(self): - """ - Process the already decrypted documents, and insert as many documents - as can be taken from the expected order without finding a gap. - - :return: Whether we have processed all the pending docs. - :rtype: bool - """ - # Acquire the lock to avoid processing while we're still - # getting data from the syncing stream, to avoid InvalidGeneration - # problems. - with self.write_encrypted_lock: - already_decrypted = self.decrypted_docs - docs = self.get_docs_by_generation() - docs = filter(lambda entry: len(entry) > 0, docs) - expected = [gen for doc_id, rev, gen in docs] - docs_to_insert = get_insertable_docs_by_gen( - expected, already_decrypted) - for doc_fields in docs_to_insert: - self.insert_decrypted_local_doc(*doc_fields) - remaining = self.count_received_encrypted_docs() - return remaining == 0 - - def insert_decrypted_local_doc(self, doc_id, doc_rev, content, - gen, trans_id): - """ - Insert the decrypted document into the local sqlcipher database. - Makes use of the passed callback `return_doc_cb` passed to the caller - by u1db sync. - - :param doc_id: The document id. - :type doc_id: str - :param doc_rev: The document revision. - :type doc_rev: str - :param content: The serialized content of the document. - :type content: str - :param gen: The generation corresponding to the modification of that - document. - :type gen: int - :param trans_id: The transaction id corresponding to the modification - of that document. - :type trans_id: str - """ - # could pass source_replica in params for callback chain - insert_fun = self._insert_doc_cb[self.source_replica_uid] - logger.debug("Sync decrypter pool: inserting doc in local db: " \ - "%s:%s %s" % (doc_id, doc_rev, gen)) - try: - # convert deleted documents to avoid error on document creation - if content == 'null': - content = None - doc = SoledadDocument(doc_id, doc_rev, content) - insert_fun(doc, int(gen), trans_id) - except Exception as exc: - logger.error("Sync decrypter pool: error while inserting " - "decrypted doc into local db.") - logger.exception(exc) - - else: - # If no errors found, remove it from the local temporary dict - # and from the received database. - self.decrypted_docs.pop(gen) - self.delete_encrypted_received_doc(doc_id, doc_rev) diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py new file mode 100644 index 00000000..2ad98767 --- /dev/null +++ b/client/src/leap/soledad/client/encdecpool.py @@ -0,0 +1,810 @@ +# -*- coding: utf-8 -*- +# encdecpool.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +""" +A pool of encryption/decryption concurrent and parallel workers for using +during synchronization. +""" + + +import multiprocessing +import Queue +import json +import logging + +from twisted.internet import reactor +from twisted.internet import defer +from twisted.python import log + +from leap.soledad.common.document import SoledadDocument +from leap.soledad.common import soledad_assert + +from leap.soledad.client.crypto import encrypt_docstr +from leap.soledad.client.crypto import decrypt_doc_dict + + +logger = logging.getLogger(__name__) + + +# +# Encrypt/decrypt pools of workers +# + +class SyncEncryptDecryptPool(object): + """ + Base class for encrypter/decrypter pools. + """ + + # TODO implement throttling to reduce cpu usage?? + WORKERS = multiprocessing.cpu_count() + + def __init__(self, crypto, sync_db): + """ + Initialize the pool of encryption-workers. + + :param crypto: A SoledadCryto instance to perform the encryption. + :type crypto: leap.soledad.crypto.SoledadCrypto + + :param sync_db: A database connection handle + :type sync_db: pysqlcipher.dbapi2.Connection + """ + self._crypto = crypto + self._sync_db = sync_db + self._pool = None + self._delayed_call = None + self._started = False + + def start(self): + self._create_pool() + self._started = True + + def stop(self): + self._started = False + self._destroy_pool() + # maybe cancel the next delayed call + if self._delayed_call \ + and not self._delayed_call.called: + self._delayed_call.cancel() + + @property + def running(self): + return self._started + + def _create_pool(self): + self._pool = multiprocessing.Pool(self.WORKERS) + + def _destroy_pool(self): + """ + Cleanly close the pool of workers. + """ + logger.debug("Closing %s" % (self.__class__.__name__,)) + self._pool.close() + try: + self._pool.join() + except Exception: + pass + + def terminate(self): + """ + Terminate the pool of workers. + """ + logger.debug("Terminating %s" % (self.__class__.__name__,)) + self._pool.terminate() + + def _runOperation(self, query, *args): + """ + Run an operation on the sync db. + + :param query: The query to be executed. + :type query: str + :param args: A list of query arguments. + :type args: list + + :return: A deferred that will fire when the operation in the database + has finished. + :rtype: twisted.internet.defer.Deferred + """ + return self._sync_db.runOperation(query, *args) + + def _runQuery(self, query, *args): + """ + Run a query on the sync db. + + :param query: The query to be executed. + :type query: str + :param args: A list of query arguments. + :type args: list + + :return: A deferred that will fire with the results of the database + query. + :rtype: twisted.internet.defer.Deferred + """ + return self._sync_db.runQuery(query, *args) + + +def encrypt_doc_task(doc_id, doc_rev, content, key, secret): + """ + Encrypt the content of the given document. + + :param doc_id: The document id. + :type doc_id: str + :param doc_rev: The document revision. + :type doc_rev: str + :param content: The serialized content of the document. + :type content: str + :param key: The encryption key. + :type key: str + :param secret: The Soledad storage secret (used for MAC auth). + :type secret: str + + :return: A tuple containing the doc id, revision and encrypted content. + :rtype: tuple(str, str, str) + """ + encrypted_content = encrypt_docstr( + content, doc_id, doc_rev, key, secret) + return doc_id, doc_rev, encrypted_content + + +class SyncEncrypterPool(SyncEncryptDecryptPool): + """ + Pool of workers that spawn subprocesses to execute the symmetric encryption + of documents to be synced. + """ + TABLE_NAME = "docs_tosync" + FIELD_NAMES = "doc_id PRIMARY KEY, rev, content" + + ENCRYPT_LOOP_PERIOD = 2 + + def __init__(self, *args, **kwargs): + """ + Initialize the sync encrypter pool. + """ + SyncEncryptDecryptPool.__init__(self, *args, **kwargs) + self._encr_queue = defer.DeferredQueue() + # TODO delete already synced files from database + + def start(self): + """ + Start the encrypter pool. + """ + SyncEncryptDecryptPool.start(self) + logger.debug("Starting the encryption loop...") + reactor.callWhenRunning(self._maybe_encrypt_and_recurse) + + def stop(self): + """ + Stop the encrypter pool. + """ + # close the sync queue + if self._encr_queue: + q = self._encr_queue + for d in q.pending: + d.cancel() + del q + self._encr_queue = None + + SyncEncryptDecryptPool.stop(self) + + def enqueue_doc_for_encryption(self, doc): + """ + Enqueue a document for encryption. + + :param doc: The document to be encrypted. + :type doc: SoledadDocument + """ + try: + self._encr_queue.put(doc) + except Queue.Full: + # do not asynchronously encrypt this file if the queue is full + pass + + @defer.inlineCallbacks + def _maybe_encrypt_and_recurse(self): + """ + Process one document from the encryption queue. + + Asynchronously encrypt a document that will then be stored in the sync + db. Processed documents will be read by the SoledadSyncTarget during + the sync_exchange. + """ + try: + while self.running: + doc = yield self._encr_queue.get() + self._encrypt_doc(doc) + except defer.QueueUnderflow: + self._delayed_call = reactor.callLater( + self.ENCRYPT_LOOP_PERIOD, + self._maybe_encrypt_and_recurse) + + def _encrypt_doc(self, doc): + """ + Symmetrically encrypt a document. + + :param doc: The document with contents to be encrypted. + :type doc: SoledadDocument + + :param workers: Whether to defer the decryption to the multiprocess + pool of workers. Useful for debugging purposes. + :type workers: bool + """ + soledad_assert(self._crypto is not None, "need a crypto object") + docstr = doc.get_json() + key = self._crypto.doc_passphrase(doc.doc_id) + secret = self._crypto.secret + args = doc.doc_id, doc.rev, docstr, key, secret + # encrypt asynchronously + self._pool.apply_async( + encrypt_doc_task, args, + callback=self._encrypt_doc_cb) + + def _encrypt_doc_cb(self, result): + """ + Insert results of encryption routine into the local sync database. + + :param result: A tuple containing the doc id, revision and encrypted + content. + :type result: tuple(str, str, str) + """ + doc_id, doc_rev, content = result + return self._insert_encrypted_local_doc(doc_id, doc_rev, content) + + def _insert_encrypted_local_doc(self, doc_id, doc_rev, content): + """ + Insert the contents of the encrypted doc into the local sync + database. + + :param doc_id: The document id. + :type doc_id: str + :param doc_rev: The document revision. + :type doc_rev: str + :param content: The serialized content of the document. + :type content: str + """ + query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?)" \ + % (self.TABLE_NAME,) + return self._runOperation(query, (doc_id, doc_rev, content)) + + @defer.inlineCallbacks + def get_encrypted_doc(self, doc_id, doc_rev): + """ + Get an encrypted document from the sync db. + + :param doc_id: The id of the document. + :type doc_id: str + :param doc_rev: The revision of the document. + :type doc_rev: str + + :return: A deferred that will fire with the encrypted content of the + document or None if the document was not found in the sync + db. + :rtype: twisted.internet.defer.Deferred + """ + query = "SELECT content FROM %s WHERE doc_id=? and rev=?" \ + % self.TABLE_NAME + result = yield self._runQuery(query, (doc_id, doc_rev)) + if result: + logger.debug("Found doc on sync db: %s" % doc_id) + val = result.pop() + defer.returnValue(val[0]) + logger.debug("Did not find doc on sync db: %s" % doc_id) + defer.returnValue(None) + + def delete_encrypted_doc(self, doc_id, doc_rev): + """ + Delete an encrypted document from the sync db. + + :param doc_id: The id of the document. + :type doc_id: str + :param doc_rev: The revision of the document. + :type doc_rev: str + + :return: A deferred that will fire when the operation in the database + has finished. + :rtype: twisted.internet.defer.Deferred + """ + query = "DELETE FROM %s WHERE doc_id=? and rev=?" \ + % self.TABLE_NAME + self._runOperation(query, (doc_id, doc_rev)) + + +def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret, + idx): + """ + Decrypt the content of the given document. + + :param doc_id: The document id. + :type doc_id: str + :param doc_rev: The document revision. + :type doc_rev: str + :param content: The encrypted content of the document. + :type content: str + :param gen: The generation corresponding to the modification of that + document. + :type gen: int + :param trans_id: The transaction id corresponding to the modification of + that document. + :type trans_id: str + :param key: The encryption key. + :type key: str + :param secret: The Soledad storage secret (used for MAC auth). + :type secret: str + :param idx: The index of this document in the current sync process. + :type idx: int + + :return: A tuple containing the doc id, revision and encrypted content. + :rtype: tuple(str, str, str) + """ + decrypted_content = decrypt_doc_dict(content, doc_id, doc_rev, key, secret) + return doc_id, doc_rev, decrypted_content, gen, trans_id, idx + + +class SyncDecrypterPool(SyncEncryptDecryptPool): + """ + Pool of workers that spawn subprocesses to execute the symmetric decryption + of documents that were received. + + The decryption of the received documents is done in two steps: + + 1. Encrypted documents are stored in the sync db by the actual soledad + sync loop. + 2. The soledad sync loop tells us how many documents we should expect + to process. + 3. We start a decrypt-and-process loop: + + a. Encrypted documents are fetched. + b. Encrypted documents are decrypted. + c. The longest possible list of decrypted documents are inserted + in the soledad db (this depends on which documents have already + arrived and which documents have already been decrypte, because + the order of insertion in the local soledad db matters). + d. Processed documents are deleted from the database. + + 4. When we have processed as many documents as we should, the loop + finishes. + """ + TABLE_NAME = "docs_received" + FIELD_NAMES = "doc_id PRIMARY KEY, rev, content, gen, " \ + "trans_id, encrypted, idx" + + """ + Period of recurrence of the periodic decrypting task, in seconds. + """ + DECRYPT_LOOP_PERIOD = 0.5 + + def __init__(self, *args, **kwargs): + """ + Initialize the decrypter pool, and setup a dict for putting the + results of the decrypted docs until they are picked by the insert + routine that gets them in order. + + :param insert_doc_cb: A callback for inserting received documents from + target. If not overriden, this will call u1db + insert_doc_from_target in synchronizer, which + implements the TAKE OTHER semantics. + :type insert_doc_cb: function + :param source_replica_uid: The source replica uid, used to find the + correct callback for inserting documents. + :type source_replica_uid: str + """ + self._insert_doc_cb = kwargs.pop("insert_doc_cb") + self.source_replica_uid = kwargs.pop("source_replica_uid") + + SyncEncryptDecryptPool.__init__(self, *args, **kwargs) + + self._docs_to_process = None + self._processed_docs = 0 + self._last_inserted_idx = 0 + self._decrypting_docs = [] + + # a list that holds the asynchronous decryption results so they can be + # collected when they are ready + self._async_results = [] + + # initialize db and make sure any database operation happens after + # db initialization + self._deferred_init = self._init_db() + self._wait_init_db('_runOperation', '_runQuery') + + def _wait_init_db(self, *methods): + """ + Methods that need to wait for db initialization. + + :param methods: methods that need to wait for initialization + :type methods: tuple(str) + """ + self._waiting = [] + self._stored = {} + + def _restore(_): + for method in self._stored: + setattr(self, method, self._stored[method]) + for d in self._waiting: + d.callback(None) + + def _makeWrapper(method): + def wrapper(*args, **kw): + d = defer.Deferred() + d.addCallback(lambda _: self._stored[method](*args, **kw)) + self._waiting.append(d) + return d + return wrapper + + for method in methods: + self._stored[method] = getattr(self, method) + setattr(self, method, _makeWrapper(method)) + + self._deferred_init.addCallback(_restore) + + def start(self, docs_to_process): + """ + Set the number of documents we expect to process. + + This should be called by the during the sync exchange process as soon + as we know how many documents are arriving from the server. + + :param docs_to_process: The number of documents to process. + :type docs_to_process: int + """ + SyncEncryptDecryptPool.start(self) + self._docs_to_process = docs_to_process + self._deferred = defer.Deferred() + reactor.callWhenRunning(self._launch_decrypt_and_recurse) + + def _launch_decrypt_and_recurse(self): + d = self._decrypt_and_recurse() + d.addErrback(self._errback) + + def _errback(self, failure): + log.err(failure) + self._deferred.errback(failure) + self._processed_docs = 0 + self._last_inserted_idx = 0 + + @property + def deferred(self): + """ + Deferred that will be fired when the decryption loop has finished + processing all the documents. + """ + return self._deferred + + def insert_encrypted_received_doc( + self, doc_id, doc_rev, content, gen, trans_id, idx): + """ + Insert a received message with encrypted content, to be decrypted later + on. + + :param doc_id: The document ID. + :type doc_id: str + :param doc_rev: The document Revision + :param doc_rev: str + :param content: The content of the document + :type content: dict + :param gen: The document Generation + :type gen: int + :param trans_id: Transaction ID + :type trans_id: str + :param idx: The index of this document in the current sync process. + :type idx: int + + :return: A deferred that will fire when the operation in the database + has finished. + :rtype: twisted.internet.defer.Deferred + """ + docstr = json.dumps(content) + query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?, ?)" \ + % self.TABLE_NAME + return self._runOperation( + query, (doc_id, doc_rev, docstr, gen, trans_id, 1, idx)) + + def insert_received_doc( + self, doc_id, doc_rev, content, gen, trans_id, idx): + """ + Insert a document that is not symmetrically encrypted. + We store it in the staging area (the decrypted_docs dictionary) to be + picked up in order as the preceding documents are decrypted. + + :param doc_id: The document id + :type doc_id: str + :param doc_rev: The document revision + :param doc_rev: str or dict + :param content: The content of the document + :type content: dict + :param gen: The document generation + :type gen: int + :param trans_id: The transaction id + :type trans_id: str + :param idx: The index of this document in the current sync process. + :type idx: int + + :return: A deferred that will fire when the operation in the database + has finished. + :rtype: twisted.internet.defer.Deferred + """ + if not isinstance(content, str): + content = json.dumps(content) + query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?, ?)" \ + % self.TABLE_NAME + return self._runOperation( + query, (doc_id, doc_rev, content, gen, trans_id, 0, idx)) + + def _delete_received_doc(self, doc_id): + """ + Delete a received doc after it was inserted into the local db. + + :param doc_id: Document ID. + :type doc_id: str + + :return: A deferred that will fire when the operation in the database + has finished. + :rtype: twisted.internet.defer.Deferred + """ + query = "DELETE FROM '%s' WHERE doc_id=?" \ + % self.TABLE_NAME + return self._runOperation(query, (doc_id,)) + + def _async_decrypt_doc(self, doc_id, rev, content, gen, trans_id, idx): + """ + Dispatch an asynchronous document decrypting routine and save the + result object. + + :param doc_id: The ID for the document with contents to be encrypted. + :type doc: str + :param rev: The revision of the document. + :type rev: str + :param content: The serialized content of the document. + :type content: str + :param gen: The generation corresponding to the modification of that + document. + :type gen: int + :param trans_id: The transaction id corresponding to the modification + of that document. + :type trans_id: str + :param idx: The index of this document in the current sync process. + :type idx: int + """ + soledad_assert(self._crypto is not None, "need a crypto object") + + content = json.loads(content) + key = self._crypto.doc_passphrase(doc_id) + secret = self._crypto.secret + args = doc_id, rev, content, gen, trans_id, key, secret, idx + # decrypt asynchronously + self._async_results.append( + self._pool.apply_async( + decrypt_doc_task, args)) + + def _decrypt_doc_cb(self, result): + """ + Store the decryption result in the sync db from where it will later be + picked by _process_decrypted_docs. + + :param result: A tuple containing the document's id, revision, + content, generation, transaction id and sync index. + :type result: tuple(str, str, str, int, str, int) + + :return: A deferred that will fire after the document has been + inserted in the sync db. + :rtype: twisted.internet.defer.Deferred + """ + doc_id, rev, content, gen, trans_id, idx = result + logger.debug("Sync decrypter pool: decrypted doc %s: %s %s %s" + % (doc_id, rev, gen, trans_id)) + self._decrypting_docs.remove((doc_id, rev)) + return self.insert_received_doc( + doc_id, rev, content, gen, trans_id, idx) + + def _get_docs(self, encrypted=None, order_by='idx', order='ASC'): + """ + Get documents from the received docs table in the sync db. + + :param encrypted: If not None, only return documents with encrypted + field equal to given parameter. + :type encrypted: bool or None + :param order_by: The name of the field to order results. + :type order_by: str + :param order: Whether the order should be ASC or DESC. + :type order: str + + :return: A deferred that will fire with the results of the database + query. + :rtype: twisted.internet.defer.Deferred + """ + query = "SELECT doc_id, rev, content, gen, trans_id, encrypted, " \ + "idx FROM %s" % self.TABLE_NAME + if encrypted is not None: + query += " WHERE encrypted = %d" % int(encrypted) + query += " ORDER BY %s %s" % (order_by, order) + return self._runQuery(query) + + @defer.inlineCallbacks + def _get_insertable_docs(self): + """ + Return a list of non-encrypted documents ready to be inserted. + + :return: A deferred that will fire with the list of insertable + documents. + :rtype: twisted.internet.defer.Deferred + """ + # here, we fetch the list of decrypted documents and compare with the + # index of the last succesfully processed document. + decrypted_docs = yield self._get_docs(encrypted=False) + insertable = [] + last_idx = self._last_inserted_idx + for doc_id, rev, content, gen, trans_id, encrypted, idx in \ + decrypted_docs: + # XXX for some reason, a document might not have been deleted from + # the database. This is a bug. In this point, already + # processed documents should have been removed from the sync + # database and we should not have to skip them here. We need + # to find out why this is happening, fix, and remove the + # skipping below. + if (idx < last_idx + 1): + continue + if (idx != last_idx + 1): + break + insertable.append((doc_id, rev, content, gen, trans_id, idx)) + last_idx += 1 + defer.returnValue(insertable) + + @defer.inlineCallbacks + def _async_decrypt_received_docs(self): + """ + Get all the encrypted documents from the sync database and dispatch a + decrypt worker to decrypt each one of them. + + :return: A deferred that will fire after all documents have been + decrypted and inserted back in the sync db. + :rtype: twisted.internet.defer.Deferred + """ + docs = yield self._get_docs(encrypted=True) + for doc_id, rev, content, gen, trans_id, _, idx in docs: + if (doc_id, rev) not in self._decrypting_docs: + self._decrypting_docs.append((doc_id, rev)) + self._async_decrypt_doc( + doc_id, rev, content, gen, trans_id, idx) + + @defer.inlineCallbacks + def _process_decrypted_docs(self): + """ + Fetch as many decrypted documents as can be taken from the expected + order and insert them in the local replica. + + :return: A deferred that will fire with the list of inserted + documents. + :rtype: twisted.internet.defer.Deferred + """ + insertable = yield self._get_insertable_docs() + for doc_fields in insertable: + self._insert_decrypted_local_doc(*doc_fields) + defer.returnValue(insertable) + + def _delete_processed_docs(self, inserted): + """ + Delete from the sync db documents that have been processed. + + :param inserted: List of documents inserted in the previous process + step. + :type inserted: list + + :return: A list of deferreds that will fire when each operation in the + database has finished. + :rtype: twisted.internet.defer.DeferredList + """ + deferreds = [] + for doc_id, doc_rev, _, _, _, _ in inserted: + deferreds.append( + self._delete_received_doc(doc_id)) + if not deferreds: + return defer.succeed(None) + return defer.gatherResults(deferreds) + + def _insert_decrypted_local_doc(self, doc_id, doc_rev, content, + gen, trans_id, idx): + """ + Insert the decrypted document into the local replica. + + Make use of the passed callback `insert_doc_cb` passed to the caller + by u1db sync. + + :param doc_id: The document id. + :type doc_id: str + :param doc_rev: The document revision. + :type doc_rev: str + :param content: The serialized content of the document. + :type content: str + :param gen: The generation corresponding to the modification of that + document. + :type gen: int + :param trans_id: The transaction id corresponding to the modification + of that document. + :type trans_id: str + """ + # could pass source_replica in params for callback chain + logger.debug("Sync decrypter pool: inserting doc in local db: " + "%s:%s %s" % (doc_id, doc_rev, gen)) + + # convert deleted documents to avoid error on document creation + if content == 'null': + content = None + doc = SoledadDocument(doc_id, doc_rev, content) + gen = int(gen) + self._insert_doc_cb(doc, gen, trans_id) + + # store info about processed docs + self._last_inserted_idx = idx + self._processed_docs += 1 + + def _init_db(self): + """ + Empty the received docs table of the sync database. + + :return: A deferred that will fire when the operation in the database + has finished. + :rtype: twisted.internet.defer.Deferred + """ + query = "DELETE FROM %s WHERE 1" % (self.TABLE_NAME,) + return self._runOperation(query) + + def _collect_async_decryption_results(self): + """ + Collect the results of the asynchronous doc decryptions and re-raise + any exception raised by a multiprocessing async decryption call. + + :raise Exception: Raised if an async call has raised an exception. + """ + async_results = self._async_results[:] + for res in async_results: + if res.ready(): + self._decrypt_doc_cb(res.get()) # might raise an exception! + self._async_results.remove(res) + + @defer.inlineCallbacks + def _decrypt_and_recurse(self): + """ + Decrypt the documents received from remote replica and insert them + into the local one. + + This method implicitelly returns a defferred (see the decorator + above). It should only be called by _launch_decrypt_and_process(). + because this way any exceptions raised here will be stored by the + errback attached to the deferred returned. + + :return: A deferred which will fire after all decrypt, process and + delete operations have been executed. + :rtype: twisted.internet.defer.Deferred + """ + processed = self._processed_docs + pending = self._docs_to_process + + if processed < pending: + yield self._async_decrypt_received_docs() + self._collect_async_decryption_results() + docs = yield self._process_decrypted_docs() + yield self._delete_processed_docs(docs) + # recurse + self._delayed_call = reactor.callLater( + self.DECRYPT_LOOP_PERIOD, + self._launch_decrypt_and_recurse) + else: + self._finish() + + def _finish(self): + self._deferred.callback(None) + self._processed_docs = 0 + self._last_inserted_idx = 0 diff --git a/client/src/leap/soledad/client/events.py b/client/src/leap/soledad/client/events.py index c4c09ac5..b1379521 100644 --- a/client/src/leap/soledad/client/events.py +++ b/client/src/leap/soledad/client/events.py @@ -20,39 +20,35 @@ Signaling functions. """ +from leap.common.events import emit +from leap.common.events import catalog -SOLEDAD_CREATING_KEYS = 'Creating keys...' -SOLEDAD_DONE_CREATING_KEYS = 'Done creating keys.' -SOLEDAD_DOWNLOADING_KEYS = 'Downloading keys...' -SOLEDAD_DONE_DOWNLOADING_KEYS = 'Done downloading keys.' -SOLEDAD_UPLOADING_KEYS = 'Uploading keys...' -SOLEDAD_DONE_UPLOADING_KEYS = 'Done uploading keys.' -SOLEDAD_NEW_DATA_TO_SYNC = 'New data available.' -SOLEDAD_DONE_DATA_SYNC = 'Done data sync.' -SOLEDAD_SYNC_SEND_STATUS = 'Sync: sent one document.' -SOLEDAD_SYNC_RECEIVE_STATUS = 'Sync: received one document.' -# we want to use leap.common.events to emits signals, if it is available. -try: - from leap.common import events - from leap.common.events import signal - SOLEDAD_CREATING_KEYS = events.proto.SOLEDAD_CREATING_KEYS - SOLEDAD_DONE_CREATING_KEYS = events.proto.SOLEDAD_DONE_CREATING_KEYS - SOLEDAD_DOWNLOADING_KEYS = events.proto.SOLEDAD_DOWNLOADING_KEYS - SOLEDAD_DONE_DOWNLOADING_KEYS = \ - events.proto.SOLEDAD_DONE_DOWNLOADING_KEYS - SOLEDAD_UPLOADING_KEYS = events.proto.SOLEDAD_UPLOADING_KEYS - SOLEDAD_DONE_UPLOADING_KEYS = \ - events.proto.SOLEDAD_DONE_UPLOADING_KEYS - SOLEDAD_NEW_DATA_TO_SYNC = events.proto.SOLEDAD_NEW_DATA_TO_SYNC - SOLEDAD_DONE_DATA_SYNC = events.proto.SOLEDAD_DONE_DATA_SYNC - SOLEDAD_SYNC_SEND_STATUS = events.proto.SOLEDAD_SYNC_SEND_STATUS - SOLEDAD_SYNC_RECEIVE_STATUS = events.proto.SOLEDAD_SYNC_RECEIVE_STATUS +SOLEDAD_CREATING_KEYS = catalog.SOLEDAD_CREATING_KEYS +SOLEDAD_DONE_CREATING_KEYS = catalog.SOLEDAD_DONE_CREATING_KEYS +SOLEDAD_DOWNLOADING_KEYS = catalog.SOLEDAD_DOWNLOADING_KEYS +SOLEDAD_DONE_DOWNLOADING_KEYS = \ + catalog.SOLEDAD_DONE_DOWNLOADING_KEYS +SOLEDAD_UPLOADING_KEYS = catalog.SOLEDAD_UPLOADING_KEYS +SOLEDAD_DONE_UPLOADING_KEYS = \ + catalog.SOLEDAD_DONE_UPLOADING_KEYS +SOLEDAD_NEW_DATA_TO_SYNC = catalog.SOLEDAD_NEW_DATA_TO_SYNC +SOLEDAD_DONE_DATA_SYNC = catalog.SOLEDAD_DONE_DATA_SYNC +SOLEDAD_SYNC_SEND_STATUS = catalog.SOLEDAD_SYNC_SEND_STATUS +SOLEDAD_SYNC_RECEIVE_STATUS = catalog.SOLEDAD_SYNC_RECEIVE_STATUS -except ImportError: - # we define a fake signaling function and fake signal constants that will - # allow for logging signaling attempts in case leap.common.events is not - # available. - def signal(signal, content=""): - logger.info("Would signal: %s - %s." % (str(signal), content)) +__all__ = [ + "catalog", + "emit", + "SOLEDAD_CREATING_KEYS", + "SOLEDAD_DONE_CREATING_KEYS", + "SOLEDAD_DOWNLOADING_KEYS", + "SOLEDAD_DONE_DOWNLOADING_KEYS", + "SOLEDAD_UPLOADING_KEYS", + "SOLEDAD_DONE_UPLOADING_KEYS", + "SOLEDAD_NEW_DATA_TO_SYNC", + "SOLEDAD_DONE_DATA_SYNC", + "SOLEDAD_SYNC_SEND_STATUS", + "SOLEDAD_SYNC_RECEIVE_STATUS", +] diff --git a/client/src/leap/soledad/client/examples/README b/client/src/leap/soledad/client/examples/README new file mode 100644 index 00000000..3aed8377 --- /dev/null +++ b/client/src/leap/soledad/client/examples/README @@ -0,0 +1,4 @@ +Right now, you can find here both an example of use +and the benchmarking scripts. +TODO move benchmark scripts to root scripts/ folder, +and leave here only a minimal example. diff --git a/client/src/leap/soledad/client/examples/benchmarks/.gitignore b/client/src/leap/soledad/client/examples/benchmarks/.gitignore new file mode 100644 index 00000000..2211df63 --- /dev/null +++ b/client/src/leap/soledad/client/examples/benchmarks/.gitignore @@ -0,0 +1 @@ +*.txt diff --git a/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh b/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh new file mode 100755 index 00000000..1995eee1 --- /dev/null +++ b/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh @@ -0,0 +1,3 @@ +#!/bin/sh +mkdir tmp +wget http://www.gutenberg.org/cache/epub/101/pg101.txt -O hacker_crackdown.txt diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py new file mode 100644 index 00000000..08775580 --- /dev/null +++ b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- +# measure_index_times.py +# Copyright (C) 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Measure u1db retrieval times for different u1db index situations. +""" +from __future__ import print_function +from functools import partial +import datetime +import hashlib +import os +import sys + +import u1db +from twisted.internet import defer, reactor + +from leap.soledad.client import adbapi +from leap.soledad.client.sqlcipher import SQLCipherOptions + + +folder = os.environ.get("TMPDIR", "tmp") +numdocs = int(os.environ.get("DOCS", "1000")) +silent = os.environ.get("SILENT", False) +tmpdb = os.path.join(folder, "test.soledad") + + +sample_file = os.environ.get("SAMPLE", "hacker_crackdown.txt") +sample_path = os.path.join(os.curdir, sample_file) + +try: + with open(sample_file) as f: + SAMPLE = f.readlines() +except Exception: + print("[!] Problem opening sample file. Did you download " + "the sample, or correctly set 'SAMPLE' env var?") + sys.exit(1) + +if numdocs > len(SAMPLE): + print("[!] Sorry! The requested DOCS number is larger than " + "the num of lines in our sample file") + sys.exit(1) + + +def debug(*args): + if not silent: + print(*args) + +debug("[+] db path:", tmpdb) +debug("[+] num docs", numdocs) + +if os.path.isfile(tmpdb): + debug("[+] Removing existing db file...") + os.remove(tmpdb) + +start_time = datetime.datetime.now() + +opts = SQLCipherOptions(tmpdb, "secret", create=True) +dbpool = adbapi.getConnectionPool(opts) + + +def createDoc(doc): + return dbpool.runU1DBQuery("create_doc", doc) + +db_indexes = { + 'by-chash': ['chash'], + 'by-number': ['number']} + + +def create_indexes(_): + deferreds = [] + for index, definition in db_indexes.items(): + d = dbpool.runU1DBQuery("create_index", index, *definition) + deferreds.append(d) + return defer.gatherResults(deferreds) + + +class TimeWitness(object): + def __init__(self, init_time): + self.init_time = init_time + + def get_time_count(self): + return datetime.datetime.now() - self.init_time + + +def get_from_index(_): + init_time = datetime.datetime.now() + debug("GETTING FROM INDEX...", init_time) + + def printValue(res, time): + print("RESULT->", res) + print("Index Query Took: ", time.get_time_count()) + return res + + d = dbpool.runU1DBQuery( + "get_from_index", "by-chash", + # "1150c7f10fabce0a57ce13071349fc5064f15bdb0cc1bf2852f74ef3f103aff5") + # XXX this is line 89 from the hacker crackdown... + # Should accept any other optional hash as an enviroment variable. + "57793320d4997a673fc7062652da0596c36a4e9fbe31310d2281e67d56d82469") + d.addCallback(printValue, TimeWitness(init_time)) + return d + + +def getAllDocs(): + return dbpool.runU1DBQuery("get_all_docs") + + +def errBack(e): + debug("[!] ERROR FOUND!!!") + e.printTraceback() + reactor.stop() + + +def countDocs(_): + debug("counting docs...") + d = getAllDocs() + d.addCallbacks(printResult, errBack) + d.addCallbacks(allDone, errBack) + return d + + +def printResult(r, **kwargs): + if kwargs: + debug(*kwargs.values()) + elif isinstance(r, u1db.Document): + debug(r.doc_id, r.content['number']) + else: + len_results = len(r[1]) + debug("GOT %s results" % len(r[1])) + + if len_results == numdocs: + debug("ALL GOOD") + else: + debug("[!] MISSING DOCS!!!!!") + raise ValueError("We didn't expect this result len") + + +def allDone(_): + debug("ALL DONE!") + + end_time = datetime.datetime.now() + print((end_time - start_time).total_seconds()) + reactor.stop() + + +def insert_docs(_): + deferreds = [] + for i in range(numdocs): + payload = SAMPLE[i] + chash = hashlib.sha256(payload).hexdigest() + doc = {"number": i, "payload": payload, 'chash': chash} + d = createDoc(doc) + d.addCallbacks(partial(printResult, i=i, chash=chash, payload=payload), + lambda e: e.printTraceback()) + deferreds.append(d) + return defer.gatherResults(deferreds, consumeErrors=True) + +d = create_indexes(None) +d.addCallback(insert_docs) +d.addCallback(get_from_index) +d.addCallback(countDocs) + +reactor.run() diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py new file mode 100644 index 00000000..9deba136 --- /dev/null +++ b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- +# measure_index_times.py +# Copyright (C) 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Measure u1db retrieval times for different u1db index situations. +""" +from __future__ import print_function +from functools import partial +import datetime +import hashlib +import os +import sys + +import u1db +from twisted.internet import defer, reactor + +from leap.soledad.client import adbapi +from leap.soledad.client.sqlcipher import SQLCipherOptions + + +folder = os.environ.get("TMPDIR", "tmp") +numdocs = int(os.environ.get("DOCS", "1000")) +silent = os.environ.get("SILENT", False) +tmpdb = os.path.join(folder, "test.soledad") + + +sample_file = os.environ.get("SAMPLE", "hacker_crackdown.txt") +sample_path = os.path.join(os.curdir, sample_file) + +try: + with open(sample_file) as f: + SAMPLE = f.readlines() +except Exception: + print("[!] Problem opening sample file. Did you download " + "the sample, or correctly set 'SAMPLE' env var?") + sys.exit(1) + +if numdocs > len(SAMPLE): + print("[!] Sorry! The requested DOCS number is larger than " + "the num of lines in our sample file") + sys.exit(1) + + +def debug(*args): + if not silent: + print(*args) + +debug("[+] db path:", tmpdb) +debug("[+] num docs", numdocs) + +if os.path.isfile(tmpdb): + debug("[+] Removing existing db file...") + os.remove(tmpdb) + +start_time = datetime.datetime.now() + +opts = SQLCipherOptions(tmpdb, "secret", create=True) +dbpool = adbapi.getConnectionPool(opts) + + +def createDoc(doc, doc_id): + return dbpool.runU1DBQuery("create_doc", doc, doc_id=doc_id) + +db_indexes = { + 'by-chash': ['chash'], + 'by-number': ['number']} + + +def create_indexes(_): + deferreds = [] + for index, definition in db_indexes.items(): + d = dbpool.runU1DBQuery("create_index", index, *definition) + deferreds.append(d) + return defer.gatherResults(deferreds) + + +class TimeWitness(object): + def __init__(self, init_time): + self.init_time = init_time + + def get_time_count(self): + return datetime.datetime.now() - self.init_time + + +def get_from_index(_): + init_time = datetime.datetime.now() + debug("GETTING FROM INDEX...", init_time) + + def printValue(res, time): + print("RESULT->", res) + print("Index Query Took: ", time.get_time_count()) + return res + + d = dbpool.runU1DBQuery( + "get_doc", + # "1150c7f10fabce0a57ce13071349fc5064f15bdb0cc1bf2852f74ef3f103aff5") + # XXX this is line 89 from the hacker crackdown... + # Should accept any other optional hash as an enviroment variable. + "57793320d4997a673fc7062652da0596c36a4e9fbe31310d2281e67d56d82469") + d.addCallback(printValue, TimeWitness(init_time)) + return d + + +def getAllDocs(): + return dbpool.runU1DBQuery("get_all_docs") + + +def errBack(e): + debug("[!] ERROR FOUND!!!") + e.printTraceback() + reactor.stop() + + +def countDocs(_): + debug("counting docs...") + d = getAllDocs() + d.addCallbacks(printResult, errBack) + d.addCallbacks(allDone, errBack) + return d + + +def printResult(r, **kwargs): + if kwargs: + debug(*kwargs.values()) + elif isinstance(r, u1db.Document): + debug(r.doc_id, r.content['number']) + else: + len_results = len(r[1]) + debug("GOT %s results" % len(r[1])) + + if len_results == numdocs: + debug("ALL GOOD") + else: + debug("[!] MISSING DOCS!!!!!") + raise ValueError("We didn't expect this result len") + + +def allDone(_): + debug("ALL DONE!") + + end_time = datetime.datetime.now() + print((end_time - start_time).total_seconds()) + reactor.stop() + + +def insert_docs(_): + deferreds = [] + for i in range(numdocs): + payload = SAMPLE[i] + chash = hashlib.sha256(payload).hexdigest() + doc = {"number": i, "payload": payload, 'chash': chash} + d = createDoc(doc, doc_id=chash) + d.addCallbacks(partial(printResult, i=i, chash=chash, payload=payload), + lambda e: e.printTraceback()) + deferreds.append(d) + return defer.gatherResults(deferreds, consumeErrors=True) + +d = create_indexes(None) +d.addCallback(insert_docs) +d.addCallback(get_from_index) +d.addCallback(countDocs) + +reactor.run() diff --git a/client/src/leap/soledad/client/examples/compare.txt b/client/src/leap/soledad/client/examples/compare.txt new file mode 100644 index 00000000..19a1325a --- /dev/null +++ b/client/src/leap/soledad/client/examples/compare.txt @@ -0,0 +1,8 @@ +TIMES=100 TMPDIR=/media/sdb5/leap python use_adbapi.py 1.34s user 0.16s system 53% cpu 2.832 total +TIMES=100 TMPDIR=/media/sdb5/leap python use_api.py 1.22s user 0.14s system 62% cpu 2.181 total + +TIMES=1000 TMPDIR=/media/sdb5/leap python use_api.py 2.18s user 0.34s system 27% cpu 9.213 total +TIMES=1000 TMPDIR=/media/sdb5/leap python use_adbapi.py 2.40s user 0.34s system 39% cpu 7.004 total + +TIMES=5000 TMPDIR=/media/sdb5/leap python use_api.py 6.63s user 1.27s system 13% cpu 57.882 total +TIMES=5000 TMPDIR=/media/sdb5/leap python use_adbapi.py 6.84s user 1.26s system 36% cpu 22.367 total diff --git a/client/src/leap/soledad/client/examples/manifest.phk b/client/src/leap/soledad/client/examples/manifest.phk new file mode 100644 index 00000000..2c86c07d --- /dev/null +++ b/client/src/leap/soledad/client/examples/manifest.phk @@ -0,0 +1,50 @@ +The Hacker's Manifesto + +The Hacker's Manifesto +by: The Mentor + +Another one got caught today, it's all over the papers. "Teenager +Arrested in Computer Crime Scandal", "Hacker Arrested after Bank +Tampering." "Damn kids. They're all alike." But did you, in your +three-piece psychology and 1950's technobrain, ever take a look behind +the eyes of the hacker? Did you ever wonder what made him tick, what +forces shaped him, what may have molded him? I am a hacker, enter my +world. Mine is a world that begins with school. I'm smarter than most of +the other kids, this crap they teach us bores me. "Damn underachiever. +They're all alike." I'm in junior high or high school. I've listened to +teachers explain for the fifteenth time how to reduce a fraction. I +understand it. "No, Ms. Smith, I didn't show my work. I did it in +my head." "Damn kid. Probably copied it. They're all alike." I made a +discovery today. I found a computer. Wait a second, this is cool. It does +what I want it to. If it makes a mistake, it's because I screwed it up. +Not because it doesn't like me, or feels threatened by me, or thinks I'm +a smart ass, or doesn't like teaching and shouldn't be here. Damn kid. +All he does is play games. They're all alike. And then it happened... a +door opened to a world... rushing through the phone line like heroin +through an addict's veins, an electronic pulse is sent out, a refuge from +the day-to-day incompetencies is sought... a board is found. "This is +it... this is where I belong..." I know everyone here... even if I've +never met them, never talked to them, may never hear from them again... I +know you all... Damn kid. Tying up the phone line again. They're all +alike... You bet your ass we're all alike... we've been spoon-fed baby +food at school when we hungered for steak... the bits of meat that you +did let slip through were pre-chewed and tasteless. We've been dominated +by sadists, or ignored by the apathetic. The few that had something to +teach found us willing pupils, but those few are like drops of water in +the desert. This is our world now... the world of the electron and the +switch, the beauty of the baud. We make use of a service already existing +without paying for what could be dirt-cheap if it wasn't run by +profiteering gluttons, and you call us criminals. We explore... and you +call us criminals. We seek after knowledge... and you call us criminals. +We exist without skin color, without nationality, without religious +bias... and you call us criminals. You build atomic bombs, you wage wars, +you murder, cheat, and lie to us and try to make us believe it's for our +own good, yet we're the criminals. Yes, I am a criminal. My crime is that +of curiosity. My crime is that of judging people by what they say and +think, not what they look like. My crime is that of outsmarting you, +something that you will never forgive me for. I am a hacker, and this is +my manifesto. You may stop this individual, but you can't stop us all... +after all, we're all alike. + +This was the last published file written by The Mentor. Shortly after +releasing it, he was busted by the FBI. The Mentor, sadly missed. diff --git a/client/src/leap/soledad/client/examples/plot-async-db.py b/client/src/leap/soledad/client/examples/plot-async-db.py new file mode 100644 index 00000000..018a1a1d --- /dev/null +++ b/client/src/leap/soledad/client/examples/plot-async-db.py @@ -0,0 +1,45 @@ +import csv +from matplotlib import pyplot as plt + +FILE = "bench.csv" + +# config the plot +plt.xlabel('number of inserts') +plt.ylabel('time (seconds)') +plt.title('SQLCipher parallelization') + +kwargs = { + 'linewidth': 1.0, + 'linestyle': '-', +} + +series = (('sync', 'r'), + ('async', 'g')) + +data = {'mark': [], + 'sync': [], + 'async': []} + +with open(FILE, 'rb') as csvfile: + series_reader = csv.reader(csvfile, delimiter=',') + for m, s, a in series_reader: + data['mark'].append(int(m)) + data['sync'].append(float(s)) + data['async'].append(float(a)) + +xmax = max(data['mark']) +xmin = min(data['mark']) +ymax = max(data['sync'] + data['async']) +ymin = min(data['sync'] + data['async']) + +for run in series: + name = run[0] + color = run[1] + plt.plot(data['mark'], data[name], label=name, color=color, **kwargs) + +plt.axes().annotate("", xy=(xmax, ymax)) +plt.axes().annotate("", xy=(xmin, ymin)) + +plt.grid() +plt.legend() +plt.show() diff --git a/client/src/leap/soledad/client/examples/run_benchmark.py b/client/src/leap/soledad/client/examples/run_benchmark.py new file mode 100644 index 00000000..61621e89 --- /dev/null +++ b/client/src/leap/soledad/client/examples/run_benchmark.py @@ -0,0 +1,29 @@ +""" +Run a mini-benchmark between regular api and dbapi +""" +import commands +import os +import time + +TMPDIR = os.environ.get("TMPDIR", "/tmp") +CSVFILE = 'bench.csv' + +cmd = "SILENT=1 TIMES={times} TMPDIR={tmpdir} python ./use_{version}api.py" + + +def parse_time(r): + return r.split('\n')[-1] + +with open(CSVFILE, 'w') as log: + + for times in range(0, 10000, 500): + cmd1 = cmd.format(times=times, tmpdir=TMPDIR, version="") + sync_time = parse_time(commands.getoutput(cmd1)) + + cmd2 = cmd.format(times=times, tmpdir=TMPDIR, version="adb") + async_time = parse_time(commands.getoutput(cmd2)) + + print times, sync_time, async_time + log.write("%s, %s, %s\n" % (times, sync_time, async_time)) + log.flush() + time.sleep(2) diff --git a/client/src/leap/soledad/client/examples/soledad_sync.py b/client/src/leap/soledad/client/examples/soledad_sync.py new file mode 100644 index 00000000..63077ee3 --- /dev/null +++ b/client/src/leap/soledad/client/examples/soledad_sync.py @@ -0,0 +1,63 @@ +from leap.bitmask.config.providerconfig import ProviderConfig +from leap.bitmask.crypto.srpauth import SRPAuth +from leap.soledad.client import Soledad +from twisted.internet import reactor +import logging +logging.basicConfig(level=logging.DEBUG) + + +# EDIT THIS -------------------------------------------- +user = u"USERNAME" +uuid = u"USERUUID" +_pass = u"USERPASS" +server_url = "https://soledad.server.example.org:2323" +# EDIT THIS -------------------------------------------- + +secrets_path = "/tmp/%s.secrets" % uuid +local_db_path = "/tmp/%s.soledad" % uuid +cert_file = "/tmp/cacert.pem" +provider_config = '/tmp/cdev.json' + + +provider = ProviderConfig() +provider.load(provider_config) + +soledad = None + + +def printStuff(r): + print r + + +def printErr(err): + logging.exception(err.value) + + +def init_soledad(_): + token = srpauth.get_token() + print "token", token + + global soledad + soledad = Soledad(uuid, _pass, secrets_path, local_db_path, + server_url, cert_file, + auth_token=token, defer_encryption=False) + + def getall(_): + d = soledad.get_all_docs() + return d + + d1 = soledad.create_doc({"test": 42}) + d1.addCallback(getall) + d1.addCallbacks(printStuff, printErr) + + d2 = soledad.sync() + d2.addCallbacks(printStuff, printErr) + d2.addBoth(lambda r: reactor.stop()) + + +srpauth = SRPAuth(provider) + +d = srpauth.authenticate(user, _pass) +d.addCallbacks(init_soledad, printErr) + +reactor.run() diff --git a/client/src/leap/soledad/client/examples/use_adbapi.py b/client/src/leap/soledad/client/examples/use_adbapi.py new file mode 100644 index 00000000..d7bd21f2 --- /dev/null +++ b/client/src/leap/soledad/client/examples/use_adbapi.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +# use_adbapi.py +# Copyright (C) 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Example of use of the asynchronous soledad api. +""" +from __future__ import print_function +import datetime +import os + +import u1db +from twisted.internet import defer, reactor + +from leap.soledad.client import adbapi +from leap.soledad.client.sqlcipher import SQLCipherOptions + + +folder = os.environ.get("TMPDIR", "tmp") +times = int(os.environ.get("TIMES", "1000")) +silent = os.environ.get("SILENT", False) + +tmpdb = os.path.join(folder, "test.soledad") + + +def debug(*args): + if not silent: + print(*args) + +debug("[+] db path:", tmpdb) +debug("[+] times", times) + +if os.path.isfile(tmpdb): + debug("[+] Removing existing db file...") + os.remove(tmpdb) + +start_time = datetime.datetime.now() + +opts = SQLCipherOptions(tmpdb, "secret", create=True) +dbpool = adbapi.getConnectionPool(opts) + + +def createDoc(doc): + return dbpool.runU1DBQuery("create_doc", doc) + + +def getAllDocs(): + return dbpool.runU1DBQuery("get_all_docs") + + +def countDocs(_): + debug("counting docs...") + d = getAllDocs() + d.addCallbacks(printResult, lambda e: e.printTraceback()) + d.addBoth(allDone) + + +def printResult(r): + if isinstance(r, u1db.Document): + debug(r.doc_id, r.content['number']) + else: + len_results = len(r[1]) + debug("GOT %s results" % len(r[1])) + + if len_results == times: + debug("ALL GOOD") + else: + raise ValueError("We didn't expect this result len") + + +def allDone(_): + debug("ALL DONE!") + if silent: + end_time = datetime.datetime.now() + print((end_time - start_time).total_seconds()) + reactor.stop() + +deferreds = [] +payload = open('manifest.phk').read() + +for i in range(times): + doc = {"number": i, "payload": payload} + d = createDoc(doc) + d.addCallbacks(printResult, lambda e: e.printTraceback()) + deferreds.append(d) + + +all_done = defer.gatherResults(deferreds, consumeErrors=True) +all_done.addCallback(countDocs) + +reactor.run() diff --git a/client/src/leap/soledad/client/examples/use_api.py b/client/src/leap/soledad/client/examples/use_api.py new file mode 100644 index 00000000..e2501c98 --- /dev/null +++ b/client/src/leap/soledad/client/examples/use_api.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +# use_api.py +# Copyright (C) 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Example of use of the soledad api. +""" +from __future__ import print_function +import datetime +import os + +from leap.soledad.client import sqlcipher +from leap.soledad.client.sqlcipher import SQLCipherOptions + + +folder = os.environ.get("TMPDIR", "tmp") +times = int(os.environ.get("TIMES", "1000")) +silent = os.environ.get("SILENT", False) + +tmpdb = os.path.join(folder, "test.soledad") + + +def debug(*args): + if not silent: + print(*args) + +debug("[+] db path:", tmpdb) +debug("[+] times", times) + +if os.path.isfile(tmpdb): + debug("[+] Removing existing db file...") + os.remove(tmpdb) + +start_time = datetime.datetime.now() + +opts = SQLCipherOptions(tmpdb, "secret", create=True) +db = sqlcipher.SQLCipherDatabase(opts) + + +def allDone(): + debug("ALL DONE!") + +payload = open('manifest.phk').read() + +for i in range(times): + doc = {"number": i, "payload": payload} + d = db.create_doc(doc) + debug(d.doc_id, d.content['number']) + +debug("Count", len(db.get_all_docs()[1])) +if silent: + end_time = datetime.datetime.now() + print((end_time - start_time).total_seconds()) + +allDone() diff --git a/client/src/leap/soledad/client/http_target.py b/client/src/leap/soledad/client/http_target.py new file mode 100644 index 00000000..a6ef2b0d --- /dev/null +++ b/client/src/leap/soledad/client/http_target.py @@ -0,0 +1,711 @@ +# -*- coding: utf-8 -*- +# http_target.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +""" +A U1DB backend for encrypting data before sending to server and decrypting +after receiving. +""" + + +import json +import base64 +import logging +import warnings + +from uuid import uuid4 + +from twisted.internet import defer +from twisted.web.error import Error +from twisted.web.client import _ReadBodyProtocol +from twisted.web.client import PartialDownloadError +from twisted.web._newclient import ResponseDone +from twisted.web._newclient import PotentialDataLoss + +from u1db import errors +from u1db import SyncTarget +from u1db.remote import utils +from u1db.remote import http_errors + +from leap.common.http import HTTPClient + +from leap.soledad.common.document import SoledadDocument +from leap.soledad.common.errors import InvalidAuthTokenError + +from leap.soledad.client.crypto import is_symmetrically_encrypted +from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS +from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS +from leap.soledad.client.events import emit +from leap.soledad.client.encdecpool import SyncDecrypterPool + + +logger = logging.getLogger(__name__) + + +# we want to make sure that HTTP errors will raise appropriate u1db errors, +# that is, fire errbacks with the appropriate failures, in the context of +# twisted. Because of that, we redefine the http body reader used by the HTTP +# client below. + +class ReadBodyProtocol(_ReadBodyProtocol): + + def __init__(self, response, deferred): + """ + Initialize the protocol, additionally storing the response headers. + """ + _ReadBodyProtocol.__init__( + self, response.code, response.phrase, deferred) + self.headers = response.headers + + # ---8<--- snippet from u1db.remote.http_client, modified to use errbacks + def _error(self, respdic): + descr = respdic.get("error") + exc_cls = errors.wire_description_to_exc.get(descr) + if exc_cls is not None: + message = respdic.get("message") + self.deferred.errback(exc_cls(message)) + # ---8<--- end of snippet from u1db.remote.http_client + + def connectionLost(self, reason): + """ + Deliver the accumulated response bytes to the waiting L{Deferred}, if + the response body has been completely received without error. + """ + if reason.check(ResponseDone): + + body = b''.join(self.dataBuffer) + + # ---8<--- snippet from u1db.remote.http_client + if self.status in (200, 201): + self.deferred.callback(body) + elif self.status in http_errors.ERROR_STATUSES: + try: + respdic = json.loads(body) + except ValueError: + self.deferred.errback( + errors.HTTPError(self.status, body, self.headers)) + else: + self._error(respdic) + # special cases + elif self.status == 503: + self.deferred.errback(errors.Unavailable(body, self.headers)) + else: + self.deferred.errback( + errors.HTTPError(self.status, body, self.headers)) + # ---8<--- end of snippet from u1db.remote.http_client + + elif reason.check(PotentialDataLoss): + self.deferred.errback( + PartialDownloadError(self.status, self.message, + b''.join(self.dataBuffer))) + else: + self.deferred.errback(reason) + + +def readBody(response): + """ + Get the body of an L{IResponse} and return it as a byte string. + + This is a helper function for clients that don't want to incrementally + receive the body of an HTTP response. + + @param response: The HTTP response for which the body will be read. + @type response: L{IResponse} provider + + @return: A L{Deferred} which will fire with the body of the response. + Cancelling it will close the connection to the server immediately. + """ + def cancel(deferred): + """ + Cancel a L{readBody} call, close the connection to the HTTP server + immediately, if it is still open. + + @param deferred: The cancelled L{defer.Deferred}. + """ + abort = getAbort() + if abort is not None: + abort() + + d = defer.Deferred(cancel) + protocol = ReadBodyProtocol(response, d) + + def getAbort(): + return getattr(protocol.transport, 'abortConnection', None) + + response.deliverBody(protocol) + + if protocol.transport is not None and getAbort() is None: + warnings.warn( + 'Using readBody with a transport that does not have an ' + 'abortConnection method', + category=DeprecationWarning, + stacklevel=2) + + return d + + +class SoledadHTTPSyncTarget(SyncTarget): + + """ + A SyncTarget that encrypts data before sending and decrypts data after + receiving. + + Normally encryption will have been written to the sync database upon + document modification. The sync database is also used to write temporarily + the parsed documents that the remote send us, before being decrypted and + written to the main database. + """ + + def __init__(self, url, source_replica_uid, creds, crypto, cert_file, + sync_db=None, sync_enc_pool=None): + """ + Initialize the sync target. + + :param url: The server sync url. + :type url: str + :param source_replica_uid: The source replica uid which we use when + deferring decryption. + :type source_replica_uid: str + :param creds: A dictionary containing the uuid and token. + :type creds: creds + :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt + document contents when syncing. + :type crypto: soledad.crypto.SoledadCrypto + :param cert_file: Path to the certificate of the ca used to validate + the SSL certificate used by the remote soledad + server. + :type cert_file: str + :param sync_db: Optional. handler for the db with the symmetric + encryption of the syncing documents. If + None, encryption will be done in-place, + instead of retreiving it from the dedicated + database. + :type sync_db: Sqlite handler + :param sync_enc_pool: The encryption pool to use to defer encryption. + If None is passed the encryption will not be + deferred. + :type sync_enc_pool: leap.soledad.client.encdecpool.SyncEncrypterPool + """ + if url.endswith("/"): + url = url[:-1] + self._url = str(url) + "/sync-from/" + str(source_replica_uid) + self.source_replica_uid = source_replica_uid + self._auth_header = None + self.set_creds(creds) + self._crypto = crypto + self._sync_db = sync_db + self._sync_enc_pool = sync_enc_pool + self._insert_doc_cb = None + # asynchronous encryption/decryption attributes + self._decryption_callback = None + self._sync_decr_pool = None + self._http = HTTPClient(cert_file) + + def close(self): + self._http.close() + + def set_creds(self, creds): + """ + Update credentials. + + :param creds: A dictionary containing the uuid and token. + :type creds: dict + """ + uuid = creds['token']['uuid'] + token = creds['token']['token'] + auth = '%s:%s' % (uuid, token) + b64_token = base64.b64encode(auth) + self._auth_header = {'Authorization': ['Token %s' % b64_token]} + + @property + def _defer_encryption(self): + return self._sync_enc_pool is not None + + # + # SyncTarget API + # + + @defer.inlineCallbacks + def get_sync_info(self, source_replica_uid): + """ + Return information about known state of remote database. + + Return the replica_uid and the current database generation of the + remote database, and its last-seen database generation for the client + replica. + + :param source_replica_uid: The client-size replica uid. + :type source_replica_uid: str + + :return: A deferred which fires with (target_replica_uid, + target_replica_generation, target_trans_id, + source_replica_last_known_generation, + source_replica_last_known_transaction_id) + :rtype: twisted.internet.defer.Deferred + """ + raw = yield self._http_request(self._url, headers=self._auth_header) + res = json.loads(raw) + defer.returnValue(( + res['target_replica_uid'], + res['target_replica_generation'], + res['target_replica_transaction_id'], + res['source_replica_generation'], + res['source_transaction_id'] + )) + + def record_sync_info( + self, source_replica_uid, source_replica_generation, + source_replica_transaction_id): + """ + Record tip information for another replica. + + After sync_exchange has been processed, the caller will have + received new content from this replica. This call allows the + source replica instigating the sync to inform us what their + generation became after applying the documents we returned. + + This is used to allow future sync operations to not need to repeat data + that we just talked about. It also means that if this is called at the + wrong time, there can be database records that will never be + synchronized. + + :param source_replica_uid: The identifier for the source replica. + :type source_replica_uid: str + :param source_replica_generation: The database generation for the + source replica. + :type source_replica_generation: int + :param source_replica_transaction_id: The transaction id associated + with the source replica + generation. + :type source_replica_transaction_id: str + + :return: A deferred which fires with the result of the query. + :rtype: twisted.internet.defer.Deferred + """ + data = json.dumps({ + 'generation': source_replica_generation, + 'transaction_id': source_replica_transaction_id + }) + headers = self._auth_header.copy() + headers.update({'content-type': ['application/json']}) + return self._http_request( + self._url, + method='PUT', + headers=headers, + body=data) + + @defer.inlineCallbacks + def sync_exchange(self, docs_by_generation, source_replica_uid, + last_known_generation, last_known_trans_id, + insert_doc_cb, ensure_callback=None, + defer_decryption=True, sync_id=None): + """ + Find out which documents the remote database does not know about, + encrypt and send them. After that, receive documents from the remote + database. + + :param docs_by_generations: A list of (doc_id, generation, trans_id) + of local documents that were changed since + the last local generation the remote + replica knows about. + :type docs_by_generations: list of tuples + + :param source_replica_uid: The uid of the source replica. + :type source_replica_uid: str + + :param last_known_generation: Target's last known generation. + :type last_known_generation: int + + :param last_known_trans_id: Target's last known transaction id. + :type last_known_trans_id: str + + :param insert_doc_cb: A callback for inserting received documents from + target. If not overriden, this will call u1db + insert_doc_from_target in synchronizer, which + implements the TAKE OTHER semantics. + :type insert_doc_cb: function + + :param ensure_callback: A callback that ensures we know the target + replica uid if the target replica was just + created. + :type ensure_callback: function + + :param defer_decryption: Whether to defer the decryption process using + the intermediate database. If False, + decryption will be done inline. + :type defer_decryption: bool + + :return: A deferred which fires with the new generation and + transaction id of the target replica. + :rtype: twisted.internet.defer.Deferred + """ + + self._ensure_callback = ensure_callback + + if sync_id is None: + sync_id = str(uuid4()) + self.source_replica_uid = source_replica_uid + + # save a reference to the callback so we can use it after decrypting + self._insert_doc_cb = insert_doc_cb + + gen_after_send, trans_id_after_send = yield self._send_docs( + docs_by_generation, + last_known_generation, + last_known_trans_id, + sync_id) + + cur_target_gen, cur_target_trans_id = yield self._receive_docs( + last_known_generation, last_known_trans_id, + ensure_callback, sync_id, + defer_decryption=defer_decryption) + + # update gen and trans id info in case we just sent and did not + # receive docs. + if gen_after_send is not None and gen_after_send > cur_target_gen: + cur_target_gen = gen_after_send + cur_target_trans_id = trans_id_after_send + + defer.returnValue([cur_target_gen, cur_target_trans_id]) + + # + # methods to send docs + # + + def _prepare(self, comma, entries, **dic): + entry = comma + '\r\n' + json.dumps(dic) + entries.append(entry) + return len(entry) + + @defer.inlineCallbacks + def _send_docs(self, docs_by_generation, last_known_generation, + last_known_trans_id, sync_id): + + if not docs_by_generation: + defer.returnValue([None, None]) + + headers = self._auth_header.copy() + headers.update({'content-type': ['application/x-soledad-sync-put']}) + # add remote replica metadata to the request + first_entries = ['['] + self._prepare( + '', first_entries, + last_known_generation=last_known_generation, + last_known_trans_id=last_known_trans_id, + sync_id=sync_id, + ensure=self._ensure_callback is not None) + idx = 0 + total = len(docs_by_generation) + for doc, gen, trans_id in docs_by_generation: + idx += 1 + result = yield self._send_one_doc( + headers, first_entries, doc, + gen, trans_id, total, idx) + if self._defer_encryption: + self._sync_enc_pool.delete_encrypted_doc( + doc.doc_id, doc.rev) + + msg = "%d/%d" % (idx, total) + content = {'sent': idx, 'total': total} + emit(SOLEDAD_SYNC_SEND_STATUS, content) + logger.debug("Sync send status: %s" % msg) + + response_dict = json.loads(result)[0] + gen_after_send = response_dict['new_generation'] + trans_id_after_send = response_dict['new_transaction_id'] + defer.returnValue([gen_after_send, trans_id_after_send]) + + @defer.inlineCallbacks + def _send_one_doc(self, headers, first_entries, doc, gen, trans_id, + number_of_docs, doc_idx): + entries = first_entries[:] + # add the document to the request + content = yield self._encrypt_doc(doc) + self._prepare( + ',', entries, + id=doc.doc_id, rev=doc.rev, content=content, gen=gen, + trans_id=trans_id, number_of_docs=number_of_docs, + doc_idx=doc_idx) + entries.append('\r\n]') + data = ''.join(entries) + result = yield self._http_request( + self._url, + method='POST', + headers=headers, + body=data) + defer.returnValue(result) + + def _encrypt_doc(self, doc): + d = None + if doc.is_tombstone(): + d = defer.succeed(None) + elif not self._defer_encryption: + # fallback case, for tests + d = defer.succeed(self._crypto.encrypt_doc(doc)) + else: + + def _maybe_encrypt_doc_inline(doc_json): + if doc_json is None: + # the document is not marked as tombstone, but we got + # nothing from the sync db. As it is not encrypted + # yet, we force inline encryption. + return self._crypto.encrypt_doc(doc) + return doc_json + + d = self._sync_enc_pool.get_encrypted_doc(doc.doc_id, doc.rev) + d.addCallback(_maybe_encrypt_doc_inline) + return d + + # + # methods to receive doc + # + + @defer.inlineCallbacks + def _receive_docs(self, last_known_generation, last_known_trans_id, + ensure_callback, sync_id, defer_decryption): + + self._queue_for_decrypt = defer_decryption \ + and self._sync_db is not None + + new_generation = last_known_generation + new_transaction_id = last_known_trans_id + + if self._queue_for_decrypt: + logger.debug( + "Soledad sync: will queue received docs for decrypting.") + + if defer_decryption: + self._setup_sync_decr_pool() + + headers = self._auth_header.copy() + headers.update({'content-type': ['application/x-soledad-sync-get']}) + + # --------------------------------------------------------------------- + # maybe receive the first document + # --------------------------------------------------------------------- + + # we fetch the first document before fetching the rest because we need + # to know the total number of documents to be received, and this + # information comes as metadata to each request. + + doc = yield self._receive_one_doc( + headers, last_known_generation, last_known_trans_id, + sync_id, 0) + self._received_docs = 0 + number_of_changes, ngen, ntrans = self._insert_received_doc(doc, 1, 1) + + # update the target gen and trans_id in case a document was received + if ngen: + new_generation = ngen + new_transaction_id = ntrans + + if defer_decryption: + self._sync_decr_pool.start(number_of_changes) + + # --------------------------------------------------------------------- + # maybe receive the rest of the documents + # --------------------------------------------------------------------- + + # launch many asynchronous fetches and inserts of received documents + # in the temporary sync db. Will wait for all results before + # continuing. + + received = 1 + deferreds = [] + while received < number_of_changes: + d = self._receive_one_doc( + headers, last_known_generation, + last_known_trans_id, sync_id, received) + d.addCallback( + self._insert_received_doc, + received + 1, # the index of the current received doc + number_of_changes) + deferreds.append(d) + received += 1 + results = yield defer.gatherResults(deferreds) + + # get generation and transaction id of target after insertions + if deferreds: + _, new_generation, new_transaction_id = results.pop() + + # --------------------------------------------------------------------- + # wait for async decryption to finish + # --------------------------------------------------------------------- + + if defer_decryption: + yield self._sync_decr_pool.deferred + self._sync_decr_pool.stop() + + defer.returnValue([new_generation, new_transaction_id]) + + def _receive_one_doc(self, headers, last_known_generation, + last_known_trans_id, sync_id, received): + entries = ['['] + # add remote replica metadata to the request + self._prepare( + '', entries, + last_known_generation=last_known_generation, + last_known_trans_id=last_known_trans_id, + sync_id=sync_id, + ensure=self._ensure_callback is not None) + # inform server of how many documents have already been received + self._prepare( + ',', entries, received=received) + entries.append('\r\n]') + # send headers + return self._http_request( + self._url, + method='POST', + headers=headers, + body=''.join(entries)) + + def _insert_received_doc(self, response, idx, total): + """ + Insert a received document into the local replica. + + :param response: The body and headers of the response. + :type response: tuple(str, dict) + :param idx: The index count of the current operation. + :type idx: int + :param total: The total number of operations. + :type total: int + """ + new_generation, new_transaction_id, number_of_changes, doc_id, \ + rev, content, gen, trans_id = \ + self._parse_received_doc_response(response) + if doc_id is not None: + # decrypt incoming document and insert into local database + # ------------------------------------------------------------- + # symmetric decryption of document's contents + # ------------------------------------------------------------- + # If arriving content was symmetrically encrypted, we decrypt it. + # We do it inline if defer_decryption flag is False or no sync_db + # was defined, otherwise we defer it writing it to the received + # docs table. + doc = SoledadDocument(doc_id, rev, content) + if is_symmetrically_encrypted(doc): + if self._queue_for_decrypt: + self._sync_decr_pool.insert_encrypted_received_doc( + doc.doc_id, doc.rev, doc.content, gen, trans_id, + idx) + else: + # defer_decryption is False or no-sync-db fallback + doc.set_json(self._crypto.decrypt_doc(doc)) + self._insert_doc_cb(doc, gen, trans_id) + else: + # not symmetrically encrypted doc, insert it directly + # or save it in the decrypted stage. + if self._queue_for_decrypt: + self._sync_decr_pool.insert_received_doc( + doc.doc_id, doc.rev, doc.content, gen, trans_id, + idx) + else: + self._insert_doc_cb(doc, gen, trans_id) + # ------------------------------------------------------------- + # end of symmetric decryption + # ------------------------------------------------------------- + self._received_docs += 1 + msg = "%d/%d" % (self._received_docs, total) + content = {'received': self._received_docs, 'total': total} + emit(SOLEDAD_SYNC_RECEIVE_STATUS, content) + logger.debug("Sync receive status: %s" % msg) + return number_of_changes, new_generation, new_transaction_id + + def _parse_received_doc_response(self, response): + """ + Parse the response from the server containing the received document. + + :param response: The body and headers of the response. + :type response: tuple(str, dict) + + :return: (new_gen, new_trans_id, number_of_changes, doc_id, rev, + content, gen, trans_id) + :rtype: tuple + """ + # decode incoming stream + parts = response.splitlines() + if not parts or parts[0] != '[' or parts[-1] != ']': + raise errors.BrokenSyncStream + data = parts[1:-1] + # decode metadata + try: + line, comma = utils.check_and_strip_comma(data[0]) + metadata = None + except (IndexError): + raise errors.BrokenSyncStream + try: + metadata = json.loads(line) + new_generation = metadata['new_generation'] + new_transaction_id = metadata['new_transaction_id'] + number_of_changes = metadata['number_of_changes'] + except (ValueError, KeyError): + raise errors.BrokenSyncStream + # make sure we have replica_uid from fresh new dbs + if self._ensure_callback and 'replica_uid' in metadata: + self._ensure_callback(metadata['replica_uid']) + # parse incoming document info + doc_id = None + rev = None + content = None + gen = None + trans_id = None + if number_of_changes > 0: + try: + entry = json.loads(data[1]) + doc_id = entry['id'] + rev = entry['rev'] + content = entry['content'] + gen = entry['gen'] + trans_id = entry['trans_id'] + except (IndexError, KeyError): + raise errors.BrokenSyncStream + return new_generation, new_transaction_id, number_of_changes, \ + doc_id, rev, content, gen, trans_id + + def _setup_sync_decr_pool(self): + """ + Set up the SyncDecrypterPool for deferred decryption. + """ + if self._sync_decr_pool is None and self._sync_db is not None: + # initialize syncing queue decryption pool + self._sync_decr_pool = SyncDecrypterPool( + self._crypto, + self._sync_db, + insert_doc_cb=self._insert_doc_cb, + source_replica_uid=self.source_replica_uid) + + def _http_request(self, url, method='GET', body=None, headers={}): + d = self._http.request(url, method, body, headers, readBody) + d.addErrback(_unauth_to_invalid_token_error) + return d + + +def _unauth_to_invalid_token_error(failure): + """ + An errback to translate unauthorized errors to our own invalid token + class. + + :param failure: The original failure. + :type failure: twisted.python.failure.Failure + + :return: Either the original failure or an invalid auth token error. + :rtype: twisted.python.failure.Failure + """ + failure.trap(Error) + if failure.getErrorMessage() == "401 Unauthorized": + raise InvalidAuthTokenError + return failure diff --git a/client/src/leap/soledad/client/interfaces.py b/client/src/leap/soledad/client/interfaces.py new file mode 100644 index 00000000..14b34d24 --- /dev/null +++ b/client/src/leap/soledad/client/interfaces.py @@ -0,0 +1,393 @@ +# -*- coding: utf-8 -*- +# interfaces.py +# Copyright (C) 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Interfaces used by the Soledad Client. +""" +from zope.interface import Interface, Attribute + +# +# Plugins +# + + +class ISoledadPostSyncPlugin(Interface): + """ + I implement the minimal methods and attributes for a plugin that can be + called after a soledad synchronization has ended. + """ + + def process_received_docs(self, doc_id_list): + """ + Do something with the passed list of doc_ids received after the last + sync. + + :param doc_id_list: a list of strings for the received doc_ids + """ + + watched_doc_types = Attribute(""" + a tuple of the watched doc types for this plugin. So far, the + `doc-types` convention is just the preffix of the doc_id, which is + basically its first character, followed by a dash. So, for instance, + `M-` is used for meta-docs in mail, and `F-` is used for flag-docs in + mail. For now there's no central register of all the doc-types + used.""") + + +# +# Soledad storage +# + +class ILocalStorage(Interface): + """ + I implement core methods for the u1db local storage of documents and + indexes. + """ + local_db_path = Attribute( + "The path for the local database replica") + local_db_file_name = Attribute( + "The name of the local SQLCipher U1DB database file") + uuid = Attribute("The user uuid") + default_prefix = Attribute( + "Prefix for default values for path") + + def put_doc(self, doc): + """ + Update a document in the local encrypted database. + + :param doc: the document to update + :type doc: SoledadDocument + + :return: + a deferred that will fire with the new revision identifier for + the document + :rtype: Deferred + """ + + def delete_doc(self, doc): + """ + Delete a document from the local encrypted database. + + :param doc: the document to delete + :type doc: SoledadDocument + + :return: + a deferred that will fire with ... + :rtype: Deferred + """ + + def get_doc(self, doc_id, include_deleted=False): + """ + Retrieve a document from the local encrypted database. + + :param doc_id: the unique document identifier + :type doc_id: str + :param include_deleted: + if True, deleted documents will be returned with empty content; + otherwise asking for a deleted document will return None + :type include_deleted: bool + + :return: + A deferred that will fire with the document object, containing a + SoledadDocument, or None if it could not be found + :rtype: Deferred + """ + + def get_docs(self, doc_ids, check_for_conflicts=True, + include_deleted=False): + """ + Get the content for many documents. + + :param doc_ids: a list of document identifiers + :type doc_ids: list + :param check_for_conflicts: if set False, then the conflict check will + be skipped, and 'None' will be returned instead of True/False + :type check_for_conflicts: bool + + :return: + A deferred that will fire with an iterable giving the Document + object for each document id in matching doc_ids order. + :rtype: Deferred + """ + + def get_all_docs(self, include_deleted=False): + """ + Get the JSON content for all documents in the database. + + :param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted + documents will not be included in the results. + :return: + A deferred that will fire with (generation, [Document]): that is, + the current generation of the database, followed by a list of all + the documents in the database. + :rtype: Deferred + """ + + def create_doc(self, content, doc_id=None): + """ + Create a new document in the local encrypted database. + + :param content: the contents of the new document + :type content: dict + :param doc_id: an optional identifier specifying the document id + :type doc_id: str + + :return: + A deferred tht will fire with the new document (SoledadDocument + instance). + :rtype: Deferred + """ + + def create_doc_from_json(self, json, doc_id=None): + """ + Create a new document. + + You can optionally specify the document identifier, but the document + must not already exist. See 'put_doc' if you want to override an + existing document. + If the database specifies a maximum document size and the document + exceeds it, create will fail and raise a DocumentTooBig exception. + + :param json: The JSON document string + :type json: str + :param doc_id: An optional identifier specifying the document id. + :type doc_id: + :return: + A deferred that will fire with the new document (A SoledadDocument + instance) + :rtype: Deferred + """ + + def create_index(self, index_name, *index_expressions): + """ + Create an named index, which can then be queried for future lookups. + Creating an index which already exists is not an error, and is cheap. + Creating an index which does not match the index_expressions of the + existing index is an error. + Creating an index will block until the expressions have been evaluated + and the index generated. + + :param index_name: A unique name which can be used as a key prefix + :type index_name: str + :param index_expressions: + index expressions defining the index information. + :type index_expressions: dict + + Examples: + + "fieldname", or "fieldname.subfieldname" to index alphabetically + sorted on the contents of a field. + + "number(fieldname, width)", "lower(fieldname)" + """ + + def delete_index(self, index_name): + """ + Remove a named index. + + :param index_name: The name of the index we are removing + :type index_name: str + """ + + def list_indexes(self): + """ + List the definitions of all known indexes. + + :return: A list of [('index-name', ['field', 'field2'])] definitions. + :rtype: Deferred + """ + + def get_from_index(self, index_name, *key_values): + """ + Return documents that match the keys supplied. + + You must supply exactly the same number of values as have been defined + in the index. It is possible to do a prefix match by using '*' to + indicate a wildcard match. You can only supply '*' to trailing entries, + (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) + It is also possible to append a '*' to the last supplied value (eg + 'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: tuple + :return: List of [Document] + :rtype: list + """ + + def get_count_from_index(self, index_name, *key_values): + """ + Return the count of the documents that match the keys and + values supplied. + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: tuple + :return: count. + :rtype: int + """ + + def get_range_from_index(self, index_name, start_value, end_value): + """ + Return documents that fall within the specified range. + + Both ends of the range are inclusive. For both start_value and + end_value, one must supply exactly the same number of values as have + been defined in the index, or pass None. In case of a single column + index, a string is accepted as an alternative for a tuple with a single + value. It is possible to do a prefix match by using '*' to indicate + a wildcard match. You can only supply '*' to trailing entries, (eg + 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also + possible to append a '*' to the last supplied value (eg 'val*', '*', + '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + + :param index_name: The index to query + :type index_name: str + :param start_values: tuples of values that define the lower bound of + the range. eg, if you have an index with 3 fields then you would + have: (val1, val2, val3) + :type start_values: tuple + :param end_values: tuples of values that define the upper bound of the + range. eg, if you have an index with 3 fields then you would have: + (val1, val2, val3) + :type end_values: tuple + :return: A deferred that will fire with a list of [Document] + :rtype: Deferred + """ + + def get_index_keys(self, index_name): + """ + Return all keys under which documents are indexed in this index. + + :param index_name: The index to query + :type index_name: str + :return: + A deferred that will fire with a list of tuples of indexed keys. + :rtype: Deferred + """ + + def get_doc_conflicts(self, doc_id): + """ + Get the list of conflicts for the given document. + + :param doc_id: the document id + :type doc_id: str + + :return: + A deferred that will fire with a list of the document entries that + are conflicted. + :rtype: Deferred + """ + + def resolve_doc(self, doc, conflicted_doc_revs): + """ + Mark a document as no longer conflicted. + + :param doc: a document with the new content to be inserted. + :type doc: SoledadDocument + :param conflicted_doc_revs: + A deferred that will fire with a list of revisions that the new + content supersedes. + :type conflicted_doc_revs: list + """ + + +class ISyncableStorage(Interface): + """ + I implement methods to synchronize with a remote replica. + """ + replica_uid = Attribute("The uid of the local replica") + syncing = Attribute( + "Property, True if the syncer is syncing.") + token = Attribute("The authentication Token.") + + def sync(self, defer_decryption=True): + """ + Synchronize the local encrypted replica with a remote replica. + + This method blocks until a syncing lock is acquired, so there are no + attempts of concurrent syncs from the same client replica. + + :param url: the url of the target replica to sync with + :type url: str + + :param defer_decryption: + Whether to defer the decryption process using the intermediate + database. If False, decryption will be done inline. + :type defer_decryption: bool + + :return: + A deferred that will fire with the local generation before the + synchronisation was performed. + :rtype: str + """ + + def stop_sync(self): + """ + Stop the current syncing process. + """ + + +class ISecretsStorage(Interface): + """ + I implement methods needed for initializing and accessing secrets, that are + synced against the Shared Recovery Database. + """ + secrets_file_name = Attribute( + "The name of the file where the storage secrets will be stored") + + storage_secret = Attribute("") + remote_storage_secret = Attribute("") + shared_db = Attribute("The shared db object") + + # XXX this used internally from secrets, so it might be good to preserve + # as a public boundary with other components. + + # We should also probably document its interface. + secrets = Attribute("A SoledadSecrets object containing access to secrets") + + def init_shared_db(self, server_url, uuid, creds): + """ + Initialize the shared recovery database. + + :param server_url: + :type server_url: + :param uuid: + :type uuid: + :param creds: + :type creds: + """ + + def change_passphrase(self, new_passphrase): + """ + Change the passphrase that encrypts the storage secret. + + :param new_passphrase: The new passphrase. + :type new_passphrase: unicode + + :raise NoStorageSecret: Raised if there's no storage secret available. + """ + + # XXX not in use. Uncomment if we ever decide to allow + # multiple secrets. + # secret_id = Attribute("The id of the storage secret to be used") diff --git a/client/src/leap/soledad/client/pragmas.py b/client/src/leap/soledad/client/pragmas.py new file mode 100644 index 00000000..55397d10 --- /dev/null +++ b/client/src/leap/soledad/client/pragmas.py @@ -0,0 +1,379 @@ +# -*- coding: utf-8 -*- +# pragmas.py +# Copyright (C) 2013, 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Different pragmas used in the initialization of the SQLCipher database. +""" +import logging +import string +import threading +import os + +from leap.soledad.common import soledad_assert + + +logger = logging.getLogger(__name__) + + +_db_init_lock = threading.Lock() + + +def set_init_pragmas(conn, opts=None, extra_queries=None): + """ + Set the initialization pragmas. + + This includes the crypto pragmas, and any other options that must + be passed early to sqlcipher db. + """ + soledad_assert(opts is not None) + extra_queries = [] if extra_queries is None else extra_queries + with _db_init_lock: + # only one execution path should initialize the db + _set_init_pragmas(conn, opts, extra_queries) + + +def _set_init_pragmas(conn, opts, extra_queries): + + sync_off = os.environ.get('LEAP_SQLITE_NOSYNC') + memstore = os.environ.get('LEAP_SQLITE_MEMSTORE') + nowal = os.environ.get('LEAP_SQLITE_NOWAL') + + set_crypto_pragmas(conn, opts) + + if not nowal: + set_write_ahead_logging(conn) + if sync_off: + set_synchronous_off(conn) + else: + set_synchronous_normal(conn) + if memstore: + set_mem_temp_store(conn) + + for query in extra_queries: + conn.cursor().execute(query) + + +def set_crypto_pragmas(db_handle, sqlcipher_opts): + """ + Set cryptographic params (key, cipher, KDF number of iterations and + cipher page size). + + :param db_handle: + :type db_handle: + :param sqlcipher_opts: options for the SQLCipherDatabase + :type sqlcipher_opts: SQLCipherOpts instance + """ + # XXX assert CryptoOptions + opts = sqlcipher_opts + _set_key(db_handle, opts.key, opts.is_raw_key) + _set_cipher(db_handle, opts.cipher) + _set_kdf_iter(db_handle, opts.kdf_iter) + _set_cipher_page_size(db_handle, opts.cipher_page_size) + + +def _set_key(db_handle, key, is_raw_key): + """ + Set the ``key`` for use with the database. + + The process of creating a new, encrypted database is called 'keying' + the database. SQLCipher uses just-in-time key derivation at the point + it is first needed for an operation. This means that the key (and any + options) must be set before the first operation on the database. As + soon as the database is touched (e.g. SELECT, CREATE TABLE, UPDATE, + etc.) and pages need to be read or written, the key is prepared for + use. + + Implementation Notes: + + * PRAGMA key should generally be called as the first operation on a + database. + + :param key: The key for use with the database. + :type key: str + :param is_raw_key: + Whether C{key} is a raw 64-char hex string or a passphrase that should + be hashed to obtain the encyrption key. + :type is_raw_key: bool + """ + if is_raw_key: + _set_key_raw(db_handle, key) + else: + _set_key_passphrase(db_handle, key) + + +def _set_key_passphrase(db_handle, passphrase): + """ + Set a passphrase for encryption key derivation. + + The key itself can be a passphrase, which is converted to a key using + PBKDF2 key derivation. The result is used as the encryption key for + the database. By using this method, there is no way to alter the KDF; + if you want to do so you should use a raw key instead and derive the + key using your own KDF. + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param passphrase: The passphrase used to derive the encryption key. + :type passphrase: str + """ + db_handle.cursor().execute("PRAGMA key = '%s'" % passphrase) + + +def _set_key_raw(db_handle, key): + """ + Set a raw hexadecimal encryption key. + + It is possible to specify an exact byte sequence using a blob literal. + With this method, it is the calling application's responsibility to + ensure that the data provided is a 64 character hex string, which will + be converted directly to 32 bytes (256 bits) of key data. + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param key: A 64 character hex string. + :type key: str + """ + if not all(c in string.hexdigits for c in key): + raise NotAnHexString(key) + db_handle.cursor().execute('PRAGMA key = "x\'%s"' % key) + + +def _set_cipher(db_handle, cipher='aes-256-cbc'): + """ + Set the cipher and mode to use for symmetric encryption. + + SQLCipher uses aes-256-cbc as the default cipher and mode of + operation. It is possible to change this, though not generally + recommended, using PRAGMA cipher. + + SQLCipher makes direct use of libssl, so all cipher options available + to libssl are also available for use with SQLCipher. See `man enc` for + OpenSSL's supported ciphers. + + Implementation Notes: + + * PRAGMA cipher must be called after PRAGMA key and before the first + actual database operation or it will have no effect. + + * If a non-default value is used PRAGMA cipher to create a database, + it must also be called every time that database is opened. + + * SQLCipher does not implement its own encryption. Instead it uses the + widely available and peer-reviewed OpenSSL libcrypto for all + cryptographic functions. + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param cipher: The cipher and mode to use. + :type cipher: str + """ + db_handle.cursor().execute("PRAGMA cipher = '%s'" % cipher) + + +def _set_kdf_iter(db_handle, kdf_iter=4000): + """ + Set the number of iterations for the key derivation function. + + SQLCipher uses PBKDF2 key derivation to strengthen the key and make it + resistent to brute force and dictionary attacks. The default + configuration uses 4000 PBKDF2 iterations (effectively 16,000 SHA1 + operations). PRAGMA kdf_iter can be used to increase or decrease the + number of iterations used. + + Implementation Notes: + + * PRAGMA kdf_iter must be called after PRAGMA key and before the first + actual database operation or it will have no effect. + + * If a non-default value is used PRAGMA kdf_iter to create a database, + it must also be called every time that database is opened. + + * It is not recommended to reduce the number of iterations if a + passphrase is in use. + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param kdf_iter: The number of iterations to use. + :type kdf_iter: int + """ + db_handle.cursor().execute("PRAGMA kdf_iter = '%d'" % kdf_iter) + + +def _set_cipher_page_size(db_handle, cipher_page_size=1024): + """ + Set the page size of the encrypted database. + + SQLCipher 2 introduced the new PRAGMA cipher_page_size that can be + used to adjust the page size for the encrypted database. The default + page size is 1024 bytes, but it can be desirable for some applications + to use a larger page size for increased performance. For instance, + some recent testing shows that increasing the page size can noticeably + improve performance (5-30%) for certain queries that manipulate a + large number of pages (e.g. selects without an index, large inserts in + a transaction, big deletes). + + To adjust the page size, call the pragma immediately after setting the + key for the first time and each subsequent time that you open the + database. + + Implementation Notes: + + * PRAGMA cipher_page_size must be called after PRAGMA key and before + the first actual database operation or it will have no effect. + + * If a non-default value is used PRAGMA cipher_page_size to create a + database, it must also be called every time that database is opened. + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param cipher_page_size: The page size. + :type cipher_page_size: int + """ + db_handle.cursor().execute( + "PRAGMA cipher_page_size = '%d'" % cipher_page_size) + + +# XXX UNUSED ? +def set_rekey(db_handle, new_key, is_raw_key): + """ + Change the key of an existing encrypted database. + + To change the key on an existing encrypted database, it must first be + unlocked with the current encryption key. Once the database is + readable and writeable, PRAGMA rekey can be used to re-encrypt every + page in the database with a new key. + + * PRAGMA rekey must be called after PRAGMA key. It can be called at any + time once the database is readable. + + * PRAGMA rekey can not be used to encrypted a standard SQLite + database! It is only useful for changing the key on an existing + database. + + * Previous versions of SQLCipher provided a PRAGMA rekey_cipher and + code>PRAGMA rekey_kdf_iter. These are deprecated and should not be + used. Instead, use sqlcipher_export(). + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param new_key: The new key. + :type new_key: str + :param is_raw_key: Whether C{password} is a raw 64-char hex string or a + passphrase that should be hashed to obtain the encyrption + key. + :type is_raw_key: bool + """ + if is_raw_key: + _set_rekey_raw(db_handle, new_key) + else: + _set_rekey_passphrase(db_handle, new_key) + + +def _set_rekey_passphrase(db_handle, passphrase): + """ + Change the passphrase for encryption key derivation. + + The key itself can be a passphrase, which is converted to a key using + PBKDF2 key derivation. The result is used as the encryption key for + the database. + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param passphrase: The passphrase used to derive the encryption key. + :type passphrase: str + """ + db_handle.cursor().execute("PRAGMA rekey = '%s'" % passphrase) + + +def _set_rekey_raw(db_handle, key): + """ + Change the raw hexadecimal encryption key. + + It is possible to specify an exact byte sequence using a blob literal. + With this method, it is the calling application's responsibility to + ensure that the data provided is a 64 character hex string, which will + be converted directly to 32 bytes (256 bits) of key data. + + :param db_handle: A handle to the SQLCipher database. + :type db_handle: pysqlcipher.Connection + :param key: A 64 character hex string. + :type key: str + """ + if not all(c in string.hexdigits for c in key): + raise NotAnHexString(key) + db_handle.cursor().execute('PRAGMA rekey = "x\'%s"' % key) + + +def set_synchronous_off(db_handle): + """ + Change the setting of the "synchronous" flag to OFF. + """ + logger.debug("SQLCIPHER: SETTING SYNCHRONOUS OFF") + db_handle.cursor().execute('PRAGMA synchronous=OFF') + + +def set_synchronous_normal(db_handle): + """ + Change the setting of the "synchronous" flag to NORMAL. + """ + logger.debug("SQLCIPHER: SETTING SYNCHRONOUS NORMAL") + db_handle.cursor().execute('PRAGMA synchronous=NORMAL') + + +def set_mem_temp_store(db_handle): + """ + Use a in-memory store for temporary tables. + """ + logger.debug("SQLCIPHER: SETTING TEMP_STORE MEMORY") + db_handle.cursor().execute('PRAGMA temp_store=MEMORY') + + +def set_write_ahead_logging(db_handle): + """ + Enable write-ahead logging, and set the autocheckpoint to 50 pages. + + Setting the autocheckpoint to a small value, we make the reads not + suffer too much performance degradation. + + From the sqlite docs: + + "There is a tradeoff between average read performance and average write + performance. To maximize the read performance, one wants to keep the + WAL as small as possible and hence run checkpoints frequently, perhaps + as often as every COMMIT. To maximize write performance, one wants to + amortize the cost of each checkpoint over as many writes as possible, + meaning that one wants to run checkpoints infrequently and let the WAL + grow as large as possible before each checkpoint. The decision of how + often to run checkpoints may therefore vary from one application to + another depending on the relative read and write performance + requirements of the application. The default strategy is to run a + checkpoint once the WAL reaches 1000 pages" + """ + logger.debug("SQLCIPHER: SETTING WRITE-AHEAD LOGGING") + db_handle.cursor().execute('PRAGMA journal_mode=WAL') + + # The optimum value can still use a little bit of tuning, but we favor + # small sizes of the WAL file to get fast reads, since we assume that + # the writes will be quick enough to not block too much. + + db_handle.cursor().execute('PRAGMA wal_autocheckpoint=50') + + +class NotAnHexString(Exception): + """ + Raised when trying to (raw) key the database with a non-hex string. + """ + pass diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py new file mode 100644 index 00000000..ee3aacdb --- /dev/null +++ b/client/src/leap/soledad/client/secrets.py @@ -0,0 +1,739 @@ +# -*- coding: utf-8 -*- +# secrets.py +# Copyright (C) 2014 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +""" +Soledad secrets handling. +""" + + +import os +import scrypt +import logging +import binascii +import errno +import json + +from hashlib import sha256 + +from leap.soledad.common import soledad_assert +from leap.soledad.common import soledad_assert_type +from leap.soledad.common import document +from leap.soledad.common import errors +from leap.soledad.client import events +from leap.soledad.client.crypto import encrypt_sym, decrypt_sym + + +logger = logging.getLogger(name=__name__) + + +# +# Exceptions +# + + +class SecretsException(Exception): + + """ + Generic exception type raised by this module. + """ + + +class NoStorageSecret(SecretsException): + + """ + Raised when trying to use a storage secret but none is available. + """ + pass + + +class PassphraseTooShort(SecretsException): + + """ + Raised when trying to change the passphrase but the provided passphrase is + too short. + """ + + +class BootstrapSequenceError(SecretsException): + + """ + Raised when an attempt to generate a secret and store it in a recovery + document on server failed. + """ + + +# +# Secrets handler +# + +class SoledadSecrets(object): + + """ + Soledad secrets handler. + + The first C{self.REMOTE_STORAGE_SECRET_LENGTH} bytes of the storage + secret are used for remote storage encryption. We use the next + C{self.LOCAL_STORAGE_SECRET} bytes to derive a key for local storage. + From these bytes, the first C{self.SALT_LENGTH} bytes are used as the + salt and the rest as the password for the scrypt hashing. + """ + + LOCAL_STORAGE_SECRET_LENGTH = 512 + """ + The length, in bytes, of the secret used to derive a passphrase for the + SQLCipher database. + """ + + REMOTE_STORAGE_SECRET_LENGTH = 512 + """ + The length, in bytes, of the secret used to derive an encryption key for + remote storage. + """ + + SALT_LENGTH = 64 + """ + The length, in bytes, of the salt used to derive the key for the storage + secret encryption. + """ + + GEN_SECRET_LENGTH = LOCAL_STORAGE_SECRET_LENGTH \ + + REMOTE_STORAGE_SECRET_LENGTH \ + + SALT_LENGTH # for sync db + """ + The length, in bytes, of the secret to be generated. This includes local + and remote secrets, and the salt for deriving the sync db secret. + """ + + MINIMUM_PASSPHRASE_LENGTH = 6 + """ + The minimum length, in bytes, for a passphrase. The passphrase length is + only checked when the user changes her passphrase, not when she + instantiates Soledad. + """ + + IV_SEPARATOR = ":" + """ + A separator used for storing the encryption initial value prepended to the + ciphertext. + """ + + UUID_KEY = 'uuid' + STORAGE_SECRETS_KEY = 'storage_secrets' + ACTIVE_SECRET_KEY = 'active_secret' + SECRET_KEY = 'secret' + CIPHER_KEY = 'cipher' + LENGTH_KEY = 'length' + KDF_KEY = 'kdf' + KDF_SALT_KEY = 'kdf_salt' + KDF_LENGTH_KEY = 'kdf_length' + KDF_SCRYPT = 'scrypt' + CIPHER_AES256 = 'aes256' + """ + Keys used to access storage secrets in recovery documents. + """ + + def __init__(self, uuid, passphrase, secrets_path, shared_db): + """ + Initialize the secrets manager. + + :param uuid: User's unique id. + :type uuid: str + :param passphrase: The passphrase for locking and unlocking encryption + secrets for local and remote storage. + :type passphrase: unicode + :param secrets_path: Path for storing encrypted key used for + symmetric encryption. + :type secrets_path: str + :param shared_db: The shared database that stores user secrets. + :type shared_db: leap.soledad.client.shared_db.SoledadSharedDatabase + """ + # XXX removed since not in use + # We will pick the first secret available. + # param secret_id: The id of the storage secret to be used. + + self._uuid = uuid + self._passphrase = passphrase + self._secrets_path = secrets_path + self._shared_db = shared_db + self._secrets = {} + + self._secret_id = None + + def bootstrap(self): + """ + Bootstrap secrets. + + Soledad secrets bootstrap is the following sequence of stages: + + * stage 1 - local secret loading: + - if secrets exist locally, load them. + * stage 2 - remote secret loading: + - else, if secrets exist in server, download them. + * stage 3 - secret generation: + - else, generate a new secret and store in server. + + This method decides which bootstrap stages have already been performed + and performs the missing ones in order. + + :raise BootstrapSequenceError: Raised when the secret generation and + storage on server sequence has failed for some reason. + """ + # STAGE 1 - verify if secrets exist locally + if not self._has_secret(): # try to load from local storage. + + # STAGE 2 - there are no secrets in local storage, so try to fetch + # encrypted secrets from server. + logger.info( + 'Trying to fetch cryptographic secrets from shared recovery ' + 'database...') + + # --- start of atomic operation in shared db --- + + # obtain lock on shared db + token = timeout = None + try: + token, timeout = self._shared_db.lock() + except errors.AlreadyLockedError: + raise BootstrapSequenceError('Database is already locked.') + except errors.LockTimedOutError: + raise BootstrapSequenceError('Lock operation timed out.') + + self._get_or_gen_crypto_secrets() + + # release the lock on shared db + try: + self._shared_db.unlock(token) + self._shared_db.close() + except errors.NotLockedError: + # for some reason the lock expired. Despite that, secret + # loading or generation/storage must have been executed + # successfully, so we pass. + pass + except errors.InvalidTokenError: + # here, our lock has not only expired but also some other + # client application has obtained a new lock and is currently + # doing its thing in the shared database. Using the same + # reasoning as above, we assume everything went smooth and + # pass. + pass + except Exception as e: + logger.error("Unhandled exception when unlocking shared " + "database.") + logger.exception(e) + + # --- end of atomic operation in shared db --- + + def _has_secret(self): + """ + Return whether there is a storage secret available for use or not. + + :return: Whether there's a storage secret for symmetric encryption. + :rtype: bool + """ + logger.info("Checking if there's a secret in local storage...") + if (self._secret_id is None or self._secret_id not in self._secrets) \ + and os.path.isfile(self._secrets_path): + try: + self._load_secrets() # try to load from disk + except IOError as e: + logger.warning( + 'IOError while loading secrets from disk: %s' % str(e)) + + if self.storage_secret is not None: + logger.info("Found a secret in local storage.") + return True + + logger.info("Could not find a secret in local storage.") + return False + + def _load_secrets(self): + """ + Load storage secrets from local file. + """ + # read storage secrets from file + content = None + with open(self._secrets_path, 'r') as f: + content = json.loads(f.read()) + _, active_secret = self._import_recovery_document(content) + # choose first secret if no secret_id was given + if self._secret_id is None: + if active_secret is None: + self.set_secret_id(self._secrets.items()[0][0]) + else: + self.set_secret_id(active_secret) + # enlarge secret if needed + enlarged = False + if len(self._secrets[self._secret_id]) < self.GEN_SECRET_LENGTH: + gen_len = self.GEN_SECRET_LENGTH \ + - len(self._secrets[self._secret_id]) + new_piece = os.urandom(gen_len) + self._secrets[self._secret_id] += new_piece + enlarged = True + # store and save in shared db if needed + if enlarged: + self._store_secrets() + self._put_secrets_in_shared_db() + + def _get_or_gen_crypto_secrets(self): + """ + Retrieves or generates the crypto secrets. + + :raises BootstrapSequenceError: Raised when unable to store secrets in + shared database. + """ + if self._shared_db.syncable: + doc = self._get_secrets_from_shared_db() + else: + doc = None + + if doc is not None: + logger.info( + 'Found cryptographic secrets in shared recovery ' + 'database.') + _, active_secret = self._import_recovery_document(doc.content) + self._store_secrets() # save new secrets in local file + if self._secret_id is None: + if active_secret is None: + self.set_secret_id(self._secrets.items()[0][0]) + else: + self.set_secret_id(active_secret) + else: + # STAGE 3 - there are no secrets in server also, so + # generate a secret and store it in remote db. + logger.info( + 'No cryptographic secrets found, creating new ' + ' secrets...') + self.set_secret_id(self._gen_secret()) + + if self._shared_db.syncable: + try: + self._put_secrets_in_shared_db() + except Exception as ex: + # storing generated secret in shared db failed for + # some reason, so we erase the generated secret and + # raise. + try: + os.unlink(self._secrets_path) + except OSError as e: + if e.errno != errno.ENOENT: + # no such file or directory + logger.exception(e) + logger.exception(ex) + raise BootstrapSequenceError( + 'Could not store generated secret in the shared ' + 'database, bailing out...') + + # + # Shared DB related methods + # + + def _shared_db_doc_id(self): + """ + Calculate the doc_id of the document in the shared db that stores key + material. + + :return: the hash + :rtype: str + """ + return sha256( + '%s%s' % + (self._passphrase_as_string(), self._uuid)).hexdigest() + + def _export_recovery_document(self): + """ + Export the storage secrets. + + A recovery document has the following structure: + + { + 'storage_secrets': { + '<storage_secret id>': { + 'cipher': 'aes256', + 'length': <secret length>, + 'secret': '<encrypted storage_secret>', + }, + }, + 'active_secret': '<secret_id>', + } + + Note that multiple storage secrets might be stored in one recovery + document. + + :return: The recovery document. + :rtype: dict + """ + # encrypt secrets + encrypted_secrets = {} + for secret_id in self._secrets: + encrypted_secrets[secret_id] = self._encrypt_storage_secret( + self._secrets[secret_id]) + # create the recovery document + data = { + self.STORAGE_SECRETS_KEY: encrypted_secrets, + self.ACTIVE_SECRET_KEY: self._secret_id, + } + return data + + def _import_recovery_document(self, data): + """ + Import storage secrets for symmetric encryption and uuid (if present) + from a recovery document. + + Note that this method does not store the imported data on disk. For + that, use C{self._store_secrets()}. + + :param data: The recovery document. + :type data: dict + + :return: A tuple containing the number of imported secrets and the + secret_id of the last active secret. + :rtype: (int, str) + """ + soledad_assert(self.STORAGE_SECRETS_KEY in data) + # include secrets in the secret pool. + secret_count = 0 + secrets = data[self.STORAGE_SECRETS_KEY].items() + active_secret = None + # XXX remove check for existence of key (included for backwards + # compatibility) + if self.ACTIVE_SECRET_KEY in data: + active_secret = data[self.ACTIVE_SECRET_KEY] + for secret_id, encrypted_secret in secrets: + if secret_id not in self._secrets: + try: + self._secrets[secret_id] = \ + self._decrypt_storage_secret(encrypted_secret) + secret_count += 1 + except SecretsException as e: + logger.error("Failed to decrypt storage secret: %s" + % str(e)) + return secret_count, active_secret + + def _get_secrets_from_shared_db(self): + """ + Retrieve the document with encrypted key material from the shared + database. + + :return: a document with encrypted key material in its contents + :rtype: document.SoledadDocument + """ + events.emit(events.SOLEDAD_DOWNLOADING_KEYS, self._uuid) + db = self._shared_db + if not db: + logger.warning('No shared db found') + return + doc = db.get_doc(self._shared_db_doc_id()) + events.emit(events.SOLEDAD_DONE_DOWNLOADING_KEYS, self._uuid) + return doc + + def _put_secrets_in_shared_db(self): + """ + Assert local keys are the same as shared db's ones. + + Try to fetch keys from shared recovery database. If they already exist + in the remote db, assert that that data is the same as local data. + Otherwise, upload keys to shared recovery database. + """ + soledad_assert( + self._has_secret(), + 'Tried to send keys to server but they don\'t exist in local ' + 'storage.') + # try to get secrets doc from server, otherwise create it + doc = self._get_secrets_from_shared_db() + if doc is None: + doc = document.SoledadDocument( + doc_id=self._shared_db_doc_id()) + # fill doc with encrypted secrets + doc.content = self._export_recovery_document() + # upload secrets to server + events.emit(events.SOLEDAD_UPLOADING_KEYS, self._uuid) + db = self._shared_db + if not db: + logger.warning('No shared db found') + return + db.put_doc(doc) + events.emit(events.SOLEDAD_DONE_UPLOADING_KEYS, self._uuid) + + # + # Management of secret for symmetric encryption. + # + + def _decrypt_storage_secret(self, encrypted_secret_dict): + """ + Decrypt the storage secret. + + Storage secret is encrypted before being stored. This method decrypts + and returns the decrypted storage secret. + + :param encrypted_secret_dict: The encrypted storage secret. + :type encrypted_secret_dict: dict + + :return: The decrypted storage secret. + :rtype: str + + :raise SecretsException: Raised in case the decryption of the storage + secret fails for some reason. + """ + # calculate the encryption key + if encrypted_secret_dict[self.KDF_KEY] != self.KDF_SCRYPT: + raise SecretsException("Unknown KDF in stored secret.") + key = scrypt.hash( + self._passphrase_as_string(), + # the salt is stored base64 encoded + binascii.a2b_base64( + encrypted_secret_dict[self.KDF_SALT_KEY]), + buflen=32, # we need a key with 256 bits (32 bytes). + ) + if encrypted_secret_dict[self.KDF_LENGTH_KEY] != len(key): + raise SecretsException("Wrong length of decryption key.") + if encrypted_secret_dict[self.CIPHER_KEY] != self.CIPHER_AES256: + raise SecretsException("Unknown cipher in stored secret.") + # recover the initial value and ciphertext + iv, ciphertext = encrypted_secret_dict[self.SECRET_KEY].split( + self.IV_SEPARATOR, 1) + ciphertext = binascii.a2b_base64(ciphertext) + decrypted_secret = decrypt_sym(ciphertext, key, iv) + if encrypted_secret_dict[self.LENGTH_KEY] != len(decrypted_secret): + raise SecretsException("Wrong length of decrypted secret.") + return decrypted_secret + + def _encrypt_storage_secret(self, decrypted_secret): + """ + Encrypt the storage secret. + + An encrypted secret has the following structure: + + { + '<secret_id>': { + 'kdf': 'scrypt', + 'kdf_salt': '<b64 repr of salt>' + 'kdf_length': <key length> + 'cipher': 'aes256', + 'length': <secret length>, + 'secret': '<encrypted b64 repr of storage_secret>', + } + } + + :param decrypted_secret: The decrypted storage secret. + :type decrypted_secret: str + + :return: The encrypted storage secret. + :rtype: dict + """ + # generate random salt + salt = os.urandom(self.SALT_LENGTH) + # get a 256-bit key + key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32) + iv, ciphertext = encrypt_sym(decrypted_secret, key) + encrypted_secret_dict = { + # leap.soledad.crypto submodule uses AES256 for symmetric + # encryption. + self.KDF_KEY: self.KDF_SCRYPT, + self.KDF_SALT_KEY: binascii.b2a_base64(salt), + self.KDF_LENGTH_KEY: len(key), + self.CIPHER_KEY: self.CIPHER_AES256, + self.LENGTH_KEY: len(decrypted_secret), + self.SECRET_KEY: '%s%s%s' % ( + str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)), + } + return encrypted_secret_dict + + @property + def storage_secret(self): + """ + Return the storage secret. + + :return: The decrypted storage secret. + :rtype: str + """ + return self._secrets.get(self._secret_id) + + def set_secret_id(self, secret_id): + """ + Define the id of the storage secret to be used. + + This method will also replace the secret in the crypto object. + + :param secret_id: The id of the storage secret to be used. + :type secret_id: str + """ + self._secret_id = secret_id + + def _gen_secret(self): + """ + Generate a secret for symmetric encryption and store in a local + encrypted file. + + This method emits the following events.signals: + + * SOLEDAD_CREATING_KEYS + * SOLEDAD_DONE_CREATING_KEYS + + :return: The id of the generated secret. + :rtype: str + """ + events.emit(events.SOLEDAD_CREATING_KEYS, self._uuid) + # generate random secret + secret = os.urandom(self.GEN_SECRET_LENGTH) + secret_id = sha256(secret).hexdigest() + self._secrets[secret_id] = secret + self._store_secrets() + events.emit(events.SOLEDAD_DONE_CREATING_KEYS, self._uuid) + return secret_id + + def _store_secrets(self): + """ + Store secrets in C{Soledad.STORAGE_SECRETS_FILE_PATH}. + """ + with open(self._secrets_path, 'w') as f: + f.write( + json.dumps( + self._export_recovery_document())) + + def change_passphrase(self, new_passphrase): + """ + Change the passphrase that encrypts the storage secret. + + :param new_passphrase: The new passphrase. + :type new_passphrase: unicode + + :raise NoStorageSecret: Raised if there's no storage secret available. + """ + # TODO: maybe we want to add more checks to guarantee passphrase is + # reasonable? + soledad_assert_type(new_passphrase, unicode) + if len(new_passphrase) < self.MINIMUM_PASSPHRASE_LENGTH: + raise PassphraseTooShort( + 'Passphrase must be at least %d characters long!' % + self.MINIMUM_PASSPHRASE_LENGTH) + # ensure there's a secret for which the passphrase will be changed. + if not self._has_secret(): + raise NoStorageSecret() + self._passphrase = new_passphrase + self._store_secrets() + self._put_secrets_in_shared_db() + + # + # Setters and getters + # + + @property + def secret_id(self): + return self._secret_id + + def _get_secrets_path(self): + return self._secrets_path + + def _set_secrets_path(self, secrets_path): + self._secrets_path = secrets_path + + secrets_path = property( + _get_secrets_path, + _set_secrets_path, + doc='The path for the file containing the encrypted symmetric secret.') + + @property + def passphrase(self): + """ + Return the passphrase for locking and unlocking encryption secrets for + local and remote storage. + """ + return self._passphrase + + def _passphrase_as_string(self): + return self._passphrase.encode('utf-8') + + # + # remote storage secret + # + + @property + def remote_storage_secret(self): + """ + Return the secret for remote storage. + """ + key_start = 0 + key_end = self.REMOTE_STORAGE_SECRET_LENGTH + return self.storage_secret[key_start:key_end] + + # + # local storage key + # + + def _get_local_storage_secret(self): + """ + Return the local storage secret. + + :return: The local storage secret. + :rtype: str + """ + secret_len = self.REMOTE_STORAGE_SECRET_LENGTH + lsecret_len = self.LOCAL_STORAGE_SECRET_LENGTH + pwd_start = secret_len + self.SALT_LENGTH + pwd_end = secret_len + lsecret_len + return self.storage_secret[pwd_start:pwd_end] + + def _get_local_storage_salt(self): + """ + Return the local storage salt. + + :return: The local storage salt. + :rtype: str + """ + salt_start = self.REMOTE_STORAGE_SECRET_LENGTH + salt_end = salt_start + self.SALT_LENGTH + return self.storage_secret[salt_start:salt_end] + + def get_local_storage_key(self): + """ + Return the local storage key derived from the local storage secret. + + :return: The key for protecting the local database. + :rtype: str + """ + return scrypt.hash( + password=self._get_local_storage_secret(), + salt=self._get_local_storage_salt(), + buflen=32, # we need a key with 256 bits (32 bytes) + ) + + # + # sync db key + # + + def _get_sync_db_salt(self): + """ + Return the salt for sync db. + """ + salt_start = self.LOCAL_STORAGE_SECRET_LENGTH \ + + self.REMOTE_STORAGE_SECRET_LENGTH + salt_end = salt_start + self.SALT_LENGTH + return self.storage_secret[salt_start:salt_end] + + def get_sync_db_key(self): + """ + Return the key for protecting the sync database. + + :return: The key for protecting the sync database. + :rtype: str + """ + return scrypt.hash( + password=self._get_local_storage_secret(), + salt=self._get_sync_db_salt(), + buflen=32, # we need a key with 256 bits (32 bytes) + ) diff --git a/client/src/leap/soledad/client/shared_db.py b/client/src/leap/soledad/client/shared_db.py index 52e51c6f..6abf8ea3 100644 --- a/client/src/leap/soledad/client/shared_db.py +++ b/client/src/leap/soledad/client/shared_db.py @@ -14,19 +14,11 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. - - """ A shared database for storing/retrieving encrypted key material. """ - -import simplejson as json - - from u1db.remote import http_database - -from leap.soledad.common import SHARED_DB_LOCK_DOC_ID_PREFIX from leap.soledad.client.auth import TokenBasedAuth @@ -34,6 +26,9 @@ from leap.soledad.client.auth import TokenBasedAuth # Soledad shared database # ---------------------------------------------------------------------------- +# TODO could have a hierarchy of soledad exceptions. + + class NoTokenForAuth(Exception): """ No token was found for token-based authentication. @@ -46,6 +41,12 @@ class Unauthorized(Exception): """ +class ImproperlyConfiguredError(Exception): + """ + Wrong parameters in the database configuration. + """ + + class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): """ This is a shared recovery database that enables users to store their @@ -54,6 +55,10 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): # TODO: prevent client from messing with the shared DB. # TODO: define and document API. + # If syncable is False, the database will not attempt to sync against + # a remote replica. Default is True. + syncable = True + # # Token auth methods. # @@ -90,9 +95,7 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): # @staticmethod - def open_database(url, uuid, create, creds=None): - # TODO: users should not be able to create the shared database, so we - # have to remove this from here in the future. + def open_database(url, uuid, creds=None, syncable=True): """ Open a Soledad shared database. @@ -100,17 +103,23 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): :type url: str :param uuid: The user's unique id. :type uuid: str - :param create: Should the database be created if it does not already - exist? - :type create: bool - :param token: An authentication token for accessing the shared db. - :type token: str + :param creds: A tuple containing the authentication method and + credentials. + :type creds: tuple + :param syncable: + If syncable is False, the database will not attempt to sync against + a remote replica. + :type syncable: bool :return: The shared database in the given url. :rtype: SoledadSharedDatabase """ + # XXX fix below, doesn't work with tests. + # if syncable and not url.startswith('https://'): + # raise ImproperlyConfiguredError( + # "Remote soledad server must be an https URI") db = SoledadSharedDatabase(url, uuid, creds=creds) - db.open(create) + db.syncable = syncable return db @staticmethod @@ -153,9 +162,12 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): :raise HTTPError: Raised if any HTTP error occurs. """ - res, headers = self._request_json('PUT', ['lock', self._uuid], - body={}) - return res['token'], res['timeout'] + if self.syncable: + res, headers = self._request_json( + 'PUT', ['lock', self._uuid], body={}) + return res['token'], res['timeout'] + else: + return None, None def unlock(self, token): """ @@ -166,5 +178,6 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): :raise HTTPError: """ - res, headers = self._request_json('DELETE', ['lock', self._uuid], - params={'token': token}) + if self.syncable: + _, _ = self._request_json( + 'DELETE', ['lock', self._uuid], params={'token': token}) diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index f9bed049..2151884a 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -42,264 +42,114 @@ SQLCipher 1.1 databases, we do not implement them as all SQLCipher databases handled by Soledad should be created by SQLCipher >= 2.0. """ import logging -import multiprocessing import os -import sqlite3 -import string -import threading -import time import json +import u1db + +from u1db import errors as u1db_errors +from u1db.backends import sqlite_backend from hashlib import sha256 -from contextlib import contextmanager -from collections import defaultdict +from functools import partial -from pysqlcipher import dbapi2 -from u1db.backends import sqlite_backend -from u1db import errors as u1db_errors -from taskthread import TimerTask +from pysqlcipher import dbapi2 as sqlcipher_dbapi2 + +from twisted.internet import reactor +from twisted.internet import defer +from twisted.enterprise import adbapi -from leap.soledad.client.crypto import SyncEncrypterPool, SyncDecrypterPool -from leap.soledad.client.target import SoledadSyncTarget -from leap.soledad.client.target import PendingReceivedDocsSyncError +from leap.soledad.client.http_target import SoledadHTTPSyncTarget from leap.soledad.client.sync import SoledadSynchronizer + +from leap.soledad.client import pragmas from leap.soledad.common.document import SoledadDocument logger = logging.getLogger(__name__) -# Monkey-patch u1db.backends.sqlite_backend with pysqlcipher.dbapi2 -sqlite_backend.dbapi2 = dbapi2 - -# It seems that, as long as we are not using old sqlite versions, serialized -# mode is enabled by default at compile time. So accessing db connections from -# different threads should be safe, as long as no attempt is made to use them -# from multiple threads with no locking. -# See https://sqlite.org/threadsafe.html -# and http://bugs.python.org/issue16509 - -SQLITE_CHECK_SAME_THREAD = False - -# We set isolation_level to None to setup autocommit mode. -# See: http://docs.python.org/2/library/sqlite3.html#controlling-transactions -# This avoids problems with sequential operations using the same soledad object -# trying to open new transactions -# (The error was: -# OperationalError:cannot start a transaction within a transaction.) -SQLITE_ISOLATION_LEVEL = None - - -def open(path, password, create=True, document_factory=None, crypto=None, - raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, - cipher_page_size=1024, defer_encryption=False): - """Open a database at the given location. - - Will raise u1db.errors.DatabaseDoesNotExist if create=False and the - database does not already exist. - - :param path: The filesystem path for the database to open. - :type path: str - :param create: True/False, should the database be created if it doesn't - already exist? - :param create: bool - :param document_factory: A function that will be called with the same - parameters as Document.__init__. - :type document_factory: callable - :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt - document contents when syncing. - :type crypto: soledad.crypto.SoledadCrypto - :param raw_key: Whether C{password} is a raw 64-char hex string or a - passphrase that should be hashed to obtain the encyrption key. - :type raw_key: bool - :param cipher: The cipher and mode to use. - :type cipher: str - :param kdf_iter: The number of iterations to use. - :type kdf_iter: int - :param cipher_page_size: The page size. - :type cipher_page_size: int - :param defer_encryption: Whether to defer encryption/decryption of - documents, or do it inline while syncing. - :type defer_encryption: bool - - :return: An instance of Database. - :rtype SQLCipherDatabase - """ - return SQLCipherDatabase.open_database( - path, password, create=create, document_factory=document_factory, - crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter, - cipher_page_size=cipher_page_size, defer_encryption=defer_encryption) +# Monkey-patch u1db.backends.sqlite_backend with pysqlcipher.dbapi2 +sqlite_backend.dbapi2 = sqlcipher_dbapi2 -# -# Exceptions -# -class DatabaseIsNotEncrypted(Exception): - """ - Exception raised when trying to open non-encrypted databases. +def initialize_sqlcipher_db(opts, on_init=None, check_same_thread=True): """ - pass - + Initialize a SQLCipher database. -class NotAnHexString(Exception): - """ - Raised when trying to (raw) key the database with a non-hex string. + :param opts: + :type opts: SQLCipherOptions + :param on_init: a tuple of queries to be executed on initialization + :type on_init: tuple + :return: pysqlcipher.dbapi2.Connection """ - pass - - -# -# The SQLCipher database -# + # Note: There seemed to be a bug in sqlite 3.5.9 (with python2.6) + # where without re-opening the database on Windows, it + # doesn't see the transaction that was just committed + # Removing from here now, look at the pysqlite implementation if the + # bug shows up in windows. -class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): - """ - A U1DB implementation that uses SQLCipher as its persistence layer. - """ - defer_encryption = False + if not os.path.isfile(opts.path) and not opts.create: + raise u1db_errors.DatabaseDoesNotExist() - _index_storage_value = 'expand referenced encrypted' - k_lock = threading.Lock() - create_doc_lock = threading.Lock() - update_indexes_lock = threading.Lock() - _sync_watcher = None - _sync_enc_pool = None + conn = sqlcipher_dbapi2.connect( + opts.path, check_same_thread=check_same_thread) + pragmas.set_init_pragmas(conn, opts, extra_queries=on_init) + return conn - """ - The name of the local symmetrically encrypted documents to - sync database file. - """ - LOCAL_SYMMETRIC_SYNC_FILE_NAME = 'sync.u1db' - """ - A dictionary that hold locks which avoid multiple sync attempts from the - same database replica. - """ - encrypting_lock = threading.Lock() +def initialize_sqlcipher_adbapi_db(opts, extra_queries=None): + from leap.soledad.client import sqlcipher_adbapi + return sqlcipher_adbapi.getConnectionPool( + opts, extra_queries=extra_queries) - """ - Period or recurrence of the periodic encrypting task, in seconds. - """ - ENCRYPT_TASK_PERIOD = 1 - syncing_lock = defaultdict(threading.Lock) +class SQLCipherOptions(object): """ - A dictionary that hold locks which avoid multiple sync attempts from the - same database replica. + A container with options for the initialization of an SQLCipher database. """ - def __init__(self, sqlcipher_file, password, document_factory=None, - crypto=None, raw_key=False, cipher='aes-256-cbc', - kdf_iter=4000, cipher_page_size=1024): + @classmethod + def copy(cls, source, path=None, key=None, create=None, + is_raw_key=None, cipher=None, kdf_iter=None, + cipher_page_size=None, defer_encryption=None, sync_db_key=None): """ - Connect to an existing SQLCipher database, creating a new sqlcipher - database file if needed. - - :param sqlcipher_file: The path for the SQLCipher file. - :type sqlcipher_file: str - :param password: The password that protects the SQLCipher db. - :type password: str - :param document_factory: A function that will be called with the same - parameters as Document.__init__. - :type document_factory: callable - :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt - document contents when syncing. - :type crypto: soledad.crypto.SoledadCrypto - :param raw_key: Whether password is a raw 64-char hex string or a - passphrase that should be hashed to obtain the - encyrption key. - :type raw_key: bool - :param cipher: The cipher and mode to use. - :type cipher: str - :param kdf_iter: The number of iterations to use. - :type kdf_iter: int - :param cipher_page_size: The page size. - :type cipher_page_size: int + Return a copy of C{source} with parameters different than None + replaced by new values. """ - # ensure the db is encrypted if the file already exists - if os.path.exists(sqlcipher_file): - self.assert_db_is_encrypted( - sqlcipher_file, password, raw_key, cipher, kdf_iter, - cipher_page_size) + local_vars = locals() + args = [] + kwargs = {} - # connect to the sqlcipher database - with self.k_lock: - self._db_handle = dbapi2.connect( - sqlcipher_file, - isolation_level=SQLITE_ISOLATION_LEVEL, - check_same_thread=SQLITE_CHECK_SAME_THREAD) - # set SQLCipher cryptographic parameters - self._set_crypto_pragmas( - self._db_handle, password, raw_key, cipher, kdf_iter, - cipher_page_size) - if os.environ.get('LEAP_SQLITE_NOSYNC'): - self._pragma_synchronous_off(self._db_handle) + for name in ["path", "key"]: + val = local_vars[name] + if val is not None: + args.append(val) else: - self._pragma_synchronous_normal(self._db_handle) - if os.environ.get('LEAP_SQLITE_MEMSTORE'): - self._pragma_mem_temp_store(self._db_handle) - - # Disabled for 0.6.x branch. See #5562 - # self._pragma_write_ahead_logging(self._db_handle) - - self._real_replica_uid = None - self._ensure_schema() - self._crypto = crypto - - self._sync_db = None - self._sync_db_write_lock = None - self._sync_enc_pool = None + args.append(getattr(source, name)) - if self.defer_encryption: - if sqlcipher_file != ":memory:": - self._sync_db_path = "%s-sync" % sqlcipher_file + for name in ["create", "is_raw_key", "cipher", "kdf_iter", + "cipher_page_size", "defer_encryption", "sync_db_key"]: + val = local_vars[name] + if val is not None: + kwargs[name] = val else: - self._sync_db_path = ":memory:" - - # initialize sync db - self._init_sync_db() - - # initialize syncing queue encryption pool - self._sync_enc_pool = SyncEncrypterPool( - self._crypto, self._sync_db, self._sync_db_write_lock) - self._sync_watcher = TimerTask(self._encrypt_syncing_docs, - self.ENCRYPT_TASK_PERIOD) - self._sync_watcher.start() - - def factory(doc_id=None, rev=None, json='{}', has_conflicts=False, - syncable=True): - return SoledadDocument(doc_id=doc_id, rev=rev, json=json, - has_conflicts=has_conflicts, - syncable=syncable) - self.set_document_factory(factory) - # we store syncers in a dictionary indexed by the target URL. We also - # store a hash of the auth info in case auth info expires and we need - # to rebuild the syncer for that target. The final self._syncers - # format is the following: - # - # self._syncers = {'<url>': ('<auth_hash>', syncer), ...} - self._syncers = {} - - @classmethod - def _open_database(cls, sqlcipher_file, password, document_factory=None, - crypto=None, raw_key=False, cipher='aes-256-cbc', - kdf_iter=4000, cipher_page_size=1024, - defer_encryption=False): - """ - Open a SQLCipher database. - - :param sqlcipher_file: The path for the SQLCipher file. - :type sqlcipher_file: str - :param password: The password that protects the SQLCipher db. - :type password: str - :param document_factory: A function that will be called with the same - parameters as Document.__init__. - :type document_factory: callable - :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt - document contents when syncing. - :type crypto: soledad.crypto.SoledadCrypto - :param raw_key: Whether C{password} is a raw 64-char hex string or a - passphrase that should be hashed to obtain the encyrption key. + kwargs[name] = getattr(source, name) + + return SQLCipherOptions(*args, **kwargs) + + def __init__(self, path, key, create=True, is_raw_key=False, + cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024, + defer_encryption=False, sync_db_key=None): + """ + :param path: The filesystem path for the database to open. + :type path: str + :param create: + True/False, should the database be created if it doesn't + already exist? + :param create: bool + :param is_raw_key: + Whether ``password`` is a raw 64-char hex string or a passphrase + that should be hashed to obtain the encyrption key. :type raw_key: bool :param cipher: The cipher and mode to use. :type cipher: str @@ -307,233 +157,93 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): :type kdf_iter: int :param cipher_page_size: The page size. :type cipher_page_size: int - :param defer_encryption: Whether to defer encryption/decryption of - documents, or do it inline while syncing. + :param defer_encryption: + Whether to defer encryption/decryption of documents, or do it + inline while syncing. :type defer_encryption: bool - - :return: The database object. - :rtype: SQLCipherDatabase """ - cls.defer_encryption = defer_encryption - if not os.path.isfile(sqlcipher_file): - raise u1db_errors.DatabaseDoesNotExist() - - tries = 2 - # Note: There seems to be a bug in sqlite 3.5.9 (with python2.6) - # where without re-opening the database on Windows, it - # doesn't see the transaction that was just committed - while True: - - with cls.k_lock: - db_handle = dbapi2.connect( - sqlcipher_file, - check_same_thread=SQLITE_CHECK_SAME_THREAD) - - try: - # set cryptographic params - cls._set_crypto_pragmas( - db_handle, password, raw_key, cipher, kdf_iter, - cipher_page_size) - c = db_handle.cursor() - # XXX if we use it here, it should be public - v, err = cls._which_index_storage(c) - except Exception as exc: - logger.warning("ERROR OPENING DATABASE!") - logger.debug("error was: %r" % exc) - v, err = None, exc - finally: - db_handle.close() - if v is not None: - break - # possibly another process is initializing it, wait for it to be - # done - if tries == 0: - raise err # go for the richest error? - tries -= 1 - time.sleep(cls.WAIT_FOR_PARALLEL_INIT_HALF_INTERVAL) - return SQLCipherDatabase._sqlite_registry[v]( - sqlcipher_file, password, document_factory=document_factory, - crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter, - cipher_page_size=cipher_page_size) + self.path = path + self.key = key + self.is_raw_key = is_raw_key + self.create = create + self.cipher = cipher + self.kdf_iter = kdf_iter + self.cipher_page_size = cipher_page_size + self.defer_encryption = defer_encryption + self.sync_db_key = sync_db_key - @classmethod - def open_database(cls, sqlcipher_file, password, create, backend_cls=None, - document_factory=None, crypto=None, raw_key=False, - cipher='aes-256-cbc', kdf_iter=4000, - cipher_page_size=1024, defer_encryption=False): + def __str__(self): """ - Open a SQLCipher database. - - :param sqlcipher_file: The path for the SQLCipher file. - :type sqlcipher_file: str - - :param password: The password that protects the SQLCipher db. - :type password: str - - :param create: Should the datbase be created if it does not already - exist? - :type create: bool - - :param backend_cls: A class to use as backend. - :type backend_cls: type - - :param document_factory: A function that will be called with the same - parameters as Document.__init__. - :type document_factory: callable - - :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt - document contents when syncing. - :type crypto: soledad.crypto.SoledadCrypto + Return string representation of options, for easy debugging. - :param raw_key: Whether C{password} is a raw 64-char hex string or a - passphrase that should be hashed to obtain the - encyrption key. - :type raw_key: bool + :return: String representation of options. + :rtype: str + """ + attr_names = filter(lambda a: not a.startswith('_'), dir(self)) + attr_str = [] + for a in attr_names: + attr_str.append(a + "=" + str(getattr(self, a))) + name = self.__class__.__name__ + return "%s(%s)" % (name, ', '.join(attr_str)) - :param cipher: The cipher and mode to use. - :type cipher: str - :param kdf_iter: The number of iterations to use. - :type kdf_iter: int +# +# The SQLCipher database +# - :param cipher_page_size: The page size. - :type cipher_page_size: int +class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): + """ + A U1DB implementation that uses SQLCipher as its persistence layer. + """ + defer_encryption = False - :param defer_encryption: Whether to defer encryption/decryption of - documents, or do it inline while syncing. - :type defer_encryption: bool + # The attribute _index_storage_value will be used as the lookup key for the + # implementation of the SQLCipher storage backend. + _index_storage_value = 'expand referenced encrypted' - :return: The database object. - :rtype: SQLCipherDatabase + def __init__(self, opts): """ - cls.defer_encryption = defer_encryption - try: - return cls._open_database( - sqlcipher_file, password, document_factory=document_factory, - crypto=crypto, raw_key=raw_key, cipher=cipher, - kdf_iter=kdf_iter, cipher_page_size=cipher_page_size, - defer_encryption=defer_encryption) - except u1db_errors.DatabaseDoesNotExist: - if not create: - raise - # TODO: remove backend class from here. - if backend_cls is None: - # default is SQLCipherPartialExpandDatabase - backend_cls = SQLCipherDatabase - return backend_cls( - sqlcipher_file, password, document_factory=document_factory, - crypto=crypto, raw_key=raw_key, cipher=cipher, - kdf_iter=kdf_iter, cipher_page_size=cipher_page_size) - - def sync(self, url, creds=None, autocreate=True, defer_decryption=True): - """ - Synchronize documents with remote replica exposed at url. - - There can be at most one instance syncing the same database replica at - the same time, so this method will block until the syncing lock can be - acquired. + Connect to an existing SQLCipher database, creating a new sqlcipher + database file if needed. - :param url: The url of the target replica to sync with. - :type url: str - :param creds: optional dictionary giving credentials. - to authorize the operation with the server. - :type creds: dict - :param autocreate: Ask the target to create the db if non-existent. - :type autocreate: bool - :param defer_decryption: Whether to defer the decryption process using - the intermediate database. If False, - decryption will be done inline. - :type defer_decryption: bool + *** IMPORTANT *** - :return: The local generation before the synchronisation was performed. - :rtype: int - """ - res = None - # the following context manager blocks until the syncing lock can be - # acquired. - with self.syncer(url, creds=creds) as syncer: - - # XXX could mark the critical section here... - try: - if defer_decryption and not self.defer_encryption: - logger.warning("Can't defer decryption without first having " - "created a sync db. Falling back to normal " - "syncing mode.") - defer_decryption = False - res = syncer.sync(autocreate=autocreate, - defer_decryption=defer_decryption) - - except PendingReceivedDocsSyncError: - logger.warning("Local sync db is not clear, skipping sync...") - return - - return res - - def stop_sync(self): - """ - Interrupt all ongoing syncs. - """ - for url in self._syncers: - _, syncer = self._syncers[url] - syncer.stop() + Don't forget to close the database after use by calling the close() + method otherwise some resources might not be freed and you may + experience several kinds of leakages. - @contextmanager - def syncer(self, url, creds=None): - """ - Accesor for synchronizer. + *** IMPORTANT *** - As we reuse the same synchronizer for every sync, there can be only - one instance synchronizing the same database replica at the same time. - Because of that, this method blocks until the syncing lock can be - acquired. + :param opts: options for initialization of the SQLCipher database. + :type opts: SQLCipherOptions """ - with SQLCipherDatabase.syncing_lock[self._get_replica_uid()]: - syncer = self._get_syncer(url, creds=creds) - yield syncer + # ensure the db is encrypted if the file already exists + if os.path.isfile(opts.path): + _assert_db_is_encrypted(opts) - @property - def syncing(self): - lock = SQLCipherDatabase.syncing_lock[self._get_replica_uid()] - acquired_lock = lock.acquire(False) - if acquired_lock is False: - return True - lock.release() - return False + # connect to the sqlcipher database + self._db_handle = initialize_sqlcipher_db(opts) - def _get_syncer(self, url, creds=None): - """ - Get a synchronizer for C{url} using C{creds}. + # TODO --------------------------------------------------- + # Everything else in this initialization has to be factored + # out, so it can be used from SoledadSQLCipherWrapper.__init__ + # too. + # --------------------------------------------------------- - :param url: The url of the target replica to sync with. - :type url: str - :param creds: optional dictionary giving credentials. - to authorize the operation with the server. - :type creds: dict + self._ensure_schema() + self.set_document_factory(soledad_doc_factory) + self._prime_replica_uid() - :return: A synchronizer. - :rtype: Synchronizer + def _prime_replica_uid(self): """ - # we want to store at most one syncer for each url, so we also store a - # hash of the connection credentials and replace the stored syncer for - # a certain url if credentials have changed. - h = sha256(json.dumps([url, creds])).hexdigest() - cur_h, syncer = self._syncers.get(url, (None, None)) - if syncer is None or h != cur_h: - wlock = self._sync_db_write_lock - syncer = SoledadSynchronizer( - self, - SoledadSyncTarget(url, - self._replica_uid, - creds=creds, - crypto=self._crypto, - sync_db=self._sync_db, - sync_db_write_lock=wlock)) - self._syncers[url] = (h, syncer) - # in order to reuse the same synchronizer multiple times we have to - # reset its state (i.e. the number of documents received from target - # and inserted in the local replica). - syncer.num_inserted = 0 - return syncer + In the u1db implementation, _replica_uid is a property + that returns the value in _real_replica_uid, and does + a db query if no value found. + Here we prime the replica uid during initialization so + that we don't have to wait for the query afterwards. + """ + self._real_replica_uid = None + self._get_replica_uid() def _extra_schema_init(self, c): """ @@ -550,63 +260,6 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): 'ALTER TABLE document ' 'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE') - def _init_sync_db(self): - """ - Initialize the Symmetrically-Encrypted document to be synced database, - and the queue to communicate with subprocess workers. - """ - self._sync_db = sqlite3.connect(self._sync_db_path, - check_same_thread=False) - - self._sync_db_write_lock = threading.Lock() - self._create_sync_db_tables() - self.sync_queue = multiprocessing.Queue() - - def _create_sync_db_tables(self): - """ - Create tables for the local sync documents db if needed. - """ - encr = SyncEncrypterPool - decr = SyncDecrypterPool - sql_encr = ("CREATE TABLE IF NOT EXISTS %s (%s)" % ( - encr.TABLE_NAME, encr.FIELD_NAMES)) - sql_decr = ("CREATE TABLE IF NOT EXISTS %s (%s)" % ( - decr.TABLE_NAME, decr.FIELD_NAMES)) - - with self._sync_db_write_lock: - with self._sync_db: - self._sync_db.execute(sql_encr) - self._sync_db.execute(sql_decr) - - # - # Symmetric encryption of syncing docs - # - - def _encrypt_syncing_docs(self): - """ - Process the syncing queue and send the documents there - to be encrypted in the sync db. They will be read by the - SoledadSyncTarget during the sync_exchange. - - Called periodical from the TimerTask self._sync_watcher. - """ - lock = self.encrypting_lock - # optional wait flag used to avoid blocking - if not lock.acquire(False): - return - else: - queue = self.sync_queue - try: - while not queue.empty(): - doc = queue.get_nowait() - self._sync_enc_pool.encrypt_doc(doc) - - except Exception as exc: - logger.error("Error while encrypting docs to sync") - logger.exception(exc) - finally: - lock.release() - # # Document operations # @@ -622,12 +275,81 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): :return: The new document revision. :rtype: str """ - doc_rev = sqlite_backend.SQLitePartialExpandDatabase.put_doc( - self, doc) + doc_rev = sqlite_backend.SQLitePartialExpandDatabase.put_doc(self, doc) if self.defer_encryption: - self.sync_queue.put_nowait(doc) + # TODO move to api? + self._sync_enc_pool.enqueue_doc_for_encryption(doc) return doc_rev + # + # SQLCipher API methods + # + + # Extra query methods: extensions to the base u1db sqlite implmentation. + + def get_count_from_index(self, index_name, *key_values): + """ + Return the count for a given combination of index_name + and key values. + + Extension method made from similar methods in u1db version 13.09 + + :param index_name: The index to query + :type index_name: str + :param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + :type key_values: tuple + :return: count. + :rtype: int + """ + c = self._db_handle.cursor() + definition = self._get_index_definition(index_name) + + if len(key_values) != len(definition): + raise u1db_errors.InvalidValueForIndex() + tables = ["document_fields d%d" % i for i in range(len(definition))] + novalue_where = ["d.doc_id = d%d.doc_id" + " AND d%d.field_name = ?" + % (i, i) for i in range(len(definition))] + exact_where = [novalue_where[i] + (" AND d%d.value = ?" % (i,)) + for i in range(len(definition))] + args = [] + where = [] + for idx, (field, value) in enumerate(zip(definition, key_values)): + args.append(field) + where.append(exact_where[idx]) + args.append(value) + + tables = ["document_fields d%d" % i for i in range(len(definition))] + statement = ( + "SELECT COUNT(*) FROM document d, %s WHERE %s " % ( + ', '.join(tables), + ' AND '.join(where), + )) + try: + c.execute(statement, tuple(args)) + except sqlcipher_dbapi2.OperationalError, e: + raise sqlcipher_dbapi2.OperationalError( + str(e) + '\nstatement: %s\nargs: %s\n' % (statement, args)) + res = c.fetchall() + return res[0][0] + + def close(self): + """ + Close db connections. + """ + # TODO should be handled by adbapi instead + # TODO syncdb should be stopped first + + if logger is not None: # logger might be none if called from __del__ + logger.debug("SQLCipher backend: closing") + + # close the actual database + if getattr(self, '_db_handle', False): + self._db_handle.close() + self._db_handle = None + # indexes def _put_and_update_indexes(self, old_doc, doc): @@ -639,13 +361,11 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): :param doc: The new version of the document. :type doc: u1db.Document """ - with self.update_indexes_lock: - sqlite_backend.SQLitePartialExpandDatabase._put_and_update_indexes( - self, old_doc, doc) - c = self._db_handle.cursor() - c.execute('UPDATE document SET syncable=? ' - 'WHERE doc_id=?', - (doc.syncable, doc.doc_id)) + sqlite_backend.SQLitePartialExpandDatabase._put_and_update_indexes( + self, old_doc, doc) + c = self._db_handle.cursor() + c.execute('UPDATE document SET syncable=? WHERE doc_id=?', + (doc.syncable, doc.doc_id)) def _get_doc(self, doc_id, check_for_conflicts=False): """ @@ -665,438 +385,311 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): self, doc_id, check_for_conflicts) if doc: c = self._db_handle.cursor() - c.execute('SELECT syncable FROM document ' - 'WHERE doc_id=?', + c.execute('SELECT syncable FROM document WHERE doc_id=?', (doc.doc_id,)) result = c.fetchone() doc.syncable = bool(result[0]) return doc - # - # SQLCipher API methods - # - - @classmethod - def assert_db_is_encrypted(cls, sqlcipher_file, key, raw_key, cipher, - kdf_iter, cipher_page_size): - """ - Assert that C{sqlcipher_file} contains an encrypted database. - - When opening an existing database, PRAGMA key will not immediately - throw an error if the key provided is incorrect. To test that the - database can be successfully opened with the provided key, it is - necessary to perform some operation on the database (i.e. read from - it) and confirm it is success. - - The easiest way to do this is select off the sqlite_master table, - which will attempt to read the first page of the database and will - parse the schema. - - :param sqlcipher_file: The path for the SQLCipher file. - :type sqlcipher_file: str - :param key: The key that protects the SQLCipher db. - :type key: str - :param raw_key: Whether C{key} is a raw 64-char hex string or a - passphrase that should be hashed to obtain the encyrption key. - :type raw_key: bool - :param cipher: The cipher and mode to use. - :type cipher: str - :param kdf_iter: The number of iterations to use. - :type kdf_iter: int - :param cipher_page_size: The page size. - :type cipher_page_size: int + def __del__(self): """ - try: - # try to open an encrypted database with the regular u1db - # backend should raise a DatabaseError exception. - sqlite_backend.SQLitePartialExpandDatabase(sqlcipher_file) - raise DatabaseIsNotEncrypted() - except dbapi2.DatabaseError: - # assert that we can access it using SQLCipher with the given - # key - with cls.k_lock: - db_handle = dbapi2.connect( - sqlcipher_file, - isolation_level=SQLITE_ISOLATION_LEVEL, - check_same_thread=SQLITE_CHECK_SAME_THREAD) - cls._set_crypto_pragmas( - db_handle, key, raw_key, cipher, - kdf_iter, cipher_page_size) - db_handle.cursor().execute( - 'SELECT count(*) FROM sqlite_master') + Free resources when deleting or garbage collecting the database. - @classmethod - def _set_crypto_pragmas(cls, db_handle, key, raw_key, cipher, kdf_iter, - cipher_page_size): - """ - Set cryptographic params (key, cipher, KDF number of iterations and - cipher page size). + This is only here to minimze problems if someone ever forgets to call + the close() method after using the database; you should not rely on + garbage collecting to free up the database resources. """ - cls._pragma_key(db_handle, key, raw_key) - cls._pragma_cipher(db_handle, cipher) - cls._pragma_kdf_iter(db_handle, kdf_iter) - cls._pragma_cipher_page_size(db_handle, cipher_page_size) + self.close() - @classmethod - def _pragma_key(cls, db_handle, key, raw_key): - """ - Set the C{key} for use with the database. - The process of creating a new, encrypted database is called 'keying' - the database. SQLCipher uses just-in-time key derivation at the point - it is first needed for an operation. This means that the key (and any - options) must be set before the first operation on the database. As - soon as the database is touched (e.g. SELECT, CREATE TABLE, UPDATE, - etc.) and pages need to be read or written, the key is prepared for - use. +class SQLCipherU1DBSync(SQLCipherDatabase): + """ + Soledad syncer implementation. + """ - Implementation Notes: + """ + The name of the local symmetrically encrypted documents to + sync database file. + """ + LOCAL_SYMMETRIC_SYNC_FILE_NAME = 'sync.u1db' - * PRAGMA key should generally be called as the first operation on a - database. + """ + Period or recurrence of the Looping Call that will do the encryption to the + syncdb (in seconds). + """ + ENCRYPT_LOOP_PERIOD = 1 - :param key: The key for use with the database. - :type key: str - :param raw_key: Whether C{key} is a raw 64-char hex string or a - passphrase that should be hashed to obtain the encyrption key. - :type raw_key: bool - """ - if raw_key: - cls._pragma_key_raw(db_handle, key) - else: - cls._pragma_key_passphrase(db_handle, key) + def __init__(self, opts, soledad_crypto, replica_uid, cert_file, + defer_encryption=False, sync_db=None, sync_enc_pool=None): - @classmethod - def _pragma_key_passphrase(cls, db_handle, passphrase): - """ - Set a passphrase for encryption key derivation. - - The key itself can be a passphrase, which is converted to a key using - PBKDF2 key derivation. The result is used as the encryption key for - the database. By using this method, there is no way to alter the KDF; - if you want to do so you should use a raw key instead and derive the - key using your own KDF. - - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param passphrase: The passphrase used to derive the encryption key. - :type passphrase: str - """ - db_handle.cursor().execute("PRAGMA key = '%s'" % passphrase) + self._opts = opts + self._path = opts.path + self._crypto = soledad_crypto + self.__replica_uid = replica_uid + self._cert_file = cert_file + self._sync_enc_pool = sync_enc_pool - @classmethod - def _pragma_key_raw(cls, db_handle, key): - """ - Set a raw hexadecimal encryption key. + self._sync_db = sync_db + + # we store syncers in a dictionary indexed by the target URL. We also + # store a hash of the auth info in case auth info expires and we need + # to rebuild the syncer for that target. The final self._syncers + # format is the following: + # + # self._syncers = {'<url>': ('<auth_hash>', syncer), ...} - It is possible to specify an exact byte sequence using a blob literal. - With this method, it is the calling application's responsibility to - ensure that the data provided is a 64 character hex string, which will - be converted directly to 32 bytes (256 bits) of key data. + self._syncers = {} - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param key: A 64 character hex string. - :type key: str - """ - if not all(c in string.hexdigits for c in key): - raise NotAnHexString(key) - db_handle.cursor().execute('PRAGMA key = "x\'%s"' % key) + # Storage for the documents received during a sync + self.received_docs = [] - @classmethod - def _pragma_cipher(cls, db_handle, cipher='aes-256-cbc'): - """ - Set the cipher and mode to use for symmetric encryption. + self.running = False - SQLCipher uses aes-256-cbc as the default cipher and mode of - operation. It is possible to change this, though not generally - recommended, using PRAGMA cipher. + self._reactor = reactor + self._reactor.callWhenRunning(self._start) - SQLCipher makes direct use of libssl, so all cipher options available - to libssl are also available for use with SQLCipher. See `man enc` for - OpenSSL's supported ciphers. + self._db_handle = None + self._initialize_main_db() - Implementation Notes: + self.shutdownID = None - * PRAGMA cipher must be called after PRAGMA key and before the first - actual database operation or it will have no effect. + @property + def _replica_uid(self): + return str(self.__replica_uid) - * If a non-default value is used PRAGMA cipher to create a database, - it must also be called every time that database is opened. + def _start(self): + if not self.running: + self.shutdownID = self._reactor.addSystemEventTrigger( + 'during', 'shutdown', self.finalClose) + self.running = True - * SQLCipher does not implement its own encryption. Instead it uses the - widely available and peer-reviewed OpenSSL libcrypto for all - cryptographic functions. + def _initialize_main_db(self): + self._db_handle = initialize_sqlcipher_db( + self._opts, check_same_thread=False) + self._real_replica_uid = None + self._ensure_schema() + self.set_document_factory(soledad_doc_factory) - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param cipher: The cipher and mode to use. - :type cipher: str + @defer.inlineCallbacks + def sync(self, url, creds=None, defer_decryption=True): """ - db_handle.cursor().execute("PRAGMA cipher = '%s'" % cipher) + Synchronize documents with remote replica exposed at url. - @classmethod - def _pragma_kdf_iter(cls, db_handle, kdf_iter=4000): + It is not safe to initiate more than one sync process and let them run + concurrently. It is responsibility of the caller to ensure that there + are no concurrent sync processes running. This is currently controlled + by the main Soledad object because it may also run post-sync hooks, + which should be run while the lock is locked. + + :param url: The url of the target replica to sync with. + :type url: str + :param creds: optional dictionary giving credentials to authorize the + operation with the server. + :type creds: dict + :param defer_decryption: + Whether to defer the decryption process using the intermediate + database. If False, decryption will be done inline. + :type defer_decryption: bool + + :return: + A Deferred, that will fire with the local generation (type `int`) + before the synchronisation was performed. + :rtype: Deferred """ - Set the number of iterations for the key derivation function. + syncer = self._get_syncer(url, creds=creds) + local_gen_before_sync = yield syncer.sync( + defer_decryption=defer_decryption) + self.received_docs = syncer.received_docs + defer.returnValue(local_gen_before_sync) - SQLCipher uses PBKDF2 key derivation to strengthen the key and make it - resistent to brute force and dictionary attacks. The default - configuration uses 4000 PBKDF2 iterations (effectively 16,000 SHA1 - operations). PRAGMA kdf_iter can be used to increase or decrease the - number of iterations used. + def _get_syncer(self, url, creds=None): + """ + Get a synchronizer for ``url`` using ``creds``. - Implementation Notes: + :param url: The url of the target replica to sync with. + :type url: str + :param creds: optional dictionary giving credentials. + to authorize the operation with the server. + :type creds: dict - * PRAGMA kdf_iter must be called after PRAGMA key and before the first - actual database operation or it will have no effect. + :return: A synchronizer. + :rtype: Synchronizer + """ + # we want to store at most one syncer for each url, so we also store a + # hash of the connection credentials and replace the stored syncer for + # a certain url if credentials have changed. + h = sha256(json.dumps([url, creds])).hexdigest() + cur_h, syncer = self._syncers.get(url, (None, None)) + if syncer is None or h != cur_h: + syncer = SoledadSynchronizer( + self, + SoledadHTTPSyncTarget( + url, + # XXX is the replica_uid ready? + self._replica_uid, + creds=creds, + crypto=self._crypto, + cert_file=self._cert_file, + sync_db=self._sync_db, + sync_enc_pool=self._sync_enc_pool)) + self._syncers[url] = (h, syncer) + # in order to reuse the same synchronizer multiple times we have to + # reset its state (i.e. the number of documents received from target + # and inserted in the local replica). + syncer.num_inserted = 0 + return syncer - * If a non-default value is used PRAGMA kdf_iter to create a database, - it must also be called every time that database is opened. + # + # Symmetric encryption of syncing docs + # - * It is not recommended to reduce the number of iterations if a - passphrase is in use. + def get_generation(self): + # FIXME + # XXX this SHOULD BE a callback + return self._get_generation() - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param kdf_iter: The number of iterations to use. - :type kdf_iter: int + def finalClose(self): + """ + This should only be called by the shutdown trigger. """ - db_handle.cursor().execute("PRAGMA kdf_iter = '%d'" % kdf_iter) + self.shutdownID = None + self.running = False - @classmethod - def _pragma_cipher_page_size(cls, db_handle, cipher_page_size=1024): + def close(self): + """ + Close the syncer and syncdb orderly """ - Set the page size of the encrypted database. + # close all open syncers + for url in self._syncers.keys(): + _, syncer = self._syncers[url] + syncer.close() + del self._syncers[url] - SQLCipher 2 introduced the new PRAGMA cipher_page_size that can be - used to adjust the page size for the encrypted database. The default - page size is 1024 bytes, but it can be desirable for some applications - to use a larger page size for increased performance. For instance, - some recent testing shows that increasing the page size can noticeably - improve performance (5-30%) for certain queries that manipulate a - large number of pages (e.g. selects without an index, large inserts in - a transaction, big deletes). - To adjust the page size, call the pragma immediately after setting the - key for the first time and each subsequent time that you open the - database. +class U1DBSQLiteBackend(sqlite_backend.SQLitePartialExpandDatabase): + """ + A very simple wrapper for u1db around sqlcipher backend. - Implementation Notes: + Instead of initializing the database on the fly, it just uses an existing + connection that is passed to it in the initializer. - * PRAGMA cipher_page_size must be called after PRAGMA key and before - the first actual database operation or it will have no effect. + It can be used in tests and debug runs to initialize the adbapi with plain + sqlite connections, decoupled from the sqlcipher layer. + """ - * If a non-default value is used PRAGMA cipher_page_size to create a - database, it must also be called every time that database is opened. + def __init__(self, conn): + self._db_handle = conn + self._real_replica_uid = None + self._ensure_schema() + self._factory = u1db.Document - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param cipher_page_size: The page size. - :type cipher_page_size: int - """ - db_handle.cursor().execute( - "PRAGMA cipher_page_size = '%d'" % cipher_page_size) - @classmethod - def _pragma_rekey(cls, db_handle, new_key, raw_key): - """ - Change the key of an existing encrypted database. - - To change the key on an existing encrypted database, it must first be - unlocked with the current encryption key. Once the database is - readable and writeable, PRAGMA rekey can be used to re-encrypt every - page in the database with a new key. - - * PRAGMA rekey must be called after PRAGMA key. It can be called at any - time once the database is readable. - - * PRAGMA rekey can not be used to encrypted a standard SQLite - database! It is only useful for changing the key on an existing - database. - - * Previous versions of SQLCipher provided a PRAGMA rekey_cipher and - code>PRAGMA rekey_kdf_iter. These are deprecated and should not be - used. Instead, use sqlcipher_export(). - - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param new_key: The new key. - :type new_key: str - :param raw_key: Whether C{password} is a raw 64-char hex string or a - passphrase that should be hashed to obtain the encyrption key. - :type raw_key: bool - """ - # XXX change key param! - if raw_key: - cls._pragma_rekey_raw(db_handle, key) - else: - cls._pragma_rekey_passphrase(db_handle, key) +class SoledadSQLCipherWrapper(SQLCipherDatabase): + """ + A wrapper for u1db that uses the Soledad-extended sqlcipher backend. - @classmethod - def _pragma_rekey_passphrase(cls, db_handle, passphrase): - """ - Change the passphrase for encryption key derivation. + Instead of initializing the database on the fly, it just uses an existing + connection that is passed to it in the initializer. - The key itself can be a passphrase, which is converted to a key using - PBKDF2 key derivation. The result is used as the encryption key for - the database. + It can be used from adbapi to initialize a soledad database after + getting a regular connection to a sqlcipher database. + """ + def __init__(self, conn, opts, sync_enc_pool): + self._db_handle = conn + self._real_replica_uid = None + self._ensure_schema() + self.set_document_factory(soledad_doc_factory) + self._prime_replica_uid() + self.defer_encryption = opts.defer_encryption + self._sync_enc_pool = sync_enc_pool - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param passphrase: The passphrase used to derive the encryption key. - :type passphrase: str - """ - db_handle.cursor().execute("PRAGMA rekey = '%s'" % passphrase) - @classmethod - def _pragma_rekey_raw(cls, db_handle, key): - """ - Change the raw hexadecimal encryption key. +def _assert_db_is_encrypted(opts): + """ + Assert that the sqlcipher file contains an encrypted database. - It is possible to specify an exact byte sequence using a blob literal. - With this method, it is the calling application's responsibility to - ensure that the data provided is a 64 character hex string, which will - be converted directly to 32 bytes (256 bits) of key data. + When opening an existing database, PRAGMA key will not immediately + throw an error if the key provided is incorrect. To test that the + database can be successfully opened with the provided key, it is + necessary to perform some operation on the database (i.e. read from + it) and confirm it is success. - :param db_handle: A handle to the SQLCipher database. - :type db_handle: pysqlcipher.Connection - :param key: A 64 character hex string. - :type key: str - """ - if not all(c in string.hexdigits for c in key): - raise NotAnHexString(key) - # XXX change passphrase param! - db_handle.cursor().execute('PRAGMA rekey = "x\'%s"' % passphrase) + The easiest way to do this is select off the sqlite_master table, + which will attempt to read the first page of the database and will + parse the schema. - @classmethod - def _pragma_synchronous_off(cls, db_handle): - """ - Change the setting of the "synchronous" flag to OFF. - """ - logger.debug("SQLCIPHER: SETTING SYNCHRONOUS OFF") - db_handle.cursor().execute('PRAGMA synchronous=OFF') + :param opts: + """ + # We try to open an encrypted database with the regular u1db + # backend should raise a DatabaseError exception. + # If the regular backend succeeds, then we need to stop because + # the database was not properly initialized. + try: + sqlite_backend.SQLitePartialExpandDatabase(opts.path) + except sqlcipher_dbapi2.DatabaseError: + # assert that we can access it using SQLCipher with the given + # key + dummy_query = ('SELECT count(*) FROM sqlite_master',) + initialize_sqlcipher_db(opts, on_init=dummy_query) + else: + raise DatabaseIsNotEncrypted() - @classmethod - def _pragma_synchronous_normal(cls, db_handle): - """ - Change the setting of the "synchronous" flag to NORMAL. - """ - logger.debug("SQLCIPHER: SETTING SYNCHRONOUS NORMAL") - db_handle.cursor().execute('PRAGMA synchronous=NORMAL') +# +# Exceptions +# - @classmethod - def _pragma_mem_temp_store(cls, db_handle): - """ - Use a in-memory store for temporary tables. - """ - logger.debug("SQLCIPHER: SETTING TEMP_STORE MEMORY") - db_handle.cursor().execute('PRAGMA temp_store=MEMORY') - @classmethod - def _pragma_write_ahead_logging(cls, db_handle): - """ - Enable write-ahead logging, and set the autocheckpoint to 50 pages. - - Setting the autocheckpoint to a small value, we make the reads not - suffer too much performance degradation. - - From the sqlite docs: - - "There is a tradeoff between average read performance and average write - performance. To maximize the read performance, one wants to keep the - WAL as small as possible and hence run checkpoints frequently, perhaps - as often as every COMMIT. To maximize write performance, one wants to - amortize the cost of each checkpoint over as many writes as possible, - meaning that one wants to run checkpoints infrequently and let the WAL - grow as large as possible before each checkpoint. The decision of how - often to run checkpoints may therefore vary from one application to - another depending on the relative read and write performance - requirements of the application. The default strategy is to run a - checkpoint once the WAL reaches 1000 pages" - """ - logger.debug("SQLCIPHER: SETTING WRITE-AHEAD LOGGING") - db_handle.cursor().execute('PRAGMA journal_mode=WAL') - # The optimum value can still use a little bit of tuning, but we favor - # small sizes of the WAL file to get fast reads, since we assume that - # the writes will be quick enough to not block too much. +class DatabaseIsNotEncrypted(Exception): + """ + Exception raised when trying to open non-encrypted databases. + """ + pass - # TODO - # As a further improvement, we might want to set autocheckpoint to 0 - # here and do the checkpoints manually in a separate thread, to avoid - # any blocks in the main thread (we should run a loopingcall from here) - db_handle.cursor().execute('PRAGMA wal_autocheckpoint=50') - # Extra query methods: extensions to the base sqlite implmentation. +def soledad_doc_factory(doc_id=None, rev=None, json='{}', has_conflicts=False, + syncable=True): + """ + Return a default Soledad Document. + Used in the initialization for SQLCipherDatabase + """ + return SoledadDocument(doc_id=doc_id, rev=rev, json=json, + has_conflicts=has_conflicts, syncable=syncable) - def get_count_from_index(self, index_name, *key_values): - """ - Returns the count for a given combination of index_name - and key values. +sqlite_backend.SQLiteDatabase.register_implementation(SQLCipherDatabase) - Extension method made from similar methods in u1db version 13.09 - :param index_name: The index to query - :type index_name: str - :param key_values: values to match. eg, if you have - an index with 3 fields then you would have: - get_from_index(index_name, val1, val2, val3) - :type key_values: tuple - :return: count. - :rtype: int - """ - c = self._db_handle.cursor() - definition = self._get_index_definition(index_name) +# +# twisted.enterprise.adbapi SQLCipher implementation +# - if len(key_values) != len(definition): - raise u1db_errors.InvalidValueForIndex() - tables = ["document_fields d%d" % i for i in range(len(definition))] - novalue_where = ["d.doc_id = d%d.doc_id" - " AND d%d.field_name = ?" - % (i, i) for i in range(len(definition))] - exact_where = [novalue_where[i] - + (" AND d%d.value = ?" % (i,)) - for i in range(len(definition))] - args = [] - where = [] - for idx, (field, value) in enumerate(zip(definition, key_values)): - args.append(field) - where.append(exact_where[idx]) - args.append(value) +SQLCIPHER_CONNECTION_TIMEOUT = 10 - tables = ["document_fields d%d" % i for i in range(len(definition))] - statement = ( - "SELECT COUNT(*) FROM document d, %s WHERE %s " % ( - ', '.join(tables), - ' AND '.join(where), - )) - try: - c.execute(statement, tuple(args)) - except dbapi2.OperationalError, e: - raise dbapi2.OperationalError( - str(e) + '\nstatement: %s\nargs: %s\n' % (statement, args)) - res = c.fetchall() - return res[0][0] - def close(self): - """ - Close db_handle and close syncer. - """ - logger.debug("Sqlcipher backend: closing") - if self._sync_watcher is not None: - self._sync_watcher.stop() - self._sync_watcher.shutdown() - for url in self._syncers: - _, syncer = self._syncers[url] - syncer.close() - if self._sync_enc_pool is not None: - self._sync_enc_pool.close() - if self._db_handle is not None: - self._db_handle.close() +def getConnectionPool(opts, extra_queries=None): + openfun = partial( + pragmas.set_init_pragmas, + opts=opts, + extra_queries=extra_queries) + return SQLCipherConnectionPool( + database=opts.path, + check_same_thread=False, + cp_openfun=openfun, + timeout=SQLCIPHER_CONNECTION_TIMEOUT) - @property - def replica_uid(self): - return self._get_replica_uid() +class SQLCipherConnection(adbapi.Connection): + pass -sqlite_backend.SQLiteDatabase.register_implementation(SQLCipherDatabase) + +class SQLCipherTransaction(adbapi.Transaction): + pass + + +class SQLCipherConnectionPool(adbapi.ConnectionPool): + + connectionFactory = SQLCipherConnection + transactionFactory = SQLCipherTransaction + + def __init__(self, *args, **kwargs): + adbapi.ConnectionPool.__init__( + self, "pysqlcipher.dbapi2", *args, **kwargs) diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index 5d545a77..110baa0a 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -14,26 +14,12 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. - - """ Soledad synchronization utilities. - - -Extend u1db Synchronizer with the ability to: - - * Defer the update of the known replica uid until all the decryption of - the incoming messages has been processed. - - * Be interrupted and recovered. """ - - -import json - import logging -import traceback -from threading import Lock + +from twisted.internet import defer from u1db import errors from u1db.sync import Synchronizer @@ -53,16 +39,10 @@ class SoledadSynchronizer(Synchronizer): Also modified to allow for interrupting the synchronization process. """ + received_docs = [] - syncing_lock = Lock() - - def stop(self): - """ - Stop the current sync in progress. - """ - self.sync_target.stop() - - def sync(self, autocreate=False, defer_decryption=True): + @defer.inlineCallbacks + def sync(self, defer_decryption=True): """ Synchronize documents between source and target. @@ -74,45 +54,24 @@ class SoledadSynchronizer(Synchronizer): This is done to allow the ongoing parallel decryption of the incoming docs to proceed without `InvalidGeneration` conflicts. - :param autocreate: Whether the target replica should be created or not. - :type autocreate: bool :param defer_decryption: Whether to defer the decryption process using the intermediate database. If False, decryption will be done inline. :type defer_decryption: bool - """ - self.syncing_lock.acquire() - try: - return self._sync(autocreate=autocreate, - defer_decryption=defer_decryption) - except Exception: - # re-raising the exceptions to let syqlcipher.sync catch them - # (and re-create the syncer instance if needed) - raise - finally: - self.release_syncing_lock() - - def _sync(self, autocreate=False, defer_decryption=True): - """ - Helper function, called from the main `sync` method. - See `sync` docstring. + + :return: A deferred which will fire after the sync has finished with + the local generation before the synchronization was performed. + :rtype: twisted.internet.defer.Deferred """ sync_target = self.sync_target + self.received_docs = [] # get target identifier, its current generation, # and its last-seen database generation for this source ensure_callback = None - try: - (self.target_replica_uid, target_gen, target_trans_id, - target_my_gen, target_my_trans_id) = \ - sync_target.get_sync_info(self.source._replica_uid) - except errors.DatabaseDoesNotExist: - if not autocreate: - raise - # will try to ask sync_exchange() to create the db - self.target_replica_uid = None - target_gen, target_trans_id = (0, '') - target_my_gen, target_my_trans_id = (0, '') + (self.target_replica_uid, target_gen, target_trans_id, + target_my_gen, target_my_trans_id) = yield \ + sync_target.get_sync_info(self.source._replica_uid) logger.debug( "Soledad target sync info:\n" @@ -120,9 +79,10 @@ class SoledadSynchronizer(Synchronizer): " target generation: %d\n" " target trans id: %s\n" " target my gen: %d\n" - " target my trans_id: %s" + " target my trans_id: %s\n" + " source replica_uid: %s\n" % (self.target_replica_uid, target_gen, target_trans_id, - target_my_gen, target_my_trans_id)) + target_my_gen, target_my_trans_id, self.source._replica_uid)) # make sure we'll have access to target replica uid once it exists if self.target_replica_uid is None: @@ -140,7 +100,7 @@ class SoledadSynchronizer(Synchronizer): # what's changed since that generation and this current gen my_gen, _, changes = self.source.whats_changed(target_my_gen) - logger.debug("Soledad sync: there are %d documents to send." \ + logger.debug("Soledad sync: there are %d documents to send." % len(changes)) # get source last-seen database generation for the target @@ -152,63 +112,57 @@ class SoledadSynchronizer(Synchronizer): self.target_replica_uid) logger.debug( "Soledad source sync info:\n" - " source target gen: %d\n" - " source target trans_id: %s" + " last target gen known to source: %d\n" + " last target trans_id known to source: %s" % (target_last_known_gen, target_last_known_trans_id)) # validate transaction ids if not changes and target_last_known_gen == target_gen: if target_trans_id != target_last_known_trans_id: raise errors.InvalidTransactionId - return my_gen + defer.returnValue(my_gen) # prepare to send all the changed docs changed_doc_ids = [doc_id for doc_id, _, _ in changes] docs_to_send = self.source.get_docs( changed_doc_ids, check_for_conflicts=False, include_deleted=True) + ids_sent = [] docs_by_generation = [] idx = 0 for doc in docs_to_send: _, gen, trans = changes[idx] docs_by_generation.append((doc, gen, trans)) idx += 1 + ids_sent.append(doc.doc_id) # exchange documents and try to insert the returned ones with # the target, return target synced-up-to gen. - # - # The sync_exchange method may be interrupted, in which case it will - # return a tuple of Nones. - try: - new_gen, new_trans_id = sync_target.sync_exchange( - docs_by_generation, self.source._replica_uid, - target_last_known_gen, target_last_known_trans_id, - self._insert_doc_from_target, ensure_callback=ensure_callback, - defer_decryption=defer_decryption) - logger.debug( - "Soledad source sync info after sync exchange:\n" - " source target gen: %d\n" - " source target trans_id: %s" - % (new_gen, new_trans_id)) - info = { - "target_replica_uid": self.target_replica_uid, - "new_gen": new_gen, - "new_trans_id": new_trans_id, - "my_gen": my_gen - } - self._syncing_info = info - if defer_decryption and not sync_target.has_syncdb(): - logger.debug("Sync target has no valid sync db, " - "aborting defer_decryption") - defer_decryption = False - self.complete_sync() - except Exception as e: - logger.error("Soledad sync error: %s" % str(e)) - logger.error(traceback.format_exc()) - sync_target.stop() - finally: - sync_target.close() - - return my_gen + new_gen, new_trans_id = yield sync_target.sync_exchange( + docs_by_generation, self.source._replica_uid, + target_last_known_gen, target_last_known_trans_id, + self._insert_doc_from_target, ensure_callback=ensure_callback, + defer_decryption=defer_decryption) + logger.debug( + "Soledad source sync info after sync exchange:\n" + " source known target gen: %d\n" + " source known target trans_id: %s" + % (new_gen, new_trans_id)) + info = { + "target_replica_uid": self.target_replica_uid, + "new_gen": new_gen, + "new_trans_id": new_trans_id, + "my_gen": my_gen + } + self._syncing_info = info + yield self.complete_sync() + + _, _, changes = self.source.whats_changed(target_my_gen) + changed_doc_ids = [doc_id for doc_id, _, _ in changes] + + just_received = list(set(changed_doc_ids) - set(ids_sent)) + self.received_docs = just_received + + defer.returnValue(my_gen) def complete_sync(self): """ @@ -216,6 +170,9 @@ class SoledadSynchronizer(Synchronizer): (a) record last known generation and transaction uid for the remote replica, and (b) make target aware of our current reached generation. + + :return: A deferred which will fire when the sync has been completed. + :rtype: twisted.internet.defer.Deferred """ logger.debug("Completing deferred last step in SYNC...") @@ -226,37 +183,29 @@ class SoledadSynchronizer(Synchronizer): info["target_replica_uid"], info["new_gen"], info["new_trans_id"]) # if gapless record current reached generation with target - self._record_sync_info_with_the_target(info["my_gen"]) - - @property - def syncing(self): - """ - Return True if a sync is ongoing, False otherwise. - :rtype: bool - """ - # XXX FIXME we need some mechanism for timeout: should cleanup and - # release if something in the syncdb-decrypt goes wrong. we could keep - # track of the release date and cleanup unrealistic sync entries after - # some time. - locked = self.syncing_lock.locked() - return locked - - def release_syncing_lock(self): - """ - Release syncing lock if it's locked. - """ - if self.syncing_lock.locked(): - self.syncing_lock.release() + return self._record_sync_info_with_the_target(info["my_gen"]) def close(self): """ - Close sync target pool of workers. + Close the synchronizer. """ - self.release_syncing_lock() self.sync_target.close() - def __del__(self): + def _record_sync_info_with_the_target(self, start_generation): """ - Cleanup: release lock. + Store local replica metadata in server. + + :param start_generation: The local generation when the sync was + started. + :type start_generation: int + + :return: A deferred which will fire when the operation has been + completed. + :rtype: twisted.internet.defer.Deferred """ - self.release_syncing_lock() + cur_gen, trans_id = self.source._get_generation_info() + if (cur_gen == start_generation + self.num_inserted and + self.num_inserted > 0): + return self.sync_target.record_sync_info( + self.source._replica_uid, cur_gen, trans_id) + return defer.succeed(None) diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py deleted file mode 100644 index 1eb84e64..00000000 --- a/client/src/leap/soledad/client/target.py +++ /dev/null @@ -1,1469 +0,0 @@ -# -*- coding: utf-8 -*- -# target.py -# Copyright (C) 2013, 2014 LEAP -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - - -""" -A U1DB backend for encrypting data before sending to server and decrypting -after receiving. -""" - - -import cStringIO -import gzip -import logging -import re -import urllib -import threading - -from collections import defaultdict -from time import sleep -from uuid import uuid4 - -import simplejson as json -from taskthread import TimerTask -from u1db import errors -from u1db.remote import utils, http_errors -from u1db.remote.http_target import HTTPSyncTarget -from u1db.remote.http_client import _encode_query_parameter, HTTPClientBase -from zope.proxy import ProxyBase -from zope.proxy import sameProxiedObjects, setProxiedObject - -from leap.soledad.common.document import SoledadDocument -from leap.soledad.client.auth import TokenBasedAuth -from leap.soledad.client.crypto import is_symmetrically_encrypted -from leap.soledad.client.crypto import encrypt_doc, decrypt_doc -from leap.soledad.client.crypto import SyncEncrypterPool, SyncDecrypterPool -from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS -from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS -from leap.soledad.client.events import signal - - -logger = logging.getLogger(__name__) - - -def _gunzip(data): - """ - Uncompress data that is gzipped. - - :param data: gzipped data - :type data: basestring - """ - buffer = cStringIO.StringIO() - buffer.write(data) - buffer.seek(0) - try: - data = gzip.GzipFile(mode='r', fileobj=buffer).read() - except Exception: - logger.warning("Error while decrypting gzipped data") - buffer.close() - return data - - -class PendingReceivedDocsSyncError(Exception): - pass - - -class DocumentSyncerThread(threading.Thread): - """ - A thread that knowns how to either send or receive a document during the - sync process. - """ - - def __init__(self, doc_syncer, release_method, failed_method, - idx, total, last_request_lock=None, last_callback_lock=None): - """ - Initialize a new syncer thread. - - :param doc_syncer: A document syncer. - :type doc_syncer: HTTPDocumentSyncer - :param release_method: A method to be called when finished running. - :type release_method: callable(DocumentSyncerThread) - :param failed_method: A method to be called when we failed. - :type failed_method: callable(DocumentSyncerThread) - :param idx: The index count of the current operation. - :type idx: int - :param total: The total number of operations. - :type total: int - :param last_request_lock: A lock to wait for before actually performing - the request. - :type last_request_lock: threading.Lock - :param last_callback_lock: A lock to wait for before actually running - the success callback. - :type last_callback_lock: threading.Lock - """ - threading.Thread.__init__(self) - self._doc_syncer = doc_syncer - self._release_method = release_method - self._failed_method = failed_method - self._idx = idx - self._total = total - self._last_request_lock = last_request_lock - self._last_callback_lock = last_callback_lock - self._response = None - self._exception = None - self._result = None - self._success = False - # a lock so we can signal when we're finished - self._request_lock = threading.Lock() - self._request_lock.acquire() - self._callback_lock = threading.Lock() - self._callback_lock.acquire() - # make thread interruptable - self._stopped = None - self._stop_lock = threading.Lock() - - def run(self): - """ - Run the HTTP request and store results. - - This method will block and wait for an eventual previous operation to - finish before actually performing the request. It also traps any - exception and register any failure with the request. - """ - with self._stop_lock: - if self._stopped is None: - self._stopped = False - else: - return - - # eventually wait for the previous thread to finish - if self._last_request_lock is not None: - self._last_request_lock.acquire() - - # bail out in case we've been interrupted - if self.stopped is True: - return - - try: - self._response = self._doc_syncer.do_request() - self._request_lock.release() - - # run success callback - if self._doc_syncer.success_callback is not None: - - # eventually wait for callback lock release - if self._last_callback_lock is not None: - self._last_callback_lock.acquire() - - # bail out in case we've been interrupted - if self._stopped is True: - return - - self._result = self._doc_syncer.success_callback( - self._idx, self._total, self._response) - self._success = True - doc_syncer = self._doc_syncer - self._release_method(self, doc_syncer) - self._doc_syncer = None - # let next thread executed its callback - self._callback_lock.release() - - # trap any exception and signal failure - except Exception as e: - self._exception = e - self._success = False - # run failure callback - if self._doc_syncer.failure_callback is not None: - - # eventually wait for callback lock release - if self._last_callback_lock is not None: - self._last_callback_lock.acquire() - - # bail out in case we've been interrupted - if self.stopped is True: - return - - self._doc_syncer.failure_callback( - self._idx, self._total, self._exception) - - self._failed_method(self) - # we do not release the callback lock here because we - # failed and so we don't want other threads to succeed. - - @property - def doc_syncer(self): - return self._doc_syncer - - @property - def response(self): - return self._response - - @property - def exception(self): - return self._exception - - @property - def callback_lock(self): - return self._callback_lock - - @property - def request_lock(self): - return self._request_lock - - @property - def success(self): - return self._success - - def stop(self): - with self._stop_lock: - self._stopped = True - - @property - def stopped(self): - with self._stop_lock: - return self._stopped - - @property - def result(self): - return self._result - - -class DocumentSyncerPool(object): - """ - A pool of reusable document syncers. - """ - - POOL_SIZE = 10 - """ - The maximum amount of syncer threads running at the same time. - """ - - def __init__(self, raw_url, raw_creds, query_string, headers, - ensure_callback, stop_method): - """ - Initialize the document syncer pool. - - :param raw_url: The complete raw URL for the HTTP request. - :type raw_url: str - :param raw_creds: The credentials for the HTTP request. - :type raw_creds: dict - :param query_string: The query string for the HTTP request. - :type query_string: str - :param headers: The headers for the HTTP request. - :type headers: dict - :param ensure_callback: A callback to ensure we have the correct - target_replica_uid, if it was just created. - :type ensure_callback: callable - - """ - # save syncer params - self._raw_url = raw_url - self._raw_creds = raw_creds - self._query_string = query_string - self._headers = headers - self._ensure_callback = ensure_callback - self._stop_method = stop_method - # pool attributes - self._failures = False - self._semaphore_pool = threading.BoundedSemaphore( - DocumentSyncerPool.POOL_SIZE) - self._pool_access_lock = threading.Lock() - self._doc_syncers = [] - self._threads = [] - - def new_syncer_thread(self, idx, total, last_request_lock=None, - last_callback_lock=None): - """ - Yield a new document syncer thread. - - :param idx: The index count of the current operation. - :type idx: int - :param total: The total number of operations. - :type total: int - :param last_request_lock: A lock to wait for before actually performing - the request. - :type last_request_lock: threading.Lock - :param last_callback_lock: A lock to wait for before actually running - the success callback. - :type last_callback_lock: threading.Lock - """ - t = None - # wait for available threads - self._semaphore_pool.acquire() - with self._pool_access_lock: - if self._failures is True: - return None - # get a syncer - doc_syncer = self._get_syncer() - # we rely on DocumentSyncerThread.run() to release the lock using - # self.release_syncer so we can launch a new thread. - t = DocumentSyncerThread( - doc_syncer, self.release_syncer, self.cancel_threads, - idx, total, - last_request_lock=last_request_lock, - last_callback_lock=last_callback_lock) - self._threads.append(t) - return t - - def _failed(self): - with self._pool_access_lock: - self._failures = True - - @property - def failures(self): - return self._failures - - def _get_syncer(self): - """ - Get a document syncer from the pool. - - This method will create a new syncer whenever there is no syncer - available in the pool. - - :return: A syncer. - :rtype: HTTPDocumentSyncer - """ - syncer = None - # get an available syncer or create a new one - try: - syncer = self._doc_syncers.pop() - except IndexError: - syncer = HTTPDocumentSyncer( - self._raw_url, self._raw_creds, self._query_string, - self._headers, self._ensure_callback) - return syncer - - def release_syncer(self, syncer_thread, doc_syncer): - """ - Return a syncer to the pool after use and check for any failures. - - :param syncer: The syncer to be returned to the pool. - :type syncer: HTTPDocumentSyncer - """ - with self._pool_access_lock: - self._doc_syncers.append(doc_syncer) - if syncer_thread.success is True: - self._threads.remove(syncer_thread) - self._semaphore_pool.release() - - def cancel_threads(self, calling_thread): - """ - Stop all threads in the pool. - """ - # stop sync - self._stop_method() - stopped = [] - # stop all threads - logger.warning("Soledad sync: cancelling sync threads...") - with self._pool_access_lock: - self._failures = True - while self._threads: - t = self._threads.pop(0) - t.stop() - self._doc_syncers.append(t.doc_syncer) - stopped.append(t) - # release locks and join - while stopped: - t = stopped.pop(0) - t.request_lock.acquire(False) # just in case - t.request_lock.release() - t.callback_lock.acquire(False) # just in case - t.callback_lock.release() - logger.warning("Soledad sync: cancelled sync threads.") - - def cleanup(self): - """ - Close and remove any syncers from the pool. - """ - with self._pool_access_lock: - while self._doc_syncers: - syncer = self._doc_syncers.pop() - syncer.close() - del syncer - - -class HTTPDocumentSyncer(HTTPClientBase, TokenBasedAuth): - - def __init__(self, raw_url, creds, query_string, headers, ensure_callback): - """ - Initialize the client. - - :param raw_url: The raw URL of the target HTTP server. - :type raw_url: str - :param creds: Authentication credentials. - :type creds: dict - :param query_string: The query string for the HTTP request. - :type query_string: str - :param headers: The headers for the HTTP request. - :type headers: dict - :param ensure_callback: A callback to ensure we have the correct - target_replica_uid, if it was just created. - :type ensure_callback: callable - """ - HTTPClientBase.__init__(self, raw_url, creds=creds) - # info needed to perform the request - self._query_string = query_string - self._headers = headers - self._ensure_callback = ensure_callback - # the actual request method - self._request_method = None - self._success_callback = None - self._failure_callback = None - - def _reset(self): - """ - Reset this document syncer so we can reuse it. - """ - self._request_method = None - self._success_callback = None - self._failure_callback = None - self._request_method = None - - def set_request_method(self, method, *args, **kwargs): - """ - Set the actual method to perform the request. - - :param method: Either 'get' or 'put'. - :type method: str - :param args: Arguments for the request method. - :type args: list - :param kwargs: Keyworded arguments for the request method. - :type kwargs: dict - """ - self._reset() - # resolve request method - if method is 'get': - self._request_method = self._get_doc - elif method is 'put': - self._request_method = self._put_doc - else: - raise Exception - # store request method args - self._args = args - self._kwargs = kwargs - - def set_success_callback(self, callback): - self._success_callback = callback - - def set_failure_callback(self, callback): - self._failure_callback = callback - - @property - def success_callback(self): - return self._success_callback - - @property - def failure_callback(self): - return self._failure_callback - - def do_request(self): - """ - Actually perform the request. - - :return: The body and headers of the response. - :rtype: tuple - """ - self._ensure_connection() - args = self._args - kwargs = self._kwargs - return self._request_method(*args, **kwargs) - - def _request(self, method, url_parts, params=None, body=None, - content_type=None): - """ - Perform an HTTP request. - - :param method: The HTTP request method. - :type method: str - :param url_parts: A list representing the request path. - :type url_parts: list - :param params: Parameters for the URL query string. - :type params: dict - :param body: The body of the request. - :type body: str - :param content-type: The content-type of the request. - :type content-type: str - - :return: The body and headers of the response. - :rtype: tuple - - :raise errors.Unavailable: Raised after a number of unsuccesful - request attempts. - :raise Exception: Raised for any other exception ocurring during the - request. - """ - - self._ensure_connection() - unquoted_url = url_query = self._url.path - if url_parts: - if not url_query.endswith('/'): - url_query += '/' - unquoted_url = url_query - url_query += '/'.join(urllib.quote(part, safe='') - for part in url_parts) - # oauth performs its own quoting - unquoted_url += '/'.join(url_parts) - encoded_params = {} - if params: - for key, value in params.items(): - key = unicode(key).encode('utf-8') - encoded_params[key] = _encode_query_parameter(value) - url_query += ('?' + urllib.urlencode(encoded_params)) - if body is not None and not isinstance(body, basestring): - body = json.dumps(body) - content_type = 'application/json' - headers = {} - if content_type: - headers['content-type'] = content_type - - # Patched: We would like to receive gzip pretty please - # ---------------------------------------------------- - headers['accept-encoding'] = "gzip" - # ---------------------------------------------------- - - headers.update( - self._sign_request(method, unquoted_url, encoded_params)) - - for delay in self._delays: - try: - self._conn.request(method, url_query, body, headers) - return self._response() - except errors.Unavailable, e: - sleep(delay) - raise e - - def _response(self): - """ - Return the response of the (possibly gzipped) HTTP request. - - :return: The body and headers of the response. - :rtype: tuple - """ - resp = self._conn.getresponse() - body = resp.read() - headers = dict(resp.getheaders()) - - # Patched: We would like to decode gzip - # ---------------------------------------------------- - encoding = headers.get('content-encoding', '') - if "gzip" in encoding: - body = _gunzip(body) - # ---------------------------------------------------- - - if resp.status in (200, 201): - return body, headers - elif resp.status in http_errors.ERROR_STATUSES: - try: - respdic = json.loads(body) - except ValueError: - pass - else: - self._error(respdic) - # special case - if resp.status == 503: - raise errors.Unavailable(body, headers) - raise errors.HTTPError(resp.status, body, headers) - - def _prepare(self, comma, entries, **dic): - """ - Prepare an entry to be sent through a syncing POST request. - - :param comma: A string to be prepended to the current entry. - :type comma: str - :param entries: A list of entries accumulated to be sent on the - request. - :type entries: list - :param dic: The data to be included in this entry. - :type dic: dict - - :return: The size of the prepared entry. - :rtype: int - """ - entry = comma + '\r\n' + json.dumps(dic) - entries.append(entry) - return len(entry) - - def _init_post_request(self, action, content_length): - """ - Initiate a syncing POST request. - - :param url: The syncing URL. - :type url: str - :param action: The syncing action, either 'get' or 'receive'. - :type action: str - :param headers: The initial headers to be sent on this request. - :type headers: dict - :param content_length: The content-length of the request. - :type content_length: int - """ - self._conn.putrequest('POST', self._query_string) - self._conn.putheader( - 'content-type', 'application/x-soledad-sync-%s' % action) - for header_name, header_value in self._headers: - self._conn.putheader(header_name, header_value) - self._conn.putheader('accept-encoding', 'gzip') - self._conn.putheader('content-length', str(content_length)) - self._conn.endheaders() - - def _get_doc(self, received, sync_id, last_known_generation, - last_known_trans_id): - """ - Get a sync document from server by means of a POST request. - - :param received: The number of documents already received in the - current sync session. - :type received: int - :param sync_id: The id for the current sync session. - :type sync_id: str - :param last_known_generation: Target's last known generation. - :type last_known_generation: int - :param last_known_trans_id: Target's last known transaction id. - :type last_known_trans_id: str - - :return: The body and headers of the response. - :rtype: tuple - """ - entries = ['['] - size = 1 - # add remote replica metadata to the request - size += self._prepare( - '', entries, - last_known_generation=last_known_generation, - last_known_trans_id=last_known_trans_id, - sync_id=sync_id, - ensure=self._ensure_callback is not None) - # inform server of how many documents have already been received - size += self._prepare( - ',', entries, received=received) - entries.append('\r\n]') - size += len(entries[-1]) - # send headers - self._init_post_request('get', size) - # get document - for entry in entries: - self._conn.send(entry) - return self._response() - - def _put_doc(self, sync_id, last_known_generation, last_known_trans_id, - id, rev, content, gen, trans_id, number_of_docs, doc_idx): - """ - Put a sync document on server by means of a POST request. - - :param sync_id: The id for the current sync session. - :type sync_id: str - :param last_known_generation: Target's last known generation. - :type last_known_generation: int - :param last_known_trans_id: Target's last known transaction id. - :type last_known_trans_id: str - :param id: The document id. - :type id: str - :param rev: The document revision. - :type rev: str - :param content: The serialized document content. - :type content: str - :param gen: The generation of the modification of the document. - :type gen: int - :param trans_id: The transaction id of the modification of the - document. - :type trans_id: str - :param number_of_docs: The total amount of documents sent on this sync - session. - :type number_of_docs: int - :param doc_idx: The index of the current document being sent. - :type doc_idx: int - - :return: The body and headers of the response. - :rtype: tuple - """ - # prepare to send the document - entries = ['['] - size = 1 - # add remote replica metadata to the request - size += self._prepare( - '', entries, - last_known_generation=last_known_generation, - last_known_trans_id=last_known_trans_id, - sync_id=sync_id, - ensure=self._ensure_callback is not None) - # add the document to the request - size += self._prepare( - ',', entries, - id=id, rev=rev, content=content, gen=gen, trans_id=trans_id, - number_of_docs=number_of_docs, doc_idx=doc_idx) - entries.append('\r\n]') - size += len(entries[-1]) - # send headers - self._init_post_request('put', size) - # send document - for entry in entries: - self._conn.send(entry) - return self._response() - - def _sign_request(self, method, url_query, params): - """ - Return an authorization header to be included in the HTTP request. - - :param method: The HTTP method. - :type method: str - :param url_query: The URL query string. - :type url_query: str - :param params: A list with encoded query parameters. - :type param: list - - :return: The Authorization header. - :rtype: list of tuple - """ - return TokenBasedAuth._sign_request(self, method, url_query, params) - - def set_token_credentials(self, uuid, token): - """ - Store given credentials so we can sign the request later. - - :param uuid: The user's uuid. - :type uuid: str - :param token: The authentication token. - :type token: str - """ - TokenBasedAuth.set_token_credentials(self, uuid, token) - - -class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): - """ - A SyncTarget that encrypts data before sending and decrypts data after - receiving. - - Normally encryption will have been written to the sync database upon - document modification. The sync database is also used to write temporarily - the parsed documents that the remote send us, before being decrypted and - written to the main database. - """ - - # will later keep a reference to the insert-doc callback - # passed to sync_exchange - _insert_doc_cb = defaultdict(lambda: ProxyBase(None)) - - """ - Period of recurrence of the periodic decrypting task, in seconds. - """ - DECRYPT_TASK_PERIOD = 0.5 - - # - # Modified HTTPSyncTarget methods. - # - - def __init__(self, url, source_replica_uid=None, creds=None, crypto=None, - sync_db=None, sync_db_write_lock=None): - """ - Initialize the SoledadSyncTarget. - - :param source_replica_uid: The source replica uid which we use when - deferring decryption. - :type source_replica_uid: str - :param url: The url of the target replica to sync with. - :type url: str - :param creds: Optional dictionary giving credentials. - to authorize the operation with the server. - :type creds: dict - :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt - document contents when syncing. - :type crypto: soledad.crypto.SoledadCrypto - :param sync_db: Optional. handler for the db with the symmetric - encryption of the syncing documents. If - None, encryption will be done in-place, - instead of retreiving it from the dedicated - database. - :type sync_db: Sqlite handler - :param sync_db_write_lock: a write lock for controlling concurrent - access to the sync_db - :type sync_db_write_lock: threading.Lock - """ - HTTPSyncTarget.__init__(self, url, creds) - self._raw_url = url - self._raw_creds = creds - self._crypto = crypto - self._stopped = True - self._stop_lock = threading.Lock() - self._sync_exchange_lock = threading.Lock() - self.source_replica_uid = source_replica_uid - self._defer_decryption = False - - # deferred decryption attributes - self._sync_db = None - self._sync_db_write_lock = None - self._decryption_callback = None - self._sync_decr_pool = None - self._sync_watcher = None - if sync_db and sync_db_write_lock is not None: - self._sync_db = sync_db - self._sync_db_write_lock = sync_db_write_lock - - def _setup_sync_decr_pool(self): - """ - Set up the SyncDecrypterPool for deferred decryption. - """ - if self._sync_decr_pool is None: - # initialize syncing queue decryption pool - self._sync_decr_pool = SyncDecrypterPool( - self._crypto, self._sync_db, - self._sync_db_write_lock, - insert_doc_cb=self._insert_doc_cb) - self._sync_decr_pool.set_source_replica_uid( - self.source_replica_uid) - - def _teardown_sync_decr_pool(self): - """ - Tear down the SyncDecrypterPool. - """ - if self._sync_decr_pool is not None: - self._sync_decr_pool.close() - self._sync_decr_pool = None - - def _setup_sync_watcher(self): - """ - Set up the sync watcher for deferred decryption. - """ - if self._sync_watcher is None: - self._sync_watcher = TimerTask( - self._decrypt_syncing_received_docs, - delay=self.DECRYPT_TASK_PERIOD) - - def _teardown_sync_watcher(self): - """ - Tear down the sync watcher. - """ - if self._sync_watcher is not None: - self._sync_watcher.stop() - self._sync_watcher.shutdown() - self._sync_watcher = None - - def _get_replica_uid(self, url): - """ - Return replica uid from the url, or None. - - :param url: the replica url - :type url: str - """ - replica_uid_match = re.findall("user-([0-9a-fA-F]+)", url) - return replica_uid_match[0] if len(replica_uid_match) > 0 else None - - @staticmethod - def connect(url, source_replica_uid=None, crypto=None): - return SoledadSyncTarget( - url, source_replica_uid=source_replica_uid, crypto=crypto) - - def _parse_received_doc_response(self, response): - """ - Parse the response from the server containing the received document. - - :param response: The body and headers of the response. - :type response: tuple(str, dict) - """ - data, _ = response - # decode incoming stream - parts = data.splitlines() - if not parts or parts[0] != '[' or parts[-1] != ']': - raise errors.BrokenSyncStream - data = parts[1:-1] - # decode metadata - line, comma = utils.check_and_strip_comma(data[0]) - metadata = None - try: - metadata = json.loads(line) - new_generation = metadata['new_generation'] - new_transaction_id = metadata['new_transaction_id'] - number_of_changes = metadata['number_of_changes'] - except (json.JSONDecodeError, KeyError): - raise errors.BrokenSyncStream - # make sure we have replica_uid from fresh new dbs - if self._ensure_callback and 'replica_uid' in metadata: - self._ensure_callback(metadata['replica_uid']) - # parse incoming document info - doc_id = None - rev = None - content = None - gen = None - trans_id = None - if number_of_changes > 0: - try: - entry = json.loads(data[1]) - doc_id = entry['id'] - rev = entry['rev'] - content = entry['content'] - gen = entry['gen'] - trans_id = entry['trans_id'] - except (IndexError, KeyError): - raise errors.BrokenSyncStream - return new_generation, new_transaction_id, number_of_changes, \ - doc_id, rev, content, gen, trans_id - - def _insert_received_doc(self, idx, total, response): - """ - Insert a received document into the local replica. - - :param idx: The index count of the current operation. - :type idx: int - :param total: The total number of operations. - :type total: int - :param response: The body and headers of the response. - :type response: tuple(str, dict) - """ - new_generation, new_transaction_id, number_of_changes, doc_id, \ - rev, content, gen, trans_id = \ - self._parse_received_doc_response(response) - if doc_id is not None: - # decrypt incoming document and insert into local database - # ------------------------------------------------------------- - # symmetric decryption of document's contents - # ------------------------------------------------------------- - # If arriving content was symmetrically encrypted, we decrypt it. - # We do it inline if defer_decryption flag is False or no sync_db - # was defined, otherwise we defer it writing it to the received - # docs table. - doc = SoledadDocument(doc_id, rev, content) - if is_symmetrically_encrypted(doc): - if self._queue_for_decrypt: - self._save_encrypted_received_doc( - doc, gen, trans_id, idx, total) - else: - # defer_decryption is False or no-sync-db fallback - doc.set_json(decrypt_doc(self._crypto, doc)) - self._return_doc_cb(doc, gen, trans_id) - else: - # not symmetrically encrypted doc, insert it directly - # or save it in the decrypted stage. - if self._queue_for_decrypt: - self._save_received_doc(doc, gen, trans_id, idx, total) - else: - self._return_doc_cb(doc, gen, trans_id) - # ------------------------------------------------------------- - # end of symmetric decryption - # ------------------------------------------------------------- - msg = "%d/%d" % (idx + 1, total) - signal(SOLEDAD_SYNC_RECEIVE_STATUS, msg) - logger.debug("Soledad sync receive status: %s" % msg) - return number_of_changes, new_generation, new_transaction_id - - def _get_remote_docs(self, url, last_known_generation, last_known_trans_id, - headers, return_doc_cb, ensure_callback, sync_id, - syncer_pool, defer_decryption=False): - """ - Fetch sync documents from the remote database and insert them in the - local database. - - If an incoming document's encryption scheme is equal to - EncryptionSchemes.SYMKEY, then this method will decrypt it with - Soledad's symmetric key. - - :param url: The syncing URL. - :type url: str - :param last_known_generation: Target's last known generation. - :type last_known_generation: int - :param last_known_trans_id: Target's last known transaction id. - :type last_known_trans_id: str - :param headers: The headers of the HTTP request. - :type headers: dict - :param return_doc_cb: A callback to insert docs from target. - :type return_doc_cb: callable - :param ensure_callback: A callback to ensure we have the correct - target_replica_uid, if it was just created. - :type ensure_callback: callable - :param sync_id: The id for the current sync session. - :type sync_id: str - :param defer_decryption: Whether to defer the decryption process using - the intermediate database. If False, - decryption will be done inline. - :type defer_decryption: bool - - :raise BrokenSyncStream: If `data` is malformed. - - :return: A dictionary representing the first line of the response got - from remote replica. - :rtype: dict - """ - # we keep a reference to the callback in case we defer the decryption - self._return_doc_cb = return_doc_cb - self._queue_for_decrypt = defer_decryption \ - and self._sync_db is not None - - new_generation = last_known_generation - new_transaction_id = last_known_trans_id - - if self._queue_for_decrypt: - logger.debug( - "Soledad sync: will queue received docs for decrypting.") - - idx = 0 - number_of_changes = 1 - - first_request = True - last_callback_lock = None - threads = [] - - # get incoming documents - while idx < number_of_changes: - # bail out if sync process was interrupted - if self.stopped is True: - break - - # launch a thread to fetch one document from target - t = syncer_pool.new_syncer_thread( - idx, number_of_changes, - last_callback_lock=last_callback_lock) - - # bail out if any thread failed - if t is None: - self.stop() - break - - t.doc_syncer.set_request_method( - 'get', idx, sync_id, last_known_generation, - last_known_trans_id) - t.doc_syncer.set_success_callback(self._insert_received_doc) - - def _failure_callback(idx, total, exception): - _failure_msg = "Soledad sync: error while getting document " \ - "%d/%d: %s" \ - % (idx + 1, total, exception) - logger.warning("%s" % _failure_msg) - logger.warning("Soledad sync: failing gracefully, will " - "recover on next sync.") - - t.doc_syncer.set_failure_callback(_failure_callback) - threads.append(t) - t.start() - last_callback_lock = t.callback_lock - idx += 1 - - # if this is the first request, wait to update the number of - # changes - if first_request is True: - t.join() - if t.success: - number_of_changes, _, _ = t.result - first_request = False - - # make sure all threads finished and we have up-to-date info - last_successful_thread = None - while threads: - # check if there are failures - t = threads.pop(0) - t.join() - if t.success: - last_successful_thread = t - - # get information about last successful thread - if last_successful_thread is not None: - body, _ = last_successful_thread.response - parsed_body = json.loads(body) - # get current target gen and trans id in case no documents were - # transferred - if len(parsed_body) == 1: - metadata = parsed_body[0] - new_generation = metadata['new_generation'] - new_transaction_id = metadata['new_transaction_id'] - # get current target gen and trans id from last transferred - # document - else: - doc_data = parsed_body[1] - new_generation = doc_data['gen'] - new_transaction_id = doc_data['trans_id'] - - return new_generation, new_transaction_id - - def sync_exchange(self, docs_by_generations, - source_replica_uid, last_known_generation, - last_known_trans_id, return_doc_cb, - ensure_callback=None, defer_decryption=True, - sync_id=None): - """ - Find out which documents the remote database does not know about, - encrypt and send them. - - This does the same as the parent's method but encrypts content before - syncing. - - :param docs_by_generations: A list of (doc_id, generation, trans_id) - of local documents that were changed since - the last local generation the remote - replica knows about. - :type docs_by_generations: list of tuples - - :param source_replica_uid: The uid of the source replica. - :type source_replica_uid: str - - :param last_known_generation: Target's last known generation. - :type last_known_generation: int - - :param last_known_trans_id: Target's last known transaction id. - :type last_known_trans_id: str - - :param return_doc_cb: A callback for inserting received documents from - target. If not overriden, this will call u1db - insert_doc_from_target in synchronizer, which - implements the TAKE OTHER semantics. - :type return_doc_cb: function - - :param ensure_callback: A callback that ensures we know the target - replica uid if the target replica was just - created. - :type ensure_callback: function - - :param defer_decryption: Whether to defer the decryption process using - the intermediate database. If False, - decryption will be done inline. - :type defer_decryption: bool - - :return: The new generation and transaction id of the target replica. - :rtype: tuple - """ - self._ensure_callback = ensure_callback - - if defer_decryption: - self._sync_exchange_lock.acquire() - self._setup_sync_decr_pool() - self._setup_sync_watcher() - self._defer_decryption = True - - self.start() - - if sync_id is None: - sync_id = str(uuid4()) - self.source_replica_uid = source_replica_uid - # let the decrypter pool access the passed callback to insert docs - setProxiedObject(self._insert_doc_cb[source_replica_uid], - return_doc_cb) - - if not self.clear_to_sync(): - raise PendingReceivedDocsSyncError - - self._ensure_connection() - if self._trace_hook: # for tests - self._trace_hook('sync_exchange') - url = '%s/sync-from/%s' % (self._url.path, source_replica_uid) - headers = self._sign_request('POST', url, {}) - - cur_target_gen = last_known_generation - cur_target_trans_id = last_known_trans_id - - # send docs - msg = "%d/%d" % (0, len(docs_by_generations)) - signal(SOLEDAD_SYNC_SEND_STATUS, msg) - logger.debug("Soledad sync send status: %s" % msg) - - defer_encryption = self._sync_db is not None - syncer_pool = DocumentSyncerPool( - self._raw_url, self._raw_creds, url, headers, ensure_callback, - self.stop) - threads = [] - last_request_lock = None - last_callback_lock = None - sent = 0 - total = len(docs_by_generations) - - synced = [] - number_of_docs = len(docs_by_generations) - - for doc, gen, trans_id in docs_by_generations: - # allow for interrupting the sync process - if self.stopped is True: - break - - # skip non-syncable docs - if isinstance(doc, SoledadDocument) and not doc.syncable: - continue - - # ------------------------------------------------------------- - # symmetric encryption of document's contents - # ------------------------------------------------------------- - doc_json = doc.get_json() - if not doc.is_tombstone(): - if not defer_encryption: - # fallback case, for tests - doc_json = encrypt_doc(self._crypto, doc) - else: - try: - doc_json = self.get_encrypted_doc_from_db( - doc.doc_id, doc.rev) - except Exception as exc: - logger.error("Error while getting " - "encrypted doc from db") - logger.exception(exc) - continue - if doc_json is None: - # Not marked as tombstone, but we got nothing - # from the sync db. As it is not encrypted yet, we - # force inline encryption. - # TODO: implement a queue to deal with these cases. - doc_json = encrypt_doc(self._crypto, doc) - # ------------------------------------------------------------- - # end of symmetric encryption - # ------------------------------------------------------------- - t = syncer_pool.new_syncer_thread( - sent + 1, total, last_request_lock=None, - last_callback_lock=last_callback_lock) - - # bail out if any thread failed - if t is None: - self.stop() - break - - # set the request method - t.doc_syncer.set_request_method( - 'put', sync_id, cur_target_gen, cur_target_trans_id, - id=doc.doc_id, rev=doc.rev, content=doc_json, gen=gen, - trans_id=trans_id, number_of_docs=number_of_docs, doc_idx=sent + 1) - # set the success calback - - def _success_callback(idx, total, response): - _success_msg = "Soledad sync send status: %d/%d" \ - % (idx, total) - signal(SOLEDAD_SYNC_SEND_STATUS, _success_msg) - logger.debug(_success_msg) - - t.doc_syncer.set_success_callback(_success_callback) - - # set the failure callback - def _failure_callback(idx, total, exception): - _failure_msg = "Soledad sync: error while sending document " \ - "%d/%d: %s" % (idx, total, exception) - logger.warning("%s" % _failure_msg) - logger.warning("Soledad sync: failing gracefully, will " - "recover on next sync.") - - t.doc_syncer.set_failure_callback(_failure_callback) - - # save thread and append - t.start() - threads.append((t, doc)) - last_request_lock = t.request_lock - last_callback_lock = t.callback_lock - sent += 1 - - # make sure all threads finished and we have up-to-date info - while threads: - # check if there are failures - t, doc = threads.pop(0) - t.join() - if t.success: - synced.append((doc.doc_id, doc.rev)) - - if defer_decryption: - self._sync_watcher.start() - - # get docs from target - if self.stopped is False: - cur_target_gen, cur_target_trans_id = self._get_remote_docs( - url, - last_known_generation, last_known_trans_id, headers, - return_doc_cb, ensure_callback, sync_id, syncer_pool, - defer_decryption=defer_decryption) - syncer_pool.cleanup() - - # delete documents from the sync database - if defer_encryption: - self.delete_encrypted_docs_from_db(synced) - - # wait for deferred decryption to finish - if defer_decryption: - while self.clear_to_sync() is False: - sleep(self.DECRYPT_TASK_PERIOD) - self._teardown_sync_watcher() - self._teardown_sync_decr_pool() - self._sync_exchange_lock.release() - - self.stop() - return cur_target_gen, cur_target_trans_id - - def start(self): - """ - Mark current sync session as running. - """ - with self._stop_lock: - self._stopped = False - - def stop(self): - """ - Mark current sync session as stopped. - - This will eventually interrupt the sync_exchange() method and return - enough information to the synchronizer so the sync session can be - recovered afterwards. - """ - with self._stop_lock: - self._stopped = True - - @property - def stopped(self): - """ - Return whether this sync session is stopped. - - :return: Whether this sync session is stopped. - :rtype: bool - """ - with self._stop_lock: - return self._stopped is True - - def get_encrypted_doc_from_db(self, doc_id, doc_rev): - """ - Retrieve encrypted document from the database of encrypted docs for - sync. - - :param doc_id: The Document id. - :type doc_id: str - - :param doc_rev: The document revision - :type doc_rev: str - """ - encr = SyncEncrypterPool - c = self._sync_db.cursor() - sql = ("SELECT content FROM %s WHERE doc_id=? and rev=?" % ( - encr.TABLE_NAME,)) - c.execute(sql, (doc_id, doc_rev)) - res = c.fetchall() - if len(res) != 0: - return res[0][0] - - def delete_encrypted_docs_from_db(self, docs_ids): - """ - Delete several encrypted documents from the database of symmetrically - encrypted docs to sync. - - :param docs_ids: an iterable with (doc_id, doc_rev) for all documents - to be deleted. - :type docs_ids: any iterable of tuples of str - """ - if docs_ids: - encr = SyncEncrypterPool - c = self._sync_db.cursor() - for doc_id, doc_rev in docs_ids: - sql = ("DELETE FROM %s WHERE doc_id=? and rev=?" % ( - encr.TABLE_NAME,)) - c.execute(sql, (doc_id, doc_rev)) - self._sync_db.commit() - - def _save_encrypted_received_doc(self, doc, gen, trans_id, idx, total): - """ - Save a symmetrically encrypted incoming document into the received - docs table in the sync db. A decryption task will pick it up - from here in turn. - - :param doc: The document to save. - :type doc: SoledadDocument - :param gen: The generation. - :type gen: str - :param trans_id: Transacion id. - :type gen: str - :param idx: The index count of the current operation. - :type idx: int - :param total: The total number of operations. - :type total: int - """ - logger.debug( - "Enqueueing doc for decryption: %d/%d." - % (idx + 1, total)) - self._sync_decr_pool.insert_encrypted_received_doc( - doc.doc_id, doc.rev, doc.content, gen, trans_id) - - def _save_received_doc(self, doc, gen, trans_id, idx, total): - """ - Save any incoming document into the received docs table in the sync db. - - :param doc: The document to save. - :type doc: SoledadDocument - :param gen: The generation. - :type gen: str - :param trans_id: Transacion id. - :type gen: str - :param idx: The index count of the current operation. - :type idx: int - :param total: The total number of operations. - :type total: int - """ - logger.debug( - "Enqueueing doc, no decryption needed: %d/%d." - % (idx + 1, total)) - self._sync_decr_pool.insert_received_doc( - doc.doc_id, doc.rev, doc.content, gen, trans_id) - - # - # Symmetric decryption of syncing docs - # - - def clear_to_sync(self): - """ - Return True if sync can proceed (ie, the received db table is empty). - :rtype: bool - """ - if self._sync_decr_pool is not None: - return self._sync_decr_pool.count_received_encrypted_docs() == 0 - else: - return True - - def set_decryption_callback(self, cb): - """ - Set callback to be called when the decryption finishes. - - :param cb: The callback to be set. - :type cb: callable - """ - self._decryption_callback = cb - - def has_decryption_callback(self): - """ - Return True if there is a decryption callback set. - :rtype: bool - """ - return self._decryption_callback is not None - - def has_syncdb(self): - """ - Return True if we have an initialized syncdb. - """ - return self._sync_db is not None - - def _decrypt_syncing_received_docs(self): - """ - Decrypt the documents received from remote replica and insert them - into the local one. - - Called periodically from TimerTask self._sync_watcher. - """ - if sameProxiedObjects( - self._insert_doc_cb.get(self.source_replica_uid), - None): - return - - decrypter = self._sync_decr_pool - decrypter.decrypt_received_docs() - done = decrypter.process_decrypted() - - def _sign_request(self, method, url_query, params): - """ - Return an authorization header to be included in the HTTP request. - - :param method: The HTTP method. - :type method: str - :param url_query: The URL query string. - :type url_query: str - :param params: A list with encoded query parameters. - :type param: list - - :return: The Authorization header. - :rtype: list of tuple - """ - return TokenBasedAuth._sign_request(self, method, url_query, params) - - def set_token_credentials(self, uuid, token): - """ - Store given credentials so we can sign the request later. - - :param uuid: The user's uuid. - :type uuid: str - :param token: The authentication token. - :type token: str - """ - TokenBasedAuth.set_token_credentials(self, uuid, token) |