summaryrefslogtreecommitdiff
path: root/client/src
diff options
context:
space:
mode:
authordrebs <drebs@leap.se>2015-06-04 11:21:40 -0300
committerdrebs <drebs@leap.se>2015-06-04 11:21:40 -0300
commit293c71080e9a21115d248e46d1a706c53cc8ee37 (patch)
tree3353672f1f770ef5f5c87ea7e3e27debe737a50b /client/src
parentfa7708e256ba56cd1e9913993d68611b4ae95824 (diff)
parent9fb1c47ca7da06d6feef6846b812aec28128ed78 (diff)
Merge tag '0.7.0'
Tag version 0.7.0. Conflicts: CHANGELOG client/src/leap/soledad/client/__init__.py client/src/leap/soledad/client/sqlcipher.py client/src/leap/soledad/client/target.py server/pkg/soledad-server
Diffstat (limited to 'client/src')
-rw-r--r--client/src/leap/soledad/client/__init__.py1347
-rw-r--r--client/src/leap/soledad/client/adbapi.py271
-rw-r--r--client/src/leap/soledad/client/api.py882
-rw-r--r--client/src/leap/soledad/client/auth.py9
-rw-r--r--client/src/leap/soledad/client/crypto.py862
-rw-r--r--client/src/leap/soledad/client/encdecpool.py746
-rw-r--r--client/src/leap/soledad/client/events.py60
-rw-r--r--client/src/leap/soledad/client/examples/README4
-rw-r--r--client/src/leap/soledad/client/examples/benchmarks/.gitignore1
-rwxr-xr-xclient/src/leap/soledad/client/examples/benchmarks/get_sample.sh3
-rw-r--r--client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py177
-rw-r--r--client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py177
-rw-r--r--client/src/leap/soledad/client/examples/compare.txt8
-rw-r--r--client/src/leap/soledad/client/examples/manifest.phk50
-rw-r--r--client/src/leap/soledad/client/examples/plot-async-db.py45
-rw-r--r--client/src/leap/soledad/client/examples/run_benchmark.py28
-rw-r--r--client/src/leap/soledad/client/examples/soledad_sync.py65
-rw-r--r--client/src/leap/soledad/client/examples/use_adbapi.py103
-rw-r--r--client/src/leap/soledad/client/examples/use_api.py67
-rw-r--r--client/src/leap/soledad/client/http_target.py622
-rw-r--r--client/src/leap/soledad/client/interfaces.py362
-rw-r--r--client/src/leap/soledad/client/pragmas.py379
-rw-r--r--client/src/leap/soledad/client/secrets.py787
-rw-r--r--client/src/leap/soledad/client/shared_db.py57
-rw-r--r--client/src/leap/soledad/client/sqlcipher.py1422
-rw-r--r--client/src/leap/soledad/client/sync.py178
-rw-r--r--client/src/leap/soledad/client/target.py1469
27 files changed, 5654 insertions, 4527 deletions
diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py
index 07255406..245a8971 100644
--- a/client/src/leap/soledad/client/__init__.py
+++ b/client/src/leap/soledad/client/__init__.py
@@ -16,1351 +16,12 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Soledad - Synchronization Of Locally Encrypted Data Among Devices.
-
-Soledad is the part of LEAP that manages storage and synchronization of
-application data. It is built on top of U1DB reference Python API and
-implements (1) a SQLCipher backend for local storage in the client, (2) a
-SyncTarget that encrypts data before syncing, and (3) a CouchDB backend for
-remote storage in the server side.
-"""
-import binascii
-import errno
-import httplib
-import logging
-import os
-import socket
-import ssl
-import urlparse
-import hmac
-
-from hashlib import sha256
-
-try:
- import cchardet as chardet
-except ImportError:
- import chardet
-
-from u1db.remote import http_client
-from u1db.remote.ssl_match_hostname import match_hostname
-
-import scrypt
-import simplejson as json
-
-from leap.common.config import get_path_prefix
-from leap.soledad.common import (
- SHARED_DB_NAME,
- soledad_assert,
- soledad_assert_type
-)
-from leap.soledad.common.errors import (
- InvalidTokenError,
- NotLockedError,
- AlreadyLockedError,
- LockTimedOutError,
-)
-from leap.soledad.common.crypto import (
- MacMethods,
- UnknownMacMethod,
- WrongMac,
- MAC_KEY,
- MAC_METHOD_KEY,
-)
-from leap.soledad.client.events import (
- SOLEDAD_CREATING_KEYS,
- SOLEDAD_DONE_CREATING_KEYS,
- SOLEDAD_DOWNLOADING_KEYS,
- SOLEDAD_DONE_DOWNLOADING_KEYS,
- SOLEDAD_UPLOADING_KEYS,
- SOLEDAD_DONE_UPLOADING_KEYS,
- SOLEDAD_NEW_DATA_TO_SYNC,
- SOLEDAD_DONE_DATA_SYNC,
- signal,
-)
-from leap.soledad.common.document import SoledadDocument
-from leap.soledad.client.crypto import SoledadCrypto
-from leap.soledad.client.shared_db import SoledadSharedDatabase
-from leap.soledad.client.sqlcipher import open as sqlcipher_open
-from leap.soledad.client.sqlcipher import SQLCipherDatabase
-from leap.soledad.client.target import SoledadSyncTarget
-
-
-logger = logging.getLogger(name=__name__)
-
-
-#
-# Constants
-#
-
-SOLEDAD_CERT = None
"""
-Path to the certificate file used to certify the SSL connection between
-Soledad client and server.
-"""
-
-
-#
-# Soledad: local encrypted storage and remote encrypted sync.
-#
-
-class NoStorageSecret(Exception):
- """
- Raised when trying to use a storage secret but none is available.
- """
- pass
-
-
-class PassphraseTooShort(Exception):
- """
- Raised when trying to change the passphrase but the provided passphrase is
- too short.
- """
-
-
-class BootstrapSequenceError(Exception):
- """
- Raised when an attempt to generate a secret and store it in a recovery
- documents on server failed.
- """
-
-
-class Soledad(object):
- """
- Soledad provides encrypted data storage and sync.
-
- A Soledad instance is used to store and retrieve data in a local encrypted
- database and synchronize this database with Soledad server.
-
- This class is also responsible for bootstrapping users' account by
- creating cryptographic secrets and/or storing/fetching them on Soledad
- server.
-
- Soledad uses C{leap.common.events} to signal events. The possible events
- to be signaled are:
-
- SOLEDAD_CREATING_KEYS: emitted during bootstrap sequence when key
- generation starts.
- SOLEDAD_DONE_CREATING_KEYS: emitted during bootstrap sequence when key
- generation finishes.
- SOLEDAD_UPLOADING_KEYS: emitted during bootstrap sequence when soledad
- starts sending keys to server.
- SOLEDAD_DONE_UPLOADING_KEYS: emitted during bootstrap sequence when
- soledad finishes sending keys to server.
- SOLEDAD_DOWNLOADING_KEYS: emitted during bootstrap sequence when
- soledad starts to retrieve keys from server.
- SOLEDAD_DONE_DOWNLOADING_KEYS: emitted during bootstrap sequence when
- soledad finishes downloading keys from server.
- SOLEDAD_NEW_DATA_TO_SYNC: emitted upon call to C{need_sync()} when
- there's indeed new data to be synchronized between local database
- replica and server's replica.
- SOLEDAD_DONE_DATA_SYNC: emitted inside C{sync()} method when it has
- finished synchronizing with remote replica.
- """
-
- LOCAL_DATABASE_FILE_NAME = 'soledad.u1db'
- """
- The name of the local SQLCipher U1DB database file.
- """
-
- STORAGE_SECRETS_FILE_NAME = "soledad.json"
- """
- The name of the file where the storage secrets will be stored.
- """
-
- GENERATED_SECRET_LENGTH = 1024
- """
- The length of the generated secret used to derive keys for symmetric
- encryption for local and remote storage.
- """
-
- LOCAL_STORAGE_SECRET_LENGTH = 512
- """
- The length of the secret used to derive a passphrase for the SQLCipher
- database.
- """
-
- REMOTE_STORAGE_SECRET_LENGTH = \
- GENERATED_SECRET_LENGTH - LOCAL_STORAGE_SECRET_LENGTH
- """
- The length of the secret used to derive an encryption key and a MAC auth
- key for remote storage.
- """
-
- SALT_LENGTH = 64
- """
- The length of the salt used to derive the key for the storage secret
- encryption.
- """
-
- MINIMUM_PASSPHRASE_LENGTH = 6
- """
- The minimum length for a passphrase. The passphrase length is only checked
- when the user changes her passphrase, not when she instantiates Soledad.
- """
-
- IV_SEPARATOR = ":"
- """
- A separator used for storing the encryption initial value prepended to the
- ciphertext.
- """
-
- UUID_KEY = 'uuid'
- STORAGE_SECRETS_KEY = 'storage_secrets'
- SECRET_KEY = 'secret'
- CIPHER_KEY = 'cipher'
- LENGTH_KEY = 'length'
- KDF_KEY = 'kdf'
- KDF_SALT_KEY = 'kdf_salt'
- KDF_LENGTH_KEY = 'kdf_length'
- KDF_SCRYPT = 'scrypt'
- CIPHER_AES256 = 'aes256'
- """
- Keys used to access storage secrets in recovery documents.
- """
-
- DEFAULT_PREFIX = os.path.join(get_path_prefix(), 'leap', 'soledad')
- """
- Prefix for default values for path.
- """
-
- def __init__(self, uuid, passphrase, secrets_path, local_db_path,
- server_url, cert_file,
- auth_token=None, secret_id=None, defer_encryption=True):
- """
- Initialize configuration, cryptographic keys and dbs.
-
- :param uuid: User's uuid.
- :type uuid: str
-
- :param passphrase: The passphrase for locking and unlocking encryption
- secrets for local and remote storage.
- :type passphrase: unicode
-
- :param secrets_path: Path for storing encrypted key used for
- symmetric encryption.
- :type secrets_path: str
-
- :param local_db_path: Path for local encrypted storage db.
- :type local_db_path: str
-
- :param server_url: URL for Soledad server. This is used either to sync
- with the user's remote db and to interact with the
- shared recovery database.
- :type server_url: str
-
- :param cert_file: Path to the certificate of the ca used
- to validate the SSL certificate used by the remote
- soledad server.
- :type cert_file: str
-
- :param auth_token: Authorization token for accessing remote databases.
- :type auth_token: str
-
- :param secret_id: The id of the storage secret to be used.
- :type secret_id: str
-
- :param defer_encryption: Whether to defer encryption/decryption of
- documents, or do it inline while syncing.
- :type defer_encryption: bool
-
- :raise BootstrapSequenceError: Raised when the secret generation and
- storage on server sequence has failed
- for some reason.
- """
- # get config params
- self._uuid = uuid
- soledad_assert_type(passphrase, unicode)
- self._passphrase = passphrase
- # init crypto variables
- self._secrets = {}
- self._secret_id = secret_id
- self._defer_encryption = defer_encryption
-
- self._init_config(secrets_path, local_db_path, server_url)
-
- self._set_token(auth_token)
- self._shared_db_instance = None
- # configure SSL certificate
- global SOLEDAD_CERT
- SOLEDAD_CERT = cert_file
- # initiate bootstrap sequence
- self._bootstrap() # might raise BootstrapSequenceError()
-
- def _init_config(self, secrets_path, local_db_path, server_url):
- """
- Initialize configuration using default values for missing params.
- """
- # initialize secrets_path
- self._secrets_path = secrets_path
- if self._secrets_path is None:
- self._secrets_path = os.path.join(
- self.DEFAULT_PREFIX, self.STORAGE_SECRETS_FILE_NAME)
- # initialize local_db_path
- self._local_db_path = local_db_path
- if self._local_db_path is None:
- self._local_db_path = os.path.join(
- self.DEFAULT_PREFIX, self.LOCAL_DATABASE_FILE_NAME)
- # initialize server_url
- self._server_url = server_url
- soledad_assert(
- self._server_url is not None,
- 'Missing URL for Soledad server.')
-
- #
- # initialization/destruction methods
- #
-
- def _get_or_gen_crypto_secrets(self):
- """
- Retrieves or generates the crypto secrets.
-
- Might raise BootstrapSequenceError
- """
- doc = self._get_secrets_from_shared_db()
-
- if doc:
- logger.info(
- 'Found cryptographic secrets in shared recovery '
- 'database.')
- _, mac = self.import_recovery_document(doc.content)
- if mac is False:
- self.put_secrets_in_shared_db()
- self._store_secrets() # save new secrets in local file
- if self._secret_id is None:
- self._set_secret_id(self._secrets.items()[0][0])
- else:
- # STAGE 3 - there are no secrets in server also, so
- # generate a secret and store it in remote db.
- logger.info(
- 'No cryptographic secrets found, creating new '
- ' secrets...')
- self._set_secret_id(self._gen_secret())
- try:
- self._put_secrets_in_shared_db()
- except Exception as ex:
- # storing generated secret in shared db failed for
- # some reason, so we erase the generated secret and
- # raise.
- try:
- os.unlink(self._secrets_path)
- except OSError as e:
- if e.errno != errno.ENOENT: # no such file or directory
- logger.exception(e)
- logger.exception(ex)
- raise BootstrapSequenceError(
- 'Could not store generated secret in the shared '
- 'database, bailing out...')
-
- def _bootstrap(self):
- """
- Bootstrap local Soledad instance.
-
- Soledad Client bootstrap is the following sequence of stages:
-
- * stage 0 - local environment setup.
- - directory initialization.
- - crypto submodule initialization
- * stage 1 - local secret loading:
- - if secrets exist locally, load them.
- * stage 2 - remote secret loading:
- - else, if secrets exist in server, download them.
- * stage 3 - secret generation:
- - else, generate a new secret and store in server.
- * stage 4 - database initialization.
-
- This method decides which bootstrap stages have already been performed
- and performs the missing ones in order.
-
- :raise BootstrapSequenceError: Raised when the secret generation and
- storage on server sequence has failed for some reason.
- """
- # STAGE 0 - local environment setup
- self._init_dirs()
- self._crypto = SoledadCrypto(self)
-
- secrets_problem = None
-
- # STAGE 1 - verify if secrets exist locally
- if not self._has_secret(): # try to load from local storage.
-
- # STAGE 2 - there are no secrets in local storage, so try to fetch
- # encrypted secrets from server.
- logger.info(
- 'Trying to fetch cryptographic secrets from shared recovery '
- 'database...')
-
- # --- start of atomic operation in shared db ---
-
- # obtain lock on shared db
- token = timeout = None
- try:
- token, timeout = self._shared_db.lock()
- except AlreadyLockedError:
- raise BootstrapSequenceError('Database is already locked.')
- except LockTimedOutError:
- raise BootstrapSequenceError('Lock operation timed out.')
-
- try:
- self._get_or_gen_crypto_secrets()
- except Exception as e:
- secrets_problem = e
-
- # release the lock on shared db
- try:
- self._shared_db.unlock(token)
- self._shared_db.close()
- except NotLockedError:
- # for some reason the lock expired. Despite that, secret
- # loading or generation/storage must have been executed
- # successfully, so we pass.
- pass
- except InvalidTokenError:
- # here, our lock has not only expired but also some other
- # client application has obtained a new lock and is currently
- # doing its thing in the shared database. Using the same
- # reasoning as above, we assume everything went smooth and
- # pass.
- pass
- except Exception as e:
- logger.error("Unhandled exception when unlocking shared "
- "database.")
- logger.exception(e)
-
- # --- end of atomic operation in shared db ---
-
- # STAGE 4 - local database initialization
- if secrets_problem is None:
- self._init_db()
- else:
- raise secrets_problem
-
- def _init_dirs(self):
- """
- Create work directories.
-
- :raise OSError: in case file exists and is not a dir.
- """
- paths = map(
- lambda x: os.path.dirname(x),
- [self._local_db_path, self._secrets_path])
- for path in paths:
- try:
- if not os.path.isdir(path):
- logger.info('Creating directory: %s.' % path)
- os.makedirs(path)
- except OSError as exc:
- if exc.errno == errno.EEXIST and os.path.isdir(path):
- pass
- else:
- raise
-
- def _init_db(self):
- """
- Initialize the U1DB SQLCipher database for local storage.
-
- Currently, Soledad uses the default SQLCipher cipher, i.e.
- 'aes-256-cbc'. We use scrypt to derive a 256-bit encryption key and
- uses the 'raw PRAGMA key' format to handle the key to SQLCipher.
-
- The first C{self.REMOTE_STORAGE_SECRET_LENGTH} bytes of the storage
- secret are used for remote storage encryption. We use the next
- C{self.LOCAL_STORAGE_SECRET} bytes to derive a key for local storage.
- From these bytes, the first C{self.SALT_LENGTH} are used as the salt
- and the rest as the password for the scrypt hashing.
- """
- # salt indexes
- salt_start = self.REMOTE_STORAGE_SECRET_LENGTH
- salt_end = salt_start + self.SALT_LENGTH
- # password indexes
- pwd_start = salt_end
- pwd_end = salt_start + self.LOCAL_STORAGE_SECRET_LENGTH
- # calculate the key for local encryption
- secret = self._get_storage_secret()
- key = scrypt.hash(
- secret[pwd_start:pwd_end], # the password
- secret[salt_start:salt_end], # the salt
- buflen=32, # we need a key with 256 bits (32 bytes)
- )
-
- self._db = sqlcipher_open(
- self._local_db_path,
- binascii.b2a_hex(key), # sqlcipher only accepts the hex version
- create=True,
- document_factory=SoledadDocument,
- crypto=self._crypto,
- raw_key=True,
- defer_encryption=self._defer_encryption)
-
- def close(self):
- """
- Close underlying U1DB database.
- """
- logger.debug("Closing soledad")
- if hasattr(self, '_db') and isinstance(
- self._db,
- SQLCipherDatabase):
- self._db.stop_sync()
- self._db.close()
-
- #
- # Management of secret for symmetric encryption.
- #
-
- def _get_storage_secret(self):
- """
- Return the storage secret.
-
- Storage secret is encrypted before being stored. This method decrypts
- and returns the stored secret.
-
- :return: The storage secret.
- :rtype: str
- """
- # calculate the encryption key
- key = scrypt.hash(
- self._passphrase_as_string(),
- # the salt is stored base64 encoded
- binascii.a2b_base64(
- self._secrets[self._secret_id][self.KDF_SALT_KEY]),
- buflen=32, # we need a key with 256 bits (32 bytes).
- )
- # recover the initial value and ciphertext
- iv, ciphertext = self._secrets[self._secret_id][self.SECRET_KEY].split(
- self.IV_SEPARATOR, 1)
- ciphertext = binascii.a2b_base64(ciphertext)
- return self._crypto.decrypt_sym(ciphertext, key, iv=iv)
-
- def _set_secret_id(self, secret_id):
- """
- Define the id of the storage secret to be used.
-
- This method will also replace the secret in the crypto object.
-
- :param secret_id: The id of the storage secret to be used.
- :type secret_id: str
- """
- self._secret_id = secret_id
-
- def _load_secrets(self):
- """
- Load storage secrets from local file.
- """
- # does the file exist in disk?
- if not os.path.isfile(self._secrets_path):
- raise IOError('File does not exist: %s' % self._secrets_path)
- # read storage secrets from file
- content = None
- with open(self._secrets_path, 'r') as f:
- content = json.loads(f.read())
- _, mac = self.import_recovery_document(content)
- if mac is False:
- self._store_secrets()
- self._put_secrets_in_shared_db()
- # choose first secret if no secret_id was given
- if self._secret_id is None:
- self._set_secret_id(self._secrets.items()[0][0])
-
- def _has_secret(self):
- """
- Return whether there is a storage secret available for use or not.
-
- :return: Whether there's a storage secret for symmetric encryption.
- :rtype: bool
- """
- if self._secret_id is None or self._secret_id not in self._secrets:
- try:
- self._load_secrets() # try to load from disk
- except IOError, e:
- logger.warning('IOError: %s' % str(e))
- try:
- self._get_storage_secret()
- return True
- except Exception:
- return False
-
- def _gen_secret(self):
- """
- Generate a secret for symmetric encryption and store in a local
- encrypted file.
-
- This method emits the following signals:
-
- * SOLEDAD_CREATING_KEYS
- * SOLEDAD_DONE_CREATING_KEYS
-
- A secret has the following structure:
-
- {
- '<secret_id>': {
- 'kdf': 'scrypt',
- 'kdf_salt': '<b64 repr of salt>'
- 'kdf_length': <key length>
- 'cipher': 'aes256',
- 'length': <secret length>,
- 'secret': '<encrypted b64 repr of storage_secret>',
- }
- }
-
- :return: The id of the generated secret.
- :rtype: str
- """
- signal(SOLEDAD_CREATING_KEYS, self._uuid)
- # generate random secret
- secret = os.urandom(self.GENERATED_SECRET_LENGTH)
- secret_id = sha256(secret).hexdigest()
- # generate random salt
- salt = os.urandom(self.SALT_LENGTH)
- # get a 256-bit key
- key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32)
- iv, ciphertext = self._crypto.encrypt_sym(secret, key)
- self._secrets[secret_id] = {
- # leap.soledad.crypto submodule uses AES256 for symmetric
- # encryption.
- self.KDF_KEY: self.KDF_SCRYPT,
- self.KDF_SALT_KEY: binascii.b2a_base64(salt),
- self.KDF_LENGTH_KEY: len(key),
- self.CIPHER_KEY: self.CIPHER_AES256,
- self.LENGTH_KEY: len(secret),
- self.SECRET_KEY: '%s%s%s' % (
- str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)),
- }
- self._store_secrets()
- signal(SOLEDAD_DONE_CREATING_KEYS, self._uuid)
- return secret_id
-
- def _store_secrets(self):
- """
- Store secrets in C{Soledad.STORAGE_SECRETS_FILE_PATH}.
- """
- with open(self._secrets_path, 'w') as f:
- f.write(
- json.dumps(
- self.export_recovery_document()))
-
- def change_passphrase(self, new_passphrase):
- """
- Change the passphrase that encrypts the storage secret.
-
- :param new_passphrase: The new passphrase.
- :type new_passphrase: unicode
-
- :raise NoStorageSecret: Raised if there's no storage secret available.
- """
- # maybe we want to add more checks to guarantee passphrase is
- # reasonable?
- soledad_assert_type(new_passphrase, unicode)
- if len(new_passphrase) < self.MINIMUM_PASSPHRASE_LENGTH:
- raise PassphraseTooShort(
- 'Passphrase must be at least %d characters long!' %
- self.MINIMUM_PASSPHRASE_LENGTH)
- # ensure there's a secret for which the passphrase will be changed.
- if not self._has_secret():
- raise NoStorageSecret()
- secret = self._get_storage_secret()
- # generate random salt
- new_salt = os.urandom(self.SALT_LENGTH)
- # get a 256-bit key
- key = scrypt.hash(new_passphrase.encode('utf-8'), new_salt, buflen=32)
- iv, ciphertext = self._crypto.encrypt_sym(secret, key)
- # XXX update all secrets in the dict
- self._secrets[self._secret_id] = {
- # leap.soledad.crypto submodule uses AES256 for symmetric
- # encryption.
- self.KDF_KEY: self.KDF_SCRYPT, # TODO: remove hard coded kdf
- self.KDF_SALT_KEY: binascii.b2a_base64(new_salt),
- self.KDF_LENGTH_KEY: len(key),
- self.CIPHER_KEY: self.CIPHER_AES256,
- self.LENGTH_KEY: len(secret),
- self.SECRET_KEY: '%s%s%s' % (
- str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)),
- }
- self._passphrase = new_passphrase
- self._store_secrets()
- self._put_secrets_in_shared_db()
-
- #
- # General crypto utility methods.
- #
-
- @property
- def _shared_db(self):
- """
- Return an instance of the shared recovery database object.
-
- :return: The shared database.
- :rtype: SoledadSharedDatabase
- """
- if self._shared_db_instance is None:
- self._shared_db_instance = SoledadSharedDatabase.open_database(
- urlparse.urljoin(self.server_url, SHARED_DB_NAME),
- self._uuid,
- False, # db should exist at this point.
- creds=self._creds)
- return self._shared_db_instance
-
- def _shared_db_doc_id(self):
- """
- Calculate the doc_id of the document in the shared db that stores key
- material.
-
- :return: the hash
- :rtype: str
- """
- return sha256(
- '%s%s' %
- (self._passphrase_as_string(), self.uuid)).hexdigest()
-
- def _get_secrets_from_shared_db(self):
- """
- Retrieve the document with encrypted key material from the shared
- database.
-
- :return: a document with encrypted key material in its contents
- :rtype: SoledadDocument
- """
- signal(SOLEDAD_DOWNLOADING_KEYS, self._uuid)
- db = self._shared_db
- if not db:
- logger.warning('No shared db found')
- return
- doc = db.get_doc(self._shared_db_doc_id())
- signal(SOLEDAD_DONE_DOWNLOADING_KEYS, self._uuid)
- return doc
-
- def _put_secrets_in_shared_db(self):
- """
- Assert local keys are the same as shared db's ones.
-
- Try to fetch keys from shared recovery database. If they already exist
- in the remote db, assert that that data is the same as local data.
- Otherwise, upload keys to shared recovery database.
- """
- soledad_assert(
- self._has_secret(),
- 'Tried to send keys to server but they don\'t exist in local '
- 'storage.')
- # try to get secrets doc from server, otherwise create it
- doc = self._get_secrets_from_shared_db()
- if doc is None:
- doc = SoledadDocument(
- doc_id=self._shared_db_doc_id())
- # fill doc with encrypted secrets
- doc.content = self.export_recovery_document()
- # upload secrets to server
- signal(SOLEDAD_UPLOADING_KEYS, self._uuid)
- db = self._shared_db
- if not db:
- logger.warning('No shared db found')
- return
- db.put_doc(doc)
- signal(SOLEDAD_DONE_UPLOADING_KEYS, self._uuid)
-
- #
- # Document storage, retrieval and sync.
- #
-
- def put_doc(self, doc):
- """
- Update a document in the local encrypted database.
-
- ============================== WARNING ==============================
- This method converts the document's contents to unicode in-place. This
- means that after calling C{put_doc(doc)}, the contents of the
- document, i.e. C{doc.content}, might be different from before the
- call.
- ============================== WARNING ==============================
-
- :param doc: the document to update
- :type doc: SoledadDocument
-
- :return: the new revision identifier for the document
- :rtype: str
- """
- doc.content = self._convert_to_unicode(doc.content)
- return self._db.put_doc(doc)
-
- def delete_doc(self, doc):
- """
- Delete a document from the local encrypted database.
-
- :param doc: the document to delete
- :type doc: SoledadDocument
-
- :return: the new revision identifier for the document
- :rtype: str
- """
- return self._db.delete_doc(doc)
-
- def get_doc(self, doc_id, include_deleted=False):
- """
- Retrieve a document from the local encrypted database.
-
- :param doc_id: the unique document identifier
- :type doc_id: str
- :param include_deleted: if True, deleted documents will be
- returned with empty content; otherwise asking
- for a deleted document will return None
- :type include_deleted: bool
-
- :return: the document object or None
- :rtype: SoledadDocument
- """
- return self._db.get_doc(doc_id, include_deleted=include_deleted)
-
- def get_docs(self, doc_ids, check_for_conflicts=True,
- include_deleted=False):
- """
- Get the content for many documents.
-
- :param doc_ids: a list of document identifiers
- :type doc_ids: list
- :param check_for_conflicts: if set False, then the conflict check will
- be skipped, and 'None' will be returned instead of True/False
- :type check_for_conflicts: bool
-
- :return: iterable giving the Document object for each document id
- in matching doc_ids order.
- :rtype: generator
- """
- return self._db.get_docs(
- doc_ids, check_for_conflicts=check_for_conflicts,
- include_deleted=include_deleted)
-
- def get_all_docs(self, include_deleted=False):
- """Get the JSON content for all documents in the database.
-
- :param include_deleted: If set to True, deleted documents will be
- returned with empty content. Otherwise deleted
- documents will not be included in the results.
- :return: (generation, [Document])
- The current generation of the database, followed by a list of
- all the documents in the database.
- """
- return self._db.get_all_docs(include_deleted)
-
- def _convert_to_unicode(self, content):
- """
- Converts content to unicode (or all the strings in content)
-
- NOTE: Even though this method supports any type, it will
- currently ignore contents of lists, tuple or any other
- iterable than dict. We don't need support for these at the
- moment
-
- :param content: content to convert
- :type content: object
-
- :rtype: object
- """
- if isinstance(content, unicode):
- return content
- elif isinstance(content, str):
- result = chardet.detect(content)
- default = "utf-8"
- encoding = result["encoding"] or default
- try:
- content = content.decode(encoding)
- except UnicodeError as e:
- logger.error("Unicode error: {0!r}. Using 'replace'".format(e))
- content = content.decode(encoding, 'replace')
- return content
- else:
- if isinstance(content, dict):
- for key in content.keys():
- content[key] = self._convert_to_unicode(content[key])
- return content
-
- def create_doc(self, content, doc_id=None):
- """
- Create a new document in the local encrypted database.
-
- :param content: the contents of the new document
- :type content: dict
- :param doc_id: an optional identifier specifying the document id
- :type doc_id: str
-
- :return: the new document
- :rtype: SoledadDocument
- """
- return self._db.create_doc(
- self._convert_to_unicode(content), doc_id=doc_id)
-
- def create_doc_from_json(self, json, doc_id=None):
- """
- Create a new document.
-
- You can optionally specify the document identifier, but the document
- must not already exist. See 'put_doc' if you want to override an
- existing document.
- If the database specifies a maximum document size and the document
- exceeds it, create will fail and raise a DocumentTooBig exception.
-
- :param json: The JSON document string
- :type json: str
- :param doc_id: An optional identifier specifying the document id.
- :type doc_id:
- :return: The new document
- :rtype: SoledadDocument
- """
- return self._db.create_doc_from_json(json, doc_id=doc_id)
-
- def create_index(self, index_name, *index_expressions):
- """
- Create an named index, which can then be queried for future lookups.
- Creating an index which already exists is not an error, and is cheap.
- Creating an index which does not match the index_expressions of the
- existing index is an error.
- Creating an index will block until the expressions have been evaluated
- and the index generated.
-
- :param index_name: A unique name which can be used as a key prefix
- :type index_name: str
- :param index_expressions: index expressions defining the index
- information.
- :type index_expressions: dict
-
- Examples:
-
- "fieldname", or "fieldname.subfieldname" to index alphabetically
- sorted on the contents of a field.
-
- "number(fieldname, width)", "lower(fieldname)"
- """
- if self._db:
- return self._db.create_index(
- index_name, *index_expressions)
-
- def delete_index(self, index_name):
- """
- Remove a named index.
-
- :param index_name: The name of the index we are removing
- :type index_name: str
- """
- if self._db:
- return self._db.delete_index(index_name)
-
- def list_indexes(self):
- """
- List the definitions of all known indexes.
-
- :return: A list of [('index-name', ['field', 'field2'])] definitions.
- :rtype: list
- """
- if self._db:
- return self._db.list_indexes()
-
- def get_from_index(self, index_name, *key_values):
- """
- Return documents that match the keys supplied.
-
- You must supply exactly the same number of values as have been defined
- in the index. It is possible to do a prefix match by using '*' to
- indicate a wildcard match. You can only supply '*' to trailing entries,
- (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.)
- It is also possible to append a '*' to the last supplied value (eg
- 'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*')
-
- :param index_name: The index to query
- :type index_name: str
- :param key_values: values to match. eg, if you have
- an index with 3 fields then you would have:
- get_from_index(index_name, val1, val2, val3)
- :type key_values: tuple
- :return: List of [Document]
- :rtype: list
- """
- if self._db:
- return self._db.get_from_index(index_name, *key_values)
-
- def get_count_from_index(self, index_name, *key_values):
- """
- Return the count of the documents that match the keys and
- values supplied.
-
- :param index_name: The index to query
- :type index_name: str
- :param key_values: values to match. eg, if you have
- an index with 3 fields then you would have:
- get_from_index(index_name, val1, val2, val3)
- :type key_values: tuple
- :return: count.
- :rtype: int
- """
- if self._db:
- return self._db.get_count_from_index(index_name, *key_values)
-
- def get_range_from_index(self, index_name, start_value, end_value):
- """
- Return documents that fall within the specified range.
-
- Both ends of the range are inclusive. For both start_value and
- end_value, one must supply exactly the same number of values as have
- been defined in the index, or pass None. In case of a single column
- index, a string is accepted as an alternative for a tuple with a single
- value. It is possible to do a prefix match by using '*' to indicate
- a wildcard match. You can only supply '*' to trailing entries, (eg
- 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also
- possible to append a '*' to the last supplied value (eg 'val*', '*',
- '*' or 'val', 'val*', '*', but not 'val*', 'val', '*')
-
- :param index_name: The index to query
- :type index_name: str
- :param start_values: tuples of values that define the lower bound of
- the range. eg, if you have an index with 3 fields then you would
- have: (val1, val2, val3)
- :type start_values: tuple
- :param end_values: tuples of values that define the upper bound of the
- range. eg, if you have an index with 3 fields then you would have:
- (val1, val2, val3)
- :type end_values: tuple
- :return: List of [Document]
- :rtype: list
- """
- if self._db:
- return self._db.get_range_from_index(
- index_name, start_value, end_value)
-
- def get_index_keys(self, index_name):
- """
- Return all keys under which documents are indexed in this index.
-
- :param index_name: The index to query
- :type index_name: str
- :return: [] A list of tuples of indexed keys.
- :rtype: list
- """
- if self._db:
- return self._db.get_index_keys(index_name)
-
- def get_doc_conflicts(self, doc_id):
- """
- Get the list of conflicts for the given document.
-
- :param doc_id: the document id
- :type doc_id: str
-
- :return: a list of the document entries that are conflicted
- :rtype: list
- """
- if self._db:
- return self._db.get_doc_conflicts(doc_id)
-
- def resolve_doc(self, doc, conflicted_doc_revs):
- """
- Mark a document as no longer conflicted.
-
- :param doc: a document with the new content to be inserted.
- :type doc: SoledadDocument
- :param conflicted_doc_revs: a list of revisions that the new content
- supersedes.
- :type conflicted_doc_revs: list
- """
- if self._db:
- return self._db.resolve_doc(doc, conflicted_doc_revs)
-
- def sync(self, defer_decryption=True):
- """
- Synchronize the local encrypted replica with a remote replica.
-
- This method blocks until a syncing lock is acquired, so there are no
- attempts of concurrent syncs from the same client replica.
-
- :param url: the url of the target replica to sync with
- :type url: str
-
- :param defer_decryption: Whether to defer the decryption process using
- the intermediate database. If False,
- decryption will be done inline.
- :type defer_decryption: bool
-
- :return: The local generation before the synchronisation was
- performed.
- :rtype: str
- """
- if self._db:
- try:
- local_gen = self._db.sync(
- urlparse.urljoin(self.server_url, 'user-%s' % self._uuid),
- creds=self._creds, autocreate=False,
- defer_decryption=defer_decryption)
- signal(SOLEDAD_DONE_DATA_SYNC, self._uuid)
- return local_gen
- except Exception as e:
- logger.error("Soledad exception when syncing: %s" % str(e))
-
- def stop_sync(self):
- """
- Stop the current syncing process.
- """
- if self._db:
- self._db.stop_sync()
-
- def need_sync(self, url):
- """
- Return if local db replica differs from remote url's replica.
-
- :param url: The remote replica to compare with local replica.
- :type url: str
-
- :return: Whether remote replica and local replica differ.
- :rtype: bool
- """
- target = SoledadSyncTarget(
- url, self._db._get_replica_uid(), creds=self._creds,
- crypto=self._crypto)
- info = target.get_sync_info(self._db._get_replica_uid())
- # compare source generation with target's last known source generation
- if self._db._get_generation() != info[4]:
- signal(SOLEDAD_NEW_DATA_TO_SYNC, self._uuid)
- return True
- return False
-
- @property
- def syncing(self):
- """
- Property, True if the syncer is syncing.
- """
- return self._db.syncing
-
- def _set_token(self, token):
- """
- Set the authentication token for remote database access.
-
- Build the credentials dictionary with the following format:
-
- self._{
- 'token': {
- 'uuid': '<uuid>'
- 'token': '<token>'
- }
-
- :param token: The authentication token.
- :type token: str
- """
- self._creds = {
- 'token': {
- 'uuid': self._uuid,
- 'token': token,
- }
- }
-
- def _get_token(self):
- """
- Return current token from credentials dictionary.
- """
- return self._creds['token']['token']
-
- token = property(_get_token, _set_token, doc='The authentication Token.')
-
- #
- # Recovery document export and import methods
- #
-
- def export_recovery_document(self):
- """
- Export the storage secrets.
-
- A recovery document has the following structure:
-
- {
- 'storage_secrets': {
- '<storage_secret id>': {
- 'kdf': 'scrypt',
- 'kdf_salt': '<b64 repr of salt>'
- 'kdf_length': <key length>
- 'cipher': 'aes256',
- 'length': <secret length>,
- 'secret': '<encrypted storage_secret>',
- },
- },
- 'kdf': 'scrypt',
- 'kdf_salt': '<b64 repr of salt>',
- 'kdf_length: <key length>,
- '_mac_method': 'hmac',
- '_mac': '<mac>'
- }
-
- Note that multiple storage secrets might be stored in one recovery
- document. This method will also calculate a MAC of a string
- representation of the secrets dictionary.
-
- :return: The recovery document.
- :rtype: dict
- """
- # create salt and key for calculating MAC
- salt = os.urandom(self.SALT_LENGTH)
- key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32)
- data = {
- self.STORAGE_SECRETS_KEY: self._secrets,
- self.KDF_KEY: self.KDF_SCRYPT,
- self.KDF_SALT_KEY: binascii.b2a_base64(salt),
- self.KDF_LENGTH_KEY: len(key),
- MAC_METHOD_KEY: MacMethods.HMAC,
- MAC_KEY: hmac.new(
- key,
- json.dumps(self._secrets),
- sha256).hexdigest(),
- }
- return data
-
- def import_recovery_document(self, data):
- """
- Import storage secrets for symmetric encryption and uuid (if present)
- from a recovery document.
-
- Note that this method does not store the imported data on disk. For
- that, use C{self._store_secrets()}.
-
- :param data: The recovery document.
- :type data: dict
-
- :return: A tuple containing the number of imported secrets and whether
- there was MAC informationa available for authenticating.
- :rtype: (int, bool)
- """
- soledad_assert(self.STORAGE_SECRETS_KEY in data)
- # check mac of the recovery document
- mac = None
- if MAC_KEY in data:
- soledad_assert(data[MAC_KEY] is not None)
- soledad_assert(MAC_METHOD_KEY in data)
- soledad_assert(self.KDF_KEY in data)
- soledad_assert(self.KDF_SALT_KEY in data)
- soledad_assert(self.KDF_LENGTH_KEY in data)
- if data[MAC_METHOD_KEY] == MacMethods.HMAC:
- key = scrypt.hash(
- self._passphrase_as_string(),
- binascii.a2b_base64(data[self.KDF_SALT_KEY]),
- buflen=32)
- mac = hmac.new(
- key,
- json.dumps(data[self.STORAGE_SECRETS_KEY]),
- sha256).hexdigest()
- else:
- raise UnknownMacMethod('Unknown MAC method: %s.' %
- data[MAC_METHOD_KEY])
- if mac != data[MAC_KEY]:
- raise WrongMac('Could not authenticate recovery document\'s '
- 'contents.')
- # include secrets in the secret pool.
- secrets = 0
- for secret_id, secret_data in data[self.STORAGE_SECRETS_KEY].items():
- if secret_id not in self._secrets:
- secrets += 1
- self._secrets[secret_id] = secret_data
- return secrets, mac
-
- #
- # Setters/getters
- #
-
- def _get_uuid(self):
- return self._uuid
-
- uuid = property(_get_uuid, doc='The user uuid.')
-
- def _get_secret_id(self):
- return self._secret_id
-
- secret_id = property(
- _get_secret_id,
- doc='The active secret id.')
-
- def _get_secrets_path(self):
- return self._secrets_path
-
- secrets_path = property(
- _get_secrets_path,
- doc='The path for the file containing the encrypted symmetric secret.')
-
- def _get_local_db_path(self):
- return self._local_db_path
-
- local_db_path = property(
- _get_local_db_path,
- doc='The path for the local database replica.')
-
- def _get_server_url(self):
- return self._server_url
-
- server_url = property(
- _get_server_url,
- doc='The URL of the Soledad server.')
-
- storage_secret = property(
- _get_storage_secret,
- doc='The secret used for symmetric encryption.')
-
- def _get_passphrase(self):
- return self._passphrase
-
- passphrase = property(
- _get_passphrase,
- doc='The passphrase for locking and unlocking encryption secrets for '
- 'local and remote storage.')
-
- def _passphrase_as_string(self):
- return self._passphrase.encode('utf-8')
-
-
-# ----------------------------------------------------------------------------
-# Monkey patching u1db to be able to provide a custom SSL cert
-# ----------------------------------------------------------------------------
-
-# We need a more reasonable timeout (in seconds)
-SOLEDAD_TIMEOUT = 120
-
-
-class VerifiedHTTPSConnection(httplib.HTTPSConnection):
- """
- HTTPSConnection verifying server side certificates.
- """
- # derived from httplib.py
-
- def connect(self):
- """
- Connect to a host on a given (SSL) port.
- """
- try:
- source = self.source_address
- sock = socket.create_connection((self.host, self.port),
- SOLEDAD_TIMEOUT, source)
- except AttributeError:
- # source_address was introduced in 2.7
- sock = socket.create_connection((self.host, self.port),
- SOLEDAD_TIMEOUT)
- if self._tunnel_host:
- self.sock = sock
- self._tunnel()
-
- highest_supported = ssl.PROTOCOL_SSLv23
-
- try:
- # needs python 2.7.9+
- # negotiate the best available version,
- # but explicitely disabled bad ones.
- ctx = ssl.SSLContext(highest_supported)
- ctx.options |= ssl.OP_NO_SSLv2
- ctx.options |= ssl.OP_NO_SSLv3
-
- ctx.load_verify_locations(cafile=SOLEDAD_CERT)
- ctx.verify_mode = ssl.CERT_REQUIRED
- self.sock = ctx.wrap_socket(sock)
-
- except AttributeError:
- self.sock = ssl.wrap_socket(
- sock, ca_certs=SOLEDAD_CERT, cert_reqs=ssl.CERT_REQUIRED,
- ssl_version=highest_supported)
-
- match_hostname(self.sock.getpeercert(), self.host)
-
-
-old__VerifiedHTTPSConnection = http_client._VerifiedHTTPSConnection
-http_client._VerifiedHTTPSConnection = VerifiedHTTPSConnection
-
-
-__all__ = ['soledad_assert', 'Soledad']
+from leap.soledad.client.api import Soledad
+from leap.soledad.common import soledad_assert
from ._version import get_versions
__version__ = get_versions()['version']
del get_versions
+
+__all__ = ['soledad_assert', 'Soledad', '__version__']
diff --git a/client/src/leap/soledad/client/adbapi.py b/client/src/leap/soledad/client/adbapi.py
new file mode 100644
index 00000000..5b882bbe
--- /dev/null
+++ b/client/src/leap/soledad/client/adbapi.py
@@ -0,0 +1,271 @@
+# -*- coding: utf-8 -*-
+# adbapi.py
+# Copyright (C) 2013, 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+An asyncrhonous interface to soledad using sqlcipher backend.
+It uses twisted.enterprise.adbapi.
+"""
+import re
+import os
+import sys
+import logging
+
+from functools import partial
+from threading import BoundedSemaphore
+
+from twisted.enterprise import adbapi
+from twisted.python import log
+from zope.proxy import ProxyBase, setProxiedObject
+from pysqlcipher.dbapi2 import OperationalError
+
+from leap.soledad.client import sqlcipher as soledad_sqlcipher
+from leap.soledad.client.pragmas import set_init_pragmas
+
+
+logger = logging.getLogger(name=__name__)
+
+
+DEBUG_SQL = os.environ.get("LEAP_DEBUG_SQL")
+if DEBUG_SQL:
+ log.startLogging(sys.stdout)
+
+"""
+How long the SQLCipher connection should wait for the lock to go away until
+raising an exception.
+"""
+SQLCIPHER_CONNECTION_TIMEOUT = 10
+
+"""
+How many times a SQLCipher query should be retried in case of timeout.
+"""
+SQLCIPHER_MAX_RETRIES = 10
+
+
+def getConnectionPool(opts, openfun=None, driver="pysqlcipher"):
+ """
+ Return a connection pool.
+
+ :param opts:
+ Options for the SQLCipher connection.
+ :type opts: SQLCipherOptions
+ :param openfun:
+ Callback invoked after every connect() on the underlying DB-API
+ object.
+ :type openfun: callable
+ :param driver:
+ The connection driver.
+ :type driver: str
+
+ :return: A U1DB connection pool.
+ :rtype: U1DBConnectionPool
+ """
+ if openfun is None and driver == "pysqlcipher":
+ openfun = partial(set_init_pragmas, opts=opts)
+ return U1DBConnectionPool(
+ "%s.dbapi2" % driver, database=opts.path,
+ check_same_thread=False, cp_openfun=openfun,
+ timeout=SQLCIPHER_CONNECTION_TIMEOUT)
+
+
+class U1DBConnection(adbapi.Connection):
+ """
+ A wrapper for a U1DB connection instance.
+ """
+
+ u1db_wrapper = soledad_sqlcipher.SoledadSQLCipherWrapper
+ """
+ The U1DB wrapper to use.
+ """
+
+ def __init__(self, pool, init_u1db=False):
+ """
+ :param pool: The pool of connections to that owns this connection.
+ :type pool: adbapi.ConnectionPool
+ :param init_u1db: Wether the u1db database should be initialized.
+ :type init_u1db: bool
+ """
+ self.init_u1db = init_u1db
+ adbapi.Connection.__init__(self, pool)
+
+ def reconnect(self):
+ """
+ Reconnect to the U1DB database.
+ """
+ if self._connection is not None:
+ self._pool.disconnect(self._connection)
+ self._connection = self._pool.connect()
+
+ if self.init_u1db:
+ self._u1db = self.u1db_wrapper(self._connection)
+
+ def __getattr__(self, name):
+ """
+ Route the requested attribute either to the U1DB wrapper or to the
+ connection.
+
+ :param name: The name of the attribute.
+ :type name: str
+ """
+ if name.startswith('u1db_'):
+ attr = re.sub('^u1db_', '', name)
+ return getattr(self._u1db, attr)
+ else:
+ return getattr(self._connection, name)
+
+
+class U1DBTransaction(adbapi.Transaction):
+ """
+ A wrapper for a U1DB 'cursor' object.
+ """
+
+ def __getattr__(self, name):
+ """
+ Route the requested attribute either to the U1DB wrapper of the
+ connection or to the actual connection cursor.
+
+ :param name: The name of the attribute.
+ :type name: str
+ """
+ if name.startswith('u1db_'):
+ attr = re.sub('^u1db_', '', name)
+ return getattr(self._connection._u1db, attr)
+ else:
+ return getattr(self._cursor, name)
+
+
+class U1DBConnectionPool(adbapi.ConnectionPool):
+ """
+ Represent a pool of connections to an U1DB database.
+ """
+
+ connectionFactory = U1DBConnection
+ transactionFactory = U1DBTransaction
+
+ def __init__(self, *args, **kwargs):
+ """
+ Initialize the connection pool.
+ """
+ adbapi.ConnectionPool.__init__(self, *args, **kwargs)
+ # all u1db connections, hashed by thread-id
+ self._u1dbconnections = {}
+
+ # The replica uid, primed by the connections on init.
+ self.replica_uid = ProxyBase(None)
+
+ conn = self.connectionFactory(self, init_u1db=True)
+ replica_uid = conn._u1db._real_replica_uid
+ setProxiedObject(self.replica_uid, replica_uid)
+
+ def runU1DBQuery(self, meth, *args, **kw):
+ """
+ Execute a U1DB query in a thread, using a pooled connection.
+
+ Concurrent threads trying to update the same database may timeout
+ because of other threads holding the database lock. Because of this,
+ we will retry SQLCIPHER_MAX_RETRIES times and fail after that.
+
+ :param meth: The U1DB wrapper method name.
+ :type meth: str
+
+ :return: a Deferred which will fire the return value of
+ 'self._runU1DBQuery(Transaction(...), *args, **kw)', or a Failure.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ meth = "u1db_%s" % meth
+ semaphore = BoundedSemaphore(SQLCIPHER_MAX_RETRIES - 1)
+
+ def _run_interaction():
+ return self.runInteraction(
+ self._runU1DBQuery, meth, *args, **kw)
+
+ def _errback(failure):
+ failure.trap(OperationalError)
+ if failure.getErrorMessage() == "database is locked":
+ should_retry = semaphore.acquire(False)
+ if should_retry:
+ logger.warning(
+ "Database operation timed out while waiting for "
+ "lock, trying again...")
+ return _run_interaction()
+ return failure
+
+ d = _run_interaction()
+ d.addErrback(_errback)
+ return d
+
+ def _runU1DBQuery(self, trans, meth, *args, **kw):
+ """
+ Execute a U1DB query.
+
+ :param trans: An U1DB transaction.
+ :type trans: adbapi.Transaction
+ :param meth: the U1DB wrapper method name.
+ :type meth: str
+ """
+ meth = getattr(trans, meth)
+ return meth(*args, **kw)
+
+ def _runInteraction(self, interaction, *args, **kw):
+ """
+ Interact with the database and return the result.
+
+ :param interaction:
+ A callable object whose first argument is an
+ L{adbapi.Transaction}.
+ :type interaction: callable
+ :return: a Deferred which will fire the return value of
+ 'interaction(Transaction(...), *args, **kw)', or a Failure.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ tid = self.threadID()
+ u1db = self._u1dbconnections.get(tid)
+ conn = self.connectionFactory(self, init_u1db=not bool(u1db))
+
+ if self.replica_uid is None:
+ replica_uid = conn._u1db._real_replica_uid
+ setProxiedObject(self.replica_uid, replica_uid)
+
+ if u1db is None:
+ self._u1dbconnections[tid] = conn._u1db
+ else:
+ conn._u1db = u1db
+
+ trans = self.transactionFactory(self, conn)
+ try:
+ result = interaction(trans, *args, **kw)
+ trans.close()
+ conn.commit()
+ return result
+ except:
+ excType, excValue, excTraceback = sys.exc_info()
+ try:
+ conn.rollback()
+ except:
+ log.err(None, "Rollback failed")
+ raise excType, excValue, excTraceback
+
+ def finalClose(self):
+ """
+ A final close, only called by the shutdown trigger.
+ """
+ self.shutdownID = None
+ self.threadpool.stop()
+ self.running = False
+ for conn in self.connections.values():
+ self._close(conn)
+ for u1db in self._u1dbconnections.values():
+ self._close(u1db)
+ self.connections.clear()
diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py
new file mode 100644
index 00000000..76d6acc3
--- /dev/null
+++ b/client/src/leap/soledad/client/api.py
@@ -0,0 +1,882 @@
+# -*- coding: utf-8 -*-
+# api.py
+# Copyright (C) 2013, 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+Soledad - Synchronization Of Locally Encrypted Data Among Devices.
+
+This module holds the public api for Soledad.
+
+Soledad is the part of LEAP that manages storage and synchronization of
+application data. It is built on top of U1DB reference Python API and
+implements (1) a SQLCipher backend for local storage in the client, (2) a
+SyncTarget that encrypts data before syncing, and (3) a CouchDB backend for
+remote storage in the server side.
+"""
+import binascii
+import errno
+import httplib
+import logging
+import os
+import socket
+import ssl
+import urlparse
+
+try:
+ import cchardet as chardet
+except ImportError:
+ import chardet
+
+from StringIO import StringIO
+from u1db.remote import http_client
+from u1db.remote.ssl_match_hostname import match_hostname
+from zope.interface import implements
+
+from leap.common.config import get_path_prefix
+
+from leap.soledad.common import SHARED_DB_NAME
+from leap.soledad.common import soledad_assert
+from leap.soledad.common import soledad_assert_type
+
+from leap.soledad.client import adbapi
+from leap.soledad.client import events as soledad_events
+from leap.soledad.client import interfaces as soledad_interfaces
+from leap.soledad.client.crypto import SoledadCrypto
+from leap.soledad.client.secrets import SoledadSecrets
+from leap.soledad.client.shared_db import SoledadSharedDatabase
+from leap.soledad.client.sqlcipher import SQLCipherOptions, SQLCipherU1DBSync
+
+logger = logging.getLogger(name=__name__)
+
+#
+# Constants
+#
+
+"""
+Path to the certificate file used to certify the SSL connection between
+Soledad client and server.
+"""
+SOLEDAD_CERT = None
+
+
+class Soledad(object):
+ """
+ Soledad provides encrypted data storage and sync.
+
+ A Soledad instance is used to store and retrieve data in a local encrypted
+ database and synchronize this database with Soledad server.
+
+ This class is also responsible for bootstrapping users' account by
+ creating cryptographic secrets and/or storing/fetching them on Soledad
+ server.
+
+ Soledad uses ``leap.common.events`` to signal events. The possible events
+ to be signaled are:
+
+ SOLEDAD_CREATING_KEYS: emitted during bootstrap sequence when key
+ generation starts.
+ SOLEDAD_DONE_CREATING_KEYS: emitted during bootstrap sequence when key
+ generation finishes.
+ SOLEDAD_UPLOADING_KEYS: emitted during bootstrap sequence when soledad
+ starts sending keys to server.
+ SOLEDAD_DONE_UPLOADING_KEYS: emitted during bootstrap sequence when
+ soledad finishes sending keys to server.
+ SOLEDAD_DOWNLOADING_KEYS: emitted during bootstrap sequence when
+ soledad starts to retrieve keys from server.
+ SOLEDAD_DONE_DOWNLOADING_KEYS: emitted during bootstrap sequence when
+ soledad finishes downloading keys from server.
+ SOLEDAD_NEW_DATA_TO_SYNC: emitted upon call to C{need_sync()} when
+ there's indeed new data to be synchronized between local database
+ replica and server's replica.
+ SOLEDAD_DONE_DATA_SYNC: emitted inside C{sync()} method when it has
+ finished synchronizing with remote replica.
+ """
+ implements(soledad_interfaces.ILocalStorage,
+ soledad_interfaces.ISyncableStorage,
+ soledad_interfaces.ISecretsStorage)
+
+ local_db_file_name = 'soledad.u1db'
+ secrets_file_name = "soledad.json"
+ default_prefix = os.path.join(get_path_prefix(), 'leap', 'soledad')
+
+ def __init__(self, uuid, passphrase, secrets_path, local_db_path,
+ server_url, cert_file, shared_db=None,
+ auth_token=None, defer_encryption=False, syncable=True):
+ """
+ Initialize configuration, cryptographic keys and dbs.
+
+ :param uuid: User's uuid.
+ :type uuid: str
+
+ :param passphrase:
+ The passphrase for locking and unlocking encryption secrets for
+ local and remote storage.
+ :type passphrase: unicode
+
+ :param secrets_path:
+ Path for storing encrypted key used for symmetric encryption.
+ :type secrets_path: str
+
+ :param local_db_path: Path for local encrypted storage db.
+ :type local_db_path: str
+
+ :param server_url:
+ URL for Soledad server. This is used either to sync with the user's
+ remote db and to interact with the shared recovery database.
+ :type server_url: str
+
+ :param cert_file:
+ Path to the certificate of the ca used to validate the SSL
+ certificate used by the remote soledad server.
+ :type cert_file: str
+
+ :param shared_db:
+ The shared database.
+ :type shared_db: HTTPDatabase
+
+ :param auth_token:
+ Authorization token for accessing remote databases.
+ :type auth_token: str
+
+ :param defer_encryption:
+ Whether to defer encryption/decryption of documents, or do it
+ inline while syncing.
+ :type defer_encryption: bool
+
+ :param syncable:
+ If set to ``False``, this database will not attempt to synchronize
+ with remote replicas (default is ``True``)
+ :type syncable: bool
+
+ :raise BootstrapSequenceError:
+ Raised when the secret initialization sequence (i.e. retrieval
+ from server or generation and storage on server) has failed for
+ some reason.
+ """
+ # store config params
+ self._uuid = uuid
+ self._passphrase = passphrase
+ self._local_db_path = local_db_path
+ self._server_url = server_url
+ self._defer_encryption = defer_encryption
+ self._secrets_path = None
+
+ self.shared_db = shared_db
+
+ # configure SSL certificate
+ global SOLEDAD_CERT
+ SOLEDAD_CERT = cert_file
+
+ # init crypto variables
+ self._set_token(auth_token)
+ self._crypto = SoledadCrypto(self)
+
+ self._init_config_with_defaults()
+ self._init_working_dirs()
+
+ self._secrets_path = secrets_path
+
+ # Initialize shared recovery database
+ self.init_shared_db(server_url, uuid, self._creds, syncable=syncable)
+
+ # The following can raise BootstrapSequenceError, that will be
+ # propagated upwards.
+ self._init_secrets()
+ self._init_u1db_sqlcipher_backend()
+
+ if syncable:
+ self._init_u1db_syncer()
+
+ #
+ # initialization/destruction methods
+ #
+ def _init_config_with_defaults(self):
+ """
+ Initialize configuration using default values for missing params.
+ """
+ soledad_assert_type(self._passphrase, unicode)
+ initialize = lambda attr, val: getattr(
+ self, attr, None) is None and setattr(self, attr, val)
+
+ initialize("_secrets_path", os.path.join(
+ self.default_prefix, self.secrets_file_name))
+ initialize("_local_db_path", os.path.join(
+ self.default_prefix, self.local_db_file_name))
+ # initialize server_url
+ soledad_assert(self._server_url is not None,
+ 'Missing URL for Soledad server.')
+
+ def _init_working_dirs(self):
+ """
+ Create work directories.
+
+ :raise OSError: in case file exists and is not a dir.
+ """
+ paths = map(lambda x: os.path.dirname(x), [
+ self._local_db_path, self._secrets_path])
+ for path in paths:
+ create_path_if_not_exists(path)
+
+ def _init_secrets(self):
+ """
+ Initialize Soledad secrets.
+ """
+ self._secrets = SoledadSecrets(
+ self.uuid, self._passphrase, self._secrets_path,
+ self.shared_db, self._crypto)
+ self._secrets.bootstrap()
+
+ def _init_u1db_sqlcipher_backend(self):
+ """
+ Initialize the U1DB SQLCipher database for local storage.
+
+ Instantiates a modified twisted adbapi that will maintain a threadpool
+ with a u1db-sqclipher connection for each thread, and will return
+ deferreds for each u1db query.
+
+ Currently, Soledad uses the default SQLCipher cipher, i.e.
+ 'aes-256-cbc'. We use scrypt to derive a 256-bit encryption key,
+ and internally the SQLCipherDatabase initialization uses the 'raw
+ PRAGMA key' format to handle the key to SQLCipher.
+ """
+ tohex = binascii.b2a_hex
+ # sqlcipher only accepts the hex version
+ key = tohex(self._secrets.get_local_storage_key())
+ sync_db_key = tohex(self._secrets.get_sync_db_key())
+
+ opts = SQLCipherOptions(
+ self._local_db_path, key,
+ is_raw_key=True, create=True,
+ defer_encryption=self._defer_encryption,
+ sync_db_key=sync_db_key,
+ )
+ self._sqlcipher_opts = opts
+ self._dbpool = adbapi.getConnectionPool(opts)
+
+ def _init_u1db_syncer(self):
+ """
+ Initialize the U1DB synchronizer.
+ """
+ replica_uid = self._dbpool.replica_uid
+ self._dbsyncer = SQLCipherU1DBSync(
+ self._sqlcipher_opts, self._crypto, replica_uid,
+ SOLEDAD_CERT,
+ defer_encryption=self._defer_encryption)
+
+ #
+ # Closing methods
+ #
+
+ def close(self):
+ """
+ Close underlying U1DB database.
+ """
+ logger.debug("Closing soledad")
+ self._dbpool.close()
+ if getattr(self, '_dbsyncer', None):
+ self._dbsyncer.close()
+
+ #
+ # ILocalStorage
+ #
+
+ def _defer(self, meth, *args, **kw):
+ """
+ Defer a method to be run on a U1DB connection pool.
+
+ :param meth: A method to defer to the U1DB connection pool.
+ :type meth: callable
+ :return: A deferred.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._dbpool.runU1DBQuery(meth, *args, **kw)
+
+ def put_doc(self, doc):
+ """
+ Update a document.
+
+ If the document currently has conflicts, put will fail.
+ If the database specifies a maximum document size and the document
+ exceeds it, put will fail and raise a DocumentTooBig exception.
+
+ ============================== WARNING ==============================
+ This method converts the document's contents to unicode in-place. This
+ means that after calling `put_doc(doc)`, the contents of the
+ document, i.e. `doc.content`, might be different from before the
+ call.
+ ============================== WARNING ==============================
+
+ :param doc: A document with new content.
+ :type doc: leap.soledad.common.document.SoledadDocument
+ :return: A deferred whose callback will be invoked with the new
+ revision identifier for the document. The document object will
+ also be updated.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ doc.content = _convert_to_unicode(doc.content)
+ return self._defer("put_doc", doc)
+
+ def delete_doc(self, doc):
+ """
+ Mark a document as deleted.
+
+ Will abort if the current revision doesn't match doc.rev.
+ This will also set doc.content to None.
+
+ :param doc: A document to be deleted.
+ :type doc: leap.soledad.common.document.SoledadDocument
+ :return: A deferred.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._defer("delete_doc", doc)
+
+ def get_doc(self, doc_id, include_deleted=False):
+ """
+ Get the JSON string for the given document.
+
+ :param doc_id: The unique document identifier
+ :type doc_id: str
+ :param include_deleted: If set to True, deleted documents will be
+ returned with empty content. Otherwise asking for a deleted
+ document will return None.
+ :type include_deleted: bool
+ :return: A deferred whose callback will be invoked with a document
+ object.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._defer(
+ "get_doc", doc_id, include_deleted=include_deleted)
+
+ def get_docs(
+ self, doc_ids, check_for_conflicts=True, include_deleted=False):
+ """
+ Get the JSON content for many documents.
+
+ :param doc_ids: A list of document identifiers.
+ :type doc_ids: list
+ :param check_for_conflicts: If set to False, then the conflict check
+ will be skipped, and 'None' will be returned instead of True/False.
+ :type check_for_conflicts: bool
+ :param include_deleted: If set to True, deleted documents will be
+ returned with empty content. Otherwise deleted documents will not
+ be included in the results.
+ :type include_deleted: bool
+ :return: A deferred whose callback will be invoked with an iterable
+ giving the document object for each document id in matching
+ doc_ids order.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._defer(
+ "get_docs", doc_ids, check_for_conflicts=check_for_conflicts,
+ include_deleted=include_deleted)
+
+ def get_all_docs(self, include_deleted=False):
+ """
+ Get the JSON content for all documents in the database.
+
+ :param include_deleted: If set to True, deleted documents will be
+ returned with empty content. Otherwise deleted documents will not
+ be included in the results.
+ :type include_deleted: bool
+
+ :return: A deferred which, when fired, will pass the a tuple
+ containing (generation, [Document]) to the callback, with the
+ current generation of the database, followed by a list of all the
+ documents in the database.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._defer("get_all_docs", include_deleted)
+
+ def create_doc(self, content, doc_id=None):
+ """
+ Create a new document.
+
+ You can optionally specify the document identifier, but the document
+ must not already exist. See 'put_doc' if you want to override an
+ existing document.
+ If the database specifies a maximum document size and the document
+ exceeds it, create will fail and raise a DocumentTooBig exception.
+
+ :param content: A Python dictionary.
+ :type content: dict
+ :param doc_id: An optional identifier specifying the document id.
+ :type doc_id: str
+ :return: A deferred whose callback will be invoked with a document.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ # TODO we probably should pass an optional "encoding" parameter to
+ # create_doc (and probably to put_doc too). There are cases (mail
+ # payloads for example) in which we already have the encoding in the
+ # headers, so we don't need to guess it.
+ return self._defer(
+ "create_doc", _convert_to_unicode(content), doc_id=doc_id)
+
+ def create_doc_from_json(self, json, doc_id=None):
+ """
+ Create a new document.
+
+ You can optionally specify the document identifier, but the document
+ must not already exist. See 'put_doc' if you want to override an
+ existing document.
+ If the database specifies a maximum document size and the document
+ exceeds it, create will fail and raise a DocumentTooBig exception.
+
+ :param json: The JSON document string
+ :type json: dict
+ :param doc_id: An optional identifier specifying the document id.
+ :type doc_id: str
+ :return: A deferred whose callback will be invoked with a document.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._defer("create_doc_from_json", json, doc_id=doc_id)
+
+ def create_index(self, index_name, *index_expressions):
+ """
+ Create a named index, which can then be queried for future lookups.
+
+ Creating an index which already exists is not an error, and is cheap.
+ Creating an index which does not match the index_expressions of the
+ existing index is an error.
+ Creating an index will block until the expressions have been evaluated
+ and the index generated.
+
+ :param index_name: A unique name which can be used as a key prefix
+ :type index_name: str
+ :param index_expressions: index expressions defining the index
+ information.
+
+ Examples:
+
+ "fieldname", or "fieldname.subfieldname" to index alphabetically
+ sorted on the contents of a field.
+
+ "number(fieldname, width)", "lower(fieldname)"
+ :type index_expresions: list of str
+ :return: A deferred.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._defer("create_index", index_name, *index_expressions)
+
+ def delete_index(self, index_name):
+ """
+ Remove a named index.
+
+ :param index_name: The name of the index we are removing
+ :type index_name: str
+ :return: A deferred.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._defer("delete_index", index_name)
+
+ def list_indexes(self):
+ """
+ List the definitions of all known indexes.
+
+ :return: A deferred whose callback will be invoked with a list of
+ [('index-name', ['field', 'field2'])] definitions.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._defer("list_indexes")
+
+ def get_from_index(self, index_name, *key_values):
+ """
+ Return documents that match the keys supplied.
+
+ You must supply exactly the same number of values as have been defined
+ in the index. It is possible to do a prefix match by using '*' to
+ indicate a wildcard match. You can only supply '*' to trailing entries,
+ (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.)
+ It is also possible to append a '*' to the last supplied value (eg
+ 'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*')
+
+ :param index_name: The index to query
+ :type index_name: str
+ :param key_values: values to match. eg, if you have
+ an index with 3 fields then you would have:
+ get_from_index(index_name, val1, val2, val3)
+ :type key_values: list
+ :return: A deferred whose callback will be invoked with a list of
+ [Document].
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._defer("get_from_index", index_name, *key_values)
+
+ def get_count_from_index(self, index_name, *key_values):
+ """
+ Return the count for a given combination of index_name
+ and key values.
+
+ Extension method made from similar methods in u1db version 13.09
+
+ :param index_name: The index to query
+ :type index_name: str
+ :param key_values: values to match. eg, if you have
+ an index with 3 fields then you would have:
+ get_from_index(index_name, val1, val2, val3)
+ :type key_values: tuple
+ :return: A deferred whose callback will be invoked with the count.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._defer("get_count_from_index", index_name, *key_values)
+
+ def get_range_from_index(self, index_name, start_value, end_value):
+ """
+ Return documents that fall within the specified range.
+
+ Both ends of the range are inclusive. For both start_value and
+ end_value, one must supply exactly the same number of values as have
+ been defined in the index, or pass None. In case of a single column
+ index, a string is accepted as an alternative for a tuple with a single
+ value. It is possible to do a prefix match by using '*' to indicate
+ a wildcard match. You can only supply '*' to trailing entries, (eg
+ 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also
+ possible to append a '*' to the last supplied value (eg 'val*', '*',
+ '*' or 'val', 'val*', '*', but not 'val*', 'val', '*')
+
+ :param index_name: The index to query
+ :type index_name: str
+ :param start_values: tuples of values that define the lower bound of
+ the range. eg, if you have an index with 3 fields then you would
+ have: (val1, val2, val3)
+ :type start_values: tuple
+ :param end_values: tuples of values that define the upper bound of the
+ range. eg, if you have an index with 3 fields then you would have:
+ (val1, val2, val3)
+ :type end_values: tuple
+ :return: A deferred whose callback will be invoked with a list of
+ [Document].
+ :rtype: twisted.internet.defer.Deferred
+ """
+
+ return self._defer(
+ "get_range_from_index", index_name, start_value, end_value)
+
+ def get_index_keys(self, index_name):
+ """
+ Return all keys under which documents are indexed in this index.
+
+ :param index_name: The index to query
+ :type index_name: str
+ :return: A deferred whose callback will be invoked with a list of
+ tuples of indexed keys.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._defer("get_index_keys", index_name)
+
+ def get_doc_conflicts(self, doc_id):
+ """
+ Get the list of conflicts for the given document.
+
+ The order of the conflicts is such that the first entry is the value
+ that would be returned by "get_doc".
+
+ :param doc_id: The unique document identifier
+ :type doc_id: str
+ :return: A deferred whose callback will be invoked with a list of the
+ Document entries that are conflicted.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._defer("get_doc_conflicts", doc_id)
+
+ def resolve_doc(self, doc, conflicted_doc_revs):
+ """
+ Mark a document as no longer conflicted.
+
+ We take the list of revisions that the client knows about that it is
+ superseding. This may be a different list from the actual current
+ conflicts, in which case only those are removed as conflicted. This
+ may fail if the conflict list is significantly different from the
+ supplied information. (sync could have happened in the background from
+ the time you GET_DOC_CONFLICTS until the point where you RESOLVE)
+
+ :param doc: A Document with the new content to be inserted.
+ :type doc: SoledadDocument
+ :param conflicted_doc_revs: A list of revisions that the new content
+ supersedes.
+ :type conflicted_doc_revs: list(str)
+ :return: A deferred.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._defer("resolve_doc", doc, conflicted_doc_revs)
+
+ @property
+ def local_db_path(self):
+ return self._local_db_path
+
+ @property
+ def uuid(self):
+ return self._uuid
+
+ #
+ # ISyncableStorage
+ #
+
+ def sync(self, defer_decryption=True):
+ """
+ Synchronize documents with the server replica.
+
+ :param defer_decryption:
+ Whether to defer decryption of documents, or do it inline while
+ syncing.
+ :type defer_decryption: bool
+
+ :return: A deferred whose callback will be invoked with the local
+ generation before the synchronization was performed.
+ :rtype: twisted.internet.defer.Deferred
+ """
+
+ # -----------------------------------------------------------------
+ # TODO this needs work.
+ # Should review/write tests to check that this:
+
+ # (1) Defer to the syncer pool -- DONE (on dbsyncer)
+ # (2) Return the deferred
+ # (3) Add the callback for signaling the event (executed on reactor
+ # thread)
+ # (4) Check that the deferred is called with the local gen.
+
+ # -----------------------------------------------------------------
+
+ sync_url = urlparse.urljoin(self._server_url, 'user-%s' % self.uuid)
+ d = self._dbsyncer.sync(
+ sync_url,
+ creds=self._creds,
+ defer_decryption=defer_decryption)
+
+ def _sync_callback(local_gen):
+ soledad_events.emit(
+ soledad_events.SOLEDAD_DONE_DATA_SYNC, self.uuid)
+ return local_gen
+
+ def _sync_errback(failure):
+ s = StringIO()
+ failure.printDetailedTraceback(file=s)
+ msg = "Soledad exception when syncing!\n" + s.getvalue()
+ logger.error(msg)
+ return failure
+
+ d.addCallbacks(_sync_callback, _sync_errback)
+ return d
+
+ @property
+ def syncing(self):
+ """
+ Return wether Soledad is currently synchronizing with the server.
+
+ :return: Wether Soledad is currently synchronizing with the server.
+ :rtype: bool
+ """
+ return self._dbsyncer.syncing
+
+ def _set_token(self, token):
+ """
+ Set the authentication token for remote database access.
+
+ Internally, this builds the credentials dictionary with the following
+ format:
+
+ {
+ 'token': {
+ 'uuid': '<uuid>'
+ 'token': '<token>'
+ }
+ }
+
+ :param token: The authentication token.
+ :type token: str
+ """
+ self._creds = {
+ 'token': {
+ 'uuid': self.uuid,
+ 'token': token,
+ }
+ }
+
+ def _get_token(self):
+ """
+ Return current token from credentials dictionary.
+ """
+ return self._creds['token']['token']
+
+ token = property(_get_token, _set_token, doc='The authentication Token.')
+
+ #
+ # ISecretsStorage
+ #
+
+ def init_shared_db(self, server_url, uuid, creds, syncable=True):
+ """
+ Initialize the shared database.
+
+ :param server_url: URL of the remote database.
+ :type server_url: str
+ :param uuid: The user's unique id.
+ :type uuid: str
+ :param creds: A tuple containing the authentication method and
+ credentials.
+ :type creds: tuple
+ :param syncable:
+ If syncable is False, the database will not attempt to sync against
+ a remote replica.
+ :type syncable: bool
+ """
+ # only case this is False is for testing purposes
+ if self.shared_db is None:
+ shared_db_url = urlparse.urljoin(server_url, SHARED_DB_NAME)
+ self.shared_db = SoledadSharedDatabase.open_database(
+ shared_db_url,
+ uuid,
+ creds=creds,
+ syncable=syncable)
+
+ @property
+ def storage_secret(self):
+ """
+ Return the secret used for local storage encryption.
+
+ :return: The secret used for local storage encryption.
+ :rtype: str
+ """
+ return self._secrets.storage_secret
+
+ @property
+ def remote_storage_secret(self):
+ """
+ Return the secret used for encryption of remotely stored data.
+
+ :return: The secret used for remote storage encryption.
+ :rtype: str
+ """
+ return self._secrets.remote_storage_secret
+
+ @property
+ def secrets(self):
+ """
+ Return the secrets object.
+
+ :return: The secrets object.
+ :rtype: SoledadSecrets
+ """
+ return self._secrets
+
+ def change_passphrase(self, new_passphrase):
+ """
+ Change the passphrase that encrypts the storage secret.
+
+ :param new_passphrase: The new passphrase.
+ :type new_passphrase: unicode
+
+ :raise NoStorageSecret: Raised if there's no storage secret available.
+ """
+ self._secrets.change_passphrase(new_passphrase)
+
+ #
+ # Raw SQLCIPHER Queries
+ #
+
+ def raw_sqlcipher_query(self, *args, **kw):
+ """
+ Run a raw sqlcipher query in the local database.
+ """
+ return self._dbpool.runQuery(*args, **kw)
+
+
+def _convert_to_unicode(content):
+ """
+ Convert content to unicode (or all the strings in content).
+
+ NOTE: Even though this method supports any type, it will
+ currently ignore contents of lists, tuple or any other
+ iterable than dict. We don't need support for these at the
+ moment
+
+ :param content: content to convert
+ :type content: object
+
+ :rtype: object
+ """
+ # Chardet doesn't guess very well with some smallish payloads.
+ # This parameter might need some empirical tweaking.
+ CUTOFF_CONFIDENCE = 0.90
+
+ if isinstance(content, unicode):
+ return content
+ elif isinstance(content, str):
+ encoding = "utf-8"
+ result = chardet.detect(content)
+ if result["confidence"] > CUTOFF_CONFIDENCE:
+ encoding = result["encoding"]
+ try:
+ content = content.decode(encoding)
+ except UnicodeError as e:
+ logger.error("Unicode error: {0!r}. Using 'replace'".format(e))
+ content = content.decode(encoding, 'replace')
+ return content
+ else:
+ if isinstance(content, dict):
+ for key in content.keys():
+ content[key] = _convert_to_unicode(content[key])
+ return content
+
+
+def create_path_if_not_exists(path):
+ try:
+ if not os.path.isdir(path):
+ logger.info('Creating directory: %s.' % path)
+ os.makedirs(path)
+ except OSError as exc:
+ if exc.errno == errno.EEXIST and os.path.isdir(path):
+ pass
+ else:
+ raise
+
+# ----------------------------------------------------------------------------
+# Monkey patching u1db to be able to provide a custom SSL cert
+# ----------------------------------------------------------------------------
+
+# We need a more reasonable timeout (in seconds)
+SOLEDAD_TIMEOUT = 120
+
+
+class VerifiedHTTPSConnection(httplib.HTTPSConnection):
+ """
+ HTTPSConnection verifying server side certificates.
+ """
+ # derived from httplib.py
+
+ def connect(self):
+ """
+ Connect to a host on a given (SSL) port.
+ """
+ try:
+ source = self.source_address
+ sock = socket.create_connection((self.host, self.port),
+ SOLEDAD_TIMEOUT, source)
+ except AttributeError:
+ # source_address was introduced in 2.7
+ sock = socket.create_connection((self.host, self.port),
+ SOLEDAD_TIMEOUT)
+ if self._tunnel_host:
+ self.sock = sock
+ self._tunnel()
+
+ self.sock = ssl.wrap_socket(sock,
+ ca_certs=SOLEDAD_CERT,
+ cert_reqs=ssl.CERT_REQUIRED)
+ match_hostname(self.sock.getpeercert(), self.host)
+
+
+old__VerifiedHTTPSConnection = http_client._VerifiedHTTPSConnection
+http_client._VerifiedHTTPSConnection = VerifiedHTTPSConnection
diff --git a/client/src/leap/soledad/client/auth.py b/client/src/leap/soledad/client/auth.py
index 72ab0008..6dfabeb4 100644
--- a/client/src/leap/soledad/client/auth.py
+++ b/client/src/leap/soledad/client/auth.py
@@ -14,15 +14,13 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
"""
Methods for token-based authentication.
These methods have to be included in all classes that extend HTTPClient so
they can do token-based auth requests to the Soledad server.
"""
-
+import base64
from u1db import errors
@@ -49,7 +47,7 @@ class TokenBasedAuth(object):
Return an authorization header to be included in the HTTP request, in
the form:
- [('Authorization', 'Token <base64 encoded creds')]
+ [('Authorization', 'Token <(base64 encoded) uuid:token>')]
:param method: The HTTP method.
:type method: str
@@ -64,7 +62,8 @@ class TokenBasedAuth(object):
if 'token' in self._creds:
uuid, token = self._creds['token']
auth = '%s:%s' % (uuid, token)
- return [('Authorization', 'Token %s' % auth.encode('base64')[:-1])]
+ b64_token = base64.b64encode(auth)
+ return [('Authorization', 'Token %s' % b64_token)]
else:
raise errors.UnknownAuthMethod(
'Wrong credentials: %s' % self._creds)
diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py
index 7133f804..bdbaa8e0 100644
--- a/client/src/leap/soledad/client/crypto.py
+++ b/client/src/leap/soledad/client/crypto.py
@@ -23,31 +23,14 @@ import hmac
import hashlib
import json
import logging
-import multiprocessing
-import threading
from pycryptopp.cipher.aes import AES
from pycryptopp.cipher.xsalsa20 import XSalsa20
-from zope.proxy import sameProxiedObjects
from leap.soledad.common import soledad_assert
from leap.soledad.common import soledad_assert_type
-from leap.soledad.common.document import SoledadDocument
-
-
-from leap.soledad.common.crypto import (
- EncryptionSchemes,
- UnknownEncryptionScheme,
- MacMethods,
- UnknownMacMethod,
- WrongMac,
- ENC_JSON_KEY,
- ENC_SCHEME_KEY,
- ENC_METHOD_KEY,
- ENC_IV_KEY,
- MAC_KEY,
- MAC_METHOD_KEY,
-)
+from leap.soledad.common import crypto
+
logger = logging.getLogger(__name__)
@@ -55,37 +38,23 @@ logger = logging.getLogger(__name__)
MAC_KEY_LENGTH = 64
-class EncryptionMethods(object):
- """
- Representation of encryption methods that can be used.
+def _assert_known_encryption_method(method):
"""
+ Assert that we can encrypt/decrypt the given C{method}
- AES_256_CTR = 'aes-256-ctr'
- XSALSA20 = 'xsalsa20'
-
-#
-# Exceptions
-#
-
-
-class DocumentNotEncrypted(Exception):
- """
- Raised for failures in document encryption.
- """
- pass
-
-
-class UnknownEncryptionMethod(Exception):
- """
- Raised when trying to encrypt/decrypt with unknown method.
- """
- pass
-
+ :param method: The encryption method to assert.
+ :type method: str
-class NoSymmetricSecret(Exception):
- """
- Raised when trying to get a hashed passphrase.
+ :raise UnknownEncryptionMethodError: Raised when C{method} is unknown.
"""
+ valid_methods = [
+ crypto.EncryptionMethods.AES_256_CTR,
+ crypto.EncryptionMethods.XSALSA20,
+ ]
+ try:
+ soledad_assert(method in valid_methods)
+ except AssertionError:
+ raise crypto.UnknownEncryptionMethodError
def encrypt_sym(data, key, method):
@@ -104,25 +73,26 @@ def encrypt_sym(data, key, method):
:return: A tuple with the initial value and the encrypted data.
:rtype: (long, str)
+
+ :raise AssertionError: Raised if C{method} is unknown.
"""
soledad_assert_type(key, str)
-
soledad_assert(
len(key) == 32, # 32 x 8 = 256 bits.
'Wrong key size: %s bits (must be 256 bits long).' %
(len(key) * 8))
+ _assert_known_encryption_method(method)
+
iv = None
# AES-256 in CTR mode
- if method == EncryptionMethods.AES_256_CTR:
+ if method == crypto.EncryptionMethods.AES_256_CTR:
iv = os.urandom(16)
ciphertext = AES(key=key, iv=iv).process(data)
# XSalsa20
- elif method == EncryptionMethods.XSALSA20:
+ elif method == crypto.EncryptionMethods.XSALSA20:
iv = os.urandom(24)
ciphertext = XSalsa20(key=key, iv=iv).process(data)
- else:
- # raise if method is unknown
- raise UnknownEncryptionMethod('Unkwnown method: %s' % method)
+
return binascii.b2a_base64(iv), ciphertext
@@ -143,6 +113,8 @@ def decrypt_sym(data, key, method, **kwargs):
:return: The decrypted data.
:rtype: str
+
+ :raise UnknownEncryptionMethodError: Raised when C{method} is unknown.
"""
soledad_assert_type(key, str)
# assert params
@@ -152,17 +124,15 @@ def decrypt_sym(data, key, method, **kwargs):
soledad_assert(
'iv' in kwargs,
'%s needs an initial value.' % method)
+ _assert_known_encryption_method(method)
# AES-256 in CTR mode
- if method == EncryptionMethods.AES_256_CTR:
+ if method == crypto.EncryptionMethods.AES_256_CTR:
return AES(
key=key, iv=binascii.a2b_base64(kwargs['iv'])).process(data)
- elif method == EncryptionMethods.XSALSA20:
+ elif method == crypto.EncryptionMethods.XSALSA20:
return XSalsa20(
key=key, iv=binascii.a2b_base64(kwargs['iv'])).process(data)
- # raise if method is unknown
- raise UnknownEncryptionMethod('Unkwnown method: %s' % method)
-
def doc_mac_key(doc_id, secret):
"""
@@ -176,17 +146,13 @@ def doc_mac_key(doc_id, secret):
:param doc_id: The id of the document.
:type doc_id: str
- :param secret: soledad secret storage
- :type secret: Soledad.storage_secret
+ :param secret: The Soledad storage secret
+ :type secret: str
:return: The key.
:rtype: str
-
- :raise NoSymmetricSecret: if no symmetric secret was supplied.
"""
- if secret is None:
- raise NoSymmetricSecret()
-
+ soledad_assert(secret is not None)
return hmac.new(
secret[:MAC_KEY_LENGTH],
doc_id,
@@ -208,11 +174,11 @@ class SoledadCrypto(object):
self._soledad = soledad
def encrypt_sym(self, data, key,
- method=EncryptionMethods.AES_256_CTR):
+ method=crypto.EncryptionMethods.AES_256_CTR):
return encrypt_sym(data, key, method)
def decrypt_sym(self, data, key,
- method=EncryptionMethods.AES_256_CTR, **kwargs):
+ method=crypto.EncryptionMethods.AES_256_CTR, **kwargs):
return decrypt_sym(data, key, method, **kwargs)
def doc_mac_key(self, doc_id, secret):
@@ -224,7 +190,7 @@ class SoledadCrypto(object):
The password is derived using HMAC having sha256 as underlying hash
function. The key used for HMAC are the first
- C{soledad.REMOTE_STORAGE_SECRET_KENGTH} bytes of Soledad's storage
+ C{soledad.REMOTE_STORAGE_SECRET_LENGTH} bytes of Soledad's storage
secret stripped from the first MAC_KEY_LENGTH characters. The HMAC
message is C{doc_id}.
@@ -234,15 +200,10 @@ class SoledadCrypto(object):
:return: The passphrase.
:rtype: str
-
- :raise NoSymmetricSecret: if no symmetric secret was supplied.
"""
- if self.secret is None:
- raise NoSymmetricSecret()
+ soledad_assert(self.secret is not None)
return hmac.new(
- self.secret[
- MAC_KEY_LENGTH:
- self._soledad.REMOTE_STORAGE_SECRET_LENGTH],
+ self.secret[MAC_KEY_LENGTH:],
doc_id,
hashlib.sha256).digest()
@@ -251,17 +212,18 @@ class SoledadCrypto(object):
#
def _get_secret(self):
- return self._soledad.storage_secret
+ return self._soledad.secrets.remote_storage_secret
secret = property(
_get_secret, doc='The secret used for symmetric encryption')
+
#
# Crypto utilities for a SoledadDocument.
#
-
-def mac_doc(doc_id, doc_rev, ciphertext, mac_method, secret):
+def mac_doc(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, enc_iv,
+ mac_method, secret):
"""
Calculate a MAC for C{doc} using C{ciphertext}.
@@ -277,21 +239,38 @@ def mac_doc(doc_id, doc_rev, ciphertext, mac_method, secret):
:type doc_rev: str
:param ciphertext: The content of the document.
:type ciphertext: str
+ :param enc_scheme: The encryption scheme.
+ :type enc_scheme: str
+ :param enc_method: The encryption method.
+ :type enc_method: str
+ :param enc_iv: The encryption initialization vector.
+ :type enc_iv: str
:param mac_method: The MAC method to use.
:type mac_method: str
- :param secret: soledad secret
- :type secret: Soledad.secret_storage
+ :param secret: The Soledad storage secret
+ :type secret: str
:return: The calculated MAC.
:rtype: str
- """
- if mac_method == MacMethods.HMAC:
- return hmac.new(
- doc_mac_key(doc_id, secret),
- str(doc_id) + str(doc_rev) + ciphertext,
- hashlib.sha256).digest()
- # raise if we do not know how to handle this MAC method
- raise UnknownMacMethod('Unknown MAC method: %s.' % mac_method)
+
+ :raise crypto.UnknownMacMethodError: Raised when C{mac_method} is unknown.
+ """
+ try:
+ soledad_assert(mac_method == crypto.MacMethods.HMAC)
+ except AssertionError:
+ raise crypto.UnknownMacMethodError
+ template = "{doc_id}{doc_rev}{ciphertext}{enc_scheme}{enc_method}{enc_iv}"
+ content = template.format(
+ doc_id=doc_id,
+ doc_rev=doc_rev,
+ ciphertext=ciphertext,
+ enc_scheme=enc_scheme,
+ enc_method=enc_method,
+ enc_iv=enc_iv)
+ return hmac.new(
+ doc_mac_key(doc_id, secret),
+ content,
+ hashlib.sha256).digest()
def encrypt_doc(crypto, doc):
@@ -319,12 +298,12 @@ def encrypt_docstr(docstr, doc_id, doc_rev, key, secret):
string representing the following:
{
- ENC_JSON_KEY: '<encrypted doc JSON string>',
- ENC_SCHEME_KEY: 'symkey',
- ENC_METHOD_KEY: EncryptionMethods.AES_256_CTR,
- ENC_IV_KEY: '<the initial value used to encrypt>',
+ crypto.ENC_JSON_KEY: '<encrypted doc JSON string>',
+ crypto.ENC_SCHEME_KEY: 'symkey',
+ crypto.ENC_METHOD_KEY: crypto.EncryptionMethods.AES_256_CTR,
+ crypto.ENC_IV_KEY: '<the initial value used to encrypt>',
MAC_KEY: '<mac>'
- MAC_METHOD_KEY: 'hmac'
+ crypto.MAC_METHOD_KEY: 'hmac'
}
:param docstr: A representation of the document to be encrypted.
@@ -339,30 +318,40 @@ def encrypt_docstr(docstr, doc_id, doc_rev, key, secret):
:param key: The key used to encrypt ``data`` (must be 256 bits long).
:type key: str
- :param secret: The Soledad secret (used for MAC auth).
+ :param secret: The Soledad storage secret (used for MAC auth).
:type secret: str
:return: The JSON serialization of the dict representing the encrypted
content.
:rtype: str
"""
- # encrypt content using AES-256 CTR mode
- iv, ciphertext = encrypt_sym(
+ enc_scheme = crypto.EncryptionSchemes.SYMKEY
+ enc_method = crypto.EncryptionMethods.AES_256_CTR
+ mac_method = crypto.MacMethods.HMAC
+ enc_iv, ciphertext = encrypt_sym(
str(docstr), # encryption/decryption routines expect str
- key, method=EncryptionMethods.AES_256_CTR)
+ key, method=enc_method)
+ mac = binascii.b2a_hex( # store the mac as hex.
+ mac_doc(
+ doc_id,
+ doc_rev,
+ ciphertext,
+ enc_scheme,
+ enc_method,
+ enc_iv,
+ mac_method,
+ secret))
# Return a representation for the encrypted content. In the following, we
# convert binary data to hexadecimal representation so the JSON
# serialization does not complain about what it tries to serialize.
hex_ciphertext = binascii.b2a_hex(ciphertext)
return json.dumps({
- ENC_JSON_KEY: hex_ciphertext,
- ENC_SCHEME_KEY: EncryptionSchemes.SYMKEY,
- ENC_METHOD_KEY: EncryptionMethods.AES_256_CTR,
- ENC_IV_KEY: iv,
- MAC_KEY: binascii.b2a_hex(mac_doc( # store the mac as hex.
- doc_id, doc_rev, ciphertext,
- MacMethods.HMAC, secret)),
- MAC_METHOD_KEY: MacMethods.HMAC,
+ crypto.ENC_JSON_KEY: hex_ciphertext,
+ crypto.ENC_SCHEME_KEY: enc_scheme,
+ crypto.ENC_METHOD_KEY: enc_method,
+ crypto.ENC_IV_KEY: enc_iv,
+ crypto.MAC_KEY: mac,
+ crypto.MAC_METHOD_KEY: mac_method,
})
@@ -384,27 +373,77 @@ def decrypt_doc(crypto, doc):
return decrypt_doc_dict(doc.content, doc.doc_id, doc.rev, key, secret)
+def _verify_doc_mac(doc_id, doc_rev, ciphertext, enc_scheme, enc_method,
+ enc_iv, mac_method, secret, doc_mac):
+ """
+ Verify that C{doc_mac} is a correct MAC for the given document.
+
+ :param doc_id: The id of the document.
+ :type doc_id: str
+ :param doc_rev: The revision of the document.
+ :type doc_rev: str
+ :param ciphertext: The content of the document.
+ :type ciphertext: str
+ :param enc_scheme: The encryption scheme.
+ :type enc_scheme: str
+ :param enc_method: The encryption method.
+ :type enc_method: str
+ :param enc_iv: The encryption initialization vector.
+ :type enc_iv: str
+ :param mac_method: The MAC method to use.
+ :type mac_method: str
+ :param secret: The Soledad storage secret
+ :type secret: str
+ :param doc_mac: The MAC to be verified against.
+ :type doc_mac: str
+
+ :raise crypto.UnknownMacMethodError: Raised when C{mac_method} is unknown.
+ :raise crypto.WrongMacError: Raised when MAC could not be verified.
+ """
+ calculated_mac = mac_doc(
+ doc_id,
+ doc_rev,
+ ciphertext,
+ enc_scheme,
+ enc_method,
+ enc_iv,
+ mac_method,
+ secret)
+ # we compare mac's hashes to avoid possible timing attacks that might
+ # exploit python's builtin comparison operator behaviour, which fails
+ # immediatelly when non-matching bytes are found.
+ doc_mac_hash = hashlib.sha256(
+ binascii.a2b_hex( # the mac is stored as hex
+ doc_mac)).digest()
+ calculated_mac_hash = hashlib.sha256(calculated_mac).digest()
+
+ if doc_mac_hash != calculated_mac_hash:
+ logger.warning("Wrong MAC while decrypting doc...")
+ raise crypto.WrongMacError("Could not authenticate document's "
+ "contents.")
+
+
def decrypt_doc_dict(doc_dict, doc_id, doc_rev, key, secret):
"""
- Decrypt C{doc}'s content.
+ Decrypt a symmetrically encrypted C{doc}'s content.
Return the JSON string representation of the document's decrypted content.
The passed doc_dict argument should have the following structure:
{
- ENC_JSON_KEY: '<enc_blob>',
- ENC_SCHEME_KEY: '<enc_scheme>',
- ENC_METHOD_KEY: '<enc_method>',
- ENC_IV_KEY: '<initial value used to encrypt>', # (optional)
+ crypto.ENC_JSON_KEY: '<enc_blob>',
+ crypto.ENC_SCHEME_KEY: '<enc_scheme>',
+ crypto.ENC_METHOD_KEY: '<enc_method>',
+ crypto.ENC_IV_KEY: '<initial value used to encrypt>', # (optional)
MAC_KEY: '<mac>'
- MAC_METHOD_KEY: 'hmac'
+ crypto.MAC_METHOD_KEY: 'hmac'
}
C{enc_blob} is the encryption of the JSON serialization of the document's
content. For now Soledad just deals with documents whose C{enc_scheme} is
- EncryptionSchemes.SYMKEY and C{enc_method} is
- EncryptionMethods.AES_256_CTR.
+ crypto.EncryptionSchemes.SYMKEY and C{enc_method} is
+ crypto.EncryptionMethods.AES_256_CTR.
:param doc_dict: The content of the document to be decrypted.
:type doc_dict: dict
@@ -423,48 +462,35 @@ def decrypt_doc_dict(doc_dict, doc_id, doc_rev, key, secret):
:return: The JSON serialization of the decrypted content.
:rtype: str
+
+ :raise UnknownEncryptionMethodError: Raised when trying to decrypt from an
+ unknown encryption method.
"""
- soledad_assert(ENC_JSON_KEY in doc_dict)
- soledad_assert(ENC_SCHEME_KEY in doc_dict)
- soledad_assert(ENC_METHOD_KEY in doc_dict)
- soledad_assert(MAC_KEY in doc_dict)
- soledad_assert(MAC_METHOD_KEY in doc_dict)
-
- # verify MAC
- ciphertext = binascii.a2b_hex( # content is stored as hex.
- doc_dict[ENC_JSON_KEY])
- mac = mac_doc(
- doc_id, doc_rev,
- ciphertext,
- doc_dict[MAC_METHOD_KEY], secret)
- # we compare mac's hashes to avoid possible timing attacks that might
- # exploit python's builtin comparison operator behaviour, which fails
- # immediatelly when non-matching bytes are found.
- doc_mac_hash = hashlib.sha256(
- binascii.a2b_hex( # the mac is stored as hex
- doc_dict[MAC_KEY])).digest()
- calculated_mac_hash = hashlib.sha256(mac).digest()
+ # assert document dictionary structure
+ expected_keys = set([
+ crypto.ENC_JSON_KEY,
+ crypto.ENC_SCHEME_KEY,
+ crypto.ENC_METHOD_KEY,
+ crypto.ENC_IV_KEY,
+ crypto.MAC_KEY,
+ crypto.MAC_METHOD_KEY,
+ ])
+ soledad_assert(expected_keys.issubset(set(doc_dict.keys())))
- if doc_mac_hash != calculated_mac_hash:
- logger.warning("Wrong MAC while decrypting doc...")
- raise WrongMac('Could not authenticate document\'s contents.')
- # decrypt doc's content
- enc_scheme = doc_dict[ENC_SCHEME_KEY]
- plainjson = None
- if enc_scheme == EncryptionSchemes.SYMKEY:
- enc_method = doc_dict[ENC_METHOD_KEY]
- if enc_method == EncryptionMethods.AES_256_CTR:
- soledad_assert(ENC_IV_KEY in doc_dict)
- plainjson = decrypt_sym(
- ciphertext, key,
- method=enc_method,
- iv=doc_dict[ENC_IV_KEY])
- else:
- raise UnknownEncryptionMethod(enc_method)
- else:
- raise UnknownEncryptionScheme(enc_scheme)
-
- return plainjson
+ ciphertext = binascii.a2b_hex(doc_dict[crypto.ENC_JSON_KEY])
+ enc_scheme = doc_dict[crypto.ENC_SCHEME_KEY]
+ enc_method = doc_dict[crypto.ENC_METHOD_KEY]
+ enc_iv = doc_dict[crypto.ENC_IV_KEY]
+ doc_mac = doc_dict[crypto.MAC_KEY]
+ mac_method = doc_dict[crypto.MAC_METHOD_KEY]
+
+ soledad_assert(enc_scheme == crypto.EncryptionSchemes.SYMKEY)
+
+ _verify_doc_mac(
+ doc_id, doc_rev, ciphertext, enc_scheme, enc_method,
+ enc_iv, mac_method, secret, doc_mac)
+
+ return decrypt_sym(ciphertext, key, method=enc_method, iv=enc_iv)
def is_symmetrically_encrypted(doc):
@@ -476,534 +502,8 @@ def is_symmetrically_encrypted(doc):
:rtype: bool
"""
- if doc.content and ENC_SCHEME_KEY in doc.content:
- if doc.content[ENC_SCHEME_KEY] == EncryptionSchemes.SYMKEY:
+ if doc.content and crypto.ENC_SCHEME_KEY in doc.content:
+ if doc.content[crypto.ENC_SCHEME_KEY] \
+ == crypto.EncryptionSchemes.SYMKEY:
return True
return False
-
-
-#
-# Encrypt/decrypt pools of workers
-#
-
-class SyncEncryptDecryptPool(object):
- """
- Base class for encrypter/decrypter pools.
- """
- WORKERS = 5
-
- def __init__(self, crypto, sync_db, write_lock):
- """
- Initialize the pool of encryption-workers.
-
- :param crypto: A SoledadCryto instance to perform the encryption.
- :type crypto: leap.soledad.crypto.SoledadCrypto
-
- :param sync_db: a database connection handle
- :type sync_db: handle
-
- :param write_lock: a write lock for controlling concurrent access
- to the sync_db
- :type write_lock: threading.Lock
- """
- self._pool = multiprocessing.Pool(self.WORKERS)
- self._crypto = crypto
- self._sync_db = sync_db
- self._sync_db_write_lock = write_lock
-
- def close(self):
- """
- Cleanly close the pool of workers.
- """
- logger.debug("Closing %s" % (self.__class__.__name__,))
- self._pool.close()
- try:
- self._pool.join()
- except Exception:
- pass
-
- def terminate(self):
- """
- Terminate the pool of workers.
- """
- logger.debug("Terminating %s" % (self.__class__.__name__,))
- self._pool.terminate()
-
-
-def encrypt_doc_task(doc_id, doc_rev, content, key, secret):
- """
- Encrypt the content of the given document.
-
- :param doc_id: The document id.
- :type doc_id: str
- :param doc_rev: The document revision.
- :type doc_rev: str
- :param content: The serialized content of the document.
- :type content: str
- :param key: The encryption key.
- :type key: str
- :param secret: The Soledad secret (used for MAC auth).
- :type secret: str
-
- :return: A tuple containing the doc id, revision and encrypted content.
- :rtype: tuple(str, str, str)
- """
- encrypted_content = encrypt_docstr(
- content, doc_id, doc_rev, key, secret)
- return doc_id, doc_rev, encrypted_content
-
-
-class SyncEncrypterPool(SyncEncryptDecryptPool):
- """
- Pool of workers that spawn subprocesses to execute the symmetric encryption
- of documents to be synced.
- """
- # TODO implement throttling to reduce cpu usage??
- WORKERS = 5
- TABLE_NAME = "docs_tosync"
- FIELD_NAMES = "doc_id, rev, content"
-
- def encrypt_doc(self, doc, workers=True):
- """
- Symmetrically encrypt a document.
-
- :param doc: The document with contents to be encrypted.
- :type doc: SoledadDocument
-
- :param workers: Whether to defer the decryption to the multiprocess
- pool of workers. Useful for debugging purposes.
- :type workers: bool
- """
- soledad_assert(self._crypto is not None, "need a crypto object")
- docstr = doc.get_json()
- key = self._crypto.doc_passphrase(doc.doc_id)
- secret = self._crypto.secret
- args = doc.doc_id, doc.rev, docstr, key, secret
-
- try:
- if workers:
- res = self._pool.apply_async(
- encrypt_doc_task, args,
- callback=self.encrypt_doc_cb)
- else:
- # encrypt inline
- res = encrypt_doc_task(*args)
- self.encrypt_doc_cb(res)
-
- except Exception as exc:
- logger.exception(exc)
-
- def encrypt_doc_cb(self, result):
- """
- Insert results of encryption routine into the local sync database.
-
- :param result: A tuple containing the doc id, revision and encrypted
- content.
- :type result: tuple(str, str, str)
- """
- doc_id, doc_rev, content = result
- self.insert_encrypted_local_doc(doc_id, doc_rev, content)
-
- def insert_encrypted_local_doc(self, doc_id, doc_rev, content):
- """
- Insert the contents of the encrypted doc into the local sync
- database.
-
- :param doc_id: The document id.
- :type doc_id: str
- :param doc_rev: The document revision.
- :type doc_rev: str
- :param content: The serialized content of the document.
- :type content: str
- :param content: The encrypted document.
- :type content: str
- """
- sql_del = "DELETE FROM '%s' WHERE doc_id=?" % (self.TABLE_NAME,)
- sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?)" % (self.TABLE_NAME,)
-
- con = self._sync_db
- with self._sync_db_write_lock:
- with con:
- con.execute(sql_del, (doc_id, ))
- con.execute(sql_ins, (doc_id, doc_rev, content))
-
-
-def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret):
- """
- Decrypt the content of the given document.
-
- :param doc_id: The document id.
- :type doc_id: str
- :param doc_rev: The document revision.
- :type doc_rev: str
- :param content: The encrypted content of the document.
- :type content: str
- :param gen: The generation corresponding to the modification of that
- document.
- :type gen: int
- :param trans_id: The transaction id corresponding to the modification of
- that document.
- :type trans_id: str
- :param key: The encryption key.
- :type key: str
- :param secret: The Soledad secret (used for MAC auth).
- :type secret: str
-
- :return: A tuple containing the doc id, revision and encrypted content.
- :rtype: tuple(str, str, str)
- """
- decrypted_content = decrypt_doc_dict(
- content, doc_id, doc_rev, key, secret)
- return doc_id, doc_rev, decrypted_content, gen, trans_id
-
-
-def get_insertable_docs_by_gen(expected, got):
- """
- Return a list of documents ready to be inserted. This list is computed
- by aligning the expected list with the already gotten docs, and returning
- the maximum number of docs that can be processed in the expected order
- before finding a gap.
-
- :param expected: A list of generations to be inserted.
- :type expected: list
-
- :param got: A dictionary whose values are the docs to be inserted.
- :type got: dict
- """
- ordered = [got.get(i) for i in expected]
- if None in ordered:
- return ordered[:ordered.index(None)]
- else:
- return ordered
-
-
-class SyncDecrypterPool(SyncEncryptDecryptPool):
- """
- Pool of workers that spawn subprocesses to execute the symmetric decryption
- of documents that were received.
-
- The decryption of the received documents is done in two steps:
-
- 1. All the encrypted docs are collected, together with their generation
- and transaction-id
- 2. The docs are enqueued for decryption. When completed, they are
- inserted following the generation order.
- """
- # TODO implement throttling to reduce cpu usage??
- TABLE_NAME = "docs_received"
- FIELD_NAMES = "doc_id, rev, content, gen, trans_id"
-
- write_encrypted_lock = threading.Lock()
-
- def __init__(self, *args, **kwargs):
- """
- Initialize the decrypter pool, and setup a dict for putting the
- results of the decrypted docs until they are picked by the insert
- routine that gets them in order.
- """
- self._insert_doc_cb = kwargs.pop("insert_doc_cb")
- SyncEncryptDecryptPool.__init__(self, *args, **kwargs)
- self.decrypted_docs = {}
- self.source_replica_uid = None
-
- def set_source_replica_uid(self, source_replica_uid):
- """
- Set the source replica uid for this decrypter pool instance.
-
- :param source_replica_uid: The uid of the source replica.
- :type source_replica_uid: str
- """
- self.source_replica_uid = source_replica_uid
-
- def insert_encrypted_received_doc(self, doc_id, doc_rev, content,
- gen, trans_id):
- """
- Insert a received message with encrypted content, to be decrypted later
- on.
-
- :param doc_id: The Document ID.
- :type doc_id: str
- :param doc_rev: The Document Revision
- :param doc_rev: str
- :param content: the Content of the document
- :type content: str
- :param gen: the Document Generation
- :type gen: int
- :param trans_id: Transaction ID
- :type trans_id: str
- """
- docstr = json.dumps(content)
- sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?, ?, ?)" % (
- self.TABLE_NAME,)
-
- con = self._sync_db
- with self._sync_db_write_lock:
- with con:
- con.execute(sql_ins, (doc_id, doc_rev, docstr, gen, trans_id))
-
- def insert_marker_for_received_doc(self, doc_id, doc_rev, gen):
- """
- Insert a marker with the document id, revision and generation on the
- sync db. This document does not have an encrypted payload, so the
- content has already been inserted into the decrypted_docs dictionary
- from where it can be picked following generation order.
- We need to leave here the marker to be able to calculate the expected
- insertion order for a synchronization batch.
-
- :param doc_id: The Document ID.
- :type doc_id: str
- :param doc_rev: The Document Revision
- :param doc_rev: str
- :param gen: the Document Generation
- :type gen: int
- """
- sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?, ?, ?)" % (
- self.TABLE_NAME,)
- con = self._sync_db
- with self._sync_db_write_lock:
- with con:
- con.execute(sql_ins, (doc_id, doc_rev, '', gen, ''))
-
- def insert_received_doc(self, doc_id, doc_rev, content, gen, trans_id):
- """
- Insert a document that is not symmetrically encrypted.
- We store it in the staging area (the decrypted_docs dictionary) to be
- picked up in order as the preceding documents are decrypted.
-
- :param doc_id: The Document ID.
- :type doc_id: str
- :param doc_rev: The Document Revision
- :param doc_rev: str
- :param content: the Content of the document
- :type content: str
- :param gen: the Document Generation
- :type gen: int
- :param trans_id: Transaction ID
- :type trans_id: str
- """
- # XXX this need a deeper review / testing.
- # I believe that what I'm doing here is prone to problems
- # if the sync is interrupted (ie, client crash) in the worst possible
- # moment. We would need a recover strategy in that case
- # (or, insert the document in the table all the same, but with a flag
- # saying if the document is sym-encrypted or not),
- content = json.dumps(content)
- result = doc_id, doc_rev, content, gen, trans_id
- self.decrypted_docs[gen] = result
- self.insert_marker_for_received_doc(doc_id, doc_rev, gen)
-
- def delete_encrypted_received_doc(self, doc_id, doc_rev):
- """
- Delete a encrypted received doc after it was inserted into the local
- db.
-
- :param doc_id: Document ID.
- :type doc_id: str
- :param doc_rev: Document revision.
- :type doc_rev: str
- """
- sql_del = "DELETE FROM '%s' WHERE doc_id=? AND rev=?" % (
- self.TABLE_NAME,)
- con = self._sync_db
- with self._sync_db_write_lock:
- with con:
- con.execute(sql_del, (doc_id, doc_rev))
-
- def decrypt_doc(self, doc_id, rev, source_replica_uid, workers=True):
- """
- Symmetrically decrypt a document.
-
- :param doc_id: The ID for the document with contents to be encrypted.
- :type doc: str
- :param rev: The revision of the document.
- :type rev: str
- :param source_replica_uid:
- :type source_replica_uid: str
-
- :param workers: Whether to defer the decryption to the multiprocess
- pool of workers. Useful for debugging purposes.
- :type workers: bool
- """
- self.source_replica_uid = source_replica_uid
-
- # insert_doc_cb is a proxy object that gets updated with the right
- # insert function only when the sync_target invokes the sync_exchange
- # method. so, if we don't still have a non-empty callback, we refuse
- # to proceed.
- if sameProxiedObjects(self._insert_doc_cb.get(source_replica_uid),
- None):
- logger.debug("Sync decrypter pool: no insert_doc_cb() yet.")
- return
-
- # XXX move to get_doc function...
- c = self._sync_db.cursor()
- sql = "SELECT * FROM '%s' WHERE doc_id=? AND rev=?" % (
- self.TABLE_NAME,)
- try:
- c.execute(sql, (doc_id, rev))
- res = c.fetchone()
- except Exception as exc:
- logger.warning("Error getting docs from syncdb: %r" % (exc,))
- return
- if res is None:
- logger.debug("Doc %s:%s does not exist in sync db" % (doc_id, rev))
- return
-
- soledad_assert(self._crypto is not None, "need a crypto object")
- try:
- doc_id, rev, docstr, gen, trans_id = res
- except ValueError:
- logger.warning("Wrong entry in sync db")
- return
-
- if len(docstr) == 0:
- # not encrypted payload
- return
-
- try:
- content = json.loads(docstr)
- except TypeError:
- logger.warning("Wrong type while decoding json: %s" % repr(docstr))
- return
-
- key = self._crypto.doc_passphrase(doc_id)
- secret = self._crypto.secret
- args = doc_id, rev, content, gen, trans_id, key, secret
-
- try:
- if workers:
- # Ouch. This is sent to the workers asynchronously, so
- # we have no way of logging errors. We'd have to inspect
- # lingering results by querying successful / get() over them...
- # Or move the heck out of it to twisted.
- res = self._pool.apply_async(
- decrypt_doc_task, args,
- callback=self.decrypt_doc_cb)
- else:
- # decrypt inline
- res = decrypt_doc_task(*args)
- self.decrypt_doc_cb(res)
-
- except Exception as exc:
- logger.exception(exc)
-
- def decrypt_doc_cb(self, result):
- """
- Temporarily store the decryption result in a dictionary where it will
- be picked by process_decrypted.
-
- :param result: A tuple containing the doc id, revision and encrypted
- content.
- :type result: tuple(str, str, str)
- """
- doc_id, rev, content, gen, trans_id = result
- logger.debug("Sync decrypter pool: decrypted doc %s: %s %s" % (doc_id, rev, gen))
- self.decrypted_docs[gen] = result
-
- def get_docs_by_generation(self):
- """
- Get all documents in the received table from the sync db,
- ordered by generation.
-
- :return: list of doc_id, rev, generation
- """
- c = self._sync_db.cursor()
- sql = "SELECT doc_id, rev, gen FROM %s ORDER BY gen" % (
- self.TABLE_NAME,)
- c.execute(sql)
- return c.fetchall()
-
- def count_received_encrypted_docs(self):
- """
- Count how many documents we have in the table for received and
- encrypted docs.
-
- :return: The count of documents.
- :rtype: int
- """
- if self._sync_db is None:
- logger.warning("cannot return count with null sync_db")
- return
- c = self._sync_db.cursor()
- sql = "SELECT COUNT(*) FROM %s" % (self.TABLE_NAME,)
- c.execute(sql)
- res = c.fetchone()
- if res is not None:
- return res[0]
- else:
- return 0
-
- def decrypt_received_docs(self):
- """
- Get all the encrypted documents from the sync database and dispatch a
- decrypt worker to decrypt each one of them.
- """
- docs_by_generation = self.get_docs_by_generation()
- logger.debug("Sync decrypter pool: There are %d documents to " \
- "decrypt." % len(docs_by_generation))
- for doc_id, rev, gen in filter(None, docs_by_generation):
- self.decrypt_doc(doc_id, rev, self.source_replica_uid)
-
- def process_decrypted(self):
- """
- Process the already decrypted documents, and insert as many documents
- as can be taken from the expected order without finding a gap.
-
- :return: Whether we have processed all the pending docs.
- :rtype: bool
- """
- # Acquire the lock to avoid processing while we're still
- # getting data from the syncing stream, to avoid InvalidGeneration
- # problems.
- with self.write_encrypted_lock:
- already_decrypted = self.decrypted_docs
- docs = self.get_docs_by_generation()
- docs = filter(lambda entry: len(entry) > 0, docs)
- expected = [gen for doc_id, rev, gen in docs]
- docs_to_insert = get_insertable_docs_by_gen(
- expected, already_decrypted)
- for doc_fields in docs_to_insert:
- self.insert_decrypted_local_doc(*doc_fields)
- remaining = self.count_received_encrypted_docs()
- return remaining == 0
-
- def insert_decrypted_local_doc(self, doc_id, doc_rev, content,
- gen, trans_id):
- """
- Insert the decrypted document into the local sqlcipher database.
- Makes use of the passed callback `return_doc_cb` passed to the caller
- by u1db sync.
-
- :param doc_id: The document id.
- :type doc_id: str
- :param doc_rev: The document revision.
- :type doc_rev: str
- :param content: The serialized content of the document.
- :type content: str
- :param gen: The generation corresponding to the modification of that
- document.
- :type gen: int
- :param trans_id: The transaction id corresponding to the modification
- of that document.
- :type trans_id: str
- """
- # could pass source_replica in params for callback chain
- insert_fun = self._insert_doc_cb[self.source_replica_uid]
- logger.debug("Sync decrypter pool: inserting doc in local db: " \
- "%s:%s %s" % (doc_id, doc_rev, gen))
- try:
- # convert deleted documents to avoid error on document creation
- if content == 'null':
- content = None
- doc = SoledadDocument(doc_id, doc_rev, content)
- insert_fun(doc, int(gen), trans_id)
- except Exception as exc:
- logger.error("Sync decrypter pool: error while inserting "
- "decrypted doc into local db.")
- logger.exception(exc)
-
- else:
- # If no errors found, remove it from the local temporary dict
- # and from the received database.
- self.decrypted_docs.pop(gen)
- self.delete_encrypted_received_doc(doc_id, doc_rev)
diff --git a/client/src/leap/soledad/client/encdecpool.py b/client/src/leap/soledad/client/encdecpool.py
new file mode 100644
index 00000000..d9a72b25
--- /dev/null
+++ b/client/src/leap/soledad/client/encdecpool.py
@@ -0,0 +1,746 @@
+# -*- coding: utf-8 -*-
+# encdecpool.py
+# Copyright (C) 2015 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+"""
+A pool of encryption/decryption concurrent and parallel workers for using
+during synchronization.
+"""
+
+
+import multiprocessing
+import Queue
+import json
+import logging
+
+from twisted.internet import reactor
+from twisted.internet import defer
+from twisted.internet.threads import deferToThread
+
+from leap.soledad.common.document import SoledadDocument
+from leap.soledad.common import soledad_assert
+
+from leap.soledad.client.crypto import encrypt_docstr
+from leap.soledad.client.crypto import decrypt_doc_dict
+
+
+logger = logging.getLogger(__name__)
+
+
+#
+# Encrypt/decrypt pools of workers
+#
+
+class SyncEncryptDecryptPool(object):
+ """
+ Base class for encrypter/decrypter pools.
+ """
+
+ # TODO implement throttling to reduce cpu usage??
+ WORKERS = multiprocessing.cpu_count()
+
+ def __init__(self, crypto, sync_db):
+ """
+ Initialize the pool of encryption-workers.
+
+ :param crypto: A SoledadCryto instance to perform the encryption.
+ :type crypto: leap.soledad.crypto.SoledadCrypto
+
+ :param sync_db: A database connection handle
+ :type sync_db: pysqlcipher.dbapi2.Connection
+ """
+ self._crypto = crypto
+ self._sync_db = sync_db
+ self._pool = multiprocessing.Pool(self.WORKERS)
+
+ def close(self):
+ """
+ Cleanly close the pool of workers.
+ """
+ logger.debug("Closing %s" % (self.__class__.__name__,))
+ self._pool.close()
+ try:
+ self._pool.join()
+ except Exception:
+ pass
+
+ def terminate(self):
+ """
+ Terminate the pool of workers.
+ """
+ logger.debug("Terminating %s" % (self.__class__.__name__,))
+ self._pool.terminate()
+
+ def _runOperation(self, query, *args):
+ """
+ Run an operation on the sync db.
+
+ :param query: The query to be executed.
+ :type query: str
+ :param args: A list of query arguments.
+ :type args: list
+
+ :return: A deferred that will fire when the operation in the database
+ has finished.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._sync_db.runOperation(query, *args)
+
+ def _runQuery(self, query, *args):
+ """
+ Run a query on the sync db.
+
+ :param query: The query to be executed.
+ :type query: str
+ :param args: A list of query arguments.
+ :type args: list
+
+ :return: A deferred that will fire with the results of the database
+ query.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ return self._sync_db.runQuery(query, *args)
+
+
+def encrypt_doc_task(doc_id, doc_rev, content, key, secret):
+ """
+ Encrypt the content of the given document.
+
+ :param doc_id: The document id.
+ :type doc_id: str
+ :param doc_rev: The document revision.
+ :type doc_rev: str
+ :param content: The serialized content of the document.
+ :type content: str
+ :param key: The encryption key.
+ :type key: str
+ :param secret: The Soledad storage secret (used for MAC auth).
+ :type secret: str
+
+ :return: A tuple containing the doc id, revision and encrypted content.
+ :rtype: tuple(str, str, str)
+ """
+ encrypted_content = encrypt_docstr(
+ content, doc_id, doc_rev, key, secret)
+ return doc_id, doc_rev, encrypted_content
+
+
+class SyncEncrypterPool(SyncEncryptDecryptPool):
+ """
+ Pool of workers that spawn subprocesses to execute the symmetric encryption
+ of documents to be synced.
+ """
+ TABLE_NAME = "docs_tosync"
+ FIELD_NAMES = "doc_id PRIMARY KEY, rev, content"
+
+ ENCRYPT_LOOP_PERIOD = 0.5
+
+ def __init__(self, *args, **kwargs):
+ """
+ Initialize the sync encrypter pool.
+ """
+ SyncEncryptDecryptPool.__init__(self, *args, **kwargs)
+
+ self._stopped = False
+ self._sync_queue = multiprocessing.Queue()
+
+ # start the encryption loop
+ self._deferred_loop = deferToThread(self._encrypt_docs_loop)
+ self._deferred_loop.addCallback(
+ lambda _: logger.debug("Finished encrypter thread."))
+
+ def enqueue_doc_for_encryption(self, doc):
+ """
+ Enqueue a document for encryption.
+
+ :param doc: The document to be encrypted.
+ :type doc: SoledadDocument
+ """
+ try:
+ self.sync_queue.put_nowait(doc)
+ except multiprocessing.Queue.Full:
+ # do not asynchronously encrypt this file if the queue is full
+ pass
+
+ def _encrypt_docs_loop(self):
+ """
+ Process the syncing queue and send the documents there to be encrypted
+ in the sync db. They will be read by the SoledadSyncTarget during the
+ sync_exchange.
+ """
+ logger.debug("Starting encrypter thread.")
+ while not self._stopped:
+ try:
+ doc = self._sync_queue.get(True, self.ENCRYPT_LOOP_PERIOD)
+ self._encrypt_doc(doc)
+ except Queue.Empty:
+ pass
+
+ def _encrypt_doc(self, doc):
+ """
+ Symmetrically encrypt a document.
+
+ :param doc: The document with contents to be encrypted.
+ :type doc: SoledadDocument
+
+ :param workers: Whether to defer the decryption to the multiprocess
+ pool of workers. Useful for debugging purposes.
+ :type workers: bool
+ """
+ soledad_assert(self._crypto is not None, "need a crypto object")
+ docstr = doc.get_json()
+ key = self._crypto.doc_passphrase(doc.doc_id)
+ secret = self._crypto.secret
+ args = doc.doc_id, doc.rev, docstr, key, secret
+ # encrypt asynchronously
+ self._pool.apply_async(
+ encrypt_doc_task, args,
+ callback=self._encrypt_doc_cb)
+
+ def _encrypt_doc_cb(self, result):
+ """
+ Insert results of encryption routine into the local sync database.
+
+ :param result: A tuple containing the doc id, revision and encrypted
+ content.
+ :type result: tuple(str, str, str)
+ """
+ doc_id, doc_rev, content = result
+ return self._insert_encrypted_local_doc(doc_id, doc_rev, content)
+
+ def _insert_encrypted_local_doc(self, doc_id, doc_rev, content):
+ """
+ Insert the contents of the encrypted doc into the local sync
+ database.
+
+ :param doc_id: The document id.
+ :type doc_id: str
+ :param doc_rev: The document revision.
+ :type doc_rev: str
+ :param content: The serialized content of the document.
+ :type content: str
+ """
+ query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?)" \
+ % (self.TABLE_NAME,)
+ return self._runOperation(query, (doc_id, doc_rev, content))
+
+ @defer.inlineCallbacks
+ def get_encrypted_doc(self, doc_id, doc_rev):
+ """
+ Get an encrypted document from the sync db.
+
+ :param doc_id: The id of the document.
+ :type doc_id: str
+ :param doc_rev: The revision of the document.
+ :type doc_rev: str
+
+ :return: A deferred that will fire with the encrypted content of the
+ document or None if the document was not found in the sync
+ db.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ logger.debug("Trying to get encrypted doc from sync db: %s" % doc_id)
+ query = "SELECT content FROM %s WHERE doc_id=? and rev=?" \
+ % self.TABLE_NAME
+ result = yield self._runQuery(query, (doc_id, doc_rev))
+ if result:
+ val = result.pop()
+ defer.returnValue(val[0])
+ defer.returnValue(None)
+
+ def delete_encrypted_doc(self, doc_id, doc_rev):
+ """
+ Delete an encrypted document from the sync db.
+
+ :param doc_id: The id of the document.
+ :type doc_id: str
+ :param doc_rev: The revision of the document.
+ :type doc_rev: str
+
+ :return: A deferred that will fire when the operation in the database
+ has finished.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ query = "DELETE FROM %s WHERE doc_id=? and rev=?" \
+ % self.TABLE_NAME
+ self._runOperation(query, (doc_id, doc_rev))
+
+ def close(self):
+ """
+ Close the encrypter pool.
+ """
+ self._stopped = True
+ self._sync_queue.close()
+ q = self._sync_queue
+ del q
+ self._sync_queue = None
+
+
+def decrypt_doc_task(doc_id, doc_rev, content, gen, trans_id, key, secret,
+ idx):
+ """
+ Decrypt the content of the given document.
+
+ :param doc_id: The document id.
+ :type doc_id: str
+ :param doc_rev: The document revision.
+ :type doc_rev: str
+ :param content: The encrypted content of the document.
+ :type content: str
+ :param gen: The generation corresponding to the modification of that
+ document.
+ :type gen: int
+ :param trans_id: The transaction id corresponding to the modification of
+ that document.
+ :type trans_id: str
+ :param key: The encryption key.
+ :type key: str
+ :param secret: The Soledad storage secret (used for MAC auth).
+ :type secret: str
+ :param idx: The index of this document in the current sync process.
+ :type idx: int
+
+ :return: A tuple containing the doc id, revision and encrypted content.
+ :rtype: tuple(str, str, str)
+ """
+ decrypted_content = decrypt_doc_dict(content, doc_id, doc_rev, key, secret)
+ return doc_id, doc_rev, decrypted_content, gen, trans_id, idx
+
+
+class SyncDecrypterPool(SyncEncryptDecryptPool):
+ """
+ Pool of workers that spawn subprocesses to execute the symmetric decryption
+ of documents that were received.
+
+ The decryption of the received documents is done in two steps:
+
+ 1. Encrypted documents are stored in the sync db by the actual soledad
+ sync loop.
+ 2. The soledad sync loop tells us how many documents we should expect
+ to process.
+ 3. We start a decrypt-and-process loop:
+
+ a. Encrypted documents are fetched.
+ b. Encrypted documents are decrypted.
+ c. The longest possible list of decrypted documents are inserted
+ in the soledad db (this depends on which documents have already
+ arrived and which documents have already been decrypte, because
+ the order of insertion in the local soledad db matters).
+ d. Processed documents are deleted from the database.
+
+ 4. When we have processed as many documents as we should, the loop
+ finishes.
+ """
+ # TODO implement throttling to reduce cpu usage??
+ TABLE_NAME = "docs_received"
+ FIELD_NAMES = "doc_id PRIMARY KEY, rev, content, gen, " \
+ "trans_id, encrypted, idx"
+
+ """
+ Period of recurrence of the periodic decrypting task, in seconds.
+ """
+ DECRYPT_LOOP_PERIOD = 0.5
+
+ def __init__(self, *args, **kwargs):
+ """
+ Initialize the decrypter pool, and setup a dict for putting the
+ results of the decrypted docs until they are picked by the insert
+ routine that gets them in order.
+
+ :param insert_doc_cb: A callback for inserting received documents from
+ target. If not overriden, this will call u1db
+ insert_doc_from_target in synchronizer, which
+ implements the TAKE OTHER semantics.
+ :type insert_doc_cb: function
+ :param source_replica_uid: The source replica uid, used to find the
+ correct callback for inserting documents.
+ :type source_replica_uid: str
+ """
+ self._insert_doc_cb = kwargs.pop("insert_doc_cb")
+ self.source_replica_uid = kwargs.pop("source_replica_uid")
+ SyncEncryptDecryptPool.__init__(self, *args, **kwargs)
+
+ self._last_inserted_idx = 0
+ self._docs_to_process = None
+ self._processed_docs = 0
+
+ self._async_results = []
+ self._failure = None
+ self._finished = False
+
+ # XXX we want to empty the database before starting, but this is an
+ # asynchronous call, so we have to somehow make sure that it is
+ # executed before any other call to the database, without
+ # blocking.
+ self._empty()
+
+ def _launch_decrypt_and_process(self):
+ d = self._decrypt_and_process_docs()
+ d.addErrback(lambda f: self._set_failure(f))
+
+ def _schedule_decrypt_and_process(self):
+ reactor.callLater(
+ self.DECRYPT_LOOP_PERIOD,
+ self._launch_decrypt_and_process)
+
+ @property
+ def failure(self):
+ return self._failure
+
+ def _set_failure(self, failure):
+ self._failure = failure
+ self._finished = True
+
+ def failed(self):
+ return bool(self._failure)
+
+ def start(self, docs_to_process):
+ """
+ Set the number of documents we expect to process.
+
+ This should be called by the during the sync exchange process as soon
+ as we know how many documents are arriving from the server.
+
+ :param docs_to_process: The number of documents to process.
+ :type docs_to_process: int
+ """
+ self._docs_to_process = docs_to_process
+ self._schedule_decrypt_and_process()
+
+ def insert_encrypted_received_doc(
+ self, doc_id, doc_rev, content, gen, trans_id, idx):
+ """
+ Insert a received message with encrypted content, to be decrypted later
+ on.
+
+ :param doc_id: The Document ID.
+ :type doc_id: str
+ :param doc_rev: The Document Revision
+ :param doc_rev: str
+ :param content: the Content of the document
+ :type content: str
+ :param gen: the Document Generation
+ :type gen: int
+ :param trans_id: Transaction ID
+ :type trans_id: str
+ :param idx: The index of this document in the current sync process.
+ :type idx: int
+
+ :return: A deferred that will fire when the operation in the database
+ has finished.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ docstr = json.dumps(content)
+ query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?, ?)" \
+ % self.TABLE_NAME
+ return self._runOperation(
+ query, (doc_id, doc_rev, docstr, gen, trans_id, 1, idx))
+
+ def insert_received_doc(
+ self, doc_id, doc_rev, content, gen, trans_id, idx):
+ """
+ Insert a document that is not symmetrically encrypted.
+ We store it in the staging area (the decrypted_docs dictionary) to be
+ picked up in order as the preceding documents are decrypted.
+
+ :param doc_id: The Document ID.
+ :type doc_id: str
+ :param doc_rev: The Document Revision
+ :param doc_rev: str
+ :param content: the Content of the document
+ :type content: str
+ :param gen: the Document Generation
+ :type gen: int
+ :param trans_id: Transaction ID
+ :type trans_id: str
+ :param idx: The index of this document in the current sync process.
+ :type idx: int
+
+ :return: A deferred that will fire when the operation in the database
+ has finished.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ if not isinstance(content, str):
+ content = json.dumps(content)
+ query = "INSERT OR REPLACE INTO '%s' VALUES (?, ?, ?, ?, ?, ?, ?)" \
+ % self.TABLE_NAME
+ return self._runOperation(
+ query, (doc_id, doc_rev, content, gen, trans_id, 0, idx))
+
+ def _delete_received_doc(self, doc_id):
+ """
+ Delete a received doc after it was inserted into the local db.
+
+ :param doc_id: Document ID.
+ :type doc_id: str
+
+ :return: A deferred that will fire when the operation in the database
+ has finished.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ query = "DELETE FROM '%s' WHERE doc_id=?" \
+ % self.TABLE_NAME
+ return self._runOperation(query, (doc_id,))
+
+ def _async_decrypt_doc(self, doc_id, rev, content, gen, trans_id, idx):
+ """
+ Dispatch an asynchronous document decrypting routine and save the
+ result object.
+
+ :param doc_id: The ID for the document with contents to be encrypted.
+ :type doc: str
+ :param rev: The revision of the document.
+ :type rev: str
+ :param content: The serialized content of the document.
+ :type content: str
+ :param gen: The generation corresponding to the modification of that
+ document.
+ :type gen: int
+ :param trans_id: The transaction id corresponding to the modification
+ of that document.
+ :type trans_id: str
+ :param idx: The index of this document in the current sync process.
+ :type idx: int
+
+ :return: A deferred that will fire after the document hasa been
+ decrypted and inserted in the sync db.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ soledad_assert(self._crypto is not None, "need a crypto object")
+
+ content = json.loads(content)
+ key = self._crypto.doc_passphrase(doc_id)
+ secret = self._crypto.secret
+ args = doc_id, rev, content, gen, trans_id, key, secret, idx
+ # decrypt asynchronously
+ self._async_results.append(
+ self._pool.apply_async(
+ decrypt_doc_task, args))
+
+ def _decrypt_doc_cb(self, result):
+ """
+ Store the decryption result in the sync db from where it will later be
+ picked by _process_decrypted_docs.
+
+ :param result: A tuple containing the document's id, revision,
+ content, generation, transaction id and sync index.
+ :type result: tuple(str, str, str, int, str, int)
+
+ :return: A deferred that will fire after the document has been
+ inserted in the sync db.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ doc_id, rev, content, gen, trans_id, idx = result
+ logger.debug("Sync decrypter pool: decrypted doc %s: %s %s %s"
+ % (doc_id, rev, gen, trans_id))
+ return self.insert_received_doc(
+ doc_id, rev, content, gen, trans_id, idx)
+
+ def _get_docs(self, encrypted=None, order_by='idx', order='ASC'):
+ """
+ Get documents from the received docs table in the sync db.
+
+ :param encrypted: If not None, only return documents with encrypted
+ field equal to given parameter.
+ :type encrypted: bool or None
+ :param order_by: The name of the field to order results.
+ :type order_by: str
+ :param order: Whether the order should be ASC or DESC.
+ :type order: str
+
+ :return: A deferred that will fire with the results of the database
+ query.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ query = "SELECT doc_id, rev, content, gen, trans_id, encrypted, " \
+ "idx FROM %s" % self.TABLE_NAME
+ if encrypted is not None:
+ query += " WHERE encrypted = %d" % int(encrypted)
+ query += " ORDER BY %s %s" % (order_by, order)
+ return self._runQuery(query)
+
+ @defer.inlineCallbacks
+ def _get_insertable_docs(self):
+ """
+ Return a list of non-encrypted documents ready to be inserted.
+
+ :return: A deferred that will fire with the list of insertable
+ documents.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ # here, we fetch the list of decrypted documents and compare with the
+ # index of the last succesfully processed document.
+ decrypted_docs = yield self._get_docs(encrypted=False)
+ insertable = []
+ last_idx = self._last_inserted_idx
+ for doc_id, rev, content, gen, trans_id, encrypted, idx in \
+ decrypted_docs:
+ # XXX for some reason, a document might not have been deleted from
+ # the database. This is a bug. In this point, already
+ # processed documents should have been removed from the sync
+ # database and we should not have to skip them here. We need
+ # to find out why this is happening, fix, and remove the
+ # skipping below.
+ if (idx < last_idx + 1):
+ continue
+ if (idx != last_idx + 1):
+ break
+ insertable.append((doc_id, rev, content, gen, trans_id, idx))
+ last_idx += 1
+ defer.returnValue(insertable)
+
+ @defer.inlineCallbacks
+ def _async_decrypt_received_docs(self):
+ """
+ Get all the encrypted documents from the sync database and dispatch a
+ decrypt worker to decrypt each one of them.
+
+ :return: A deferred that will fire after all documents have been
+ decrypted and inserted back in the sync db.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ docs = yield self._get_docs(encrypted=True)
+ for doc_id, rev, content, gen, trans_id, _, idx in docs:
+ self._async_decrypt_doc(
+ doc_id, rev, content, gen, trans_id, idx)
+
+ @defer.inlineCallbacks
+ def _process_decrypted_docs(self):
+ """
+ Fetch as many decrypted documents as can be taken from the expected
+ order and insert them in the local replica.
+
+ :return: A deferred that will fire with the list of inserted
+ documents.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ insertable = yield self._get_insertable_docs()
+ for doc_fields in insertable:
+ self._insert_decrypted_local_doc(*doc_fields)
+ defer.returnValue(insertable)
+
+ def _delete_processed_docs(self, inserted):
+ """
+ Delete from the sync db documents that have been processed.
+
+ :param inserted: List of documents inserted in the previous process
+ step.
+ :type inserted: list
+
+ :return: A list of deferreds that will fire when each operation in the
+ database has finished.
+ :rtype: twisted.internet.defer.DeferredList
+ """
+ deferreds = []
+ for doc_id, doc_rev, _, _, _, _ in inserted:
+ deferreds.append(
+ self._delete_received_doc(doc_id))
+ if not deferreds:
+ return defer.succeed(None)
+ return defer.gatherResults(deferreds)
+
+ def _insert_decrypted_local_doc(self, doc_id, doc_rev, content,
+ gen, trans_id, idx):
+ """
+ Insert the decrypted document into the local replica.
+
+ Make use of the passed callback `insert_doc_cb` passed to the caller
+ by u1db sync.
+
+ :param doc_id: The document id.
+ :type doc_id: str
+ :param doc_rev: The document revision.
+ :type doc_rev: str
+ :param content: The serialized content of the document.
+ :type content: str
+ :param gen: The generation corresponding to the modification of that
+ document.
+ :type gen: int
+ :param trans_id: The transaction id corresponding to the modification
+ of that document.
+ :type trans_id: str
+ """
+ # could pass source_replica in params for callback chain
+ logger.debug("Sync decrypter pool: inserting doc in local db: "
+ "%s:%s %s" % (doc_id, doc_rev, gen))
+
+ # convert deleted documents to avoid error on document creation
+ if content == 'null':
+ content = None
+ doc = SoledadDocument(doc_id, doc_rev, content)
+ gen = int(gen)
+ self._insert_doc_cb(doc, gen, trans_id)
+
+ # store info about processed docs
+ self._last_inserted_idx = idx
+ self._processed_docs += 1
+
+ def _empty(self):
+ """
+ Empty the received docs table of the sync database.
+
+ :return: A deferred that will fire when the operation in the database
+ has finished.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ query = "DELETE FROM %s WHERE 1" % (self.TABLE_NAME,)
+ return self._runOperation(query)
+
+ def _collect_async_decryption_results(self):
+ """
+ Collect the results of the asynchronous doc decryptions and re-raise
+ any exception raised by a multiprocessing async decryption call.
+
+ :raise Exception: Raised if an async call has raised an exception.
+ """
+ async_results = self._async_results[:]
+ for res in async_results:
+ if res.ready():
+ self._decrypt_doc_cb(res.get()) # might raise an exception!
+ self._async_results.remove(res)
+
+ @defer.inlineCallbacks
+ def _decrypt_and_process_docs(self):
+ """
+ Decrypt the documents received from remote replica and insert them
+ into the local one.
+
+ This method implicitelly returns a defferred (see the decorator
+ above). It should only be called by _launch_decrypt_and_process().
+ because this way any exceptions raised here will be stored by the
+ errback attached to the deferred returned.
+
+ :return: A deferred which will fire after all decrypt, process and
+ delete operations have been executed.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ if not self.failed():
+ if self._processed_docs < self._docs_to_process:
+ yield self._async_decrypt_received_docs()
+ yield self._collect_async_decryption_results()
+ docs = yield self._process_decrypted_docs()
+ yield self._delete_processed_docs(docs)
+ # recurse
+ self._schedule_decrypt_and_process()
+ else:
+ self._finished = True
+
+ def has_finished(self):
+ """
+ Return whether the decrypter has finished its work.
+ """
+ return self._finished
diff --git a/client/src/leap/soledad/client/events.py b/client/src/leap/soledad/client/events.py
index c4c09ac5..b1379521 100644
--- a/client/src/leap/soledad/client/events.py
+++ b/client/src/leap/soledad/client/events.py
@@ -20,39 +20,35 @@
Signaling functions.
"""
+from leap.common.events import emit
+from leap.common.events import catalog
-SOLEDAD_CREATING_KEYS = 'Creating keys...'
-SOLEDAD_DONE_CREATING_KEYS = 'Done creating keys.'
-SOLEDAD_DOWNLOADING_KEYS = 'Downloading keys...'
-SOLEDAD_DONE_DOWNLOADING_KEYS = 'Done downloading keys.'
-SOLEDAD_UPLOADING_KEYS = 'Uploading keys...'
-SOLEDAD_DONE_UPLOADING_KEYS = 'Done uploading keys.'
-SOLEDAD_NEW_DATA_TO_SYNC = 'New data available.'
-SOLEDAD_DONE_DATA_SYNC = 'Done data sync.'
-SOLEDAD_SYNC_SEND_STATUS = 'Sync: sent one document.'
-SOLEDAD_SYNC_RECEIVE_STATUS = 'Sync: received one document.'
-# we want to use leap.common.events to emits signals, if it is available.
-try:
- from leap.common import events
- from leap.common.events import signal
- SOLEDAD_CREATING_KEYS = events.proto.SOLEDAD_CREATING_KEYS
- SOLEDAD_DONE_CREATING_KEYS = events.proto.SOLEDAD_DONE_CREATING_KEYS
- SOLEDAD_DOWNLOADING_KEYS = events.proto.SOLEDAD_DOWNLOADING_KEYS
- SOLEDAD_DONE_DOWNLOADING_KEYS = \
- events.proto.SOLEDAD_DONE_DOWNLOADING_KEYS
- SOLEDAD_UPLOADING_KEYS = events.proto.SOLEDAD_UPLOADING_KEYS
- SOLEDAD_DONE_UPLOADING_KEYS = \
- events.proto.SOLEDAD_DONE_UPLOADING_KEYS
- SOLEDAD_NEW_DATA_TO_SYNC = events.proto.SOLEDAD_NEW_DATA_TO_SYNC
- SOLEDAD_DONE_DATA_SYNC = events.proto.SOLEDAD_DONE_DATA_SYNC
- SOLEDAD_SYNC_SEND_STATUS = events.proto.SOLEDAD_SYNC_SEND_STATUS
- SOLEDAD_SYNC_RECEIVE_STATUS = events.proto.SOLEDAD_SYNC_RECEIVE_STATUS
+SOLEDAD_CREATING_KEYS = catalog.SOLEDAD_CREATING_KEYS
+SOLEDAD_DONE_CREATING_KEYS = catalog.SOLEDAD_DONE_CREATING_KEYS
+SOLEDAD_DOWNLOADING_KEYS = catalog.SOLEDAD_DOWNLOADING_KEYS
+SOLEDAD_DONE_DOWNLOADING_KEYS = \
+ catalog.SOLEDAD_DONE_DOWNLOADING_KEYS
+SOLEDAD_UPLOADING_KEYS = catalog.SOLEDAD_UPLOADING_KEYS
+SOLEDAD_DONE_UPLOADING_KEYS = \
+ catalog.SOLEDAD_DONE_UPLOADING_KEYS
+SOLEDAD_NEW_DATA_TO_SYNC = catalog.SOLEDAD_NEW_DATA_TO_SYNC
+SOLEDAD_DONE_DATA_SYNC = catalog.SOLEDAD_DONE_DATA_SYNC
+SOLEDAD_SYNC_SEND_STATUS = catalog.SOLEDAD_SYNC_SEND_STATUS
+SOLEDAD_SYNC_RECEIVE_STATUS = catalog.SOLEDAD_SYNC_RECEIVE_STATUS
-except ImportError:
- # we define a fake signaling function and fake signal constants that will
- # allow for logging signaling attempts in case leap.common.events is not
- # available.
- def signal(signal, content=""):
- logger.info("Would signal: %s - %s." % (str(signal), content))
+__all__ = [
+ "catalog",
+ "emit",
+ "SOLEDAD_CREATING_KEYS",
+ "SOLEDAD_DONE_CREATING_KEYS",
+ "SOLEDAD_DOWNLOADING_KEYS",
+ "SOLEDAD_DONE_DOWNLOADING_KEYS",
+ "SOLEDAD_UPLOADING_KEYS",
+ "SOLEDAD_DONE_UPLOADING_KEYS",
+ "SOLEDAD_NEW_DATA_TO_SYNC",
+ "SOLEDAD_DONE_DATA_SYNC",
+ "SOLEDAD_SYNC_SEND_STATUS",
+ "SOLEDAD_SYNC_RECEIVE_STATUS",
+]
diff --git a/client/src/leap/soledad/client/examples/README b/client/src/leap/soledad/client/examples/README
new file mode 100644
index 00000000..3aed8377
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/README
@@ -0,0 +1,4 @@
+Right now, you can find here both an example of use
+and the benchmarking scripts.
+TODO move benchmark scripts to root scripts/ folder,
+and leave here only a minimal example.
diff --git a/client/src/leap/soledad/client/examples/benchmarks/.gitignore b/client/src/leap/soledad/client/examples/benchmarks/.gitignore
new file mode 100644
index 00000000..2211df63
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/benchmarks/.gitignore
@@ -0,0 +1 @@
+*.txt
diff --git a/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh b/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh
new file mode 100755
index 00000000..1995eee1
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+mkdir tmp
+wget http://www.gutenberg.org/cache/epub/101/pg101.txt -O hacker_crackdown.txt
diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py
new file mode 100644
index 00000000..7fa1e38f
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py
@@ -0,0 +1,177 @@
+# -*- coding: utf-8 -*-
+# measure_index_times.py
+# Copyright (C) 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+Measure u1db retrieval times for different u1db index situations.
+"""
+from __future__ import print_function
+from functools import partial
+import datetime
+import hashlib
+import os
+import sys
+
+import u1db
+from twisted.internet import defer, reactor
+
+from leap.soledad.client import adbapi
+from leap.soledad.client.sqlcipher import SQLCipherOptions
+
+
+folder = os.environ.get("TMPDIR", "tmp")
+numdocs = int(os.environ.get("DOCS", "1000"))
+silent = os.environ.get("SILENT", False)
+tmpdb = os.path.join(folder, "test.soledad")
+
+
+sample_file = os.environ.get("SAMPLE", "hacker_crackdown.txt")
+sample_path = os.path.join(os.curdir, sample_file)
+
+try:
+ with open(sample_file) as f:
+ SAMPLE = f.readlines()
+except Exception:
+ print("[!] Problem opening sample file. Did you download "
+ "the sample, or correctly set 'SAMPLE' env var?")
+ sys.exit(1)
+
+if numdocs > len(SAMPLE):
+ print("[!] Sorry! The requested DOCS number is larger than "
+ "the num of lines in our sample file")
+ sys.exit(1)
+
+
+def debug(*args):
+ if not silent:
+ print(*args)
+
+debug("[+] db path:", tmpdb)
+debug("[+] num docs", numdocs)
+
+if os.path.isfile(tmpdb):
+ debug("[+] Removing existing db file...")
+ os.remove(tmpdb)
+
+start_time = datetime.datetime.now()
+
+opts = SQLCipherOptions(tmpdb, "secret", create=True)
+dbpool = adbapi.getConnectionPool(opts)
+
+
+def createDoc(doc):
+ return dbpool.runU1DBQuery("create_doc", doc)
+
+db_indexes = {
+ 'by-chash': ['chash'],
+ 'by-number': ['number']}
+
+
+def create_indexes(_):
+ deferreds = []
+ for index, definition in db_indexes.items():
+ d = dbpool.runU1DBQuery("create_index", index, *definition)
+ deferreds.append(d)
+ return defer.gatherResults(deferreds)
+
+
+class TimeWitness(object):
+ def __init__(self, init_time):
+ self.init_time = init_time
+
+ def get_time_count(self):
+ return datetime.datetime.now() - self.init_time
+
+
+def get_from_index(_):
+ init_time = datetime.datetime.now()
+ debug("GETTING FROM INDEX...", init_time)
+
+ def printValue(res, time):
+ print("RESULT->", res)
+ print("Index Query Took: ", time.get_time_count())
+ return res
+
+ d = dbpool.runU1DBQuery(
+ "get_from_index", "by-chash",
+ #"1150c7f10fabce0a57ce13071349fc5064f15bdb0cc1bf2852f74ef3f103aff5")
+ # XXX this is line 89 from the hacker crackdown...
+ # Should accept any other optional hash as an enviroment variable.
+ "57793320d4997a673fc7062652da0596c36a4e9fbe31310d2281e67d56d82469")
+ d.addCallback(printValue, TimeWitness(init_time))
+ return d
+
+
+def getAllDocs():
+ return dbpool.runU1DBQuery("get_all_docs")
+
+
+def errBack(e):
+ debug("[!] ERROR FOUND!!!")
+ e.printTraceback()
+ reactor.stop()
+
+
+def countDocs(_):
+ debug("counting docs...")
+ d = getAllDocs()
+ d.addCallbacks(printResult, errBack)
+ d.addCallbacks(allDone, errBack)
+ return d
+
+
+def printResult(r, **kwargs):
+ if kwargs:
+ debug(*kwargs.values())
+ elif isinstance(r, u1db.Document):
+ debug(r.doc_id, r.content['number'])
+ else:
+ len_results = len(r[1])
+ debug("GOT %s results" % len(r[1]))
+
+ if len_results == numdocs:
+ debug("ALL GOOD")
+ else:
+ debug("[!] MISSING DOCS!!!!!")
+ raise ValueError("We didn't expect this result len")
+
+
+def allDone(_):
+ debug("ALL DONE!")
+
+ #if silent:
+ end_time = datetime.datetime.now()
+ print((end_time - start_time).total_seconds())
+ reactor.stop()
+
+
+def insert_docs(_):
+ deferreds = []
+ for i in range(numdocs):
+ payload = SAMPLE[i]
+ chash = hashlib.sha256(payload).hexdigest()
+ doc = {"number": i, "payload": payload, 'chash': chash}
+ d = createDoc(doc)
+ d.addCallbacks(partial(printResult, i=i, chash=chash, payload=payload),
+ lambda e: e.printTraceback())
+ deferreds.append(d)
+ return defer.gatherResults(deferreds, consumeErrors=True)
+
+d = create_indexes(None)
+d.addCallback(insert_docs)
+d.addCallback(get_from_index)
+d.addCallback(countDocs)
+
+reactor.run()
diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py
new file mode 100644
index 00000000..c6d76e6b
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py
@@ -0,0 +1,177 @@
+# -*- coding: utf-8 -*-
+# measure_index_times.py
+# Copyright (C) 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+Measure u1db retrieval times for different u1db index situations.
+"""
+from __future__ import print_function
+from functools import partial
+import datetime
+import hashlib
+import os
+import sys
+
+import u1db
+from twisted.internet import defer, reactor
+
+from leap.soledad.client import adbapi
+from leap.soledad.client.sqlcipher import SQLCipherOptions
+
+
+folder = os.environ.get("TMPDIR", "tmp")
+numdocs = int(os.environ.get("DOCS", "1000"))
+silent = os.environ.get("SILENT", False)
+tmpdb = os.path.join(folder, "test.soledad")
+
+
+sample_file = os.environ.get("SAMPLE", "hacker_crackdown.txt")
+sample_path = os.path.join(os.curdir, sample_file)
+
+try:
+ with open(sample_file) as f:
+ SAMPLE = f.readlines()
+except Exception:
+ print("[!] Problem opening sample file. Did you download "
+ "the sample, or correctly set 'SAMPLE' env var?")
+ sys.exit(1)
+
+if numdocs > len(SAMPLE):
+ print("[!] Sorry! The requested DOCS number is larger than "
+ "the num of lines in our sample file")
+ sys.exit(1)
+
+
+def debug(*args):
+ if not silent:
+ print(*args)
+
+debug("[+] db path:", tmpdb)
+debug("[+] num docs", numdocs)
+
+if os.path.isfile(tmpdb):
+ debug("[+] Removing existing db file...")
+ os.remove(tmpdb)
+
+start_time = datetime.datetime.now()
+
+opts = SQLCipherOptions(tmpdb, "secret", create=True)
+dbpool = adbapi.getConnectionPool(opts)
+
+
+def createDoc(doc, doc_id):
+ return dbpool.runU1DBQuery("create_doc", doc, doc_id=doc_id)
+
+db_indexes = {
+ 'by-chash': ['chash'],
+ 'by-number': ['number']}
+
+
+def create_indexes(_):
+ deferreds = []
+ for index, definition in db_indexes.items():
+ d = dbpool.runU1DBQuery("create_index", index, *definition)
+ deferreds.append(d)
+ return defer.gatherResults(deferreds)
+
+
+class TimeWitness(object):
+ def __init__(self, init_time):
+ self.init_time = init_time
+
+ def get_time_count(self):
+ return datetime.datetime.now() - self.init_time
+
+
+def get_from_index(_):
+ init_time = datetime.datetime.now()
+ debug("GETTING FROM INDEX...", init_time)
+
+ def printValue(res, time):
+ print("RESULT->", res)
+ print("Index Query Took: ", time.get_time_count())
+ return res
+
+ d = dbpool.runU1DBQuery(
+ "get_doc",
+ #"1150c7f10fabce0a57ce13071349fc5064f15bdb0cc1bf2852f74ef3f103aff5")
+ # XXX this is line 89 from the hacker crackdown...
+ # Should accept any other optional hash as an enviroment variable.
+ "57793320d4997a673fc7062652da0596c36a4e9fbe31310d2281e67d56d82469")
+ d.addCallback(printValue, TimeWitness(init_time))
+ return d
+
+
+def getAllDocs():
+ return dbpool.runU1DBQuery("get_all_docs")
+
+
+def errBack(e):
+ debug("[!] ERROR FOUND!!!")
+ e.printTraceback()
+ reactor.stop()
+
+
+def countDocs(_):
+ debug("counting docs...")
+ d = getAllDocs()
+ d.addCallbacks(printResult, errBack)
+ d.addCallbacks(allDone, errBack)
+ return d
+
+
+def printResult(r, **kwargs):
+ if kwargs:
+ debug(*kwargs.values())
+ elif isinstance(r, u1db.Document):
+ debug(r.doc_id, r.content['number'])
+ else:
+ len_results = len(r[1])
+ debug("GOT %s results" % len(r[1]))
+
+ if len_results == numdocs:
+ debug("ALL GOOD")
+ else:
+ debug("[!] MISSING DOCS!!!!!")
+ raise ValueError("We didn't expect this result len")
+
+
+def allDone(_):
+ debug("ALL DONE!")
+
+ #if silent:
+ end_time = datetime.datetime.now()
+ print((end_time - start_time).total_seconds())
+ reactor.stop()
+
+
+def insert_docs(_):
+ deferreds = []
+ for i in range(numdocs):
+ payload = SAMPLE[i]
+ chash = hashlib.sha256(payload).hexdigest()
+ doc = {"number": i, "payload": payload, 'chash': chash}
+ d = createDoc(doc, doc_id=chash)
+ d.addCallbacks(partial(printResult, i=i, chash=chash, payload=payload),
+ lambda e: e.printTraceback())
+ deferreds.append(d)
+ return defer.gatherResults(deferreds, consumeErrors=True)
+
+d = create_indexes(None)
+d.addCallback(insert_docs)
+d.addCallback(get_from_index)
+d.addCallback(countDocs)
+
+reactor.run()
diff --git a/client/src/leap/soledad/client/examples/compare.txt b/client/src/leap/soledad/client/examples/compare.txt
new file mode 100644
index 00000000..19a1325a
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/compare.txt
@@ -0,0 +1,8 @@
+TIMES=100 TMPDIR=/media/sdb5/leap python use_adbapi.py 1.34s user 0.16s system 53% cpu 2.832 total
+TIMES=100 TMPDIR=/media/sdb5/leap python use_api.py 1.22s user 0.14s system 62% cpu 2.181 total
+
+TIMES=1000 TMPDIR=/media/sdb5/leap python use_api.py 2.18s user 0.34s system 27% cpu 9.213 total
+TIMES=1000 TMPDIR=/media/sdb5/leap python use_adbapi.py 2.40s user 0.34s system 39% cpu 7.004 total
+
+TIMES=5000 TMPDIR=/media/sdb5/leap python use_api.py 6.63s user 1.27s system 13% cpu 57.882 total
+TIMES=5000 TMPDIR=/media/sdb5/leap python use_adbapi.py 6.84s user 1.26s system 36% cpu 22.367 total
diff --git a/client/src/leap/soledad/client/examples/manifest.phk b/client/src/leap/soledad/client/examples/manifest.phk
new file mode 100644
index 00000000..2c86c07d
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/manifest.phk
@@ -0,0 +1,50 @@
+The Hacker's Manifesto
+
+The Hacker's Manifesto
+by: The Mentor
+
+Another one got caught today, it's all over the papers. "Teenager
+Arrested in Computer Crime Scandal", "Hacker Arrested after Bank
+Tampering." "Damn kids. They're all alike." But did you, in your
+three-piece psychology and 1950's technobrain, ever take a look behind
+the eyes of the hacker? Did you ever wonder what made him tick, what
+forces shaped him, what may have molded him? I am a hacker, enter my
+world. Mine is a world that begins with school. I'm smarter than most of
+the other kids, this crap they teach us bores me. "Damn underachiever.
+They're all alike." I'm in junior high or high school. I've listened to
+teachers explain for the fifteenth time how to reduce a fraction. I
+understand it. "No, Ms. Smith, I didn't show my work. I did it in
+my head." "Damn kid. Probably copied it. They're all alike." I made a
+discovery today. I found a computer. Wait a second, this is cool. It does
+what I want it to. If it makes a mistake, it's because I screwed it up.
+Not because it doesn't like me, or feels threatened by me, or thinks I'm
+a smart ass, or doesn't like teaching and shouldn't be here. Damn kid.
+All he does is play games. They're all alike. And then it happened... a
+door opened to a world... rushing through the phone line like heroin
+through an addict's veins, an electronic pulse is sent out, a refuge from
+the day-to-day incompetencies is sought... a board is found. "This is
+it... this is where I belong..." I know everyone here... even if I've
+never met them, never talked to them, may never hear from them again... I
+know you all... Damn kid. Tying up the phone line again. They're all
+alike... You bet your ass we're all alike... we've been spoon-fed baby
+food at school when we hungered for steak... the bits of meat that you
+did let slip through were pre-chewed and tasteless. We've been dominated
+by sadists, or ignored by the apathetic. The few that had something to
+teach found us willing pupils, but those few are like drops of water in
+the desert. This is our world now... the world of the electron and the
+switch, the beauty of the baud. We make use of a service already existing
+without paying for what could be dirt-cheap if it wasn't run by
+profiteering gluttons, and you call us criminals. We explore... and you
+call us criminals. We seek after knowledge... and you call us criminals.
+We exist without skin color, without nationality, without religious
+bias... and you call us criminals. You build atomic bombs, you wage wars,
+you murder, cheat, and lie to us and try to make us believe it's for our
+own good, yet we're the criminals. Yes, I am a criminal. My crime is that
+of curiosity. My crime is that of judging people by what they say and
+think, not what they look like. My crime is that of outsmarting you,
+something that you will never forgive me for. I am a hacker, and this is
+my manifesto. You may stop this individual, but you can't stop us all...
+after all, we're all alike.
+
+This was the last published file written by The Mentor. Shortly after
+releasing it, he was busted by the FBI. The Mentor, sadly missed.
diff --git a/client/src/leap/soledad/client/examples/plot-async-db.py b/client/src/leap/soledad/client/examples/plot-async-db.py
new file mode 100644
index 00000000..018a1a1d
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/plot-async-db.py
@@ -0,0 +1,45 @@
+import csv
+from matplotlib import pyplot as plt
+
+FILE = "bench.csv"
+
+# config the plot
+plt.xlabel('number of inserts')
+plt.ylabel('time (seconds)')
+plt.title('SQLCipher parallelization')
+
+kwargs = {
+ 'linewidth': 1.0,
+ 'linestyle': '-',
+}
+
+series = (('sync', 'r'),
+ ('async', 'g'))
+
+data = {'mark': [],
+ 'sync': [],
+ 'async': []}
+
+with open(FILE, 'rb') as csvfile:
+ series_reader = csv.reader(csvfile, delimiter=',')
+ for m, s, a in series_reader:
+ data['mark'].append(int(m))
+ data['sync'].append(float(s))
+ data['async'].append(float(a))
+
+xmax = max(data['mark'])
+xmin = min(data['mark'])
+ymax = max(data['sync'] + data['async'])
+ymin = min(data['sync'] + data['async'])
+
+for run in series:
+ name = run[0]
+ color = run[1]
+ plt.plot(data['mark'], data[name], label=name, color=color, **kwargs)
+
+plt.axes().annotate("", xy=(xmax, ymax))
+plt.axes().annotate("", xy=(xmin, ymin))
+
+plt.grid()
+plt.legend()
+plt.show()
diff --git a/client/src/leap/soledad/client/examples/run_benchmark.py b/client/src/leap/soledad/client/examples/run_benchmark.py
new file mode 100644
index 00000000..a112cf45
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/run_benchmark.py
@@ -0,0 +1,28 @@
+"""
+Run a mini-benchmark between regular api and dbapi
+"""
+import commands
+import os
+import time
+
+TMPDIR = os.environ.get("TMPDIR", "/tmp")
+CSVFILE = 'bench.csv'
+
+cmd = "SILENT=1 TIMES={times} TMPDIR={tmpdir} python ./use_{version}api.py"
+
+parse_time = lambda r: r.split('\n')[-1]
+
+
+with open(CSVFILE, 'w') as log:
+
+ for times in range(0, 10000, 500):
+ cmd1 = cmd.format(times=times, tmpdir=TMPDIR, version="")
+ sync_time = parse_time(commands.getoutput(cmd1))
+
+ cmd2 = cmd.format(times=times, tmpdir=TMPDIR, version="adb")
+ async_time = parse_time(commands.getoutput(cmd2))
+
+ print times, sync_time, async_time
+ log.write("%s, %s, %s\n" % (times, sync_time, async_time))
+ log.flush()
+ time.sleep(2)
diff --git a/client/src/leap/soledad/client/examples/soledad_sync.py b/client/src/leap/soledad/client/examples/soledad_sync.py
new file mode 100644
index 00000000..6d0f6595
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/soledad_sync.py
@@ -0,0 +1,65 @@
+from leap.bitmask.config.providerconfig import ProviderConfig
+from leap.bitmask.crypto.srpauth import SRPAuth
+from leap.soledad.client import Soledad
+
+import logging
+logging.basicConfig(level=logging.DEBUG)
+
+
+# EDIT THIS --------------------------------------------
+user = u"USERNAME"
+uuid = u"USERUUID"
+_pass = u"USERPASS"
+server_url = "https://soledad.server.example.org:2323"
+# EDIT THIS --------------------------------------------
+
+secrets_path = "/tmp/%s.secrets" % uuid
+local_db_path = "/tmp/%s.soledad" % uuid
+cert_file = "/tmp/cacert.pem"
+provider_config = '/tmp/cdev.json'
+
+
+provider = ProviderConfig()
+provider.load(provider_config)
+
+soledad = None
+
+
+def printStuff(r):
+ print r
+
+
+def printErr(err):
+ logging.exception(err.value)
+
+
+def init_soledad(_):
+ token = srpauth.get_token()
+ print "token", token
+
+ global soledad
+ soledad = Soledad(uuid, _pass, secrets_path, local_db_path,
+ server_url, cert_file,
+ auth_token=token, defer_encryption=False)
+
+ def getall(_):
+ d = soledad.get_all_docs()
+ return d
+
+ d1 = soledad.create_doc({"test": 42})
+ d1.addCallback(getall)
+ d1.addCallbacks(printStuff, printErr)
+
+ d2 = soledad.sync()
+ d2.addCallbacks(printStuff, printErr)
+ d2.addBoth(lambda r: reactor.stop())
+
+
+srpauth = SRPAuth(provider)
+
+d = srpauth.authenticate(user, _pass)
+d.addCallbacks(init_soledad, printErr)
+
+
+from twisted.internet import reactor
+reactor.run()
diff --git a/client/src/leap/soledad/client/examples/use_adbapi.py b/client/src/leap/soledad/client/examples/use_adbapi.py
new file mode 100644
index 00000000..d7bd21f2
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/use_adbapi.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+# use_adbapi.py
+# Copyright (C) 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+Example of use of the asynchronous soledad api.
+"""
+from __future__ import print_function
+import datetime
+import os
+
+import u1db
+from twisted.internet import defer, reactor
+
+from leap.soledad.client import adbapi
+from leap.soledad.client.sqlcipher import SQLCipherOptions
+
+
+folder = os.environ.get("TMPDIR", "tmp")
+times = int(os.environ.get("TIMES", "1000"))
+silent = os.environ.get("SILENT", False)
+
+tmpdb = os.path.join(folder, "test.soledad")
+
+
+def debug(*args):
+ if not silent:
+ print(*args)
+
+debug("[+] db path:", tmpdb)
+debug("[+] times", times)
+
+if os.path.isfile(tmpdb):
+ debug("[+] Removing existing db file...")
+ os.remove(tmpdb)
+
+start_time = datetime.datetime.now()
+
+opts = SQLCipherOptions(tmpdb, "secret", create=True)
+dbpool = adbapi.getConnectionPool(opts)
+
+
+def createDoc(doc):
+ return dbpool.runU1DBQuery("create_doc", doc)
+
+
+def getAllDocs():
+ return dbpool.runU1DBQuery("get_all_docs")
+
+
+def countDocs(_):
+ debug("counting docs...")
+ d = getAllDocs()
+ d.addCallbacks(printResult, lambda e: e.printTraceback())
+ d.addBoth(allDone)
+
+
+def printResult(r):
+ if isinstance(r, u1db.Document):
+ debug(r.doc_id, r.content['number'])
+ else:
+ len_results = len(r[1])
+ debug("GOT %s results" % len(r[1]))
+
+ if len_results == times:
+ debug("ALL GOOD")
+ else:
+ raise ValueError("We didn't expect this result len")
+
+
+def allDone(_):
+ debug("ALL DONE!")
+ if silent:
+ end_time = datetime.datetime.now()
+ print((end_time - start_time).total_seconds())
+ reactor.stop()
+
+deferreds = []
+payload = open('manifest.phk').read()
+
+for i in range(times):
+ doc = {"number": i, "payload": payload}
+ d = createDoc(doc)
+ d.addCallbacks(printResult, lambda e: e.printTraceback())
+ deferreds.append(d)
+
+
+all_done = defer.gatherResults(deferreds, consumeErrors=True)
+all_done.addCallback(countDocs)
+
+reactor.run()
diff --git a/client/src/leap/soledad/client/examples/use_api.py b/client/src/leap/soledad/client/examples/use_api.py
new file mode 100644
index 00000000..e2501c98
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/use_api.py
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+# use_api.py
+# Copyright (C) 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+Example of use of the soledad api.
+"""
+from __future__ import print_function
+import datetime
+import os
+
+from leap.soledad.client import sqlcipher
+from leap.soledad.client.sqlcipher import SQLCipherOptions
+
+
+folder = os.environ.get("TMPDIR", "tmp")
+times = int(os.environ.get("TIMES", "1000"))
+silent = os.environ.get("SILENT", False)
+
+tmpdb = os.path.join(folder, "test.soledad")
+
+
+def debug(*args):
+ if not silent:
+ print(*args)
+
+debug("[+] db path:", tmpdb)
+debug("[+] times", times)
+
+if os.path.isfile(tmpdb):
+ debug("[+] Removing existing db file...")
+ os.remove(tmpdb)
+
+start_time = datetime.datetime.now()
+
+opts = SQLCipherOptions(tmpdb, "secret", create=True)
+db = sqlcipher.SQLCipherDatabase(opts)
+
+
+def allDone():
+ debug("ALL DONE!")
+
+payload = open('manifest.phk').read()
+
+for i in range(times):
+ doc = {"number": i, "payload": payload}
+ d = db.create_doc(doc)
+ debug(d.doc_id, d.content['number'])
+
+debug("Count", len(db.get_all_docs()[1]))
+if silent:
+ end_time = datetime.datetime.now()
+ print((end_time - start_time).total_seconds())
+
+allDone()
diff --git a/client/src/leap/soledad/client/http_target.py b/client/src/leap/soledad/client/http_target.py
new file mode 100644
index 00000000..30590ae1
--- /dev/null
+++ b/client/src/leap/soledad/client/http_target.py
@@ -0,0 +1,622 @@
+# -*- coding: utf-8 -*-
+# http_target.py
+# Copyright (C) 2015 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+"""
+A U1DB backend for encrypting data before sending to server and decrypting
+after receiving.
+"""
+
+
+import json
+import base64
+import logging
+
+from uuid import uuid4
+from functools import partial
+
+from twisted.internet import defer
+from twisted.internet import reactor
+from twisted.web.error import Error
+
+from u1db import errors
+from u1db import SyncTarget
+from u1db.remote import utils
+
+from leap.common.http import HTTPClient
+
+from leap.soledad.common.document import SoledadDocument
+from leap.soledad.common.errors import InvalidAuthTokenError
+
+from leap.soledad.client.crypto import is_symmetrically_encrypted
+from leap.soledad.client.crypto import encrypt_doc
+from leap.soledad.client.crypto import decrypt_doc
+from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS
+from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS
+from leap.soledad.client.events import emit
+from leap.soledad.client.encdecpool import SyncDecrypterPool
+
+
+logger = logging.getLogger(__name__)
+
+
+class SoledadHTTPSyncTarget(SyncTarget):
+ """
+ A SyncTarget that encrypts data before sending and decrypts data after
+ receiving.
+
+ Normally encryption will have been written to the sync database upon
+ document modification. The sync database is also used to write temporarily
+ the parsed documents that the remote send us, before being decrypted and
+ written to the main database.
+ """
+
+ def __init__(self, url, source_replica_uid, creds, crypto, cert_file,
+ sync_db=None, sync_enc_pool=None):
+ """
+ Initialize the sync target.
+
+ :param url: The server sync url.
+ :type url: str
+ :param source_replica_uid: The source replica uid which we use when
+ deferring decryption.
+ :type source_replica_uid: str
+ :param url: The url of the target replica to sync with.
+ :type url: str
+ :param creds: A dictionary containing the uuid and token.
+ :type creds: creds
+ :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt
+ document contents when syncing.
+ :type crypto: soledad.crypto.SoledadCrypto
+ :param cert_file: Path to the certificate of the ca used to validate
+ the SSL certificate used by the remote soledad
+ server.
+ :type cert_file: str
+ :param sync_db: Optional. handler for the db with the symmetric
+ encryption of the syncing documents. If
+ None, encryption will be done in-place,
+ instead of retreiving it from the dedicated
+ database.
+ :type sync_db: Sqlite handler
+ :param verify_ssl: Whether we should perform SSL server certificate
+ verification.
+ :type verify_ssl: bool
+ """
+ if url.endswith("/"):
+ url = url[:-1]
+ self._url = str(url) + "/sync-from/" + source_replica_uid
+ self.source_replica_uid = source_replica_uid
+ self._auth_header = None
+ self.set_creds(creds)
+ self._crypto = crypto
+ self._sync_db = sync_db
+ self._sync_enc_pool = sync_enc_pool
+ self._insert_doc_cb = None
+ # asynchronous encryption/decryption attributes
+ self._decryption_callback = None
+ self._sync_decr_pool = None
+ self._http = HTTPClient(cert_file)
+
+ def set_creds(self, creds):
+ """
+ Update credentials.
+
+ :param creds: A dictionary containing the uuid and token.
+ :type creds: dict
+ """
+ uuid = creds['token']['uuid']
+ token = creds['token']['token']
+ auth = '%s:%s' % (uuid, token)
+ b64_token = base64.b64encode(auth)
+ self._auth_header = {'Authorization': ['Token %s' % b64_token]}
+
+ @property
+ def _defer_encryption(self):
+ return self._sync_enc_pool is not None
+
+ #
+ # SyncTarget API
+ #
+
+ @defer.inlineCallbacks
+ def get_sync_info(self, source_replica_uid):
+ """
+ Return information about known state of remote database.
+
+ Return the replica_uid and the current database generation of the
+ remote database, and its last-seen database generation for the client
+ replica.
+
+ :param source_replica_uid: The client-size replica uid.
+ :type source_replica_uid: str
+
+ :return: A deferred which fires with (target_replica_uid,
+ target_replica_generation, target_trans_id,
+ source_replica_last_known_generation,
+ source_replica_last_known_transaction_id)
+ :rtype: twisted.internet.defer.Deferred
+ """
+ raw = yield self._http_request(self._url, headers=self._auth_header)
+ res = json.loads(raw)
+ defer.returnValue([
+ res['target_replica_uid'],
+ res['target_replica_generation'],
+ res['target_replica_transaction_id'],
+ res['source_replica_generation'],
+ res['source_transaction_id']
+ ])
+
+ def record_sync_info(
+ self, source_replica_uid, source_replica_generation,
+ source_replica_transaction_id):
+ """
+ Record tip information for another replica.
+
+ After sync_exchange has been processed, the caller will have
+ received new content from this replica. This call allows the
+ source replica instigating the sync to inform us what their
+ generation became after applying the documents we returned.
+
+ This is used to allow future sync operations to not need to repeat data
+ that we just talked about. It also means that if this is called at the
+ wrong time, there can be database records that will never be
+ synchronized.
+
+ :param source_replica_uid: The identifier for the source replica.
+ :type source_replica_uid: str
+ :param source_replica_generation: The database generation for the
+ source replica.
+ :type source_replica_generation: int
+ :param source_replica_transaction_id: The transaction id associated
+ with the source replica
+ generation.
+ :type source_replica_transaction_id: str
+
+ :return: A deferred which fires with the result of the query.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ data = json.dumps({
+ 'generation': source_replica_generation,
+ 'transaction_id': source_replica_transaction_id
+ })
+ headers = self._auth_header.copy()
+ headers.update({'content-type': ['application/json']})
+ return self._http_request(
+ self._url,
+ method='PUT',
+ headers=headers,
+ body=data)
+
+ @defer.inlineCallbacks
+ def sync_exchange(self, docs_by_generation, source_replica_uid,
+ last_known_generation, last_known_trans_id,
+ insert_doc_cb, ensure_callback=None,
+ defer_decryption=True, sync_id=None):
+ """
+ Find out which documents the remote database does not know about,
+ encrypt and send them. After that, receive documents from the remote
+ database.
+
+ :param docs_by_generations: A list of (doc_id, generation, trans_id)
+ of local documents that were changed since
+ the last local generation the remote
+ replica knows about.
+ :type docs_by_generations: list of tuples
+
+ :param source_replica_uid: The uid of the source replica.
+ :type source_replica_uid: str
+
+ :param last_known_generation: Target's last known generation.
+ :type last_known_generation: int
+
+ :param last_known_trans_id: Target's last known transaction id.
+ :type last_known_trans_id: str
+
+ :param insert_doc_cb: A callback for inserting received documents from
+ target. If not overriden, this will call u1db
+ insert_doc_from_target in synchronizer, which
+ implements the TAKE OTHER semantics.
+ :type insert_doc_cb: function
+
+ :param ensure_callback: A callback that ensures we know the target
+ replica uid if the target replica was just
+ created.
+ :type ensure_callback: function
+
+ :param defer_decryption: Whether to defer the decryption process using
+ the intermediate database. If False,
+ decryption will be done inline.
+ :type defer_decryption: bool
+
+ :return: A deferred which fires with the new generation and
+ transaction id of the target replica.
+ :rtype: twisted.internet.defer.Deferred
+ """
+
+ self._ensure_callback = ensure_callback
+
+ if sync_id is None:
+ sync_id = str(uuid4())
+ self.source_replica_uid = source_replica_uid
+
+ # save a reference to the callback so we can use it after decrypting
+ self._insert_doc_cb = insert_doc_cb
+
+ gen_after_send, trans_id_after_send = yield self._send_docs(
+ docs_by_generation,
+ last_known_generation,
+ last_known_trans_id,
+ sync_id)
+
+ cur_target_gen, cur_target_trans_id = yield self._receive_docs(
+ last_known_generation, last_known_trans_id,
+ ensure_callback, sync_id,
+ defer_decryption=defer_decryption)
+
+ # update gen and trans id info in case we just sent and did not
+ # receive docs.
+ if gen_after_send is not None and gen_after_send > cur_target_gen:
+ cur_target_gen = gen_after_send
+ cur_target_trans_id = trans_id_after_send
+
+ defer.returnValue([cur_target_gen, cur_target_trans_id])
+
+ #
+ # methods to send docs
+ #
+
+ def _prepare(self, comma, entries, **dic):
+ entry = comma + '\r\n' + json.dumps(dic)
+ entries.append(entry)
+ return len(entry)
+
+ @defer.inlineCallbacks
+ def _send_docs(self, docs_by_generation, last_known_generation,
+ last_known_trans_id, sync_id):
+
+ if not docs_by_generation:
+ defer.returnValue([None, None])
+
+ headers = self._auth_header.copy()
+ headers.update({'content-type': ['application/x-soledad-sync-put']})
+ # add remote replica metadata to the request
+ first_entries = ['[']
+ self._prepare(
+ '', first_entries,
+ last_known_generation=last_known_generation,
+ last_known_trans_id=last_known_trans_id,
+ sync_id=sync_id,
+ ensure=self._ensure_callback is not None)
+ idx = 0
+ total = len(docs_by_generation)
+ for doc, gen, trans_id in docs_by_generation:
+ idx += 1
+ result = yield self._send_one_doc(
+ headers, first_entries, doc,
+ gen, trans_id, total, idx)
+ if self._defer_encryption:
+ self._sync_enc_pool.delete_encrypted_doc(
+ doc.doc_id, doc.rev)
+ emit(SOLEDAD_SYNC_SEND_STATUS,
+ "Soledad sync send status: %d/%d"
+ % (idx, total))
+ response_dict = json.loads(result)[0]
+ gen_after_send = response_dict['new_generation']
+ trans_id_after_send = response_dict['new_transaction_id']
+ defer.returnValue([gen_after_send, trans_id_after_send])
+
+ @defer.inlineCallbacks
+ def _send_one_doc(self, headers, first_entries, doc, gen, trans_id,
+ number_of_docs, doc_idx):
+ entries = first_entries[:]
+ # add the document to the request
+ content = yield self._encrypt_doc(doc)
+ self._prepare(
+ ',', entries,
+ id=doc.doc_id, rev=doc.rev, content=content, gen=gen,
+ trans_id=trans_id, number_of_docs=number_of_docs,
+ doc_idx=doc_idx)
+ entries.append('\r\n]')
+ data = ''.join(entries)
+ result = yield self._http_request(
+ self._url,
+ method='POST',
+ headers=headers,
+ body=data)
+ defer.returnValue(result)
+
+ def _encrypt_doc(self, doc):
+ d = None
+ if doc.is_tombstone():
+ d = defer.succeed(None)
+ elif not self._defer_encryption:
+ # fallback case, for tests
+ d = defer.succeed(encrypt_doc(self._crypto, doc))
+ else:
+
+ def _maybe_encrypt_doc_inline(doc_json):
+ if doc_json is None:
+ # the document is not marked as tombstone, but we got
+ # nothing from the sync db. As it is not encrypted
+ # yet, we force inline encryption.
+ return encrypt_doc(self._crypto, doc)
+ return doc_json
+
+ d = self._sync_enc_pool.get_encrypted_doc(doc.doc_id, doc.rev)
+ d.addCallback(_maybe_encrypt_doc_inline)
+ return d
+
+ #
+ # methods to receive doc
+ #
+
+ @defer.inlineCallbacks
+ def _receive_docs(self, last_known_generation, last_known_trans_id,
+ ensure_callback, sync_id, defer_decryption):
+
+ self._queue_for_decrypt = defer_decryption \
+ and self._sync_db is not None
+
+ new_generation = last_known_generation
+ new_transaction_id = last_known_trans_id
+
+ if self._queue_for_decrypt:
+ logger.debug(
+ "Soledad sync: will queue received docs for decrypting.")
+
+ if defer_decryption:
+ self._setup_sync_decr_pool()
+
+ headers = self._auth_header.copy()
+ headers.update({'content-type': ['application/x-soledad-sync-get']})
+
+ #---------------------------------------------------------------------
+ # maybe receive the first document
+ #---------------------------------------------------------------------
+
+ # we fetch the first document before fetching the rest because we need
+ # to know the total number of documents to be received, and this
+ # information comes as metadata to each request.
+
+ d = self._receive_one_doc(
+ headers, last_known_generation, last_known_trans_id,
+ sync_id, 0)
+ d.addCallback(partial(self._insert_received_doc, 1, 1))
+ number_of_changes, ngen, ntrans = yield d
+
+ if defer_decryption:
+ self._sync_decr_pool.start(number_of_changes)
+
+ #---------------------------------------------------------------------
+ # maybe receive the rest of the documents
+ #---------------------------------------------------------------------
+
+ # launch many asynchronous fetches and inserts of received documents
+ # in the temporary sync db. Will wait for all results before
+ # continuing.
+
+ received = 1
+ deferreds = []
+ while received < number_of_changes:
+ d = self._receive_one_doc(
+ headers, last_known_generation,
+ last_known_trans_id, sync_id, received)
+ d.addCallback(
+ partial(
+ self._insert_received_doc,
+ received + 1, # the index of the current received doc
+ number_of_changes))
+ deferreds.append(d)
+ received += 1
+ results = yield defer.gatherResults(deferreds)
+
+ # get generation and transaction id of target after insertions
+ if deferreds:
+ _, new_generation, new_transaction_id = results.pop()
+
+ #---------------------------------------------------------------------
+ # wait for async decryption to finish
+ #---------------------------------------------------------------------
+
+ # below we do a trick so we can wait for the SyncDecrypterPool to
+ # finish its work before finally returning the new generation and
+ # transaction id of the remote replica. To achieve that, we create a
+ # Deferred that will return the results of the sync and, if we are
+ # decrypting asynchronously, we use reactor.callLater() to
+ # periodically poll the decrypter and check if it has finished its
+ # work. When it has finished, we either call the callback or errback
+ # of that deferred. In case we are not asynchronously decrypting, we
+ # just fire the deferred.
+
+ def _shutdown_and_finish(res):
+ self._sync_decr_pool.close()
+ return new_generation, new_transaction_id
+
+ d = defer.Deferred()
+ d.addCallback(_shutdown_and_finish)
+
+ def _wait_or_finish():
+ if not self._sync_decr_pool.has_finished():
+ reactor.callLater(
+ SyncDecrypterPool.DECRYPT_LOOP_PERIOD,
+ _wait_or_finish)
+ else:
+ if not self._sync_decr_pool.failed():
+ d.callback(None)
+ else:
+ d.errback(self._sync_decr_pool.failure)
+
+ if defer_decryption:
+ _wait_or_finish()
+ else:
+ d.callback(None)
+
+ new_generation, new_transaction_id = yield d
+ defer.returnValue([new_generation, new_transaction_id])
+
+ def _receive_one_doc(self, headers, last_known_generation,
+ last_known_trans_id, sync_id, received):
+ entries = ['[']
+ # add remote replica metadata to the request
+ self._prepare(
+ '', entries,
+ last_known_generation=last_known_generation,
+ last_known_trans_id=last_known_trans_id,
+ sync_id=sync_id,
+ ensure=self._ensure_callback is not None)
+ # inform server of how many documents have already been received
+ self._prepare(
+ ',', entries, received=received)
+ entries.append('\r\n]')
+ # send headers
+ return self._http_request(
+ self._url,
+ method='POST',
+ headers=headers,
+ body=''.join(entries))
+
+ def _insert_received_doc(self, idx, total, response):
+ """
+ Insert a received document into the local replica.
+
+ :param idx: The index count of the current operation.
+ :type idx: int
+ :param total: The total number of operations.
+ :type total: int
+ :param response: The body and headers of the response.
+ :type response: tuple(str, dict)
+ """
+ new_generation, new_transaction_id, number_of_changes, doc_id, \
+ rev, content, gen, trans_id = \
+ self._parse_received_doc_response(response)
+ if doc_id is not None:
+ # decrypt incoming document and insert into local database
+ # -------------------------------------------------------------
+ # symmetric decryption of document's contents
+ # -------------------------------------------------------------
+ # If arriving content was symmetrically encrypted, we decrypt it.
+ # We do it inline if defer_decryption flag is False or no sync_db
+ # was defined, otherwise we defer it writing it to the received
+ # docs table.
+ doc = SoledadDocument(doc_id, rev, content)
+ if is_symmetrically_encrypted(doc):
+ if self._queue_for_decrypt:
+ self._sync_decr_pool.insert_encrypted_received_doc(
+ doc.doc_id, doc.rev, doc.content, gen, trans_id,
+ idx)
+ else:
+ # defer_decryption is False or no-sync-db fallback
+ doc.set_json(decrypt_doc(self._crypto, doc))
+ self._insert_doc_cb(doc, gen, trans_id)
+ else:
+ # not symmetrically encrypted doc, insert it directly
+ # or save it in the decrypted stage.
+ if self._queue_for_decrypt:
+ self._sync_decr_pool.insert_received_doc(
+ doc.doc_id, doc.rev, doc.content, gen, trans_id,
+ idx)
+ else:
+ self._insert_doc_cb(doc, gen, trans_id)
+ # -------------------------------------------------------------
+ # end of symmetric decryption
+ # -------------------------------------------------------------
+ msg = "%d/%d" % (idx, total)
+ emit(SOLEDAD_SYNC_RECEIVE_STATUS, msg)
+ logger.debug("Soledad sync receive status: %s" % msg)
+ return number_of_changes, new_generation, new_transaction_id
+
+ def _parse_received_doc_response(self, response):
+ """
+ Parse the response from the server containing the received document.
+
+ :param response: The body and headers of the response.
+ :type response: tuple(str, dict)
+
+ :return: (new_gen, new_trans_id, number_of_changes, doc_id, rev,
+ content, gen, trans_id)
+ :rtype: tuple
+ """
+ # decode incoming stream
+ parts = response.splitlines()
+ if not parts or parts[0] != '[' or parts[-1] != ']':
+ raise errors.BrokenSyncStream
+ data = parts[1:-1]
+ # decode metadata
+ line, comma = utils.check_and_strip_comma(data[0])
+ metadata = None
+ try:
+ metadata = json.loads(line)
+ new_generation = metadata['new_generation']
+ new_transaction_id = metadata['new_transaction_id']
+ number_of_changes = metadata['number_of_changes']
+ except (json.JSONDecodeError, KeyError):
+ raise errors.BrokenSyncStream
+ # make sure we have replica_uid from fresh new dbs
+ if self._ensure_callback and 'replica_uid' in metadata:
+ self._ensure_callback(metadata['replica_uid'])
+ # parse incoming document info
+ doc_id = None
+ rev = None
+ content = None
+ gen = None
+ trans_id = None
+ if number_of_changes > 0:
+ try:
+ entry = json.loads(data[1])
+ doc_id = entry['id']
+ rev = entry['rev']
+ content = entry['content']
+ gen = entry['gen']
+ trans_id = entry['trans_id']
+ except (IndexError, KeyError):
+ raise errors.BrokenSyncStream
+ return new_generation, new_transaction_id, number_of_changes, \
+ doc_id, rev, content, gen, trans_id
+
+ def _setup_sync_decr_pool(self):
+ """
+ Set up the SyncDecrypterPool for deferred decryption.
+ """
+ if self._sync_decr_pool is None and self._sync_db is not None:
+ # initialize syncing queue decryption pool
+ self._sync_decr_pool = SyncDecrypterPool(
+ self._crypto,
+ self._sync_db,
+ insert_doc_cb=self._insert_doc_cb,
+ source_replica_uid=self.source_replica_uid)
+
+ def _http_request(self, url, method='GET', body=None, headers={}):
+ d = self._http.request(url, method, body, headers)
+ d.addErrback(_unauth_to_invalid_token_error)
+ return d
+
+
+def _unauth_to_invalid_token_error(failure):
+ """
+ An errback to translate unauthorized errors to our own invalid token
+ class.
+
+ :param failure: The original failure.
+ :type failure: twisted.python.failure.Failure
+
+ :return: Either the original failure or an invalid auth token error.
+ :rtype: twisted.python.failure.Failure
+ """
+ failure.trap(Error)
+ if failure.getErrorMessage() == "401 Unauthorized":
+ raise InvalidAuthTokenError
+ return failure
diff --git a/client/src/leap/soledad/client/interfaces.py b/client/src/leap/soledad/client/interfaces.py
new file mode 100644
index 00000000..4f7b0779
--- /dev/null
+++ b/client/src/leap/soledad/client/interfaces.py
@@ -0,0 +1,362 @@
+# -*- coding: utf-8 -*-
+# interfaces.py
+# Copyright (C) 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+Interfaces used by the Soledad Client.
+"""
+from zope.interface import Interface, Attribute
+
+
+class ILocalStorage(Interface):
+ """
+ I implement core methods for the u1db local storage of documents and
+ indexes.
+ """
+ local_db_path = Attribute(
+ "The path for the local database replica")
+ local_db_file_name = Attribute(
+ "The name of the local SQLCipher U1DB database file")
+ uuid = Attribute("The user uuid")
+ default_prefix = Attribute(
+ "Prefix for default values for path")
+
+ def put_doc(self, doc):
+ """
+ Update a document in the local encrypted database.
+
+ :param doc: the document to update
+ :type doc: SoledadDocument
+
+ :return:
+ a deferred that will fire with the new revision identifier for
+ the document
+ :rtype: Deferred
+ """
+
+ def delete_doc(self, doc):
+ """
+ Delete a document from the local encrypted database.
+
+ :param doc: the document to delete
+ :type doc: SoledadDocument
+
+ :return:
+ a deferred that will fire with ...
+ :rtype: Deferred
+ """
+
+ def get_doc(self, doc_id, include_deleted=False):
+ """
+ Retrieve a document from the local encrypted database.
+
+ :param doc_id: the unique document identifier
+ :type doc_id: str
+ :param include_deleted:
+ if True, deleted documents will be returned with empty content;
+ otherwise asking for a deleted document will return None
+ :type include_deleted: bool
+
+ :return:
+ A deferred that will fire with the document object, containing a
+ SoledadDocument, or None if it could not be found
+ :rtype: Deferred
+ """
+
+ def get_docs(self, doc_ids, check_for_conflicts=True,
+ include_deleted=False):
+ """
+ Get the content for many documents.
+
+ :param doc_ids: a list of document identifiers
+ :type doc_ids: list
+ :param check_for_conflicts: if set False, then the conflict check will
+ be skipped, and 'None' will be returned instead of True/False
+ :type check_for_conflicts: bool
+
+ :return:
+ A deferred that will fire with an iterable giving the Document
+ object for each document id in matching doc_ids order.
+ :rtype: Deferred
+ """
+
+ def get_all_docs(self, include_deleted=False):
+ """
+ Get the JSON content for all documents in the database.
+
+ :param include_deleted: If set to True, deleted documents will be
+ returned with empty content. Otherwise deleted
+ documents will not be included in the results.
+ :return:
+ A deferred that will fire with (generation, [Document]): that is,
+ the current generation of the database, followed by a list of all
+ the documents in the database.
+ :rtype: Deferred
+ """
+
+ def create_doc(self, content, doc_id=None):
+ """
+ Create a new document in the local encrypted database.
+
+ :param content: the contents of the new document
+ :type content: dict
+ :param doc_id: an optional identifier specifying the document id
+ :type doc_id: str
+
+ :return:
+ A deferred tht will fire with the new document (SoledadDocument
+ instance).
+ :rtype: Deferred
+ """
+
+ def create_doc_from_json(self, json, doc_id=None):
+ """
+ Create a new document.
+
+ You can optionally specify the document identifier, but the document
+ must not already exist. See 'put_doc' if you want to override an
+ existing document.
+ If the database specifies a maximum document size and the document
+ exceeds it, create will fail and raise a DocumentTooBig exception.
+
+ :param json: The JSON document string
+ :type json: str
+ :param doc_id: An optional identifier specifying the document id.
+ :type doc_id:
+ :return:
+ A deferred that will fire with the new document (A SoledadDocument
+ instance)
+ :rtype: Deferred
+ """
+
+ def create_index(self, index_name, *index_expressions):
+ """
+ Create an named index, which can then be queried for future lookups.
+ Creating an index which already exists is not an error, and is cheap.
+ Creating an index which does not match the index_expressions of the
+ existing index is an error.
+ Creating an index will block until the expressions have been evaluated
+ and the index generated.
+
+ :param index_name: A unique name which can be used as a key prefix
+ :type index_name: str
+ :param index_expressions:
+ index expressions defining the index information.
+ :type index_expressions: dict
+
+ Examples:
+
+ "fieldname", or "fieldname.subfieldname" to index alphabetically
+ sorted on the contents of a field.
+
+ "number(fieldname, width)", "lower(fieldname)"
+ """
+
+ def delete_index(self, index_name):
+ """
+ Remove a named index.
+
+ :param index_name: The name of the index we are removing
+ :type index_name: str
+ """
+
+ def list_indexes(self):
+ """
+ List the definitions of all known indexes.
+
+ :return: A list of [('index-name', ['field', 'field2'])] definitions.
+ :rtype: Deferred
+ """
+
+ def get_from_index(self, index_name, *key_values):
+ """
+ Return documents that match the keys supplied.
+
+ You must supply exactly the same number of values as have been defined
+ in the index. It is possible to do a prefix match by using '*' to
+ indicate a wildcard match. You can only supply '*' to trailing entries,
+ (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.)
+ It is also possible to append a '*' to the last supplied value (eg
+ 'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*')
+
+ :param index_name: The index to query
+ :type index_name: str
+ :param key_values: values to match. eg, if you have
+ an index with 3 fields then you would have:
+ get_from_index(index_name, val1, val2, val3)
+ :type key_values: tuple
+ :return: List of [Document]
+ :rtype: list
+ """
+
+ def get_count_from_index(self, index_name, *key_values):
+ """
+ Return the count of the documents that match the keys and
+ values supplied.
+
+ :param index_name: The index to query
+ :type index_name: str
+ :param key_values: values to match. eg, if you have
+ an index with 3 fields then you would have:
+ get_from_index(index_name, val1, val2, val3)
+ :type key_values: tuple
+ :return: count.
+ :rtype: int
+ """
+
+ def get_range_from_index(self, index_name, start_value, end_value):
+ """
+ Return documents that fall within the specified range.
+
+ Both ends of the range are inclusive. For both start_value and
+ end_value, one must supply exactly the same number of values as have
+ been defined in the index, or pass None. In case of a single column
+ index, a string is accepted as an alternative for a tuple with a single
+ value. It is possible to do a prefix match by using '*' to indicate
+ a wildcard match. You can only supply '*' to trailing entries, (eg
+ 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also
+ possible to append a '*' to the last supplied value (eg 'val*', '*',
+ '*' or 'val', 'val*', '*', but not 'val*', 'val', '*')
+
+ :param index_name: The index to query
+ :type index_name: str
+ :param start_values: tuples of values that define the lower bound of
+ the range. eg, if you have an index with 3 fields then you would
+ have: (val1, val2, val3)
+ :type start_values: tuple
+ :param end_values: tuples of values that define the upper bound of the
+ range. eg, if you have an index with 3 fields then you would have:
+ (val1, val2, val3)
+ :type end_values: tuple
+ :return: A deferred that will fire with a list of [Document]
+ :rtype: Deferred
+ """
+
+ def get_index_keys(self, index_name):
+ """
+ Return all keys under which documents are indexed in this index.
+
+ :param index_name: The index to query
+ :type index_name: str
+ :return:
+ A deferred that will fire with a list of tuples of indexed keys.
+ :rtype: Deferred
+ """
+
+ def get_doc_conflicts(self, doc_id):
+ """
+ Get the list of conflicts for the given document.
+
+ :param doc_id: the document id
+ :type doc_id: str
+
+ :return:
+ A deferred that will fire with a list of the document entries that
+ are conflicted.
+ :rtype: Deferred
+ """
+
+ def resolve_doc(self, doc, conflicted_doc_revs):
+ """
+ Mark a document as no longer conflicted.
+
+ :param doc: a document with the new content to be inserted.
+ :type doc: SoledadDocument
+ :param conflicted_doc_revs:
+ A deferred that will fire with a list of revisions that the new
+ content supersedes.
+ :type conflicted_doc_revs: list
+ """
+
+
+class ISyncableStorage(Interface):
+ """
+ I implement methods to synchronize with a remote replica.
+ """
+ replica_uid = Attribute("The uid of the local replica")
+ syncing = Attribute(
+ "Property, True if the syncer is syncing.")
+ token = Attribute("The authentication Token.")
+
+ def sync(self, defer_decryption=True):
+ """
+ Synchronize the local encrypted replica with a remote replica.
+
+ This method blocks until a syncing lock is acquired, so there are no
+ attempts of concurrent syncs from the same client replica.
+
+ :param url: the url of the target replica to sync with
+ :type url: str
+
+ :param defer_decryption:
+ Whether to defer the decryption process using the intermediate
+ database. If False, decryption will be done inline.
+ :type defer_decryption: bool
+
+ :return:
+ A deferred that will fire with the local generation before the
+ synchronisation was performed.
+ :rtype: str
+ """
+
+ def stop_sync(self):
+ """
+ Stop the current syncing process.
+ """
+
+
+class ISecretsStorage(Interface):
+ """
+ I implement methods needed for initializing and accessing secrets, that are
+ synced against the Shared Recovery Database.
+ """
+ secrets_file_name = Attribute(
+ "The name of the file where the storage secrets will be stored")
+
+ storage_secret = Attribute("")
+ remote_storage_secret = Attribute("")
+ shared_db = Attribute("The shared db object")
+
+ # XXX this used internally from secrets, so it might be good to preserve
+ # as a public boundary with other components.
+
+ # We should also probably document its interface.
+ secrets = Attribute("A SoledadSecrets object containing access to secrets")
+
+ def init_shared_db(self, server_url, uuid, creds):
+ """
+ Initialize the shared recovery database.
+
+ :param server_url:
+ :type server_url:
+ :param uuid:
+ :type uuid:
+ :param creds:
+ :type creds:
+ """
+
+ def change_passphrase(self, new_passphrase):
+ """
+ Change the passphrase that encrypts the storage secret.
+
+ :param new_passphrase: The new passphrase.
+ :type new_passphrase: unicode
+
+ :raise NoStorageSecret: Raised if there's no storage secret available.
+ """
+
+ # XXX not in use. Uncomment if we ever decide to allow
+ # multiple secrets.
+ # secret_id = Attribute("The id of the storage secret to be used")
diff --git a/client/src/leap/soledad/client/pragmas.py b/client/src/leap/soledad/client/pragmas.py
new file mode 100644
index 00000000..55397d10
--- /dev/null
+++ b/client/src/leap/soledad/client/pragmas.py
@@ -0,0 +1,379 @@
+# -*- coding: utf-8 -*-
+# pragmas.py
+# Copyright (C) 2013, 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+Different pragmas used in the initialization of the SQLCipher database.
+"""
+import logging
+import string
+import threading
+import os
+
+from leap.soledad.common import soledad_assert
+
+
+logger = logging.getLogger(__name__)
+
+
+_db_init_lock = threading.Lock()
+
+
+def set_init_pragmas(conn, opts=None, extra_queries=None):
+ """
+ Set the initialization pragmas.
+
+ This includes the crypto pragmas, and any other options that must
+ be passed early to sqlcipher db.
+ """
+ soledad_assert(opts is not None)
+ extra_queries = [] if extra_queries is None else extra_queries
+ with _db_init_lock:
+ # only one execution path should initialize the db
+ _set_init_pragmas(conn, opts, extra_queries)
+
+
+def _set_init_pragmas(conn, opts, extra_queries):
+
+ sync_off = os.environ.get('LEAP_SQLITE_NOSYNC')
+ memstore = os.environ.get('LEAP_SQLITE_MEMSTORE')
+ nowal = os.environ.get('LEAP_SQLITE_NOWAL')
+
+ set_crypto_pragmas(conn, opts)
+
+ if not nowal:
+ set_write_ahead_logging(conn)
+ if sync_off:
+ set_synchronous_off(conn)
+ else:
+ set_synchronous_normal(conn)
+ if memstore:
+ set_mem_temp_store(conn)
+
+ for query in extra_queries:
+ conn.cursor().execute(query)
+
+
+def set_crypto_pragmas(db_handle, sqlcipher_opts):
+ """
+ Set cryptographic params (key, cipher, KDF number of iterations and
+ cipher page size).
+
+ :param db_handle:
+ :type db_handle:
+ :param sqlcipher_opts: options for the SQLCipherDatabase
+ :type sqlcipher_opts: SQLCipherOpts instance
+ """
+ # XXX assert CryptoOptions
+ opts = sqlcipher_opts
+ _set_key(db_handle, opts.key, opts.is_raw_key)
+ _set_cipher(db_handle, opts.cipher)
+ _set_kdf_iter(db_handle, opts.kdf_iter)
+ _set_cipher_page_size(db_handle, opts.cipher_page_size)
+
+
+def _set_key(db_handle, key, is_raw_key):
+ """
+ Set the ``key`` for use with the database.
+
+ The process of creating a new, encrypted database is called 'keying'
+ the database. SQLCipher uses just-in-time key derivation at the point
+ it is first needed for an operation. This means that the key (and any
+ options) must be set before the first operation on the database. As
+ soon as the database is touched (e.g. SELECT, CREATE TABLE, UPDATE,
+ etc.) and pages need to be read or written, the key is prepared for
+ use.
+
+ Implementation Notes:
+
+ * PRAGMA key should generally be called as the first operation on a
+ database.
+
+ :param key: The key for use with the database.
+ :type key: str
+ :param is_raw_key:
+ Whether C{key} is a raw 64-char hex string or a passphrase that should
+ be hashed to obtain the encyrption key.
+ :type is_raw_key: bool
+ """
+ if is_raw_key:
+ _set_key_raw(db_handle, key)
+ else:
+ _set_key_passphrase(db_handle, key)
+
+
+def _set_key_passphrase(db_handle, passphrase):
+ """
+ Set a passphrase for encryption key derivation.
+
+ The key itself can be a passphrase, which is converted to a key using
+ PBKDF2 key derivation. The result is used as the encryption key for
+ the database. By using this method, there is no way to alter the KDF;
+ if you want to do so you should use a raw key instead and derive the
+ key using your own KDF.
+
+ :param db_handle: A handle to the SQLCipher database.
+ :type db_handle: pysqlcipher.Connection
+ :param passphrase: The passphrase used to derive the encryption key.
+ :type passphrase: str
+ """
+ db_handle.cursor().execute("PRAGMA key = '%s'" % passphrase)
+
+
+def _set_key_raw(db_handle, key):
+ """
+ Set a raw hexadecimal encryption key.
+
+ It is possible to specify an exact byte sequence using a blob literal.
+ With this method, it is the calling application's responsibility to
+ ensure that the data provided is a 64 character hex string, which will
+ be converted directly to 32 bytes (256 bits) of key data.
+
+ :param db_handle: A handle to the SQLCipher database.
+ :type db_handle: pysqlcipher.Connection
+ :param key: A 64 character hex string.
+ :type key: str
+ """
+ if not all(c in string.hexdigits for c in key):
+ raise NotAnHexString(key)
+ db_handle.cursor().execute('PRAGMA key = "x\'%s"' % key)
+
+
+def _set_cipher(db_handle, cipher='aes-256-cbc'):
+ """
+ Set the cipher and mode to use for symmetric encryption.
+
+ SQLCipher uses aes-256-cbc as the default cipher and mode of
+ operation. It is possible to change this, though not generally
+ recommended, using PRAGMA cipher.
+
+ SQLCipher makes direct use of libssl, so all cipher options available
+ to libssl are also available for use with SQLCipher. See `man enc` for
+ OpenSSL's supported ciphers.
+
+ Implementation Notes:
+
+ * PRAGMA cipher must be called after PRAGMA key and before the first
+ actual database operation or it will have no effect.
+
+ * If a non-default value is used PRAGMA cipher to create a database,
+ it must also be called every time that database is opened.
+
+ * SQLCipher does not implement its own encryption. Instead it uses the
+ widely available and peer-reviewed OpenSSL libcrypto for all
+ cryptographic functions.
+
+ :param db_handle: A handle to the SQLCipher database.
+ :type db_handle: pysqlcipher.Connection
+ :param cipher: The cipher and mode to use.
+ :type cipher: str
+ """
+ db_handle.cursor().execute("PRAGMA cipher = '%s'" % cipher)
+
+
+def _set_kdf_iter(db_handle, kdf_iter=4000):
+ """
+ Set the number of iterations for the key derivation function.
+
+ SQLCipher uses PBKDF2 key derivation to strengthen the key and make it
+ resistent to brute force and dictionary attacks. The default
+ configuration uses 4000 PBKDF2 iterations (effectively 16,000 SHA1
+ operations). PRAGMA kdf_iter can be used to increase or decrease the
+ number of iterations used.
+
+ Implementation Notes:
+
+ * PRAGMA kdf_iter must be called after PRAGMA key and before the first
+ actual database operation or it will have no effect.
+
+ * If a non-default value is used PRAGMA kdf_iter to create a database,
+ it must also be called every time that database is opened.
+
+ * It is not recommended to reduce the number of iterations if a
+ passphrase is in use.
+
+ :param db_handle: A handle to the SQLCipher database.
+ :type db_handle: pysqlcipher.Connection
+ :param kdf_iter: The number of iterations to use.
+ :type kdf_iter: int
+ """
+ db_handle.cursor().execute("PRAGMA kdf_iter = '%d'" % kdf_iter)
+
+
+def _set_cipher_page_size(db_handle, cipher_page_size=1024):
+ """
+ Set the page size of the encrypted database.
+
+ SQLCipher 2 introduced the new PRAGMA cipher_page_size that can be
+ used to adjust the page size for the encrypted database. The default
+ page size is 1024 bytes, but it can be desirable for some applications
+ to use a larger page size for increased performance. For instance,
+ some recent testing shows that increasing the page size can noticeably
+ improve performance (5-30%) for certain queries that manipulate a
+ large number of pages (e.g. selects without an index, large inserts in
+ a transaction, big deletes).
+
+ To adjust the page size, call the pragma immediately after setting the
+ key for the first time and each subsequent time that you open the
+ database.
+
+ Implementation Notes:
+
+ * PRAGMA cipher_page_size must be called after PRAGMA key and before
+ the first actual database operation or it will have no effect.
+
+ * If a non-default value is used PRAGMA cipher_page_size to create a
+ database, it must also be called every time that database is opened.
+
+ :param db_handle: A handle to the SQLCipher database.
+ :type db_handle: pysqlcipher.Connection
+ :param cipher_page_size: The page size.
+ :type cipher_page_size: int
+ """
+ db_handle.cursor().execute(
+ "PRAGMA cipher_page_size = '%d'" % cipher_page_size)
+
+
+# XXX UNUSED ?
+def set_rekey(db_handle, new_key, is_raw_key):
+ """
+ Change the key of an existing encrypted database.
+
+ To change the key on an existing encrypted database, it must first be
+ unlocked with the current encryption key. Once the database is
+ readable and writeable, PRAGMA rekey can be used to re-encrypt every
+ page in the database with a new key.
+
+ * PRAGMA rekey must be called after PRAGMA key. It can be called at any
+ time once the database is readable.
+
+ * PRAGMA rekey can not be used to encrypted a standard SQLite
+ database! It is only useful for changing the key on an existing
+ database.
+
+ * Previous versions of SQLCipher provided a PRAGMA rekey_cipher and
+ code>PRAGMA rekey_kdf_iter. These are deprecated and should not be
+ used. Instead, use sqlcipher_export().
+
+ :param db_handle: A handle to the SQLCipher database.
+ :type db_handle: pysqlcipher.Connection
+ :param new_key: The new key.
+ :type new_key: str
+ :param is_raw_key: Whether C{password} is a raw 64-char hex string or a
+ passphrase that should be hashed to obtain the encyrption
+ key.
+ :type is_raw_key: bool
+ """
+ if is_raw_key:
+ _set_rekey_raw(db_handle, new_key)
+ else:
+ _set_rekey_passphrase(db_handle, new_key)
+
+
+def _set_rekey_passphrase(db_handle, passphrase):
+ """
+ Change the passphrase for encryption key derivation.
+
+ The key itself can be a passphrase, which is converted to a key using
+ PBKDF2 key derivation. The result is used as the encryption key for
+ the database.
+
+ :param db_handle: A handle to the SQLCipher database.
+ :type db_handle: pysqlcipher.Connection
+ :param passphrase: The passphrase used to derive the encryption key.
+ :type passphrase: str
+ """
+ db_handle.cursor().execute("PRAGMA rekey = '%s'" % passphrase)
+
+
+def _set_rekey_raw(db_handle, key):
+ """
+ Change the raw hexadecimal encryption key.
+
+ It is possible to specify an exact byte sequence using a blob literal.
+ With this method, it is the calling application's responsibility to
+ ensure that the data provided is a 64 character hex string, which will
+ be converted directly to 32 bytes (256 bits) of key data.
+
+ :param db_handle: A handle to the SQLCipher database.
+ :type db_handle: pysqlcipher.Connection
+ :param key: A 64 character hex string.
+ :type key: str
+ """
+ if not all(c in string.hexdigits for c in key):
+ raise NotAnHexString(key)
+ db_handle.cursor().execute('PRAGMA rekey = "x\'%s"' % key)
+
+
+def set_synchronous_off(db_handle):
+ """
+ Change the setting of the "synchronous" flag to OFF.
+ """
+ logger.debug("SQLCIPHER: SETTING SYNCHRONOUS OFF")
+ db_handle.cursor().execute('PRAGMA synchronous=OFF')
+
+
+def set_synchronous_normal(db_handle):
+ """
+ Change the setting of the "synchronous" flag to NORMAL.
+ """
+ logger.debug("SQLCIPHER: SETTING SYNCHRONOUS NORMAL")
+ db_handle.cursor().execute('PRAGMA synchronous=NORMAL')
+
+
+def set_mem_temp_store(db_handle):
+ """
+ Use a in-memory store for temporary tables.
+ """
+ logger.debug("SQLCIPHER: SETTING TEMP_STORE MEMORY")
+ db_handle.cursor().execute('PRAGMA temp_store=MEMORY')
+
+
+def set_write_ahead_logging(db_handle):
+ """
+ Enable write-ahead logging, and set the autocheckpoint to 50 pages.
+
+ Setting the autocheckpoint to a small value, we make the reads not
+ suffer too much performance degradation.
+
+ From the sqlite docs:
+
+ "There is a tradeoff between average read performance and average write
+ performance. To maximize the read performance, one wants to keep the
+ WAL as small as possible and hence run checkpoints frequently, perhaps
+ as often as every COMMIT. To maximize write performance, one wants to
+ amortize the cost of each checkpoint over as many writes as possible,
+ meaning that one wants to run checkpoints infrequently and let the WAL
+ grow as large as possible before each checkpoint. The decision of how
+ often to run checkpoints may therefore vary from one application to
+ another depending on the relative read and write performance
+ requirements of the application. The default strategy is to run a
+ checkpoint once the WAL reaches 1000 pages"
+ """
+ logger.debug("SQLCIPHER: SETTING WRITE-AHEAD LOGGING")
+ db_handle.cursor().execute('PRAGMA journal_mode=WAL')
+
+ # The optimum value can still use a little bit of tuning, but we favor
+ # small sizes of the WAL file to get fast reads, since we assume that
+ # the writes will be quick enough to not block too much.
+
+ db_handle.cursor().execute('PRAGMA wal_autocheckpoint=50')
+
+
+class NotAnHexString(Exception):
+ """
+ Raised when trying to (raw) key the database with a non-hex string.
+ """
+ pass
diff --git a/client/src/leap/soledad/client/secrets.py b/client/src/leap/soledad/client/secrets.py
new file mode 100644
index 00000000..e89e21aa
--- /dev/null
+++ b/client/src/leap/soledad/client/secrets.py
@@ -0,0 +1,787 @@
+# -*- coding: utf-8 -*-
+# secrets.py
+# Copyright (C) 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+"""
+Soledad secrets handling.
+"""
+
+
+import os
+import scrypt
+import hmac
+import logging
+import binascii
+import errno
+
+
+from hashlib import sha256
+import simplejson as json
+
+
+from leap.soledad.common import soledad_assert
+from leap.soledad.common import soledad_assert_type
+from leap.soledad.common import document
+from leap.soledad.common import errors
+from leap.soledad.common import crypto
+from leap.soledad.client import events
+
+
+logger = logging.getLogger(name=__name__)
+
+
+#
+# Exceptions
+#
+
+
+class SecretsException(Exception):
+ """
+ Generic exception type raised by this module.
+ """
+
+
+class NoStorageSecret(SecretsException):
+ """
+ Raised when trying to use a storage secret but none is available.
+ """
+ pass
+
+
+class PassphraseTooShort(SecretsException):
+ """
+ Raised when trying to change the passphrase but the provided passphrase is
+ too short.
+ """
+
+
+class BootstrapSequenceError(SecretsException):
+ """
+ Raised when an attempt to generate a secret and store it in a recovery
+ document on server failed.
+ """
+
+
+#
+# Secrets handler
+#
+
+class SoledadSecrets(object):
+ """
+ Soledad secrets handler.
+
+ The first C{self.REMOTE_STORAGE_SECRET_LENGTH} bytes of the storage
+ secret are used for remote storage encryption. We use the next
+ C{self.LOCAL_STORAGE_SECRET} bytes to derive a key for local storage.
+ From these bytes, the first C{self.SALT_LENGTH} bytes are used as the
+ salt and the rest as the password for the scrypt hashing.
+ """
+
+ LOCAL_STORAGE_SECRET_LENGTH = 512
+ """
+ The length, in bytes, of the secret used to derive a passphrase for the
+ SQLCipher database.
+ """
+
+ REMOTE_STORAGE_SECRET_LENGTH = 512
+ """
+ The length, in bytes, of the secret used to derive an encryption key and a
+ MAC auth key for remote storage.
+ """
+
+ SALT_LENGTH = 64
+ """
+ The length, in bytes, of the salt used to derive the key for the storage
+ secret encryption.
+ """
+
+ GEN_SECRET_LENGTH = LOCAL_STORAGE_SECRET_LENGTH \
+ + REMOTE_STORAGE_SECRET_LENGTH \
+ + SALT_LENGTH # for sync db
+ """
+ The length, in bytes, of the secret to be generated. This includes local
+ and remote secrets, and the salt for deriving the sync db secret.
+ """
+
+ MINIMUM_PASSPHRASE_LENGTH = 6
+ """
+ The minimum length, in bytes, for a passphrase. The passphrase length is
+ only checked when the user changes her passphrase, not when she
+ instantiates Soledad.
+ """
+
+ IV_SEPARATOR = ":"
+ """
+ A separator used for storing the encryption initial value prepended to the
+ ciphertext.
+ """
+
+ UUID_KEY = 'uuid'
+ STORAGE_SECRETS_KEY = 'storage_secrets'
+ ACTIVE_SECRET_KEY = 'active_secret'
+ SECRET_KEY = 'secret'
+ CIPHER_KEY = 'cipher'
+ LENGTH_KEY = 'length'
+ KDF_KEY = 'kdf'
+ KDF_SALT_KEY = 'kdf_salt'
+ KDF_LENGTH_KEY = 'kdf_length'
+ KDF_SCRYPT = 'scrypt'
+ CIPHER_AES256 = 'aes256'
+ """
+ Keys used to access storage secrets in recovery documents.
+ """
+
+ def __init__(self, uuid, passphrase, secrets_path, shared_db, crypto):
+ """
+ Initialize the secrets manager.
+
+ :param uuid: User's unique id.
+ :type uuid: str
+ :param passphrase: The passphrase for locking and unlocking encryption
+ secrets for local and remote storage.
+ :type passphrase: unicode
+ :param secrets_path: Path for storing encrypted key used for
+ symmetric encryption.
+ :type secrets_path: str
+ :param shared_db: The shared database that stores user secrets.
+ :type shared_db: leap.soledad.client.shared_db.SoledadSharedDatabase
+ :param crypto: A soledad crypto object.
+ :type crypto: SoledadCrypto
+ """
+ # XXX removed since not in use
+ # We will pick the first secret available.
+ # param secret_id: The id of the storage secret to be used.
+
+ self._uuid = uuid
+ self._passphrase = passphrase
+ self._secrets_path = secrets_path
+ self._shared_db = shared_db
+ self._crypto = crypto
+ self._secrets = {}
+
+ self._secret_id = None
+
+ def bootstrap(self):
+ """
+ Bootstrap secrets.
+
+ Soledad secrets bootstrap is the following sequence of stages:
+
+ * stage 1 - local secret loading:
+ - if secrets exist locally, load them.
+ * stage 2 - remote secret loading:
+ - else, if secrets exist in server, download them.
+ * stage 3 - secret generation:
+ - else, generate a new secret and store in server.
+
+ This method decides which bootstrap stages have already been performed
+ and performs the missing ones in order.
+
+ :raise BootstrapSequenceError: Raised when the secret generation and
+ storage on server sequence has failed for some reason.
+ """
+ # STAGE 1 - verify if secrets exist locally
+ if not self._has_secret(): # try to load from local storage.
+
+ # STAGE 2 - there are no secrets in local storage, so try to fetch
+ # encrypted secrets from server.
+ logger.info(
+ 'Trying to fetch cryptographic secrets from shared recovery '
+ 'database...')
+
+ # --- start of atomic operation in shared db ---
+
+ # obtain lock on shared db
+ token = timeout = None
+ try:
+ token, timeout = self._shared_db.lock()
+ except errors.AlreadyLockedError:
+ raise BootstrapSequenceError('Database is already locked.')
+ except errors.LockTimedOutError:
+ raise BootstrapSequenceError('Lock operation timed out.')
+
+ self._get_or_gen_crypto_secrets()
+
+ # release the lock on shared db
+ try:
+ self._shared_db.unlock(token)
+ self._shared_db.close()
+ except errors.NotLockedError:
+ # for some reason the lock expired. Despite that, secret
+ # loading or generation/storage must have been executed
+ # successfully, so we pass.
+ pass
+ except errors.InvalidTokenError:
+ # here, our lock has not only expired but also some other
+ # client application has obtained a new lock and is currently
+ # doing its thing in the shared database. Using the same
+ # reasoning as above, we assume everything went smooth and
+ # pass.
+ pass
+ except Exception as e:
+ logger.error("Unhandled exception when unlocking shared "
+ "database.")
+ logger.exception(e)
+
+ # --- end of atomic operation in shared db ---
+
+ def _has_secret(self):
+ """
+ Return whether there is a storage secret available for use or not.
+
+ :return: Whether there's a storage secret for symmetric encryption.
+ :rtype: bool
+ """
+ logger.info("Checking if there's a secret in local storage...")
+ if (self._secret_id is None or self._secret_id not in self._secrets) \
+ and os.path.isfile(self._secrets_path):
+ try:
+ self._load_secrets() # try to load from disk
+ except IOError as e:
+ logger.warning(
+ 'IOError while loading secrets from disk: %s' % str(e))
+
+ if self.storage_secret is not None:
+ logger.info("Found a secret in local storage.")
+ return True
+
+ logger.info("Could not find a secret in local storage.")
+ return False
+
+ def _load_secrets(self):
+ """
+ Load storage secrets from local file.
+ """
+ # read storage secrets from file
+ content = None
+ with open(self._secrets_path, 'r') as f:
+ content = json.loads(f.read())
+ _, mac, active_secret = self._import_recovery_document(content)
+ # choose first secret if no secret_id was given
+ if self._secret_id is None:
+ if active_secret is None:
+ self.set_secret_id(self._secrets.items()[0][0])
+ else:
+ self.set_secret_id(active_secret)
+ # enlarge secret if needed
+ enlarged = False
+ if len(self._secrets[self._secret_id]) < self.GEN_SECRET_LENGTH:
+ gen_len = self.GEN_SECRET_LENGTH \
+ - len(self._secrets[self._secret_id])
+ new_piece = os.urandom(gen_len)
+ self._secrets[self._secret_id] += new_piece
+ enlarged = True
+ # store and save in shared db if needed
+ if not mac or enlarged:
+ self._store_secrets()
+ self._put_secrets_in_shared_db()
+
+ def _get_or_gen_crypto_secrets(self):
+ """
+ Retrieves or generates the crypto secrets.
+
+ :raises BootstrapSequenceError: Raised when unable to store secrets in
+ shared database.
+ """
+ if self._shared_db.syncable:
+ doc = self._get_secrets_from_shared_db()
+ else:
+ doc = None
+
+ if doc is not None:
+ logger.info(
+ 'Found cryptographic secrets in shared recovery '
+ 'database.')
+ _, mac, active_secret = self._import_recovery_document(doc.content)
+ if mac is False:
+ self.put_secrets_in_shared_db()
+ self._store_secrets() # save new secrets in local file
+ if self._secret_id is None:
+ if active_secret is None:
+ self.set_secret_id(self._secrets.items()[0][0])
+ else:
+ self.set_secret_id(active_secret)
+ else:
+ # STAGE 3 - there are no secrets in server also, so
+ # generate a secret and store it in remote db.
+ logger.info(
+ 'No cryptographic secrets found, creating new '
+ ' secrets...')
+ self.set_secret_id(self._gen_secret())
+
+ if self._shared_db.syncable:
+ try:
+ self._put_secrets_in_shared_db()
+ except Exception as ex:
+ # storing generated secret in shared db failed for
+ # some reason, so we erase the generated secret and
+ # raise.
+ try:
+ os.unlink(self._secrets_path)
+ except OSError as e:
+ if e.errno != errno.ENOENT:
+ # no such file or directory
+ logger.exception(e)
+ logger.exception(ex)
+ raise BootstrapSequenceError(
+ 'Could not store generated secret in the shared '
+ 'database, bailing out...')
+
+ #
+ # Shared DB related methods
+ #
+
+ def _shared_db_doc_id(self):
+ """
+ Calculate the doc_id of the document in the shared db that stores key
+ material.
+
+ :return: the hash
+ :rtype: str
+ """
+ return sha256(
+ '%s%s' %
+ (self._passphrase_as_string(), self._uuid)).hexdigest()
+
+ def _export_recovery_document(self):
+ """
+ Export the storage secrets.
+
+ A recovery document has the following structure:
+
+ {
+ 'storage_secrets': {
+ '<storage_secret id>': {
+ 'kdf': 'scrypt',
+ 'kdf_salt': '<b64 repr of salt>'
+ 'kdf_length': <key length>
+ 'cipher': 'aes256',
+ 'length': <secret length>,
+ 'secret': '<encrypted storage_secret>',
+ },
+ },
+ 'active_secret': '<secret_id>',
+ 'kdf': 'scrypt',
+ 'kdf_salt': '<b64 repr of salt>',
+ 'kdf_length: <key length>,
+ '_mac_method': 'hmac',
+ '_mac': '<mac>'
+ }
+
+ Note that multiple storage secrets might be stored in one recovery
+ document. This method will also calculate a MAC of a string
+ representation of the secrets dictionary.
+
+ :return: The recovery document.
+ :rtype: dict
+ """
+ # create salt and key for calculating MAC
+ salt = os.urandom(self.SALT_LENGTH)
+ key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32)
+ # encrypt secrets
+ encrypted_secrets = {}
+ for secret_id in self._secrets:
+ encrypted_secrets[secret_id] = self._encrypt_storage_secret(
+ self._secrets[secret_id])
+ # create the recovery document
+ data = {
+ self.STORAGE_SECRETS_KEY: encrypted_secrets,
+ self.ACTIVE_SECRET_KEY: self._secret_id,
+ self.KDF_KEY: self.KDF_SCRYPT,
+ self.KDF_SALT_KEY: binascii.b2a_base64(salt),
+ self.KDF_LENGTH_KEY: len(key),
+ crypto.MAC_METHOD_KEY: crypto.MacMethods.HMAC,
+ crypto.MAC_KEY: hmac.new(
+ key,
+ json.dumps(encrypted_secrets, sort_keys=True),
+ sha256).hexdigest(),
+ }
+ return data
+
+ def _import_recovery_document(self, data):
+ """
+ Import storage secrets for symmetric encryption and uuid (if present)
+ from a recovery document.
+
+ Note that this method does not store the imported data on disk. For
+ that, use C{self._store_secrets()}.
+
+ :param data: The recovery document.
+ :type data: dict
+
+ :return: A tuple containing the number of imported secrets, whether
+ there was MAC information available for authenticating, and
+ the secret_id of the last active secret.
+ :rtype: (int, bool)
+ """
+ soledad_assert(self.STORAGE_SECRETS_KEY in data)
+ # check mac of the recovery document
+ mac = None
+ if crypto.MAC_KEY in data:
+ soledad_assert(data[crypto.MAC_KEY] is not None)
+ soledad_assert(crypto.MAC_METHOD_KEY in data)
+ soledad_assert(self.KDF_KEY in data)
+ soledad_assert(self.KDF_SALT_KEY in data)
+ soledad_assert(self.KDF_LENGTH_KEY in data)
+ if data[crypto.MAC_METHOD_KEY] == crypto.MacMethods.HMAC:
+ key = scrypt.hash(
+ self._passphrase_as_string(),
+ binascii.a2b_base64(data[self.KDF_SALT_KEY]),
+ buflen=32)
+ mac = hmac.new(
+ key,
+ json.dumps(
+ data[self.STORAGE_SECRETS_KEY], sort_keys=True),
+ sha256).hexdigest()
+ else:
+ raise crypto.UnknownMacMethodError('Unknown MAC method: %s.' %
+ data[crypto.MAC_METHOD_KEY])
+ if mac != data[crypto.MAC_KEY]:
+ raise crypto.WrongMacError('Could not authenticate recovery document\'s '
+ 'contents.')
+ # include secrets in the secret pool.
+ secret_count = 0
+ secrets = data[self.STORAGE_SECRETS_KEY].items()
+ active_secret = None
+ # XXX remove check for existence of key (included for backwards
+ # compatibility)
+ if self.ACTIVE_SECRET_KEY in data:
+ active_secret = data[self.ACTIVE_SECRET_KEY]
+ for secret_id, encrypted_secret in secrets:
+ if secret_id not in self._secrets:
+ try:
+ self._secrets[secret_id] = \
+ self._decrypt_storage_secret(encrypted_secret)
+ secret_count += 1
+ except SecretsException as e:
+ logger.error("Failed to decrypt storage secret: %s"
+ % str(e))
+ return secret_count, mac, active_secret
+
+ def _get_secrets_from_shared_db(self):
+ """
+ Retrieve the document with encrypted key material from the shared
+ database.
+
+ :return: a document with encrypted key material in its contents
+ :rtype: document.SoledadDocument
+ """
+ events.emit(events.SOLEDAD_DOWNLOADING_KEYS, self._uuid)
+ db = self._shared_db
+ if not db:
+ logger.warning('No shared db found')
+ return
+ doc = db.get_doc(self._shared_db_doc_id())
+ events.emit(events.SOLEDAD_DONE_DOWNLOADING_KEYS, self._uuid)
+ return doc
+
+ def _put_secrets_in_shared_db(self):
+ """
+ Assert local keys are the same as shared db's ones.
+
+ Try to fetch keys from shared recovery database. If they already exist
+ in the remote db, assert that that data is the same as local data.
+ Otherwise, upload keys to shared recovery database.
+ """
+ soledad_assert(
+ self._has_secret(),
+ 'Tried to send keys to server but they don\'t exist in local '
+ 'storage.')
+ # try to get secrets doc from server, otherwise create it
+ doc = self._get_secrets_from_shared_db()
+ if doc is None:
+ doc = document.SoledadDocument(
+ doc_id=self._shared_db_doc_id())
+ # fill doc with encrypted secrets
+ doc.content = self._export_recovery_document()
+ # upload secrets to server
+ events.emit(events.SOLEDAD_UPLOADING_KEYS, self._uuid)
+ db = self._shared_db
+ if not db:
+ logger.warning('No shared db found')
+ return
+ db.put_doc(doc)
+ events.emit(events.SOLEDAD_DONE_UPLOADING_KEYS, self._uuid)
+
+ #
+ # Management of secret for symmetric encryption.
+ #
+
+ def _decrypt_storage_secret(self, encrypted_secret_dict):
+ """
+ Decrypt the storage secret.
+
+ Storage secret is encrypted before being stored. This method decrypts
+ and returns the decrypted storage secret.
+
+ :param encrypted_secret_dict: The encrypted storage secret.
+ :type encrypted_secret_dict: dict
+
+ :return: The decrypted storage secret.
+ :rtype: str
+
+ :raise SecretsException: Raised in case the decryption of the storage
+ secret fails for some reason.
+ """
+ # calculate the encryption key
+ if encrypted_secret_dict[self.KDF_KEY] != self.KDF_SCRYPT:
+ raise SecretsException("Unknown KDF in stored secret.")
+ key = scrypt.hash(
+ self._passphrase_as_string(),
+ # the salt is stored base64 encoded
+ binascii.a2b_base64(
+ encrypted_secret_dict[self.KDF_SALT_KEY]),
+ buflen=32, # we need a key with 256 bits (32 bytes).
+ )
+ if encrypted_secret_dict[self.KDF_LENGTH_KEY] != len(key):
+ raise SecretsException("Wrong length of decryption key.")
+ if encrypted_secret_dict[self.CIPHER_KEY] != self.CIPHER_AES256:
+ raise SecretsException("Unknown cipher in stored secret.")
+ # recover the initial value and ciphertext
+ iv, ciphertext = encrypted_secret_dict[self.SECRET_KEY].split(
+ self.IV_SEPARATOR, 1)
+ ciphertext = binascii.a2b_base64(ciphertext)
+ decrypted_secret = self._crypto.decrypt_sym(ciphertext, key, iv=iv)
+ if encrypted_secret_dict[self.LENGTH_KEY] != len(decrypted_secret):
+ raise SecretsException("Wrong length of decrypted secret.")
+ return decrypted_secret
+
+ def _encrypt_storage_secret(self, decrypted_secret):
+ """
+ Encrypt the storage secret.
+
+ An encrypted secret has the following structure:
+
+ {
+ '<secret_id>': {
+ 'kdf': 'scrypt',
+ 'kdf_salt': '<b64 repr of salt>'
+ 'kdf_length': <key length>
+ 'cipher': 'aes256',
+ 'length': <secret length>,
+ 'secret': '<encrypted b64 repr of storage_secret>',
+ }
+ }
+
+ :param decrypted_secret: The decrypted storage secret.
+ :type decrypted_secret: str
+
+ :return: The encrypted storage secret.
+ :rtype: dict
+ """
+ # generate random salt
+ salt = os.urandom(self.SALT_LENGTH)
+ # get a 256-bit key
+ key = scrypt.hash(self._passphrase_as_string(), salt, buflen=32)
+ iv, ciphertext = self._crypto.encrypt_sym(decrypted_secret, key)
+ encrypted_secret_dict = {
+ # leap.soledad.crypto submodule uses AES256 for symmetric
+ # encryption.
+ self.KDF_KEY: self.KDF_SCRYPT,
+ self.KDF_SALT_KEY: binascii.b2a_base64(salt),
+ self.KDF_LENGTH_KEY: len(key),
+ self.CIPHER_KEY: self.CIPHER_AES256,
+ self.LENGTH_KEY: len(decrypted_secret),
+ self.SECRET_KEY: '%s%s%s' % (
+ str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)),
+ }
+ return encrypted_secret_dict
+
+ @property
+ def storage_secret(self):
+ """
+ Return the storage secret.
+
+ :return: The decrypted storage secret.
+ :rtype: str
+ """
+ return self._secrets.get(self._secret_id)
+
+ def set_secret_id(self, secret_id):
+ """
+ Define the id of the storage secret to be used.
+
+ This method will also replace the secret in the crypto object.
+
+ :param secret_id: The id of the storage secret to be used.
+ :type secret_id: str
+ """
+ self._secret_id = secret_id
+
+ def _gen_secret(self):
+ """
+ Generate a secret for symmetric encryption and store in a local
+ encrypted file.
+
+ This method emits the following events.signals:
+
+ * SOLEDAD_CREATING_KEYS
+ * SOLEDAD_DONE_CREATING_KEYS
+
+ :return: The id of the generated secret.
+ :rtype: str
+ """
+ events.emit(events.SOLEDAD_CREATING_KEYS, self._uuid)
+ # generate random secret
+ secret = os.urandom(self.GEN_SECRET_LENGTH)
+ secret_id = sha256(secret).hexdigest()
+ self._secrets[secret_id] = secret
+ self._store_secrets()
+ events.emit(events.SOLEDAD_DONE_CREATING_KEYS, self._uuid)
+ return secret_id
+
+ def _store_secrets(self):
+ """
+ Store secrets in C{Soledad.STORAGE_SECRETS_FILE_PATH}.
+ """
+ with open(self._secrets_path, 'w') as f:
+ f.write(
+ json.dumps(
+ self._export_recovery_document()))
+
+ def change_passphrase(self, new_passphrase):
+ """
+ Change the passphrase that encrypts the storage secret.
+
+ :param new_passphrase: The new passphrase.
+ :type new_passphrase: unicode
+
+ :raise NoStorageSecret: Raised if there's no storage secret available.
+ """
+ # TODO: maybe we want to add more checks to guarantee passphrase is
+ # reasonable?
+ soledad_assert_type(new_passphrase, unicode)
+ if len(new_passphrase) < self.MINIMUM_PASSPHRASE_LENGTH:
+ raise PassphraseTooShort(
+ 'Passphrase must be at least %d characters long!' %
+ self.MINIMUM_PASSPHRASE_LENGTH)
+ # ensure there's a secret for which the passphrase will be changed.
+ if not self._has_secret():
+ raise NoStorageSecret()
+ self._passphrase = new_passphrase
+ self._store_secrets()
+ self._put_secrets_in_shared_db()
+
+ #
+ # Setters and getters
+ #
+
+ @property
+ def secret_id(self):
+ return self._secret_id
+
+ def _get_secrets_path(self):
+ return self._secrets_path
+
+ def _set_secrets_path(self, secrets_path):
+ self._secrets_path = secrets_path
+
+ secrets_path = property(
+ _get_secrets_path,
+ _set_secrets_path,
+ doc='The path for the file containing the encrypted symmetric secret.')
+
+ @property
+ def passphrase(self):
+ """
+ Return the passphrase for locking and unlocking encryption secrets for
+ local and remote storage.
+ """
+ return self._passphrase
+
+ def _passphrase_as_string(self):
+ return self._passphrase.encode('utf-8')
+
+ #
+ # remote storage secret
+ #
+
+ @property
+ def remote_storage_secret(self):
+ """
+ Return the secret for remote storage.
+ """
+ key_start = 0
+ key_end = self.REMOTE_STORAGE_SECRET_LENGTH
+ return self.storage_secret[key_start:key_end]
+
+ #
+ # local storage key
+ #
+
+ def _get_local_storage_secret(self):
+ """
+ Return the local storage secret.
+
+ :return: The local storage secret.
+ :rtype: str
+ """
+ secret_len = self.REMOTE_STORAGE_SECRET_LENGTH
+ lsecret_len = self.LOCAL_STORAGE_SECRET_LENGTH
+ pwd_start = secret_len + self.SALT_LENGTH
+ pwd_end = secret_len + lsecret_len
+ return self.storage_secret[pwd_start:pwd_end]
+
+ def _get_local_storage_salt(self):
+ """
+ Return the local storage salt.
+
+ :return: The local storage salt.
+ :rtype: str
+ """
+ salt_start = self.REMOTE_STORAGE_SECRET_LENGTH
+ salt_end = salt_start + self.SALT_LENGTH
+ return self.storage_secret[salt_start:salt_end]
+
+ def get_local_storage_key(self):
+ """
+ Return the local storage key derived from the local storage secret.
+
+ :return: The key for protecting the local database.
+ :rtype: str
+ """
+ return scrypt.hash(
+ password=self._get_local_storage_secret(),
+ salt=self._get_local_storage_salt(),
+ buflen=32, # we need a key with 256 bits (32 bytes)
+ )
+
+ #
+ # sync db key
+ #
+
+ def _get_sync_db_salt(self):
+ """
+ Return the salt for sync db.
+ """
+ salt_start = self.LOCAL_STORAGE_SECRET_LENGTH \
+ + self.REMOTE_STORAGE_SECRET_LENGTH
+ salt_end = salt_start + self.SALT_LENGTH
+ return self.storage_secret[salt_start:salt_end]
+
+ def get_sync_db_key(self):
+ """
+ Return the key for protecting the sync database.
+
+ :return: The key for protecting the sync database.
+ :rtype: str
+ """
+ return scrypt.hash(
+ password=self._get_local_storage_secret(),
+ salt=self._get_sync_db_salt(),
+ buflen=32, # we need a key with 256 bits (32 bytes)
+ )
diff --git a/client/src/leap/soledad/client/shared_db.py b/client/src/leap/soledad/client/shared_db.py
index 52e51c6f..f1a2642e 100644
--- a/client/src/leap/soledad/client/shared_db.py
+++ b/client/src/leap/soledad/client/shared_db.py
@@ -14,19 +14,11 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
"""
A shared database for storing/retrieving encrypted key material.
"""
-
-import simplejson as json
-
-
from u1db.remote import http_database
-
-from leap.soledad.common import SHARED_DB_LOCK_DOC_ID_PREFIX
from leap.soledad.client.auth import TokenBasedAuth
@@ -34,6 +26,9 @@ from leap.soledad.client.auth import TokenBasedAuth
# Soledad shared database
# ----------------------------------------------------------------------------
+# TODO could have a hierarchy of soledad exceptions.
+
+
class NoTokenForAuth(Exception):
"""
No token was found for token-based authentication.
@@ -46,6 +41,12 @@ class Unauthorized(Exception):
"""
+class ImproperlyConfiguredError(Exception):
+ """
+ Wrong parameters in the database configuration.
+ """
+
+
class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth):
"""
This is a shared recovery database that enables users to store their
@@ -54,6 +55,10 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth):
# TODO: prevent client from messing with the shared DB.
# TODO: define and document API.
+ # If syncable is False, the database will not attempt to sync against
+ # a remote replica. Default is True.
+ syncable = True
+
#
# Token auth methods.
#
@@ -90,9 +95,7 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth):
#
@staticmethod
- def open_database(url, uuid, create, creds=None):
- # TODO: users should not be able to create the shared database, so we
- # have to remove this from here in the future.
+ def open_database(url, uuid, creds=None, syncable=True):
"""
Open a Soledad shared database.
@@ -100,17 +103,23 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth):
:type url: str
:param uuid: The user's unique id.
:type uuid: str
- :param create: Should the database be created if it does not already
- exist?
- :type create: bool
- :param token: An authentication token for accessing the shared db.
- :type token: str
+ :param creds: A tuple containing the authentication method and
+ credentials.
+ :type creds: tuple
+ :param syncable:
+ If syncable is False, the database will not attempt to sync against
+ a remote replica.
+ :type syncable: bool
:return: The shared database in the given url.
:rtype: SoledadSharedDatabase
"""
+ # XXX fix below, doesn't work with tests.
+ #if syncable and not url.startswith('https://'):
+ # raise ImproperlyConfiguredError(
+ # "Remote soledad server must be an https URI")
db = SoledadSharedDatabase(url, uuid, creds=creds)
- db.open(create)
+ db.syncable = syncable
return db
@staticmethod
@@ -153,9 +162,12 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth):
:raise HTTPError: Raised if any HTTP error occurs.
"""
- res, headers = self._request_json('PUT', ['lock', self._uuid],
- body={})
- return res['token'], res['timeout']
+ if self.syncable:
+ res, headers = self._request_json(
+ 'PUT', ['lock', self._uuid], body={})
+ return res['token'], res['timeout']
+ else:
+ return None, None
def unlock(self, token):
"""
@@ -166,5 +178,6 @@ class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth):
:raise HTTPError:
"""
- res, headers = self._request_json('DELETE', ['lock', self._uuid],
- params={'token': token})
+ if self.syncable:
+ _, _ = self._request_json(
+ 'DELETE', ['lock', self._uuid], params={'token': token})
diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py
index fded2119..b2025130 100644
--- a/client/src/leap/soledad/client/sqlcipher.py
+++ b/client/src/leap/soledad/client/sqlcipher.py
@@ -42,261 +42,120 @@ SQLCipher 1.1 databases, we do not implement them as all SQLCipher databases
handled by Soledad should be created by SQLCipher >= 2.0.
"""
import logging
-import multiprocessing
import os
-import sqlite3
-import string
import threading
-import time
import json
+import u1db
+
+from u1db import errors as u1db_errors
+from u1db.backends import sqlite_backend
from hashlib import sha256
from contextlib import contextmanager
from collections import defaultdict
+from functools import partial
-from pysqlcipher import dbapi2
-from u1db.backends import sqlite_backend
-from u1db import errors as u1db_errors
-from taskthread import TimerTask
+from pysqlcipher import dbapi2 as sqlcipher_dbapi2
+
+from twisted.internet import reactor
+from twisted.internet.threads import deferToThreadPool
+from twisted.python.threadpool import ThreadPool
+from twisted.enterprise import adbapi
-from leap.soledad.client.crypto import SyncEncrypterPool, SyncDecrypterPool
-from leap.soledad.client.target import SoledadSyncTarget
-from leap.soledad.client.target import PendingReceivedDocsSyncError
+from leap.soledad.client import encdecpool
+from leap.soledad.client.http_target import SoledadHTTPSyncTarget
from leap.soledad.client.sync import SoledadSynchronizer
+
+from leap.soledad.client import pragmas
+from leap.soledad.common import soledad_assert
from leap.soledad.common.document import SoledadDocument
logger = logging.getLogger(__name__)
-# Monkey-patch u1db.backends.sqlite_backend with pysqlcipher.dbapi2
-sqlite_backend.dbapi2 = dbapi2
-
-# It seems that, as long as we are not using old sqlite versions, serialized
-# mode is enabled by default at compile time. So accessing db connections from
-# different threads should be safe, as long as no attempt is made to use them
-# from multiple threads with no locking.
-# See https://sqlite.org/threadsafe.html
-# and http://bugs.python.org/issue16509
-
-SQLITE_CHECK_SAME_THREAD = False
-
-# We set isolation_level to None to setup autocommit mode.
-# See: http://docs.python.org/2/library/sqlite3.html#controlling-transactions
-# This avoids problems with sequential operations using the same soledad object
-# trying to open new transactions
-# (The error was:
-# OperationalError:cannot start a transaction within a transaction.)
-SQLITE_ISOLATION_LEVEL = None
-
-
-def open(path, password, create=True, document_factory=None, crypto=None,
- raw_key=False, cipher='aes-256-cbc', kdf_iter=4000,
- cipher_page_size=1024, defer_encryption=False):
- """Open a database at the given location.
-
- Will raise u1db.errors.DatabaseDoesNotExist if create=False and the
- database does not already exist.
-
- :param path: The filesystem path for the database to open.
- :type path: str
- :param create: True/False, should the database be created if it doesn't
- already exist?
- :param create: bool
- :param document_factory: A function that will be called with the same
- parameters as Document.__init__.
- :type document_factory: callable
- :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt
- document contents when syncing.
- :type crypto: soledad.crypto.SoledadCrypto
- :param raw_key: Whether C{password} is a raw 64-char hex string or a
- passphrase that should be hashed to obtain the encyrption key.
- :type raw_key: bool
- :param cipher: The cipher and mode to use.
- :type cipher: str
- :param kdf_iter: The number of iterations to use.
- :type kdf_iter: int
- :param cipher_page_size: The page size.
- :type cipher_page_size: int
- :param defer_encryption: Whether to defer encryption/decryption of
- documents, or do it inline while syncing.
- :type defer_encryption: bool
-
- :return: An instance of Database.
- :rtype SQLCipherDatabase
- """
- return SQLCipherDatabase.open_database(
- path, password, create=create, document_factory=document_factory,
- crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter,
- cipher_page_size=cipher_page_size, defer_encryption=defer_encryption)
+# Monkey-patch u1db.backends.sqlite_backend with pysqlcipher.dbapi2
+sqlite_backend.dbapi2 = sqlcipher_dbapi2
-#
-# Exceptions
-#
-class DatabaseIsNotEncrypted(Exception):
- """
- Exception raised when trying to open non-encrypted databases.
+def initialize_sqlcipher_db(opts, on_init=None, check_same_thread=True):
"""
- pass
-
+ Initialize a SQLCipher database.
-class NotAnHexString(Exception):
+ :param opts:
+ :type opts: SQLCipherOptions
+ :param on_init: a tuple of queries to be executed on initialization
+ :type on_init: tuple
+ :return: pysqlcipher.dbapi2.Connection
"""
- Raised when trying to (raw) key the database with a non-hex string.
- """
- pass
+ # Note: There seemed to be a bug in sqlite 3.5.9 (with python2.6)
+ # where without re-opening the database on Windows, it
+ # doesn't see the transaction that was just committed
+ # Removing from here now, look at the pysqlite implementation if the
+ # bug shows up in windows.
+ if not os.path.isfile(opts.path) and not opts.create:
+ raise u1db_errors.DatabaseDoesNotExist()
-#
-# The SQLCipher database
-#
+ conn = sqlcipher_dbapi2.connect(
+ opts.path, check_same_thread=check_same_thread)
+ pragmas.set_init_pragmas(conn, opts, extra_queries=on_init)
+ return conn
-class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):
- """
- A U1DB implementation that uses SQLCipher as its persistence layer.
- """
- defer_encryption = False
- _index_storage_value = 'expand referenced encrypted'
- k_lock = threading.Lock()
- create_doc_lock = threading.Lock()
- update_indexes_lock = threading.Lock()
- _sync_watcher = None
- _sync_enc_pool = None
+def initialize_sqlcipher_adbapi_db(opts, extra_queries=None):
+ from leap.soledad.client import sqlcipher_adbapi
+ return sqlcipher_adbapi.getConnectionPool(
+ opts, extra_queries=extra_queries)
- """
- The name of the local symmetrically encrypted documents to
- sync database file.
- """
- LOCAL_SYMMETRIC_SYNC_FILE_NAME = 'sync.u1db'
+class SQLCipherOptions(object):
"""
- A dictionary that hold locks which avoid multiple sync attempts from the
- same database replica.
- """
- encrypting_lock = threading.Lock()
-
- """
- Period or recurrence of the periodic encrypting task, in seconds.
- """
- ENCRYPT_TASK_PERIOD = 1
-
- syncing_lock = defaultdict(threading.Lock)
- """
- A dictionary that hold locks which avoid multiple sync attempts from the
- same database replica.
+ A container with options for the initialization of an SQLCipher database.
"""
- def __init__(self, sqlcipher_file, password, document_factory=None,
- crypto=None, raw_key=False, cipher='aes-256-cbc',
- kdf_iter=4000, cipher_page_size=1024):
+ @classmethod
+ def copy(cls, source, path=None, key=None, create=None,
+ is_raw_key=None, cipher=None, kdf_iter=None,
+ cipher_page_size=None, defer_encryption=None, sync_db_key=None):
"""
- Connect to an existing SQLCipher database, creating a new sqlcipher
- database file if needed.
-
- :param sqlcipher_file: The path for the SQLCipher file.
- :type sqlcipher_file: str
- :param password: The password that protects the SQLCipher db.
- :type password: str
- :param document_factory: A function that will be called with the same
- parameters as Document.__init__.
- :type document_factory: callable
- :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt
- document contents when syncing.
- :type crypto: soledad.crypto.SoledadCrypto
- :param raw_key: Whether password is a raw 64-char hex string or a
- passphrase that should be hashed to obtain the
- encyrption key.
- :type raw_key: bool
- :param cipher: The cipher and mode to use.
- :type cipher: str
- :param kdf_iter: The number of iterations to use.
- :type kdf_iter: int
- :param cipher_page_size: The page size.
- :type cipher_page_size: int
+ Return a copy of C{source} with parameters different than None
+ replaced by new values.
"""
- # ensure the db is encrypted if the file already exists
- if os.path.exists(sqlcipher_file):
- self.assert_db_is_encrypted(
- sqlcipher_file, password, raw_key, cipher, kdf_iter,
- cipher_page_size)
+ local_vars = locals()
+ args = []
+ kwargs = {}
- # connect to the sqlcipher database
- with self.k_lock:
- self._db_handle = dbapi2.connect(
- sqlcipher_file,
- isolation_level=SQLITE_ISOLATION_LEVEL,
- check_same_thread=SQLITE_CHECK_SAME_THREAD)
- # set SQLCipher cryptographic parameters
- self._set_crypto_pragmas(
- self._db_handle, password, raw_key, cipher, kdf_iter,
- cipher_page_size)
- if os.environ.get('LEAP_SQLITE_NOSYNC'):
- self._pragma_synchronous_off(self._db_handle)
+ for name in ["path", "key"]:
+ val = local_vars[name]
+ if val is not None:
+ args.append(val)
else:
- self._pragma_synchronous_normal(self._db_handle)
- if os.environ.get('LEAP_SQLITE_MEMSTORE'):
- self._pragma_mem_temp_store(self._db_handle)
- self._pragma_write_ahead_logging(self._db_handle)
- self._real_replica_uid = None
- self._ensure_schema()
- self._crypto = crypto
-
- self._sync_db = None
- self._sync_db_write_lock = None
- self._sync_enc_pool = None
+ args.append(getattr(source, name))
- if self.defer_encryption:
- if sqlcipher_file != ":memory:":
- self._sync_db_path = "%s-sync" % sqlcipher_file
+ for name in ["create", "is_raw_key", "cipher", "kdf_iter",
+ "cipher_page_size", "defer_encryption", "sync_db_key"]:
+ val = local_vars[name]
+ if val is not None:
+ kwargs[name] = val
else:
- self._sync_db_path = ":memory:"
-
- # initialize sync db
- self._init_sync_db()
-
- # initialize syncing queue encryption pool
- self._sync_enc_pool = SyncEncrypterPool(
- self._crypto, self._sync_db, self._sync_db_write_lock)
- self._sync_watcher = TimerTask(self._encrypt_syncing_docs,
- self.ENCRYPT_TASK_PERIOD)
- self._sync_watcher.start()
-
- def factory(doc_id=None, rev=None, json='{}', has_conflicts=False,
- syncable=True):
- return SoledadDocument(doc_id=doc_id, rev=rev, json=json,
- has_conflicts=has_conflicts,
- syncable=syncable)
- self.set_document_factory(factory)
- # we store syncers in a dictionary indexed by the target URL. We also
- # store a hash of the auth info in case auth info expires and we need
- # to rebuild the syncer for that target. The final self._syncers
- # format is the following:
- #
- # self._syncers = {'<url>': ('<auth_hash>', syncer), ...}
- self._syncers = {}
-
- @classmethod
- def _open_database(cls, sqlcipher_file, password, document_factory=None,
- crypto=None, raw_key=False, cipher='aes-256-cbc',
- kdf_iter=4000, cipher_page_size=1024,
- defer_encryption=False):
- """
- Open a SQLCipher database.
-
- :param sqlcipher_file: The path for the SQLCipher file.
- :type sqlcipher_file: str
- :param password: The password that protects the SQLCipher db.
- :type password: str
- :param document_factory: A function that will be called with the same
- parameters as Document.__init__.
- :type document_factory: callable
- :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt
- document contents when syncing.
- :type crypto: soledad.crypto.SoledadCrypto
- :param raw_key: Whether C{password} is a raw 64-char hex string or a
- passphrase that should be hashed to obtain the encyrption key.
+ kwargs[name] = getattr(source, name)
+
+ return SQLCipherOptions(*args, **kwargs)
+
+ def __init__(self, path, key, create=True, is_raw_key=False,
+ cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024,
+ defer_encryption=False, sync_db_key=None):
+ """
+ :param path: The filesystem path for the database to open.
+ :type path: str
+ :param create:
+ True/False, should the database be created if it doesn't
+ already exist?
+ :param create: bool
+ :param is_raw_key:
+ Whether ``password`` is a raw 64-char hex string or a passphrase
+ that should be hashed to obtain the encyrption key.
:type raw_key: bool
:param cipher: The cipher and mode to use.
:type cipher: str
@@ -304,233 +163,93 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):
:type kdf_iter: int
:param cipher_page_size: The page size.
:type cipher_page_size: int
- :param defer_encryption: Whether to defer encryption/decryption of
- documents, or do it inline while syncing.
+ :param defer_encryption:
+ Whether to defer encryption/decryption of documents, or do it
+ inline while syncing.
:type defer_encryption: bool
-
- :return: The database object.
- :rtype: SQLCipherDatabase
"""
- cls.defer_encryption = defer_encryption
- if not os.path.isfile(sqlcipher_file):
- raise u1db_errors.DatabaseDoesNotExist()
-
- tries = 2
- # Note: There seems to be a bug in sqlite 3.5.9 (with python2.6)
- # where without re-opening the database on Windows, it
- # doesn't see the transaction that was just committed
- while True:
-
- with cls.k_lock:
- db_handle = dbapi2.connect(
- sqlcipher_file,
- check_same_thread=SQLITE_CHECK_SAME_THREAD)
-
- try:
- # set cryptographic params
- cls._set_crypto_pragmas(
- db_handle, password, raw_key, cipher, kdf_iter,
- cipher_page_size)
- c = db_handle.cursor()
- # XXX if we use it here, it should be public
- v, err = cls._which_index_storage(c)
- except Exception as exc:
- logger.warning("ERROR OPENING DATABASE!")
- logger.debug("error was: %r" % exc)
- v, err = None, exc
- finally:
- db_handle.close()
- if v is not None:
- break
- # possibly another process is initializing it, wait for it to be
- # done
- if tries == 0:
- raise err # go for the richest error?
- tries -= 1
- time.sleep(cls.WAIT_FOR_PARALLEL_INIT_HALF_INTERVAL)
- return SQLCipherDatabase._sqlite_registry[v](
- sqlcipher_file, password, document_factory=document_factory,
- crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter,
- cipher_page_size=cipher_page_size)
+ self.path = path
+ self.key = key
+ self.is_raw_key = is_raw_key
+ self.create = create
+ self.cipher = cipher
+ self.kdf_iter = kdf_iter
+ self.cipher_page_size = cipher_page_size
+ self.defer_encryption = defer_encryption
+ self.sync_db_key = sync_db_key
- @classmethod
- def open_database(cls, sqlcipher_file, password, create, backend_cls=None,
- document_factory=None, crypto=None, raw_key=False,
- cipher='aes-256-cbc', kdf_iter=4000,
- cipher_page_size=1024, defer_encryption=False):
+ def __str__(self):
"""
- Open a SQLCipher database.
-
- :param sqlcipher_file: The path for the SQLCipher file.
- :type sqlcipher_file: str
-
- :param password: The password that protects the SQLCipher db.
- :type password: str
-
- :param create: Should the datbase be created if it does not already
- exist?
- :type create: bool
-
- :param backend_cls: A class to use as backend.
- :type backend_cls: type
+ Return string representation of options, for easy debugging.
- :param document_factory: A function that will be called with the same
- parameters as Document.__init__.
- :type document_factory: callable
-
- :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt
- document contents when syncing.
- :type crypto: soledad.crypto.SoledadCrypto
-
- :param raw_key: Whether C{password} is a raw 64-char hex string or a
- passphrase that should be hashed to obtain the
- encyrption key.
- :type raw_key: bool
+ :return: String representation of options.
+ :rtype: str
+ """
+ attr_names = filter(lambda a: not a.startswith('_'), dir(self))
+ attr_str = []
+ for a in attr_names:
+ attr_str.append(a + "=" + str(getattr(self, a)))
+ name = self.__class__.__name__
+ return "%s(%s)" % (name, ', '.join(attr_str))
- :param cipher: The cipher and mode to use.
- :type cipher: str
- :param kdf_iter: The number of iterations to use.
- :type kdf_iter: int
+#
+# The SQLCipher database
+#
- :param cipher_page_size: The page size.
- :type cipher_page_size: int
+class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):
+ """
+ A U1DB implementation that uses SQLCipher as its persistence layer.
+ """
+ defer_encryption = False
- :param defer_encryption: Whether to defer encryption/decryption of
- documents, or do it inline while syncing.
- :type defer_encryption: bool
+ # The attribute _index_storage_value will be used as the lookup key for the
+ # implementation of the SQLCipher storage backend.
+ _index_storage_value = 'expand referenced encrypted'
- :return: The database object.
- :rtype: SQLCipherDatabase
- """
- cls.defer_encryption = defer_encryption
- try:
- return cls._open_database(
- sqlcipher_file, password, document_factory=document_factory,
- crypto=crypto, raw_key=raw_key, cipher=cipher,
- kdf_iter=kdf_iter, cipher_page_size=cipher_page_size,
- defer_encryption=defer_encryption)
- except u1db_errors.DatabaseDoesNotExist:
- if not create:
- raise
- # TODO: remove backend class from here.
- if backend_cls is None:
- # default is SQLCipherPartialExpandDatabase
- backend_cls = SQLCipherDatabase
- return backend_cls(
- sqlcipher_file, password, document_factory=document_factory,
- crypto=crypto, raw_key=raw_key, cipher=cipher,
- kdf_iter=kdf_iter, cipher_page_size=cipher_page_size)
-
- def sync(self, url, creds=None, autocreate=True, defer_decryption=True):
+ def __init__(self, opts):
"""
- Synchronize documents with remote replica exposed at url.
+ Connect to an existing SQLCipher database, creating a new sqlcipher
+ database file if needed.
- There can be at most one instance syncing the same database replica at
- the same time, so this method will block until the syncing lock can be
- acquired.
+ *** IMPORTANT ***
- :param url: The url of the target replica to sync with.
- :type url: str
- :param creds: optional dictionary giving credentials.
- to authorize the operation with the server.
- :type creds: dict
- :param autocreate: Ask the target to create the db if non-existent.
- :type autocreate: bool
- :param defer_decryption: Whether to defer the decryption process using
- the intermediate database. If False,
- decryption will be done inline.
- :type defer_decryption: bool
+ Don't forget to close the database after use by calling the close()
+ method otherwise some resources might not be freed and you may
+ experience several kinds of leakages.
- :return: The local generation before the synchronisation was performed.
- :rtype: int
- """
- res = None
- # the following context manager blocks until the syncing lock can be
- # acquired.
- with self.syncer(url, creds=creds) as syncer:
+ *** IMPORTANT ***
- # XXX could mark the critical section here...
- try:
- if defer_decryption and not self.defer_encryption:
- logger.warning("Can't defer decryption without first having "
- "created a sync db. Falling back to normal "
- "syncing mode.")
- defer_decryption = False
- res = syncer.sync(autocreate=autocreate,
- defer_decryption=defer_decryption)
-
- except PendingReceivedDocsSyncError:
- logger.warning("Local sync db is not clear, skipping sync...")
- return
-
- return res
-
- def stop_sync(self):
+ :param opts: options for initialization of the SQLCipher database.
+ :type opts: SQLCipherOptions
"""
- Interrupt all ongoing syncs.
- """
- for url in self._syncers:
- _, syncer = self._syncers[url]
- syncer.stop()
+ # ensure the db is encrypted if the file already exists
+ if os.path.isfile(opts.path):
+ _assert_db_is_encrypted(opts)
- @contextmanager
- def syncer(self, url, creds=None):
- """
- Accesor for synchronizer.
+ # connect to the sqlcipher database
+ self._db_handle = initialize_sqlcipher_db(opts)
- As we reuse the same synchronizer for every sync, there can be only
- one instance synchronizing the same database replica at the same time.
- Because of that, this method blocks until the syncing lock can be
- acquired.
- """
- with SQLCipherDatabase.syncing_lock[self._get_replica_uid()]:
- syncer = self._get_syncer(url, creds=creds)
- yield syncer
+ # TODO ---------------------------------------------------
+ # Everything else in this initialization has to be factored
+ # out, so it can be used from SoledadSQLCipherWrapper.__init__
+ # too.
+ # ---------------------------------------------------------
- @property
- def syncing(self):
- lock = SQLCipherDatabase.syncing_lock[self._get_replica_uid()]
- acquired_lock = lock.acquire(False)
- if acquired_lock is False:
- return True
- lock.release()
- return False
+ self._ensure_schema()
+ self.set_document_factory(soledad_doc_factory)
+ self._prime_replica_uid()
- def _get_syncer(self, url, creds=None):
+ def _prime_replica_uid(self):
"""
- Get a synchronizer for C{url} using C{creds}.
-
- :param url: The url of the target replica to sync with.
- :type url: str
- :param creds: optional dictionary giving credentials.
- to authorize the operation with the server.
- :type creds: dict
-
- :return: A synchronizer.
- :rtype: Synchronizer
+ In the u1db implementation, _replica_uid is a property
+ that returns the value in _real_replica_uid, and does
+ a db query if no value found.
+ Here we prime the replica uid during initialization so
+ that we don't have to wait for the query afterwards.
"""
- # we want to store at most one syncer for each url, so we also store a
- # hash of the connection credentials and replace the stored syncer for
- # a certain url if credentials have changed.
- h = sha256(json.dumps([url, creds])).hexdigest()
- cur_h, syncer = self._syncers.get(url, (None, None))
- if syncer is None or h != cur_h:
- wlock = self._sync_db_write_lock
- syncer = SoledadSynchronizer(
- self,
- SoledadSyncTarget(url,
- self._replica_uid,
- creds=creds,
- crypto=self._crypto,
- sync_db=self._sync_db,
- sync_db_write_lock=wlock))
- self._syncers[url] = (h, syncer)
- # in order to reuse the same synchronizer multiple times we have to
- # reset its state (i.e. the number of documents received from target
- # and inserted in the local replica).
- syncer.num_inserted = 0
- return syncer
+ self._real_replica_uid = None
+ self._get_replica_uid()
def _extra_schema_init(self, c):
"""
@@ -547,63 +266,6 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):
'ALTER TABLE document '
'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE')
- def _init_sync_db(self):
- """
- Initialize the Symmetrically-Encrypted document to be synced database,
- and the queue to communicate with subprocess workers.
- """
- self._sync_db = sqlite3.connect(self._sync_db_path,
- check_same_thread=False)
-
- self._sync_db_write_lock = threading.Lock()
- self._create_sync_db_tables()
- self.sync_queue = multiprocessing.Queue()
-
- def _create_sync_db_tables(self):
- """
- Create tables for the local sync documents db if needed.
- """
- encr = SyncEncrypterPool
- decr = SyncDecrypterPool
- sql_encr = ("CREATE TABLE IF NOT EXISTS %s (%s)" % (
- encr.TABLE_NAME, encr.FIELD_NAMES))
- sql_decr = ("CREATE TABLE IF NOT EXISTS %s (%s)" % (
- decr.TABLE_NAME, decr.FIELD_NAMES))
-
- with self._sync_db_write_lock:
- with self._sync_db:
- self._sync_db.execute(sql_encr)
- self._sync_db.execute(sql_decr)
-
- #
- # Symmetric encryption of syncing docs
- #
-
- def _encrypt_syncing_docs(self):
- """
- Process the syncing queue and send the documents there
- to be encrypted in the sync db. They will be read by the
- SoledadSyncTarget during the sync_exchange.
-
- Called periodical from the TimerTask self._sync_watcher.
- """
- lock = self.encrypting_lock
- # optional wait flag used to avoid blocking
- if not lock.acquire(False):
- return
- else:
- queue = self.sync_queue
- try:
- while not queue.empty():
- doc = queue.get_nowait()
- self._sync_enc_pool.encrypt_doc(doc)
-
- except Exception as exc:
- logger.error("Error while encrypting docs to sync")
- logger.exception(exc)
- finally:
- lock.release()
-
#
# Document operations
#
@@ -619,12 +281,82 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):
:return: The new document revision.
:rtype: str
"""
- doc_rev = sqlite_backend.SQLitePartialExpandDatabase.put_doc(
- self, doc)
+ doc_rev = sqlite_backend.SQLitePartialExpandDatabase.put_doc(self, doc)
if self.defer_encryption:
- self.sync_queue.put_nowait(doc)
+ # TODO move to api?
+ self._sync_enc_pool.enqueue_doc_for_encryption(doc)
return doc_rev
+ #
+ # SQLCipher API methods
+ #
+
+ # Extra query methods: extensions to the base u1db sqlite implmentation.
+
+ def get_count_from_index(self, index_name, *key_values):
+ """
+ Return the count for a given combination of index_name
+ and key values.
+
+ Extension method made from similar methods in u1db version 13.09
+
+ :param index_name: The index to query
+ :type index_name: str
+ :param key_values: values to match. eg, if you have
+ an index with 3 fields then you would have:
+ get_from_index(index_name, val1, val2, val3)
+ :type key_values: tuple
+ :return: count.
+ :rtype: int
+ """
+ c = self._db_handle.cursor()
+ definition = self._get_index_definition(index_name)
+
+ if len(key_values) != len(definition):
+ raise u1db_errors.InvalidValueForIndex()
+ tables = ["document_fields d%d" % i for i in range(len(definition))]
+ novalue_where = ["d.doc_id = d%d.doc_id"
+ " AND d%d.field_name = ?"
+ % (i, i) for i in range(len(definition))]
+ exact_where = [novalue_where[i]
+ + (" AND d%d.value = ?" % (i,))
+ for i in range(len(definition))]
+ args = []
+ where = []
+ for idx, (field, value) in enumerate(zip(definition, key_values)):
+ args.append(field)
+ where.append(exact_where[idx])
+ args.append(value)
+
+ tables = ["document_fields d%d" % i for i in range(len(definition))]
+ statement = (
+ "SELECT COUNT(*) FROM document d, %s WHERE %s " % (
+ ', '.join(tables),
+ ' AND '.join(where),
+ ))
+ try:
+ c.execute(statement, tuple(args))
+ except sqlcipher_dbapi2.OperationalError, e:
+ raise sqlcipher_dbapi2.OperationalError(
+ str(e) + '\nstatement: %s\nargs: %s\n' % (statement, args))
+ res = c.fetchall()
+ return res[0][0]
+
+ def close(self):
+ """
+ Close db connections.
+ """
+ # TODO should be handled by adbapi instead
+ # TODO syncdb should be stopped first
+
+ if logger is not None: # logger might be none if called from __del__
+ logger.debug("SQLCipher backend: closing")
+
+ # close the actual database
+ if getattr(self, '_db_handle', False):
+ self._db_handle.close()
+ self._db_handle = None
+
# indexes
def _put_and_update_indexes(self, old_doc, doc):
@@ -636,13 +368,11 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):
:param doc: The new version of the document.
:type doc: u1db.Document
"""
- with self.update_indexes_lock:
- sqlite_backend.SQLitePartialExpandDatabase._put_and_update_indexes(
- self, old_doc, doc)
- c = self._db_handle.cursor()
- c.execute('UPDATE document SET syncable=? '
- 'WHERE doc_id=?',
- (doc.syncable, doc.doc_id))
+ sqlite_backend.SQLitePartialExpandDatabase._put_and_update_indexes(
+ self, old_doc, doc)
+ c = self._db_handle.cursor()
+ c.execute('UPDATE document SET syncable=? WHERE doc_id=?',
+ (doc.syncable, doc.doc_id))
def _get_doc(self, doc_id, check_for_conflicts=False):
"""
@@ -662,438 +392,426 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase):
self, doc_id, check_for_conflicts)
if doc:
c = self._db_handle.cursor()
- c.execute('SELECT syncable FROM document '
- 'WHERE doc_id=?',
+ c.execute('SELECT syncable FROM document WHERE doc_id=?',
(doc.doc_id,))
result = c.fetchone()
doc.syncable = bool(result[0])
return doc
- #
- # SQLCipher API methods
- #
-
- @classmethod
- def assert_db_is_encrypted(cls, sqlcipher_file, key, raw_key, cipher,
- kdf_iter, cipher_page_size):
+ def __del__(self):
"""
- Assert that C{sqlcipher_file} contains an encrypted database.
-
- When opening an existing database, PRAGMA key will not immediately
- throw an error if the key provided is incorrect. To test that the
- database can be successfully opened with the provided key, it is
- necessary to perform some operation on the database (i.e. read from
- it) and confirm it is success.
-
- The easiest way to do this is select off the sqlite_master table,
- which will attempt to read the first page of the database and will
- parse the schema.
-
- :param sqlcipher_file: The path for the SQLCipher file.
- :type sqlcipher_file: str
- :param key: The key that protects the SQLCipher db.
- :type key: str
- :param raw_key: Whether C{key} is a raw 64-char hex string or a
- passphrase that should be hashed to obtain the encyrption key.
- :type raw_key: bool
- :param cipher: The cipher and mode to use.
- :type cipher: str
- :param kdf_iter: The number of iterations to use.
- :type kdf_iter: int
- :param cipher_page_size: The page size.
- :type cipher_page_size: int
- """
- try:
- # try to open an encrypted database with the regular u1db
- # backend should raise a DatabaseError exception.
- sqlite_backend.SQLitePartialExpandDatabase(sqlcipher_file)
- raise DatabaseIsNotEncrypted()
- except dbapi2.DatabaseError:
- # assert that we can access it using SQLCipher with the given
- # key
- with cls.k_lock:
- db_handle = dbapi2.connect(
- sqlcipher_file,
- isolation_level=SQLITE_ISOLATION_LEVEL,
- check_same_thread=SQLITE_CHECK_SAME_THREAD)
- cls._set_crypto_pragmas(
- db_handle, key, raw_key, cipher,
- kdf_iter, cipher_page_size)
- db_handle.cursor().execute(
- 'SELECT count(*) FROM sqlite_master')
+ Free resources when deleting or garbage collecting the database.
- @classmethod
- def _set_crypto_pragmas(cls, db_handle, key, raw_key, cipher, kdf_iter,
- cipher_page_size):
- """
- Set cryptographic params (key, cipher, KDF number of iterations and
- cipher page size).
+ This is only here to minimze problems if someone ever forgets to call
+ the close() method after using the database; you should not rely on
+ garbage collecting to free up the database resources.
"""
- cls._pragma_key(db_handle, key, raw_key)
- cls._pragma_cipher(db_handle, cipher)
- cls._pragma_kdf_iter(db_handle, kdf_iter)
- cls._pragma_cipher_page_size(db_handle, cipher_page_size)
+ self.close()
- @classmethod
- def _pragma_key(cls, db_handle, key, raw_key):
- """
- Set the C{key} for use with the database.
- The process of creating a new, encrypted database is called 'keying'
- the database. SQLCipher uses just-in-time key derivation at the point
- it is first needed for an operation. This means that the key (and any
- options) must be set before the first operation on the database. As
- soon as the database is touched (e.g. SELECT, CREATE TABLE, UPDATE,
- etc.) and pages need to be read or written, the key is prepared for
- use.
+class SQLCipherU1DBSync(SQLCipherDatabase):
+ """
+ Soledad syncer implementation.
+ """
- Implementation Notes:
+ _sync_enc_pool = None
- * PRAGMA key should generally be called as the first operation on a
- database.
+ """
+ The name of the local symmetrically encrypted documents to
+ sync database file.
+ """
+ LOCAL_SYMMETRIC_SYNC_FILE_NAME = 'sync.u1db'
- :param key: The key for use with the database.
- :type key: str
- :param raw_key: Whether C{key} is a raw 64-char hex string or a
- passphrase that should be hashed to obtain the encyrption key.
- :type raw_key: bool
- """
- if raw_key:
- cls._pragma_key_raw(db_handle, key)
- else:
- cls._pragma_key_passphrase(db_handle, key)
+ """
+ Period or recurrence of the Looping Call that will do the encryption to the
+ syncdb (in seconds).
+ """
+ ENCRYPT_LOOP_PERIOD = 1
- @classmethod
- def _pragma_key_passphrase(cls, db_handle, passphrase):
- """
- Set a passphrase for encryption key derivation.
-
- The key itself can be a passphrase, which is converted to a key using
- PBKDF2 key derivation. The result is used as the encryption key for
- the database. By using this method, there is no way to alter the KDF;
- if you want to do so you should use a raw key instead and derive the
- key using your own KDF.
-
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param passphrase: The passphrase used to derive the encryption key.
- :type passphrase: str
- """
- db_handle.cursor().execute("PRAGMA key = '%s'" % passphrase)
+ """
+ A dictionary that hold locks which avoid multiple sync attempts from the
+ same database replica.
+ """
+ syncing_lock = defaultdict(threading.Lock)
- @classmethod
- def _pragma_key_raw(cls, db_handle, key):
- """
- Set a raw hexadecimal encryption key.
+ def __init__(self, opts, soledad_crypto, replica_uid, cert_file,
+ defer_encryption=False):
- It is possible to specify an exact byte sequence using a blob literal.
- With this method, it is the calling application's responsibility to
- ensure that the data provided is a 64 character hex string, which will
- be converted directly to 32 bytes (256 bits) of key data.
+ self._opts = opts
+ self._path = opts.path
+ self._crypto = soledad_crypto
+ self.__replica_uid = replica_uid
+ self._cert_file = cert_file
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param key: A 64 character hex string.
- :type key: str
- """
- if not all(c in string.hexdigits for c in key):
- raise NotAnHexString(key)
- db_handle.cursor().execute('PRAGMA key = "x\'%s"' % key)
+ self._sync_db_key = opts.sync_db_key
+ self._sync_db = None
+ self._sync_enc_pool = None
- @classmethod
- def _pragma_cipher(cls, db_handle, cipher='aes-256-cbc'):
- """
- Set the cipher and mode to use for symmetric encryption.
+ # we store syncers in a dictionary indexed by the target URL. We also
+ # store a hash of the auth info in case auth info expires and we need
+ # to rebuild the syncer for that target. The final self._syncers
+ # format is the following:
+ #
+ # self._syncers = {'<url>': ('<auth_hash>', syncer), ...}
- SQLCipher uses aes-256-cbc as the default cipher and mode of
- operation. It is possible to change this, though not generally
- recommended, using PRAGMA cipher.
+ self._syncers = {}
- SQLCipher makes direct use of libssl, so all cipher options available
- to libssl are also available for use with SQLCipher. See `man enc` for
- OpenSSL's supported ciphers.
+ self.running = False
+ self._sync_threadpool = None
+ self._initialize_sync_threadpool()
- Implementation Notes:
+ self._reactor = reactor
+ self._reactor.callWhenRunning(self._start)
- * PRAGMA cipher must be called after PRAGMA key and before the first
- actual database operation or it will have no effect.
+ self._db_handle = None
+ self._initialize_main_db()
- * If a non-default value is used PRAGMA cipher to create a database,
- it must also be called every time that database is opened.
+ # the sync_db is used both for deferred encryption and decryption, so
+ # we want to initialize it anyway to allow for all combinations of
+ # deferred encryption and decryption configurations.
+ self._initialize_sync_db(opts)
- * SQLCipher does not implement its own encryption. Instead it uses the
- widely available and peer-reviewed OpenSSL libcrypto for all
- cryptographic functions.
+ if defer_encryption:
+ # initialize syncing queue encryption pool
+ self._sync_enc_pool = encdecpool.SyncEncrypterPool(
+ self._crypto, self._sync_db)
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param cipher: The cipher and mode to use.
- :type cipher: str
- """
- db_handle.cursor().execute("PRAGMA cipher = '%s'" % cipher)
+ self.shutdownID = None
- @classmethod
- def _pragma_kdf_iter(cls, db_handle, kdf_iter=4000):
- """
- Set the number of iterations for the key derivation function.
+ @property
+ def _replica_uid(self):
+ return str(self.__replica_uid)
- SQLCipher uses PBKDF2 key derivation to strengthen the key and make it
- resistent to brute force and dictionary attacks. The default
- configuration uses 4000 PBKDF2 iterations (effectively 16,000 SHA1
- operations). PRAGMA kdf_iter can be used to increase or decrease the
- number of iterations used.
+ def _start(self):
+ if not self.running:
+ self._sync_threadpool.start()
+ self.shutdownID = self._reactor.addSystemEventTrigger(
+ 'during', 'shutdown', self.finalClose)
+ self.running = True
- Implementation Notes:
+ def _defer_to_sync_threadpool(self, meth, *args, **kwargs):
+ return deferToThreadPool(
+ self._reactor, self._sync_threadpool, meth, *args, **kwargs)
- * PRAGMA kdf_iter must be called after PRAGMA key and before the first
- actual database operation or it will have no effect.
+ def _initialize_main_db(self):
- * If a non-default value is used PRAGMA kdf_iter to create a database,
- it must also be called every time that database is opened.
+ def _init_db():
+ self._db_handle = initialize_sqlcipher_db(
+ self._opts, check_same_thread=False)
+ self._real_replica_uid = None
+ self._ensure_schema()
+ self.set_document_factory(soledad_doc_factory)
- * It is not recommended to reduce the number of iterations if a
- passphrase is in use.
+ return self._defer_to_sync_threadpool(_init_db)
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param kdf_iter: The number of iterations to use.
- :type kdf_iter: int
+ def _initialize_sync_threadpool(self):
"""
- db_handle.cursor().execute("PRAGMA kdf_iter = '%d'" % kdf_iter)
+ Initialize a ThreadPool with exactly one thread, that will be used to
+ run all the network blocking calls for syncing on a separate thread.
- @classmethod
- def _pragma_cipher_page_size(cls, db_handle, cipher_page_size=1024):
+ TODO this needs to be ported away from urllib and into twisted async
+ calls, and then we can ditch this syncing thread and reintegrate into
+ the main reactor.
"""
- Set the page size of the encrypted database.
-
- SQLCipher 2 introduced the new PRAGMA cipher_page_size that can be
- used to adjust the page size for the encrypted database. The default
- page size is 1024 bytes, but it can be desirable for some applications
- to use a larger page size for increased performance. For instance,
- some recent testing shows that increasing the page size can noticeably
- improve performance (5-30%) for certain queries that manipulate a
- large number of pages (e.g. selects without an index, large inserts in
- a transaction, big deletes).
+ # XXX if the number of threads in this thread pool is ever changed, we
+ # should make sure that no operations on the database shuold occur
+ # before the database has been initialized.
+ self._sync_threadpool = ThreadPool(0, 1)
- To adjust the page size, call the pragma immediately after setting the
- key for the first time and each subsequent time that you open the
- database.
-
- Implementation Notes:
+ def _initialize_sync_db(self, opts):
+ """
+ Initialize the Symmetrically-Encrypted document to be synced database,
+ and the queue to communicate with subprocess workers.
- * PRAGMA cipher_page_size must be called after PRAGMA key and before
- the first actual database operation or it will have no effect.
+ :param opts:
+ :type opts: SQLCipherOptions
+ """
+ soledad_assert(opts.sync_db_key is not None)
+ sync_db_path = None
+ if opts.path != ":memory:":
+ sync_db_path = "%s-sync" % opts.path
+ else:
+ sync_db_path = ":memory:"
- * If a non-default value is used PRAGMA cipher_page_size to create a
- database, it must also be called every time that database is opened.
+ # we copy incoming options because the opts object might be used
+ # somewhere else
+ sync_opts = SQLCipherOptions.copy(
+ opts, path=sync_db_path, create=True)
+ self._sync_db = getConnectionPool(
+ sync_opts, extra_queries=self._sync_db_extra_init)
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param cipher_page_size: The page size.
- :type cipher_page_size: int
+ @property
+ def _sync_db_extra_init(self):
"""
- db_handle.cursor().execute(
- "PRAGMA cipher_page_size = '%d'" % cipher_page_size)
+ Queries for creating tables for the local sync documents db if needed.
+ They are passed as extra initialization to initialize_sqlciphjer_db
- @classmethod
- def _pragma_rekey(cls, db_handle, new_key, raw_key):
- """
- Change the key of an existing encrypted database.
-
- To change the key on an existing encrypted database, it must first be
- unlocked with the current encryption key. Once the database is
- readable and writeable, PRAGMA rekey can be used to re-encrypt every
- page in the database with a new key.
-
- * PRAGMA rekey must be called after PRAGMA key. It can be called at any
- time once the database is readable.
-
- * PRAGMA rekey can not be used to encrypted a standard SQLite
- database! It is only useful for changing the key on an existing
- database.
-
- * Previous versions of SQLCipher provided a PRAGMA rekey_cipher and
- code>PRAGMA rekey_kdf_iter. These are deprecated and should not be
- used. Instead, use sqlcipher_export().
-
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param new_key: The new key.
- :type new_key: str
- :param raw_key: Whether C{password} is a raw 64-char hex string or a
- passphrase that should be hashed to obtain the encyrption key.
- :type raw_key: bool
+ :rtype: tuple of strings
"""
- # XXX change key param!
- if raw_key:
- cls._pragma_rekey_raw(db_handle, key)
- else:
- cls._pragma_rekey_passphrase(db_handle, key)
+ maybe_create = "CREATE TABLE IF NOT EXISTS %s (%s)"
+ encr = encdecpool.SyncEncrypterPool
+ decr = encdecpool.SyncDecrypterPool
+ sql_encr_table_query = (maybe_create % (
+ encr.TABLE_NAME, encr.FIELD_NAMES))
+ sql_decr_table_query = (maybe_create % (
+ decr.TABLE_NAME, decr.FIELD_NAMES))
+ return (sql_encr_table_query, sql_decr_table_query)
- @classmethod
- def _pragma_rekey_passphrase(cls, db_handle, passphrase):
+ def sync(self, url, creds=None, defer_decryption=True):
"""
- Change the passphrase for encryption key derivation.
+ Synchronize documents with remote replica exposed at url.
- The key itself can be a passphrase, which is converted to a key using
- PBKDF2 key derivation. The result is used as the encryption key for
- the database.
+ This method defers a sync to a 1-threaded threadpool. The main
+ database initialziation was deferred to that thread during this
+ object's initialization. As there's currently only one thread in that
+ threadpool, the db init was queued before this method was called, so
+ we don't need to actually wait for the db to be ready. If this ever
+ changes, we should add a thread-safe condition to ensure the db is
+ ready before using it.
+
+ :param url: The url of the target replica to sync with.
+ :type url: str
+ :param creds: optional dictionary giving credentials to authorize the
+ operation with the server.
+ :type creds: dict
+ :param defer_decryption:
+ Whether to defer the decryption process using the intermediate
+ database. If False, decryption will be done inline.
+ :type defer_decryption: bool
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param passphrase: The passphrase used to derive the encryption key.
- :type passphrase: str
+ :return:
+ A Deferred, that will fire with the local generation (type `int`)
+ before the synchronisation was performed.
+ :rtype: Deferred
"""
- db_handle.cursor().execute("PRAGMA rekey = '%s'" % passphrase)
+ # the following context manager blocks until the syncing lock can be
+ # acquired.
+ with self._syncer(url, creds=creds) as syncer:
+ # XXX could mark the critical section here...
+ return syncer.sync(defer_decryption=defer_decryption)
- @classmethod
- def _pragma_rekey_raw(cls, db_handle, key):
+ @contextmanager
+ def _syncer(self, url, creds=None):
"""
- Change the raw hexadecimal encryption key.
+ Accesor for synchronizer.
- It is possible to specify an exact byte sequence using a blob literal.
- With this method, it is the calling application's responsibility to
- ensure that the data provided is a 64 character hex string, which will
- be converted directly to 32 bytes (256 bits) of key data.
+ As we reuse the same synchronizer for every sync, there can be only
+ one instance synchronizing the same database replica at the same time.
+ Because of that, this method blocks until the syncing lock can be
+ acquired.
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param key: A 64 character hex string.
- :type key: str
+ :param creds: optional dictionary giving credentials to authorize the
+ operation with the server.
+ :type creds: dict
"""
- if not all(c in string.hexdigits for c in key):
- raise NotAnHexString(key)
- # XXX change passphrase param!
- db_handle.cursor().execute('PRAGMA rekey = "x\'%s"' % passphrase)
+ with self.syncing_lock[self._path]:
+ syncer = self._get_syncer(url, creds=creds)
+ yield syncer
- @classmethod
- def _pragma_synchronous_off(cls, db_handle):
- """
- Change the setting of the "synchronous" flag to OFF.
- """
- logger.debug("SQLCIPHER: SETTING SYNCHRONOUS OFF")
- db_handle.cursor().execute('PRAGMA synchronous=OFF')
+ @property
+ def syncing(self):
+ lock = self.syncing_lock[self._path]
+ acquired_lock = lock.acquire(False)
+ if acquired_lock is False:
+ return True
+ lock.release()
+ return False
- @classmethod
- def _pragma_synchronous_normal(cls, db_handle):
+ def _get_syncer(self, url, creds=None):
"""
- Change the setting of the "synchronous" flag to NORMAL.
+ Get a synchronizer for ``url`` using ``creds``.
+
+ :param url: The url of the target replica to sync with.
+ :type url: str
+ :param creds: optional dictionary giving credentials.
+ to authorize the operation with the server.
+ :type creds: dict
+
+ :return: A synchronizer.
+ :rtype: Synchronizer
"""
- logger.debug("SQLCIPHER: SETTING SYNCHRONOUS NORMAL")
- db_handle.cursor().execute('PRAGMA synchronous=NORMAL')
+ # we want to store at most one syncer for each url, so we also store a
+ # hash of the connection credentials and replace the stored syncer for
+ # a certain url if credentials have changed.
+ h = sha256(json.dumps([url, creds])).hexdigest()
+ cur_h, syncer = self._syncers.get(url, (None, None))
+ if syncer is None or h != cur_h:
+ syncer = SoledadSynchronizer(
+ self,
+ SoledadHTTPSyncTarget(
+ url,
+ # XXX is the replica_uid ready?
+ self._replica_uid,
+ creds=creds,
+ crypto=self._crypto,
+ cert_file=self._cert_file,
+ sync_db=self._sync_db,
+ sync_enc_pool=self._sync_enc_pool))
+ self._syncers[url] = (h, syncer)
+ # in order to reuse the same synchronizer multiple times we have to
+ # reset its state (i.e. the number of documents received from target
+ # and inserted in the local replica).
+ syncer.num_inserted = 0
+ return syncer
- @classmethod
- def _pragma_mem_temp_store(cls, db_handle):
+ #
+ # Symmetric encryption of syncing docs
+ #
+
+ def get_generation(self):
+ # FIXME
+ # XXX this SHOULD BE a callback
+ return self._get_generation()
+
+ def finalClose(self):
"""
- Use a in-memory store for temporary tables.
+ This should only be called by the shutdown trigger.
"""
- logger.debug("SQLCIPHER: SETTING TEMP_STORE MEMORY")
- db_handle.cursor().execute('PRAGMA temp_store=MEMORY')
+ self.shutdownID = None
+ self._sync_threadpool.stop()
+ self.running = False
- @classmethod
- def _pragma_write_ahead_logging(cls, db_handle):
+ def close(self):
"""
- Enable write-ahead logging, and set the autocheckpoint to 50 pages.
-
- Setting the autocheckpoint to a small value, we make the reads not
- suffer too much performance degradation.
-
- From the sqlite docs:
-
- "There is a tradeoff between average read performance and average write
- performance. To maximize the read performance, one wants to keep the
- WAL as small as possible and hence run checkpoints frequently, perhaps
- as often as every COMMIT. To maximize write performance, one wants to
- amortize the cost of each checkpoint over as many writes as possible,
- meaning that one wants to run checkpoints infrequently and let the WAL
- grow as large as possible before each checkpoint. The decision of how
- often to run checkpoints may therefore vary from one application to
- another depending on the relative read and write performance
- requirements of the application. The default strategy is to run a
- checkpoint once the WAL reaches 1000 pages"
+ Close the syncer and syncdb orderly
"""
- logger.debug("SQLCIPHER: SETTING WRITE-AHEAD LOGGING")
- db_handle.cursor().execute('PRAGMA journal_mode=WAL')
- # The optimum value can still use a little bit of tuning, but we favor
- # small sizes of the WAL file to get fast reads, since we assume that
- # the writes will be quick enough to not block too much.
+ # close all open syncers
+ for url in self._syncers.keys():
+ del self._syncers[url]
- # TODO
- # As a further improvement, we might want to set autocheckpoint to 0
- # here and do the checkpoints manually in a separate thread, to avoid
- # any blocks in the main thread (we should run a loopingcall from here)
- db_handle.cursor().execute('PRAGMA wal_autocheckpoint=50')
+ # stop the encryption pool
+ if self._sync_enc_pool is not None:
+ self._sync_enc_pool.close()
+ self._sync_enc_pool = None
- # Extra query methods: extensions to the base sqlite implmentation.
+ # close the sync database
+ if self._sync_db is not None:
+ self._sync_db.close()
+ self._sync_db = None
- def get_count_from_index(self, index_name, *key_values):
- """
- Returns the count for a given combination of index_name
- and key values.
- Extension method made from similar methods in u1db version 13.09
+class U1DBSQLiteBackend(sqlite_backend.SQLitePartialExpandDatabase):
+ """
+ A very simple wrapper for u1db around sqlcipher backend.
- :param index_name: The index to query
- :type index_name: str
- :param key_values: values to match. eg, if you have
- an index with 3 fields then you would have:
- get_from_index(index_name, val1, val2, val3)
- :type key_values: tuple
- :return: count.
- :rtype: int
- """
- c = self._db_handle.cursor()
- definition = self._get_index_definition(index_name)
+ Instead of initializing the database on the fly, it just uses an existing
+ connection that is passed to it in the initializer.
- if len(key_values) != len(definition):
- raise u1db_errors.InvalidValueForIndex()
- tables = ["document_fields d%d" % i for i in range(len(definition))]
- novalue_where = ["d.doc_id = d%d.doc_id"
- " AND d%d.field_name = ?"
- % (i, i) for i in range(len(definition))]
- exact_where = [novalue_where[i]
- + (" AND d%d.value = ?" % (i,))
- for i in range(len(definition))]
- args = []
- where = []
- for idx, (field, value) in enumerate(zip(definition, key_values)):
- args.append(field)
- where.append(exact_where[idx])
- args.append(value)
+ It can be used in tests and debug runs to initialize the adbapi with plain
+ sqlite connections, decoupled from the sqlcipher layer.
+ """
- tables = ["document_fields d%d" % i for i in range(len(definition))]
- statement = (
- "SELECT COUNT(*) FROM document d, %s WHERE %s " % (
- ', '.join(tables),
- ' AND '.join(where),
- ))
- try:
- c.execute(statement, tuple(args))
- except dbapi2.OperationalError, e:
- raise dbapi2.OperationalError(
- str(e) + '\nstatement: %s\nargs: %s\n' % (statement, args))
- res = c.fetchall()
- return res[0][0]
+ def __init__(self, conn):
+ self._db_handle = conn
+ self._real_replica_uid = None
+ self._ensure_schema()
+ self._factory = u1db.Document
- def close(self):
- """
- Close db_handle and close syncer.
- """
- logger.debug("Sqlcipher backend: closing")
- if self._sync_watcher is not None:
- self._sync_watcher.stop()
- self._sync_watcher.shutdown()
- for url in self._syncers:
- _, syncer = self._syncers[url]
- syncer.close()
- if self._sync_enc_pool is not None:
- self._sync_enc_pool.close()
- if self._db_handle is not None:
- self._db_handle.close()
- @property
- def replica_uid(self):
- return self._get_replica_uid()
+class SoledadSQLCipherWrapper(SQLCipherDatabase):
+ """
+ A wrapper for u1db that uses the Soledad-extended sqlcipher backend.
+
+ Instead of initializing the database on the fly, it just uses an existing
+ connection that is passed to it in the initializer.
+
+ It can be used from adbapi to initialize a soledad database after
+ getting a regular connection to a sqlcipher database.
+ """
+ def __init__(self, conn):
+ self._db_handle = conn
+ self._real_replica_uid = None
+ self._ensure_schema()
+ self.set_document_factory(soledad_doc_factory)
+ self._prime_replica_uid()
+
+
+def _assert_db_is_encrypted(opts):
+ """
+ Assert that the sqlcipher file contains an encrypted database.
+
+ When opening an existing database, PRAGMA key will not immediately
+ throw an error if the key provided is incorrect. To test that the
+ database can be successfully opened with the provided key, it is
+ necessary to perform some operation on the database (i.e. read from
+ it) and confirm it is success.
+
+ The easiest way to do this is select off the sqlite_master table,
+ which will attempt to read the first page of the database and will
+ parse the schema.
+
+ :param opts:
+ """
+ # We try to open an encrypted database with the regular u1db
+ # backend should raise a DatabaseError exception.
+ # If the regular backend succeeds, then we need to stop because
+ # the database was not properly initialized.
+ try:
+ sqlite_backend.SQLitePartialExpandDatabase(opts.path)
+ except sqlcipher_dbapi2.DatabaseError:
+ # assert that we can access it using SQLCipher with the given
+ # key
+ dummy_query = ('SELECT count(*) FROM sqlite_master',)
+ initialize_sqlcipher_db(opts, on_init=dummy_query)
+ else:
+ raise DatabaseIsNotEncrypted()
+
+#
+# Exceptions
+#
+class DatabaseIsNotEncrypted(Exception):
+ """
+ Exception raised when trying to open non-encrypted databases.
+ """
+ pass
+
+
+def soledad_doc_factory(doc_id=None, rev=None, json='{}', has_conflicts=False,
+ syncable=True):
+ """
+ Return a default Soledad Document.
+ Used in the initialization for SQLCipherDatabase
+ """
+ return SoledadDocument(doc_id=doc_id, rev=rev, json=json,
+ has_conflicts=has_conflicts, syncable=syncable)
+
sqlite_backend.SQLiteDatabase.register_implementation(SQLCipherDatabase)
+
+
+#
+# twisted.enterprise.adbapi SQLCipher implementation
+#
+
+SQLCIPHER_CONNECTION_TIMEOUT = 10
+
+
+def getConnectionPool(opts, extra_queries=None):
+ openfun = partial(
+ pragmas.set_init_pragmas,
+ opts=opts,
+ extra_queries=extra_queries)
+ return SQLCipherConnectionPool(
+ database=opts.path,
+ check_same_thread=False,
+ cp_openfun=openfun,
+ timeout=SQLCIPHER_CONNECTION_TIMEOUT)
+
+
+class SQLCipherConnection(adbapi.Connection):
+ pass
+
+
+class SQLCipherTransaction(adbapi.Transaction):
+ pass
+
+
+class SQLCipherConnectionPool(adbapi.ConnectionPool):
+
+ connectionFactory = SQLCipherConnection
+ transactionFactory = SQLCipherTransaction
+
+ def __init__(self, *args, **kwargs):
+ adbapi.ConnectionPool.__init__(
+ self, "pysqlcipher.dbapi2", *args, **kwargs)
diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py
index 5d545a77..53172f31 100644
--- a/client/src/leap/soledad/client/sync.py
+++ b/client/src/leap/soledad/client/sync.py
@@ -14,26 +14,12 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
"""
Soledad synchronization utilities.
-
-
-Extend u1db Synchronizer with the ability to:
-
- * Defer the update of the known replica uid until all the decryption of
- the incoming messages has been processed.
-
- * Be interrupted and recovered.
"""
-
-
-import json
-
import logging
-import traceback
-from threading import Lock
+
+from twisted.internet import defer
from u1db import errors
from u1db.sync import Synchronizer
@@ -54,15 +40,8 @@ class SoledadSynchronizer(Synchronizer):
Also modified to allow for interrupting the synchronization process.
"""
- syncing_lock = Lock()
-
- def stop(self):
- """
- Stop the current sync in progress.
- """
- self.sync_target.stop()
-
- def sync(self, autocreate=False, defer_decryption=True):
+ @defer.inlineCallbacks
+ def sync(self, defer_decryption=True):
"""
Synchronize documents between source and target.
@@ -74,45 +53,22 @@ class SoledadSynchronizer(Synchronizer):
This is done to allow the ongoing parallel decryption of the incoming
docs to proceed without `InvalidGeneration` conflicts.
- :param autocreate: Whether the target replica should be created or not.
- :type autocreate: bool
:param defer_decryption: Whether to defer the decryption process using
the intermediate database. If False,
decryption will be done inline.
:type defer_decryption: bool
- """
- self.syncing_lock.acquire()
- try:
- return self._sync(autocreate=autocreate,
- defer_decryption=defer_decryption)
- except Exception:
- # re-raising the exceptions to let syqlcipher.sync catch them
- # (and re-create the syncer instance if needed)
- raise
- finally:
- self.release_syncing_lock()
-
- def _sync(self, autocreate=False, defer_decryption=True):
- """
- Helper function, called from the main `sync` method.
- See `sync` docstring.
+
+ :return: A deferred which will fire after the sync has finished.
+ :rtype: twisted.internet.defer.Deferred
"""
sync_target = self.sync_target
# get target identifier, its current generation,
# and its last-seen database generation for this source
ensure_callback = None
- try:
- (self.target_replica_uid, target_gen, target_trans_id,
- target_my_gen, target_my_trans_id) = \
- sync_target.get_sync_info(self.source._replica_uid)
- except errors.DatabaseDoesNotExist:
- if not autocreate:
- raise
- # will try to ask sync_exchange() to create the db
- self.target_replica_uid = None
- target_gen, target_trans_id = (0, '')
- target_my_gen, target_my_trans_id = (0, '')
+ (self.target_replica_uid, target_gen, target_trans_id,
+ target_my_gen, target_my_trans_id) = yield \
+ sync_target.get_sync_info(self.source._replica_uid)
logger.debug(
"Soledad target sync info:\n"
@@ -120,9 +76,10 @@ class SoledadSynchronizer(Synchronizer):
" target generation: %d\n"
" target trans id: %s\n"
" target my gen: %d\n"
- " target my trans_id: %s"
+ " target my trans_id: %s\n"
+ " source replica_uid: %s\n"
% (self.target_replica_uid, target_gen, target_trans_id,
- target_my_gen, target_my_trans_id))
+ target_my_gen, target_my_trans_id, self.source._replica_uid))
# make sure we'll have access to target replica uid once it exists
if self.target_replica_uid is None:
@@ -140,7 +97,7 @@ class SoledadSynchronizer(Synchronizer):
# what's changed since that generation and this current gen
my_gen, _, changes = self.source.whats_changed(target_my_gen)
- logger.debug("Soledad sync: there are %d documents to send." \
+ logger.debug("Soledad sync: there are %d documents to send."
% len(changes))
# get source last-seen database generation for the target
@@ -152,15 +109,15 @@ class SoledadSynchronizer(Synchronizer):
self.target_replica_uid)
logger.debug(
"Soledad source sync info:\n"
- " source target gen: %d\n"
- " source target trans_id: %s"
+ " last target gen known to source: %d\n"
+ " last target trans_id known to source: %s"
% (target_last_known_gen, target_last_known_trans_id))
# validate transaction ids
if not changes and target_last_known_gen == target_gen:
if target_trans_id != target_last_known_trans_id:
raise errors.InvalidTransactionId
- return my_gen
+ defer.returnValue(my_gen)
# prepare to send all the changed docs
changed_doc_ids = [doc_id for doc_id, _, _ in changes]
@@ -175,40 +132,26 @@ class SoledadSynchronizer(Synchronizer):
# exchange documents and try to insert the returned ones with
# the target, return target synced-up-to gen.
- #
- # The sync_exchange method may be interrupted, in which case it will
- # return a tuple of Nones.
- try:
- new_gen, new_trans_id = sync_target.sync_exchange(
- docs_by_generation, self.source._replica_uid,
- target_last_known_gen, target_last_known_trans_id,
- self._insert_doc_from_target, ensure_callback=ensure_callback,
- defer_decryption=defer_decryption)
- logger.debug(
- "Soledad source sync info after sync exchange:\n"
- " source target gen: %d\n"
- " source target trans_id: %s"
- % (new_gen, new_trans_id))
- info = {
- "target_replica_uid": self.target_replica_uid,
- "new_gen": new_gen,
- "new_trans_id": new_trans_id,
- "my_gen": my_gen
- }
- self._syncing_info = info
- if defer_decryption and not sync_target.has_syncdb():
- logger.debug("Sync target has no valid sync db, "
- "aborting defer_decryption")
- defer_decryption = False
- self.complete_sync()
- except Exception as e:
- logger.error("Soledad sync error: %s" % str(e))
- logger.error(traceback.format_exc())
- sync_target.stop()
- finally:
- sync_target.close()
-
- return my_gen
+ new_gen, new_trans_id = yield sync_target.sync_exchange(
+ docs_by_generation, self.source._replica_uid,
+ target_last_known_gen, target_last_known_trans_id,
+ self._insert_doc_from_target, ensure_callback=ensure_callback,
+ defer_decryption=defer_decryption)
+ logger.debug(
+ "Soledad source sync info after sync exchange:\n"
+ " source known target gen: %d\n"
+ " source known target trans_id: %s"
+ % (new_gen, new_trans_id))
+ info = {
+ "target_replica_uid": self.target_replica_uid,
+ "new_gen": new_gen,
+ "new_trans_id": new_trans_id,
+ "my_gen": my_gen
+ }
+ self._syncing_info = info
+ yield self.complete_sync()
+
+ defer.returnValue(my_gen)
def complete_sync(self):
"""
@@ -216,6 +159,9 @@ class SoledadSynchronizer(Synchronizer):
(a) record last known generation and transaction uid for the remote
replica, and
(b) make target aware of our current reached generation.
+
+ :return: A deferred which will fire when the sync has been completed.
+ :rtype: twisted.internet.defer.Deferred
"""
logger.debug("Completing deferred last step in SYNC...")
@@ -226,37 +172,23 @@ class SoledadSynchronizer(Synchronizer):
info["target_replica_uid"], info["new_gen"], info["new_trans_id"])
# if gapless record current reached generation with target
- self._record_sync_info_with_the_target(info["my_gen"])
+ return self._record_sync_info_with_the_target(info["my_gen"])
- @property
- def syncing(self):
+ def _record_sync_info_with_the_target(self, start_generation):
"""
- Return True if a sync is ongoing, False otherwise.
- :rtype: bool
- """
- # XXX FIXME we need some mechanism for timeout: should cleanup and
- # release if something in the syncdb-decrypt goes wrong. we could keep
- # track of the release date and cleanup unrealistic sync entries after
- # some time.
- locked = self.syncing_lock.locked()
- return locked
-
- def release_syncing_lock(self):
- """
- Release syncing lock if it's locked.
- """
- if self.syncing_lock.locked():
- self.syncing_lock.release()
+ Store local replica metadata in server.
- def close(self):
- """
- Close sync target pool of workers.
- """
- self.release_syncing_lock()
- self.sync_target.close()
+ :param start_generation: The local generation when the sync was
+ started.
+ :type start_generation: int
- def __del__(self):
- """
- Cleanup: release lock.
+ :return: A deferred which will fire when the operation has been
+ completed.
+ :rtype: twisted.internet.defer.Deferred
"""
- self.release_syncing_lock()
+ cur_gen, trans_id = self.source._get_generation_info()
+ if (cur_gen == start_generation + self.num_inserted
+ and self.num_inserted > 0):
+ return self.sync_target.record_sync_info(
+ self.source._replica_uid, cur_gen, trans_id)
+ return defer.succeed(None)
diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py
deleted file mode 100644
index 1eb84e64..00000000
--- a/client/src/leap/soledad/client/target.py
+++ /dev/null
@@ -1,1469 +0,0 @@
-# -*- coding: utf-8 -*-
-# target.py
-# Copyright (C) 2013, 2014 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-"""
-A U1DB backend for encrypting data before sending to server and decrypting
-after receiving.
-"""
-
-
-import cStringIO
-import gzip
-import logging
-import re
-import urllib
-import threading
-
-from collections import defaultdict
-from time import sleep
-from uuid import uuid4
-
-import simplejson as json
-from taskthread import TimerTask
-from u1db import errors
-from u1db.remote import utils, http_errors
-from u1db.remote.http_target import HTTPSyncTarget
-from u1db.remote.http_client import _encode_query_parameter, HTTPClientBase
-from zope.proxy import ProxyBase
-from zope.proxy import sameProxiedObjects, setProxiedObject
-
-from leap.soledad.common.document import SoledadDocument
-from leap.soledad.client.auth import TokenBasedAuth
-from leap.soledad.client.crypto import is_symmetrically_encrypted
-from leap.soledad.client.crypto import encrypt_doc, decrypt_doc
-from leap.soledad.client.crypto import SyncEncrypterPool, SyncDecrypterPool
-from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS
-from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS
-from leap.soledad.client.events import signal
-
-
-logger = logging.getLogger(__name__)
-
-
-def _gunzip(data):
- """
- Uncompress data that is gzipped.
-
- :param data: gzipped data
- :type data: basestring
- """
- buffer = cStringIO.StringIO()
- buffer.write(data)
- buffer.seek(0)
- try:
- data = gzip.GzipFile(mode='r', fileobj=buffer).read()
- except Exception:
- logger.warning("Error while decrypting gzipped data")
- buffer.close()
- return data
-
-
-class PendingReceivedDocsSyncError(Exception):
- pass
-
-
-class DocumentSyncerThread(threading.Thread):
- """
- A thread that knowns how to either send or receive a document during the
- sync process.
- """
-
- def __init__(self, doc_syncer, release_method, failed_method,
- idx, total, last_request_lock=None, last_callback_lock=None):
- """
- Initialize a new syncer thread.
-
- :param doc_syncer: A document syncer.
- :type doc_syncer: HTTPDocumentSyncer
- :param release_method: A method to be called when finished running.
- :type release_method: callable(DocumentSyncerThread)
- :param failed_method: A method to be called when we failed.
- :type failed_method: callable(DocumentSyncerThread)
- :param idx: The index count of the current operation.
- :type idx: int
- :param total: The total number of operations.
- :type total: int
- :param last_request_lock: A lock to wait for before actually performing
- the request.
- :type last_request_lock: threading.Lock
- :param last_callback_lock: A lock to wait for before actually running
- the success callback.
- :type last_callback_lock: threading.Lock
- """
- threading.Thread.__init__(self)
- self._doc_syncer = doc_syncer
- self._release_method = release_method
- self._failed_method = failed_method
- self._idx = idx
- self._total = total
- self._last_request_lock = last_request_lock
- self._last_callback_lock = last_callback_lock
- self._response = None
- self._exception = None
- self._result = None
- self._success = False
- # a lock so we can signal when we're finished
- self._request_lock = threading.Lock()
- self._request_lock.acquire()
- self._callback_lock = threading.Lock()
- self._callback_lock.acquire()
- # make thread interruptable
- self._stopped = None
- self._stop_lock = threading.Lock()
-
- def run(self):
- """
- Run the HTTP request and store results.
-
- This method will block and wait for an eventual previous operation to
- finish before actually performing the request. It also traps any
- exception and register any failure with the request.
- """
- with self._stop_lock:
- if self._stopped is None:
- self._stopped = False
- else:
- return
-
- # eventually wait for the previous thread to finish
- if self._last_request_lock is not None:
- self._last_request_lock.acquire()
-
- # bail out in case we've been interrupted
- if self.stopped is True:
- return
-
- try:
- self._response = self._doc_syncer.do_request()
- self._request_lock.release()
-
- # run success callback
- if self._doc_syncer.success_callback is not None:
-
- # eventually wait for callback lock release
- if self._last_callback_lock is not None:
- self._last_callback_lock.acquire()
-
- # bail out in case we've been interrupted
- if self._stopped is True:
- return
-
- self._result = self._doc_syncer.success_callback(
- self._idx, self._total, self._response)
- self._success = True
- doc_syncer = self._doc_syncer
- self._release_method(self, doc_syncer)
- self._doc_syncer = None
- # let next thread executed its callback
- self._callback_lock.release()
-
- # trap any exception and signal failure
- except Exception as e:
- self._exception = e
- self._success = False
- # run failure callback
- if self._doc_syncer.failure_callback is not None:
-
- # eventually wait for callback lock release
- if self._last_callback_lock is not None:
- self._last_callback_lock.acquire()
-
- # bail out in case we've been interrupted
- if self.stopped is True:
- return
-
- self._doc_syncer.failure_callback(
- self._idx, self._total, self._exception)
-
- self._failed_method(self)
- # we do not release the callback lock here because we
- # failed and so we don't want other threads to succeed.
-
- @property
- def doc_syncer(self):
- return self._doc_syncer
-
- @property
- def response(self):
- return self._response
-
- @property
- def exception(self):
- return self._exception
-
- @property
- def callback_lock(self):
- return self._callback_lock
-
- @property
- def request_lock(self):
- return self._request_lock
-
- @property
- def success(self):
- return self._success
-
- def stop(self):
- with self._stop_lock:
- self._stopped = True
-
- @property
- def stopped(self):
- with self._stop_lock:
- return self._stopped
-
- @property
- def result(self):
- return self._result
-
-
-class DocumentSyncerPool(object):
- """
- A pool of reusable document syncers.
- """
-
- POOL_SIZE = 10
- """
- The maximum amount of syncer threads running at the same time.
- """
-
- def __init__(self, raw_url, raw_creds, query_string, headers,
- ensure_callback, stop_method):
- """
- Initialize the document syncer pool.
-
- :param raw_url: The complete raw URL for the HTTP request.
- :type raw_url: str
- :param raw_creds: The credentials for the HTTP request.
- :type raw_creds: dict
- :param query_string: The query string for the HTTP request.
- :type query_string: str
- :param headers: The headers for the HTTP request.
- :type headers: dict
- :param ensure_callback: A callback to ensure we have the correct
- target_replica_uid, if it was just created.
- :type ensure_callback: callable
-
- """
- # save syncer params
- self._raw_url = raw_url
- self._raw_creds = raw_creds
- self._query_string = query_string
- self._headers = headers
- self._ensure_callback = ensure_callback
- self._stop_method = stop_method
- # pool attributes
- self._failures = False
- self._semaphore_pool = threading.BoundedSemaphore(
- DocumentSyncerPool.POOL_SIZE)
- self._pool_access_lock = threading.Lock()
- self._doc_syncers = []
- self._threads = []
-
- def new_syncer_thread(self, idx, total, last_request_lock=None,
- last_callback_lock=None):
- """
- Yield a new document syncer thread.
-
- :param idx: The index count of the current operation.
- :type idx: int
- :param total: The total number of operations.
- :type total: int
- :param last_request_lock: A lock to wait for before actually performing
- the request.
- :type last_request_lock: threading.Lock
- :param last_callback_lock: A lock to wait for before actually running
- the success callback.
- :type last_callback_lock: threading.Lock
- """
- t = None
- # wait for available threads
- self._semaphore_pool.acquire()
- with self._pool_access_lock:
- if self._failures is True:
- return None
- # get a syncer
- doc_syncer = self._get_syncer()
- # we rely on DocumentSyncerThread.run() to release the lock using
- # self.release_syncer so we can launch a new thread.
- t = DocumentSyncerThread(
- doc_syncer, self.release_syncer, self.cancel_threads,
- idx, total,
- last_request_lock=last_request_lock,
- last_callback_lock=last_callback_lock)
- self._threads.append(t)
- return t
-
- def _failed(self):
- with self._pool_access_lock:
- self._failures = True
-
- @property
- def failures(self):
- return self._failures
-
- def _get_syncer(self):
- """
- Get a document syncer from the pool.
-
- This method will create a new syncer whenever there is no syncer
- available in the pool.
-
- :return: A syncer.
- :rtype: HTTPDocumentSyncer
- """
- syncer = None
- # get an available syncer or create a new one
- try:
- syncer = self._doc_syncers.pop()
- except IndexError:
- syncer = HTTPDocumentSyncer(
- self._raw_url, self._raw_creds, self._query_string,
- self._headers, self._ensure_callback)
- return syncer
-
- def release_syncer(self, syncer_thread, doc_syncer):
- """
- Return a syncer to the pool after use and check for any failures.
-
- :param syncer: The syncer to be returned to the pool.
- :type syncer: HTTPDocumentSyncer
- """
- with self._pool_access_lock:
- self._doc_syncers.append(doc_syncer)
- if syncer_thread.success is True:
- self._threads.remove(syncer_thread)
- self._semaphore_pool.release()
-
- def cancel_threads(self, calling_thread):
- """
- Stop all threads in the pool.
- """
- # stop sync
- self._stop_method()
- stopped = []
- # stop all threads
- logger.warning("Soledad sync: cancelling sync threads...")
- with self._pool_access_lock:
- self._failures = True
- while self._threads:
- t = self._threads.pop(0)
- t.stop()
- self._doc_syncers.append(t.doc_syncer)
- stopped.append(t)
- # release locks and join
- while stopped:
- t = stopped.pop(0)
- t.request_lock.acquire(False) # just in case
- t.request_lock.release()
- t.callback_lock.acquire(False) # just in case
- t.callback_lock.release()
- logger.warning("Soledad sync: cancelled sync threads.")
-
- def cleanup(self):
- """
- Close and remove any syncers from the pool.
- """
- with self._pool_access_lock:
- while self._doc_syncers:
- syncer = self._doc_syncers.pop()
- syncer.close()
- del syncer
-
-
-class HTTPDocumentSyncer(HTTPClientBase, TokenBasedAuth):
-
- def __init__(self, raw_url, creds, query_string, headers, ensure_callback):
- """
- Initialize the client.
-
- :param raw_url: The raw URL of the target HTTP server.
- :type raw_url: str
- :param creds: Authentication credentials.
- :type creds: dict
- :param query_string: The query string for the HTTP request.
- :type query_string: str
- :param headers: The headers for the HTTP request.
- :type headers: dict
- :param ensure_callback: A callback to ensure we have the correct
- target_replica_uid, if it was just created.
- :type ensure_callback: callable
- """
- HTTPClientBase.__init__(self, raw_url, creds=creds)
- # info needed to perform the request
- self._query_string = query_string
- self._headers = headers
- self._ensure_callback = ensure_callback
- # the actual request method
- self._request_method = None
- self._success_callback = None
- self._failure_callback = None
-
- def _reset(self):
- """
- Reset this document syncer so we can reuse it.
- """
- self._request_method = None
- self._success_callback = None
- self._failure_callback = None
- self._request_method = None
-
- def set_request_method(self, method, *args, **kwargs):
- """
- Set the actual method to perform the request.
-
- :param method: Either 'get' or 'put'.
- :type method: str
- :param args: Arguments for the request method.
- :type args: list
- :param kwargs: Keyworded arguments for the request method.
- :type kwargs: dict
- """
- self._reset()
- # resolve request method
- if method is 'get':
- self._request_method = self._get_doc
- elif method is 'put':
- self._request_method = self._put_doc
- else:
- raise Exception
- # store request method args
- self._args = args
- self._kwargs = kwargs
-
- def set_success_callback(self, callback):
- self._success_callback = callback
-
- def set_failure_callback(self, callback):
- self._failure_callback = callback
-
- @property
- def success_callback(self):
- return self._success_callback
-
- @property
- def failure_callback(self):
- return self._failure_callback
-
- def do_request(self):
- """
- Actually perform the request.
-
- :return: The body and headers of the response.
- :rtype: tuple
- """
- self._ensure_connection()
- args = self._args
- kwargs = self._kwargs
- return self._request_method(*args, **kwargs)
-
- def _request(self, method, url_parts, params=None, body=None,
- content_type=None):
- """
- Perform an HTTP request.
-
- :param method: The HTTP request method.
- :type method: str
- :param url_parts: A list representing the request path.
- :type url_parts: list
- :param params: Parameters for the URL query string.
- :type params: dict
- :param body: The body of the request.
- :type body: str
- :param content-type: The content-type of the request.
- :type content-type: str
-
- :return: The body and headers of the response.
- :rtype: tuple
-
- :raise errors.Unavailable: Raised after a number of unsuccesful
- request attempts.
- :raise Exception: Raised for any other exception ocurring during the
- request.
- """
-
- self._ensure_connection()
- unquoted_url = url_query = self._url.path
- if url_parts:
- if not url_query.endswith('/'):
- url_query += '/'
- unquoted_url = url_query
- url_query += '/'.join(urllib.quote(part, safe='')
- for part in url_parts)
- # oauth performs its own quoting
- unquoted_url += '/'.join(url_parts)
- encoded_params = {}
- if params:
- for key, value in params.items():
- key = unicode(key).encode('utf-8')
- encoded_params[key] = _encode_query_parameter(value)
- url_query += ('?' + urllib.urlencode(encoded_params))
- if body is not None and not isinstance(body, basestring):
- body = json.dumps(body)
- content_type = 'application/json'
- headers = {}
- if content_type:
- headers['content-type'] = content_type
-
- # Patched: We would like to receive gzip pretty please
- # ----------------------------------------------------
- headers['accept-encoding'] = "gzip"
- # ----------------------------------------------------
-
- headers.update(
- self._sign_request(method, unquoted_url, encoded_params))
-
- for delay in self._delays:
- try:
- self._conn.request(method, url_query, body, headers)
- return self._response()
- except errors.Unavailable, e:
- sleep(delay)
- raise e
-
- def _response(self):
- """
- Return the response of the (possibly gzipped) HTTP request.
-
- :return: The body and headers of the response.
- :rtype: tuple
- """
- resp = self._conn.getresponse()
- body = resp.read()
- headers = dict(resp.getheaders())
-
- # Patched: We would like to decode gzip
- # ----------------------------------------------------
- encoding = headers.get('content-encoding', '')
- if "gzip" in encoding:
- body = _gunzip(body)
- # ----------------------------------------------------
-
- if resp.status in (200, 201):
- return body, headers
- elif resp.status in http_errors.ERROR_STATUSES:
- try:
- respdic = json.loads(body)
- except ValueError:
- pass
- else:
- self._error(respdic)
- # special case
- if resp.status == 503:
- raise errors.Unavailable(body, headers)
- raise errors.HTTPError(resp.status, body, headers)
-
- def _prepare(self, comma, entries, **dic):
- """
- Prepare an entry to be sent through a syncing POST request.
-
- :param comma: A string to be prepended to the current entry.
- :type comma: str
- :param entries: A list of entries accumulated to be sent on the
- request.
- :type entries: list
- :param dic: The data to be included in this entry.
- :type dic: dict
-
- :return: The size of the prepared entry.
- :rtype: int
- """
- entry = comma + '\r\n' + json.dumps(dic)
- entries.append(entry)
- return len(entry)
-
- def _init_post_request(self, action, content_length):
- """
- Initiate a syncing POST request.
-
- :param url: The syncing URL.
- :type url: str
- :param action: The syncing action, either 'get' or 'receive'.
- :type action: str
- :param headers: The initial headers to be sent on this request.
- :type headers: dict
- :param content_length: The content-length of the request.
- :type content_length: int
- """
- self._conn.putrequest('POST', self._query_string)
- self._conn.putheader(
- 'content-type', 'application/x-soledad-sync-%s' % action)
- for header_name, header_value in self._headers:
- self._conn.putheader(header_name, header_value)
- self._conn.putheader('accept-encoding', 'gzip')
- self._conn.putheader('content-length', str(content_length))
- self._conn.endheaders()
-
- def _get_doc(self, received, sync_id, last_known_generation,
- last_known_trans_id):
- """
- Get a sync document from server by means of a POST request.
-
- :param received: The number of documents already received in the
- current sync session.
- :type received: int
- :param sync_id: The id for the current sync session.
- :type sync_id: str
- :param last_known_generation: Target's last known generation.
- :type last_known_generation: int
- :param last_known_trans_id: Target's last known transaction id.
- :type last_known_trans_id: str
-
- :return: The body and headers of the response.
- :rtype: tuple
- """
- entries = ['[']
- size = 1
- # add remote replica metadata to the request
- size += self._prepare(
- '', entries,
- last_known_generation=last_known_generation,
- last_known_trans_id=last_known_trans_id,
- sync_id=sync_id,
- ensure=self._ensure_callback is not None)
- # inform server of how many documents have already been received
- size += self._prepare(
- ',', entries, received=received)
- entries.append('\r\n]')
- size += len(entries[-1])
- # send headers
- self._init_post_request('get', size)
- # get document
- for entry in entries:
- self._conn.send(entry)
- return self._response()
-
- def _put_doc(self, sync_id, last_known_generation, last_known_trans_id,
- id, rev, content, gen, trans_id, number_of_docs, doc_idx):
- """
- Put a sync document on server by means of a POST request.
-
- :param sync_id: The id for the current sync session.
- :type sync_id: str
- :param last_known_generation: Target's last known generation.
- :type last_known_generation: int
- :param last_known_trans_id: Target's last known transaction id.
- :type last_known_trans_id: str
- :param id: The document id.
- :type id: str
- :param rev: The document revision.
- :type rev: str
- :param content: The serialized document content.
- :type content: str
- :param gen: The generation of the modification of the document.
- :type gen: int
- :param trans_id: The transaction id of the modification of the
- document.
- :type trans_id: str
- :param number_of_docs: The total amount of documents sent on this sync
- session.
- :type number_of_docs: int
- :param doc_idx: The index of the current document being sent.
- :type doc_idx: int
-
- :return: The body and headers of the response.
- :rtype: tuple
- """
- # prepare to send the document
- entries = ['[']
- size = 1
- # add remote replica metadata to the request
- size += self._prepare(
- '', entries,
- last_known_generation=last_known_generation,
- last_known_trans_id=last_known_trans_id,
- sync_id=sync_id,
- ensure=self._ensure_callback is not None)
- # add the document to the request
- size += self._prepare(
- ',', entries,
- id=id, rev=rev, content=content, gen=gen, trans_id=trans_id,
- number_of_docs=number_of_docs, doc_idx=doc_idx)
- entries.append('\r\n]')
- size += len(entries[-1])
- # send headers
- self._init_post_request('put', size)
- # send document
- for entry in entries:
- self._conn.send(entry)
- return self._response()
-
- def _sign_request(self, method, url_query, params):
- """
- Return an authorization header to be included in the HTTP request.
-
- :param method: The HTTP method.
- :type method: str
- :param url_query: The URL query string.
- :type url_query: str
- :param params: A list with encoded query parameters.
- :type param: list
-
- :return: The Authorization header.
- :rtype: list of tuple
- """
- return TokenBasedAuth._sign_request(self, method, url_query, params)
-
- def set_token_credentials(self, uuid, token):
- """
- Store given credentials so we can sign the request later.
-
- :param uuid: The user's uuid.
- :type uuid: str
- :param token: The authentication token.
- :type token: str
- """
- TokenBasedAuth.set_token_credentials(self, uuid, token)
-
-
-class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth):
- """
- A SyncTarget that encrypts data before sending and decrypts data after
- receiving.
-
- Normally encryption will have been written to the sync database upon
- document modification. The sync database is also used to write temporarily
- the parsed documents that the remote send us, before being decrypted and
- written to the main database.
- """
-
- # will later keep a reference to the insert-doc callback
- # passed to sync_exchange
- _insert_doc_cb = defaultdict(lambda: ProxyBase(None))
-
- """
- Period of recurrence of the periodic decrypting task, in seconds.
- """
- DECRYPT_TASK_PERIOD = 0.5
-
- #
- # Modified HTTPSyncTarget methods.
- #
-
- def __init__(self, url, source_replica_uid=None, creds=None, crypto=None,
- sync_db=None, sync_db_write_lock=None):
- """
- Initialize the SoledadSyncTarget.
-
- :param source_replica_uid: The source replica uid which we use when
- deferring decryption.
- :type source_replica_uid: str
- :param url: The url of the target replica to sync with.
- :type url: str
- :param creds: Optional dictionary giving credentials.
- to authorize the operation with the server.
- :type creds: dict
- :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt
- document contents when syncing.
- :type crypto: soledad.crypto.SoledadCrypto
- :param sync_db: Optional. handler for the db with the symmetric
- encryption of the syncing documents. If
- None, encryption will be done in-place,
- instead of retreiving it from the dedicated
- database.
- :type sync_db: Sqlite handler
- :param sync_db_write_lock: a write lock for controlling concurrent
- access to the sync_db
- :type sync_db_write_lock: threading.Lock
- """
- HTTPSyncTarget.__init__(self, url, creds)
- self._raw_url = url
- self._raw_creds = creds
- self._crypto = crypto
- self._stopped = True
- self._stop_lock = threading.Lock()
- self._sync_exchange_lock = threading.Lock()
- self.source_replica_uid = source_replica_uid
- self._defer_decryption = False
-
- # deferred decryption attributes
- self._sync_db = None
- self._sync_db_write_lock = None
- self._decryption_callback = None
- self._sync_decr_pool = None
- self._sync_watcher = None
- if sync_db and sync_db_write_lock is not None:
- self._sync_db = sync_db
- self._sync_db_write_lock = sync_db_write_lock
-
- def _setup_sync_decr_pool(self):
- """
- Set up the SyncDecrypterPool for deferred decryption.
- """
- if self._sync_decr_pool is None:
- # initialize syncing queue decryption pool
- self._sync_decr_pool = SyncDecrypterPool(
- self._crypto, self._sync_db,
- self._sync_db_write_lock,
- insert_doc_cb=self._insert_doc_cb)
- self._sync_decr_pool.set_source_replica_uid(
- self.source_replica_uid)
-
- def _teardown_sync_decr_pool(self):
- """
- Tear down the SyncDecrypterPool.
- """
- if self._sync_decr_pool is not None:
- self._sync_decr_pool.close()
- self._sync_decr_pool = None
-
- def _setup_sync_watcher(self):
- """
- Set up the sync watcher for deferred decryption.
- """
- if self._sync_watcher is None:
- self._sync_watcher = TimerTask(
- self._decrypt_syncing_received_docs,
- delay=self.DECRYPT_TASK_PERIOD)
-
- def _teardown_sync_watcher(self):
- """
- Tear down the sync watcher.
- """
- if self._sync_watcher is not None:
- self._sync_watcher.stop()
- self._sync_watcher.shutdown()
- self._sync_watcher = None
-
- def _get_replica_uid(self, url):
- """
- Return replica uid from the url, or None.
-
- :param url: the replica url
- :type url: str
- """
- replica_uid_match = re.findall("user-([0-9a-fA-F]+)", url)
- return replica_uid_match[0] if len(replica_uid_match) > 0 else None
-
- @staticmethod
- def connect(url, source_replica_uid=None, crypto=None):
- return SoledadSyncTarget(
- url, source_replica_uid=source_replica_uid, crypto=crypto)
-
- def _parse_received_doc_response(self, response):
- """
- Parse the response from the server containing the received document.
-
- :param response: The body and headers of the response.
- :type response: tuple(str, dict)
- """
- data, _ = response
- # decode incoming stream
- parts = data.splitlines()
- if not parts or parts[0] != '[' or parts[-1] != ']':
- raise errors.BrokenSyncStream
- data = parts[1:-1]
- # decode metadata
- line, comma = utils.check_and_strip_comma(data[0])
- metadata = None
- try:
- metadata = json.loads(line)
- new_generation = metadata['new_generation']
- new_transaction_id = metadata['new_transaction_id']
- number_of_changes = metadata['number_of_changes']
- except (json.JSONDecodeError, KeyError):
- raise errors.BrokenSyncStream
- # make sure we have replica_uid from fresh new dbs
- if self._ensure_callback and 'replica_uid' in metadata:
- self._ensure_callback(metadata['replica_uid'])
- # parse incoming document info
- doc_id = None
- rev = None
- content = None
- gen = None
- trans_id = None
- if number_of_changes > 0:
- try:
- entry = json.loads(data[1])
- doc_id = entry['id']
- rev = entry['rev']
- content = entry['content']
- gen = entry['gen']
- trans_id = entry['trans_id']
- except (IndexError, KeyError):
- raise errors.BrokenSyncStream
- return new_generation, new_transaction_id, number_of_changes, \
- doc_id, rev, content, gen, trans_id
-
- def _insert_received_doc(self, idx, total, response):
- """
- Insert a received document into the local replica.
-
- :param idx: The index count of the current operation.
- :type idx: int
- :param total: The total number of operations.
- :type total: int
- :param response: The body and headers of the response.
- :type response: tuple(str, dict)
- """
- new_generation, new_transaction_id, number_of_changes, doc_id, \
- rev, content, gen, trans_id = \
- self._parse_received_doc_response(response)
- if doc_id is not None:
- # decrypt incoming document and insert into local database
- # -------------------------------------------------------------
- # symmetric decryption of document's contents
- # -------------------------------------------------------------
- # If arriving content was symmetrically encrypted, we decrypt it.
- # We do it inline if defer_decryption flag is False or no sync_db
- # was defined, otherwise we defer it writing it to the received
- # docs table.
- doc = SoledadDocument(doc_id, rev, content)
- if is_symmetrically_encrypted(doc):
- if self._queue_for_decrypt:
- self._save_encrypted_received_doc(
- doc, gen, trans_id, idx, total)
- else:
- # defer_decryption is False or no-sync-db fallback
- doc.set_json(decrypt_doc(self._crypto, doc))
- self._return_doc_cb(doc, gen, trans_id)
- else:
- # not symmetrically encrypted doc, insert it directly
- # or save it in the decrypted stage.
- if self._queue_for_decrypt:
- self._save_received_doc(doc, gen, trans_id, idx, total)
- else:
- self._return_doc_cb(doc, gen, trans_id)
- # -------------------------------------------------------------
- # end of symmetric decryption
- # -------------------------------------------------------------
- msg = "%d/%d" % (idx + 1, total)
- signal(SOLEDAD_SYNC_RECEIVE_STATUS, msg)
- logger.debug("Soledad sync receive status: %s" % msg)
- return number_of_changes, new_generation, new_transaction_id
-
- def _get_remote_docs(self, url, last_known_generation, last_known_trans_id,
- headers, return_doc_cb, ensure_callback, sync_id,
- syncer_pool, defer_decryption=False):
- """
- Fetch sync documents from the remote database and insert them in the
- local database.
-
- If an incoming document's encryption scheme is equal to
- EncryptionSchemes.SYMKEY, then this method will decrypt it with
- Soledad's symmetric key.
-
- :param url: The syncing URL.
- :type url: str
- :param last_known_generation: Target's last known generation.
- :type last_known_generation: int
- :param last_known_trans_id: Target's last known transaction id.
- :type last_known_trans_id: str
- :param headers: The headers of the HTTP request.
- :type headers: dict
- :param return_doc_cb: A callback to insert docs from target.
- :type return_doc_cb: callable
- :param ensure_callback: A callback to ensure we have the correct
- target_replica_uid, if it was just created.
- :type ensure_callback: callable
- :param sync_id: The id for the current sync session.
- :type sync_id: str
- :param defer_decryption: Whether to defer the decryption process using
- the intermediate database. If False,
- decryption will be done inline.
- :type defer_decryption: bool
-
- :raise BrokenSyncStream: If `data` is malformed.
-
- :return: A dictionary representing the first line of the response got
- from remote replica.
- :rtype: dict
- """
- # we keep a reference to the callback in case we defer the decryption
- self._return_doc_cb = return_doc_cb
- self._queue_for_decrypt = defer_decryption \
- and self._sync_db is not None
-
- new_generation = last_known_generation
- new_transaction_id = last_known_trans_id
-
- if self._queue_for_decrypt:
- logger.debug(
- "Soledad sync: will queue received docs for decrypting.")
-
- idx = 0
- number_of_changes = 1
-
- first_request = True
- last_callback_lock = None
- threads = []
-
- # get incoming documents
- while idx < number_of_changes:
- # bail out if sync process was interrupted
- if self.stopped is True:
- break
-
- # launch a thread to fetch one document from target
- t = syncer_pool.new_syncer_thread(
- idx, number_of_changes,
- last_callback_lock=last_callback_lock)
-
- # bail out if any thread failed
- if t is None:
- self.stop()
- break
-
- t.doc_syncer.set_request_method(
- 'get', idx, sync_id, last_known_generation,
- last_known_trans_id)
- t.doc_syncer.set_success_callback(self._insert_received_doc)
-
- def _failure_callback(idx, total, exception):
- _failure_msg = "Soledad sync: error while getting document " \
- "%d/%d: %s" \
- % (idx + 1, total, exception)
- logger.warning("%s" % _failure_msg)
- logger.warning("Soledad sync: failing gracefully, will "
- "recover on next sync.")
-
- t.doc_syncer.set_failure_callback(_failure_callback)
- threads.append(t)
- t.start()
- last_callback_lock = t.callback_lock
- idx += 1
-
- # if this is the first request, wait to update the number of
- # changes
- if first_request is True:
- t.join()
- if t.success:
- number_of_changes, _, _ = t.result
- first_request = False
-
- # make sure all threads finished and we have up-to-date info
- last_successful_thread = None
- while threads:
- # check if there are failures
- t = threads.pop(0)
- t.join()
- if t.success:
- last_successful_thread = t
-
- # get information about last successful thread
- if last_successful_thread is not None:
- body, _ = last_successful_thread.response
- parsed_body = json.loads(body)
- # get current target gen and trans id in case no documents were
- # transferred
- if len(parsed_body) == 1:
- metadata = parsed_body[0]
- new_generation = metadata['new_generation']
- new_transaction_id = metadata['new_transaction_id']
- # get current target gen and trans id from last transferred
- # document
- else:
- doc_data = parsed_body[1]
- new_generation = doc_data['gen']
- new_transaction_id = doc_data['trans_id']
-
- return new_generation, new_transaction_id
-
- def sync_exchange(self, docs_by_generations,
- source_replica_uid, last_known_generation,
- last_known_trans_id, return_doc_cb,
- ensure_callback=None, defer_decryption=True,
- sync_id=None):
- """
- Find out which documents the remote database does not know about,
- encrypt and send them.
-
- This does the same as the parent's method but encrypts content before
- syncing.
-
- :param docs_by_generations: A list of (doc_id, generation, trans_id)
- of local documents that were changed since
- the last local generation the remote
- replica knows about.
- :type docs_by_generations: list of tuples
-
- :param source_replica_uid: The uid of the source replica.
- :type source_replica_uid: str
-
- :param last_known_generation: Target's last known generation.
- :type last_known_generation: int
-
- :param last_known_trans_id: Target's last known transaction id.
- :type last_known_trans_id: str
-
- :param return_doc_cb: A callback for inserting received documents from
- target. If not overriden, this will call u1db
- insert_doc_from_target in synchronizer, which
- implements the TAKE OTHER semantics.
- :type return_doc_cb: function
-
- :param ensure_callback: A callback that ensures we know the target
- replica uid if the target replica was just
- created.
- :type ensure_callback: function
-
- :param defer_decryption: Whether to defer the decryption process using
- the intermediate database. If False,
- decryption will be done inline.
- :type defer_decryption: bool
-
- :return: The new generation and transaction id of the target replica.
- :rtype: tuple
- """
- self._ensure_callback = ensure_callback
-
- if defer_decryption:
- self._sync_exchange_lock.acquire()
- self._setup_sync_decr_pool()
- self._setup_sync_watcher()
- self._defer_decryption = True
-
- self.start()
-
- if sync_id is None:
- sync_id = str(uuid4())
- self.source_replica_uid = source_replica_uid
- # let the decrypter pool access the passed callback to insert docs
- setProxiedObject(self._insert_doc_cb[source_replica_uid],
- return_doc_cb)
-
- if not self.clear_to_sync():
- raise PendingReceivedDocsSyncError
-
- self._ensure_connection()
- if self._trace_hook: # for tests
- self._trace_hook('sync_exchange')
- url = '%s/sync-from/%s' % (self._url.path, source_replica_uid)
- headers = self._sign_request('POST', url, {})
-
- cur_target_gen = last_known_generation
- cur_target_trans_id = last_known_trans_id
-
- # send docs
- msg = "%d/%d" % (0, len(docs_by_generations))
- signal(SOLEDAD_SYNC_SEND_STATUS, msg)
- logger.debug("Soledad sync send status: %s" % msg)
-
- defer_encryption = self._sync_db is not None
- syncer_pool = DocumentSyncerPool(
- self._raw_url, self._raw_creds, url, headers, ensure_callback,
- self.stop)
- threads = []
- last_request_lock = None
- last_callback_lock = None
- sent = 0
- total = len(docs_by_generations)
-
- synced = []
- number_of_docs = len(docs_by_generations)
-
- for doc, gen, trans_id in docs_by_generations:
- # allow for interrupting the sync process
- if self.stopped is True:
- break
-
- # skip non-syncable docs
- if isinstance(doc, SoledadDocument) and not doc.syncable:
- continue
-
- # -------------------------------------------------------------
- # symmetric encryption of document's contents
- # -------------------------------------------------------------
- doc_json = doc.get_json()
- if not doc.is_tombstone():
- if not defer_encryption:
- # fallback case, for tests
- doc_json = encrypt_doc(self._crypto, doc)
- else:
- try:
- doc_json = self.get_encrypted_doc_from_db(
- doc.doc_id, doc.rev)
- except Exception as exc:
- logger.error("Error while getting "
- "encrypted doc from db")
- logger.exception(exc)
- continue
- if doc_json is None:
- # Not marked as tombstone, but we got nothing
- # from the sync db. As it is not encrypted yet, we
- # force inline encryption.
- # TODO: implement a queue to deal with these cases.
- doc_json = encrypt_doc(self._crypto, doc)
- # -------------------------------------------------------------
- # end of symmetric encryption
- # -------------------------------------------------------------
- t = syncer_pool.new_syncer_thread(
- sent + 1, total, last_request_lock=None,
- last_callback_lock=last_callback_lock)
-
- # bail out if any thread failed
- if t is None:
- self.stop()
- break
-
- # set the request method
- t.doc_syncer.set_request_method(
- 'put', sync_id, cur_target_gen, cur_target_trans_id,
- id=doc.doc_id, rev=doc.rev, content=doc_json, gen=gen,
- trans_id=trans_id, number_of_docs=number_of_docs, doc_idx=sent + 1)
- # set the success calback
-
- def _success_callback(idx, total, response):
- _success_msg = "Soledad sync send status: %d/%d" \
- % (idx, total)
- signal(SOLEDAD_SYNC_SEND_STATUS, _success_msg)
- logger.debug(_success_msg)
-
- t.doc_syncer.set_success_callback(_success_callback)
-
- # set the failure callback
- def _failure_callback(idx, total, exception):
- _failure_msg = "Soledad sync: error while sending document " \
- "%d/%d: %s" % (idx, total, exception)
- logger.warning("%s" % _failure_msg)
- logger.warning("Soledad sync: failing gracefully, will "
- "recover on next sync.")
-
- t.doc_syncer.set_failure_callback(_failure_callback)
-
- # save thread and append
- t.start()
- threads.append((t, doc))
- last_request_lock = t.request_lock
- last_callback_lock = t.callback_lock
- sent += 1
-
- # make sure all threads finished and we have up-to-date info
- while threads:
- # check if there are failures
- t, doc = threads.pop(0)
- t.join()
- if t.success:
- synced.append((doc.doc_id, doc.rev))
-
- if defer_decryption:
- self._sync_watcher.start()
-
- # get docs from target
- if self.stopped is False:
- cur_target_gen, cur_target_trans_id = self._get_remote_docs(
- url,
- last_known_generation, last_known_trans_id, headers,
- return_doc_cb, ensure_callback, sync_id, syncer_pool,
- defer_decryption=defer_decryption)
- syncer_pool.cleanup()
-
- # delete documents from the sync database
- if defer_encryption:
- self.delete_encrypted_docs_from_db(synced)
-
- # wait for deferred decryption to finish
- if defer_decryption:
- while self.clear_to_sync() is False:
- sleep(self.DECRYPT_TASK_PERIOD)
- self._teardown_sync_watcher()
- self._teardown_sync_decr_pool()
- self._sync_exchange_lock.release()
-
- self.stop()
- return cur_target_gen, cur_target_trans_id
-
- def start(self):
- """
- Mark current sync session as running.
- """
- with self._stop_lock:
- self._stopped = False
-
- def stop(self):
- """
- Mark current sync session as stopped.
-
- This will eventually interrupt the sync_exchange() method and return
- enough information to the synchronizer so the sync session can be
- recovered afterwards.
- """
- with self._stop_lock:
- self._stopped = True
-
- @property
- def stopped(self):
- """
- Return whether this sync session is stopped.
-
- :return: Whether this sync session is stopped.
- :rtype: bool
- """
- with self._stop_lock:
- return self._stopped is True
-
- def get_encrypted_doc_from_db(self, doc_id, doc_rev):
- """
- Retrieve encrypted document from the database of encrypted docs for
- sync.
-
- :param doc_id: The Document id.
- :type doc_id: str
-
- :param doc_rev: The document revision
- :type doc_rev: str
- """
- encr = SyncEncrypterPool
- c = self._sync_db.cursor()
- sql = ("SELECT content FROM %s WHERE doc_id=? and rev=?" % (
- encr.TABLE_NAME,))
- c.execute(sql, (doc_id, doc_rev))
- res = c.fetchall()
- if len(res) != 0:
- return res[0][0]
-
- def delete_encrypted_docs_from_db(self, docs_ids):
- """
- Delete several encrypted documents from the database of symmetrically
- encrypted docs to sync.
-
- :param docs_ids: an iterable with (doc_id, doc_rev) for all documents
- to be deleted.
- :type docs_ids: any iterable of tuples of str
- """
- if docs_ids:
- encr = SyncEncrypterPool
- c = self._sync_db.cursor()
- for doc_id, doc_rev in docs_ids:
- sql = ("DELETE FROM %s WHERE doc_id=? and rev=?" % (
- encr.TABLE_NAME,))
- c.execute(sql, (doc_id, doc_rev))
- self._sync_db.commit()
-
- def _save_encrypted_received_doc(self, doc, gen, trans_id, idx, total):
- """
- Save a symmetrically encrypted incoming document into the received
- docs table in the sync db. A decryption task will pick it up
- from here in turn.
-
- :param doc: The document to save.
- :type doc: SoledadDocument
- :param gen: The generation.
- :type gen: str
- :param trans_id: Transacion id.
- :type gen: str
- :param idx: The index count of the current operation.
- :type idx: int
- :param total: The total number of operations.
- :type total: int
- """
- logger.debug(
- "Enqueueing doc for decryption: %d/%d."
- % (idx + 1, total))
- self._sync_decr_pool.insert_encrypted_received_doc(
- doc.doc_id, doc.rev, doc.content, gen, trans_id)
-
- def _save_received_doc(self, doc, gen, trans_id, idx, total):
- """
- Save any incoming document into the received docs table in the sync db.
-
- :param doc: The document to save.
- :type doc: SoledadDocument
- :param gen: The generation.
- :type gen: str
- :param trans_id: Transacion id.
- :type gen: str
- :param idx: The index count of the current operation.
- :type idx: int
- :param total: The total number of operations.
- :type total: int
- """
- logger.debug(
- "Enqueueing doc, no decryption needed: %d/%d."
- % (idx + 1, total))
- self._sync_decr_pool.insert_received_doc(
- doc.doc_id, doc.rev, doc.content, gen, trans_id)
-
- #
- # Symmetric decryption of syncing docs
- #
-
- def clear_to_sync(self):
- """
- Return True if sync can proceed (ie, the received db table is empty).
- :rtype: bool
- """
- if self._sync_decr_pool is not None:
- return self._sync_decr_pool.count_received_encrypted_docs() == 0
- else:
- return True
-
- def set_decryption_callback(self, cb):
- """
- Set callback to be called when the decryption finishes.
-
- :param cb: The callback to be set.
- :type cb: callable
- """
- self._decryption_callback = cb
-
- def has_decryption_callback(self):
- """
- Return True if there is a decryption callback set.
- :rtype: bool
- """
- return self._decryption_callback is not None
-
- def has_syncdb(self):
- """
- Return True if we have an initialized syncdb.
- """
- return self._sync_db is not None
-
- def _decrypt_syncing_received_docs(self):
- """
- Decrypt the documents received from remote replica and insert them
- into the local one.
-
- Called periodically from TimerTask self._sync_watcher.
- """
- if sameProxiedObjects(
- self._insert_doc_cb.get(self.source_replica_uid),
- None):
- return
-
- decrypter = self._sync_decr_pool
- decrypter.decrypt_received_docs()
- done = decrypter.process_decrypted()
-
- def _sign_request(self, method, url_query, params):
- """
- Return an authorization header to be included in the HTTP request.
-
- :param method: The HTTP method.
- :type method: str
- :param url_query: The URL query string.
- :type url_query: str
- :param params: A list with encoded query parameters.
- :type param: list
-
- :return: The Authorization header.
- :rtype: list of tuple
- """
- return TokenBasedAuth._sign_request(self, method, url_query, params)
-
- def set_token_credentials(self, uuid, token):
- """
- Store given credentials so we can sign the request later.
-
- :param uuid: The user's uuid.
- :type uuid: str
- :param token: The authentication token.
- :type token: str
- """
- TokenBasedAuth.set_token_credentials(self, uuid, token)