From 11ac38cb021e97cca77df2e9cf15f9136b24fc43 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 19 Aug 2013 10:13:51 -0300 Subject: Split soledad into common, client and server. --- client/src/leap/__init__.py | 6 + client/src/leap/soledad/__init__.py | 6 + client/src/leap/soledad/client/__init__.py | 1129 +++++++++++++++++++++++++++ client/src/leap/soledad/client/auth.py | 71 ++ client/src/leap/soledad/client/crypto.py | 213 +++++ client/src/leap/soledad/client/dbwrapper.py | 183 +++++ client/src/leap/soledad/client/shared_db.py | 138 ++++ client/src/leap/soledad/client/sqlcipher.py | 696 +++++++++++++++++ client/src/leap/soledad/client/target.py | 452 +++++++++++ 9 files changed, 2894 insertions(+) create mode 100644 client/src/leap/__init__.py create mode 100644 client/src/leap/soledad/__init__.py create mode 100644 client/src/leap/soledad/client/__init__.py create mode 100644 client/src/leap/soledad/client/auth.py create mode 100644 client/src/leap/soledad/client/crypto.py create mode 100644 client/src/leap/soledad/client/dbwrapper.py create mode 100644 client/src/leap/soledad/client/shared_db.py create mode 100644 client/src/leap/soledad/client/sqlcipher.py create mode 100644 client/src/leap/soledad/client/target.py (limited to 'client/src/leap') diff --git a/client/src/leap/__init__.py b/client/src/leap/__init__.py new file mode 100644 index 00000000..f48ad105 --- /dev/null +++ b/client/src/leap/__init__.py @@ -0,0 +1,6 @@ +# See http://peak.telecommunity.com/DevCenter/setuptools#namespace-packages +try: + __import__('pkg_resources').declare_namespace(__name__) +except ImportError: + from pkgutil import extend_path + __path__ = extend_path(__path__, __name__) diff --git a/client/src/leap/soledad/__init__.py b/client/src/leap/soledad/__init__.py new file mode 100644 index 00000000..f48ad105 --- /dev/null +++ b/client/src/leap/soledad/__init__.py @@ -0,0 +1,6 @@ +# See http://peak.telecommunity.com/DevCenter/setuptools#namespace-packages +try: + __import__('pkg_resources').declare_namespace(__name__) +except ImportError: + from pkgutil import extend_path + __path__ = extend_path(__path__, __name__) diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py new file mode 100644 index 00000000..fc8219fa --- /dev/null +++ b/client/src/leap/soledad/client/__init__.py @@ -0,0 +1,1129 @@ +# -*- coding: utf-8 -*- +# __init__.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +""" +Soledad - Synchronization Of Locally Encrypted Data Among Devices. + +Soledad is the part of LEAP that manages storage and synchronization of +application data. It is built on top of U1DB reference Python API and +implements (1) a SQLCipher backend for local storage in the client, (2) a +SyncTarget that encrypts data before syncing, and (3) a CouchDB backend for +remote storage in the server side. +""" + +import os +import binascii +import logging +import urlparse +import simplejson as json +import scrypt +import httplib +import socket +import ssl +import errno + + +from xdg import BaseDirectory +from hashlib import sha256 +from u1db.remote import http_client +from u1db.remote.ssl_match_hostname import match_hostname + + +# +# Signaling function +# + +SOLEDAD_CREATING_KEYS = 'Creating keys...' +SOLEDAD_DONE_CREATING_KEYS = 'Done creating keys.' +SOLEDAD_DOWNLOADING_KEYS = 'Downloading keys...' +SOLEDAD_DONE_DOWNLOADING_KEYS = 'Done downloading keys.' +SOLEDAD_UPLOADING_KEYS = 'Uploading keys...' +SOLEDAD_DONE_UPLOADING_KEYS = 'Done uploading keys.' +SOLEDAD_NEW_DATA_TO_SYNC = 'New data available.' +SOLEDAD_DONE_DATA_SYNC = 'Done data sync.' + +# we want to use leap.common.events to emits signals, if it is available. +try: + from leap.common import events + from leap.common.events import signal + SOLEDAD_CREATING_KEYS = events.events_pb2.SOLEDAD_CREATING_KEYS + SOLEDAD_DONE_CREATING_KEYS = events.events_pb2.SOLEDAD_DONE_CREATING_KEYS + SOLEDAD_DOWNLOADING_KEYS = events.events_pb2.SOLEDAD_DOWNLOADING_KEYS + SOLEDAD_DONE_DOWNLOADING_KEYS = \ + events.events_pb2.SOLEDAD_DONE_DOWNLOADING_KEYS + SOLEDAD_UPLOADING_KEYS = events.events_pb2.SOLEDAD_UPLOADING_KEYS + SOLEDAD_DONE_UPLOADING_KEYS = \ + events.events_pb2.SOLEDAD_DONE_UPLOADING_KEYS + SOLEDAD_NEW_DATA_TO_SYNC = events.events_pb2.SOLEDAD_NEW_DATA_TO_SYNC + SOLEDAD_DONE_DATA_SYNC = events.events_pb2.SOLEDAD_DONE_DATA_SYNC + +except ImportError: + # we define a fake signaling function and fake signal constants that will + # allow for logging signaling attempts in case leap.common.events is not + # available. + + def signal(signal, content=""): + logger.info("Would signal: %s - %s." % (str(signal), content)) + + +from leap.soledad.common import soledad_assert, soledad_assert_type +from leap.soledad.common.document import SoledadDocument +from leap.soledad.client.crypto import SoledadCrypto +from leap.soledad.client.dbwrapper import SQLCipherWrapper +from leap.soledad.client.shared_db import SoledadSharedDatabase +from leap.soledad.client.sqlcipher import open as sqlcipher_open +from leap.soledad.client.sqlcipher import SQLCipherDatabase +from leap.soledad.client.target import SoledadSyncTarget + + +logger = logging.getLogger(name=__name__) + + +# +# Constants +# + +SOLEDAD_CERT = None +""" +Path to the certificate file used to certify the SSL connection between +Soledad client and server. +""" + +SECRETS_DOC_ID_HASH_PREFIX = 'uuid-' + + +# +# Soledad: local encrypted storage and remote encrypted sync. +# + +class NoStorageSecret(Exception): + """ + Raised when trying to use a storage secret but none is available. + """ + pass + + +class PassphraseTooShort(Exception): + """ + Raised when trying to change the passphrase but the provided passphrase is + too short. + """ + + +class Soledad(object): + """ + Soledad provides encrypted data storage and sync. + + A Soledad instance is used to store and retrieve data in a local encrypted + database and synchronize this database with Soledad server. + + This class is also responsible for bootstrapping users' account by + creating cryptographic secrets and/or storing/fetching them on Soledad + server. + + Soledad uses C{leap.common.events} to signal events. The possible events + to be signaled are: + + SOLEDAD_CREATING_KEYS: emitted during bootstrap sequence when key + generation starts. + SOLEDAD_DONE_CREATING_KEYS: emitted during bootstrap sequence when key + generation finishes. + SOLEDAD_UPLOADING_KEYS: emitted during bootstrap sequence when soledad + starts sending keys to server. + SOLEDAD_DONE_UPLOADING_KEYS: emitted during bootstrap sequence when + soledad finishes sending keys to server. + SOLEDAD_DOWNLOADING_KEYS: emitted during bootstrap sequence when + soledad starts to retrieve keys from server. + SOLEDAD_DONE_DOWNLOADING_KEYS: emitted during bootstrap sequence when + soledad finishes downloading keys from server. + SOLEDAD_NEW_DATA_TO_SYNC: emitted upon call to C{need_sync()} when + there's indeed new data to be synchronized between local database + replica and server's replica. + SOLEDAD_DONE_DATA_SYNC: emitted inside C{sync()} method when it has + finished synchronizing with remote replica. + """ + + LOCAL_DATABASE_FILE_NAME = 'soledad.u1db' + """ + The name of the local SQLCipher U1DB database file. + """ + + STORAGE_SECRETS_FILE_NAME = "soledad.json" + """ + The name of the file where the storage secrets will be stored. + """ + + GENERATED_SECRET_LENGTH = 1024 + """ + The length of the generated secret used to derive keys for symmetric + encryption for local and remote storage. + """ + + LOCAL_STORAGE_SECRET_LENGTH = 512 + """ + The length of the secret used to derive a passphrase for the SQLCipher + database. + """ + + REMOTE_STORAGE_SECRET_LENGTH = \ + GENERATED_SECRET_LENGTH - LOCAL_STORAGE_SECRET_LENGTH + """ + The length of the secret used to derive an encryption key and a MAC auth + key for remote storage. + """ + + SALT_LENGTH = 64 + """ + The length of the salt used to derive the key for the storage secret + encryption. + """ + + MINIMUM_PASSPHRASE_LENGTH = 6 + """ + The minimum length for a passphrase. The passphrase length is only checked + when the user changes her passphrase, not when she instantiates Soledad. + """ + + IV_SEPARATOR = ":" + """ + A separator used for storing the encryption initial value prepended to the + ciphertext. + """ + + UUID_KEY = 'uuid' + STORAGE_SECRETS_KEY = 'storage_secrets' + SECRET_KEY = 'secret' + CIPHER_KEY = 'cipher' + LENGTH_KEY = 'length' + KDF_KEY = 'kdf' + KDF_SALT_KEY = 'kdf_salt' + KDF_LENGTH_KEY = 'kdf_length' + KDF_SCRYPT = 'scrypt' + CIPHER_AES256 = 'aes256' + """ + Keys used to access storage secrets in recovery documents. + """ + + DEFAULT_PREFIX = os.path.join( + BaseDirectory.xdg_config_home, + 'leap', 'soledad') + """ + Prefix for default values for path. + """ + + def __init__(self, uuid, passphrase, secrets_path, local_db_path, + server_url, cert_file, auth_token=None, secret_id=None): + """ + Initialize configuration, cryptographic keys and dbs. + + @param uuid: User's uuid. + @type uuid: str + @param passphrase: The passphrase for locking and unlocking encryption + secrets for local and remote storage. + @type passphrase: str + @param secrets_path: Path for storing encrypted key used for + symmetric encryption. + @type secrets_path: str + @param local_db_path: Path for local encrypted storage db. + @type local_db_path: str + @param server_url: URL for Soledad server. This is used either to sync + with the user's remote db and to interact with the shared recovery + database. + @type server_url: str + @param cert_file: Path to the SSL certificate to use in the + connection to the server_url. + @type cert_file: str + @param auth_token: Authorization token for accessing remote databases. + @type auth_token: str + """ + # get config params + self._uuid = uuid + self._passphrase = passphrase + # init crypto variables + self._secrets = {} + self._secret_id = secret_id + # init config (possibly with default values) + self._init_config(secrets_path, local_db_path, server_url) + self._set_token(auth_token) + # configure SSL certificate + global SOLEDAD_CERT + SOLEDAD_CERT = cert_file + # initiate bootstrap sequence + self._bootstrap() + + def _init_config(self, secrets_path, local_db_path, server_url): + """ + Initialize configuration using default values for missing params. + """ + # initialize secrets_path + self._secrets_path = secrets_path + if self._secrets_path is None: + self._secrets_path = os.path.join( + self.DEFAULT_PREFIX, self.STORAGE_SECRETS_FILE_NAME) + # initialize local_db_path + self._local_db_path = local_db_path + if self._local_db_path is None: + self._local_db_path = os.path.join( + self.DEFAULT_PREFIX, self.LOCAL_DATABASE_FILE_NAME) + # initialize server_url + self._server_url = server_url + soledad_assert( + self._server_url is not None, + 'Missing URL for Soledad server.') + + # + # initialization/destruction methods + # + + def _bootstrap(self): + """ + Bootstrap local Soledad instance. + + Soledad Client bootstrap is the following sequence of stages: + + * stage 0 - local environment setup. + - directory initialization. + - crypto submodule initialization + * stage 1 - secret generation/loading: + - if secrets exist locally, load them. + - else, if secrets exist in server, download them. + - else, generate a new secret. + * stage 2 - store secrets in server. + * stage 3 - database initialization. + + This method decides which bootstrap stages have already been performed + and performs the missing ones in order. + """ + # TODO: make sure key storage always happens (even if this method is + # interrupted). + # TODO: write tests for bootstrap stages. + # TODO: log each bootstrap step. + # stage 0 - socal environment setup + self._init_dirs() + self._crypto = SoledadCrypto(self) + # stage 1 - secret generation/loading + if not self._has_secret(): # try to load from local storage. + logger.info( + 'Trying to fetch cryptographic secrets from shared recovery ' + 'database...') + # there are no secrets in local storage, so try to fetch encrypted + # secrets from server. + doc = self._get_secrets_from_shared_db() + if doc: + # found secrets in server, so import them. + logger.info( + 'Found cryptographic secrets in shared recovery ' + 'database.') + self.import_recovery_document(doc.content) + else: + # there are no secrets in server also, so generate a secret. + logger.info( + 'No cryptographic secrets found, creating new secrets...') + self._set_secret_id(self._gen_secret()) + # Stage 2 - storage of encrypted secrets in the server. + self._put_secrets_in_shared_db() + # Stage 3 - Local database initialization + self._init_db() + + def _init_dirs(self): + """ + Create work directories. + + @raise OSError: in case file exists and is not a dir. + """ + paths = map( + lambda x: os.path.dirname(x), + [self._local_db_path, self._secrets_path]) + for path in paths: + logger.info('Creating directory: %s.' % path) + try: + os.makedirs(path) + except OSError as exc: + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + + def _init_db(self): + """ + Initialize the U1DB SQLCipher database for local storage. + + Currently, Soledad uses the default SQLCipher cipher, i.e. + 'aes-256-cbc'. We use scrypt to derive a 256-bit encryption key and + uses the 'raw PRAGMA key' format to handle the key to SQLCipher. + + The first C{self.REMOTE_STORAGE_SECRET_LENGTH} bytes of the storage + secret are used for remote storage encryption. We use the next + C{self.LOCAL_STORAGE_SECRET} bytes to derive a key for local storage. + From these bytes, the first C{self.SALT_LENGTH} are used as the salt + and the rest as the password for the scrypt hashing. + """ + # salt indexes + salt_start = self.REMOTE_STORAGE_SECRET_LENGTH + salt_end = salt_start + self.SALT_LENGTH + # password indexes + pwd_start = salt_end + pwd_end = salt_start + self.LOCAL_STORAGE_SECRET_LENGTH + # calculate the key for local encryption + secret = self._get_storage_secret() + key = scrypt.hash( + secret[pwd_start:pwd_end], # the password + secret[salt_start:salt_end], # the salt + buflen=32, # we need a key with 256 bits (32 bytes) + ) + + self._db = sqlcipher_open( + self._local_db_path, + binascii.b2a_hex(key), # sqlcipher only accepts the hex version + create=True, + document_factory=SoledadDocument, + crypto=self._crypto, + raw_key=True) + + def close(self): + """ + Close underlying U1DB database. + """ + if hasattr(self, '_db') and isinstance( + self._db, + SQLCipherDatabase): + self._db.close() + + def __del__(self): + """ + Make sure local database is closed when object is destroyed. + """ + # Watch out! We have no guarantees that this is properly called. + self.close() + + # + # Management of secret for symmetric encryption. + # + + def _get_storage_secret(self): + """ + Return the storage secret. + + Storage secret is encrypted before being stored. This method decrypts + and returns the stored secret. + + @return: The storage secret. + @rtype: str + """ + # calculate the encryption key + key = scrypt.hash( + self._passphrase, + # the salt is stored base64 encoded + binascii.a2b_base64( + self._secrets[self._secret_id][self.KDF_SALT_KEY]), + buflen=32, # we need a key with 256 bits (32 bytes). + ) + # recover the initial value and ciphertext + iv, ciphertext = self._secrets[self._secret_id][self.SECRET_KEY].split( + self.IV_SEPARATOR, 1) + ciphertext = binascii.a2b_base64(ciphertext) + return self._crypto.decrypt_sym(ciphertext, key, iv=iv) + + def _set_secret_id(self, secret_id): + """ + Define the id of the storage secret to be used. + + This method will also replace the secret in the crypto object. + """ + self._secret_id = secret_id + + def _load_secrets(self): + """ + Load storage secrets from local file. + + The content of the file has the following format: + + { + "storage_secrets": { + "": { + 'kdf': 'scrypt', + 'kdf_salt': '' + 'kdf_length': + "cipher": "aes256", + "length": , + "secret": "", + } + } + } + """ + # does the file exist in disk? + if not os.path.isfile(self._secrets_path): + raise IOError('File does not exist: %s' % self._secrets_path) + # read storage secrets from file + content = None + with open(self._secrets_path, 'r') as f: + content = json.loads(f.read()) + self._secrets = content[self.STORAGE_SECRETS_KEY] + # choose first secret if no secret_id was given + if self._secret_id is None: + self._set_secret_id(self._secrets.items()[0][0]) + + def _has_secret(self): + """ + Return whether there is a storage secret available for use or not. + + @return: Whether there's a storage secret for symmetric encryption. + @rtype: bool + """ + if self._secret_id is None or self._secret_id not in self._secrets: + try: + self._load_secrets() # try to load from disk + except IOError, e: + logger.error('IOError: %s' % str(e)) + try: + self._get_storage_secret() + return True + except: + return False + + def _gen_secret(self): + """ + Generate a secret for symmetric encryption and store in a local + encrypted file. + + This method emits the following signals: + + * SOLEDAD_CREATING_KEYS + * SOLEDAD_DONE_CREATING_KEYS + + A secret has the following structure: + + { + '': { + 'kdf': 'scrypt', + 'kdf_salt': '' + 'kdf_length': + 'cipher': 'aes256', + 'length': , + 'secret': '', + } + } + + @return: The id of the generated secret. + @rtype: str + """ + signal(SOLEDAD_CREATING_KEYS, self._uuid) + # generate random secret + secret = os.urandom(self.GENERATED_SECRET_LENGTH) + secret_id = sha256(secret).hexdigest() + # generate random salt + salt = os.urandom(self.SALT_LENGTH) + # get a 256-bit key + key = scrypt.hash(self._passphrase, salt, buflen=32) + iv, ciphertext = self._crypto.encrypt_sym(secret, key) + self._secrets[secret_id] = { + # leap.soledad.crypto submodule uses AES256 for symmetric + # encryption. + self.KDF_KEY: self.KDF_SCRYPT, + self.KDF_SALT_KEY: binascii.b2a_base64(salt), + self.KDF_LENGTH_KEY: len(key), + self.CIPHER_KEY: self.CIPHER_AES256, + self.LENGTH_KEY: len(secret), + self.SECRET_KEY: '%s%s%s' % ( + str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)), + } + self._store_secrets() + signal(SOLEDAD_DONE_CREATING_KEYS, self._uuid) + return secret_id + + def _store_secrets(self): + """ + Store a secret in C{Soledad.STORAGE_SECRETS_FILE_PATH}. + + The contents of the stored file have the following format: + + { + 'storage_secrets': { + '': { + 'kdf': 'scrypt', + 'kdf_salt': '' + 'kdf_length': + 'cipher': 'aes256', + 'length': 1024, + 'secret': '', + } + } + } + """ + data = { + self.STORAGE_SECRETS_KEY: self._secrets, + } + with open(self._secrets_path, 'w') as f: + f.write(json.dumps(data)) + + def change_passphrase(self, new_passphrase): + """ + Change the passphrase that encrypts the storage secret. + + @param new_passphrase: The new passphrase. + @type new_passphrase: str + + @raise NoStorageSecret: Raised if there's no storage secret available. + """ + # maybe we want to add more checks to guarantee passphrase is + # reasonable? + soledad_assert_type(new_passphrase, str) + if len(new_passphrase) < self.MINIMUM_PASSPHRASE_LENGTH: + raise PassphraseTooShort( + 'Passphrase must be at least %d characters long!' % + self.MINIMUM_PASSPHRASE_LENGTH) + # ensure there's a secret for which the passphrase will be changed. + if not self._has_secret(): + raise NoStorageSecret() + secret = self._get_storage_secret() + # generate random salt + new_salt = os.urandom(self.SALT_LENGTH) + # get a 256-bit key + key = scrypt.hash(new_passphrase, new_salt, buflen=32) + iv, ciphertext = self._crypto.encrypt_sym(secret, key) + self._secrets[self._secret_id] = { + # leap.soledad.crypto submodule uses AES256 for symmetric + # encryption. + self.KDF_KEY: self.KDF_SCRYPT, # TODO: remove hard coded kdf + self.KDF_SALT_KEY: binascii.b2a_base64(new_salt), + self.KDF_LENGTH_KEY: len(key), + self.CIPHER_KEY: self.CIPHER_AES256, + self.LENGTH_KEY: len(secret), + self.SECRET_KEY: '%s%s%s' % ( + str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)), + } + + self._store_secrets() + self._passphrase = new_passphrase + + # + # General crypto utility methods. + # + + def _uuid_hash(self): + """ + Calculate a hash for storing/retrieving key material on shared + database, based on user's uuid. + + @return: the hash + @rtype: str + """ + return sha256( + '%s%s' % ( + SECRETS_DOC_ID_HASH_PREFIX, + self._uuid)).hexdigest() + + def _shared_db(self): + """ + Return an instance of the shared recovery database object. + """ + if self.server_url: + return SoledadSharedDatabase.open_database( + urlparse.urljoin(self.server_url, 'shared'), + False, # TODO: eliminate need to create db here. + creds=self._creds) + + def _get_secrets_from_shared_db(self): + """ + Retrieve the document with encrypted key material from the shared + database. + + @return: a document with encrypted key material in its contents + @rtype: SoledadDocument + """ + signal(SOLEDAD_DOWNLOADING_KEYS, self._uuid) + db = self._shared_db() + if not db: + logger.warning('No shared db found') + return + doc = db.get_doc(self._uuid_hash()) + signal(SOLEDAD_DONE_DOWNLOADING_KEYS, self._uuid) + return doc + + def _put_secrets_in_shared_db(self): + """ + Assert local keys are the same as shared db's ones. + + Try to fetch keys from shared recovery database. If they already exist + in the remote db, assert that that data is the same as local data. + Otherwise, upload keys to shared recovery database. + + """ + soledad_assert( + self._has_secret(), + 'Tried to send keys to server but they don\'t exist in local ' + 'storage.') + # try to get secrets doc from server, otherwise create it + doc = self._get_secrets_from_shared_db() + if doc is None: + doc = SoledadDocument(doc_id=self._uuid_hash()) + # fill doc with encrypted secrets + doc.content = self.export_recovery_document(include_uuid=False) + # upload secrets to server + signal(SOLEDAD_UPLOADING_KEYS, self._uuid) + db = self._shared_db() + if not db: + logger.warning('No shared db found') + return + db.put_doc(doc) + signal(SOLEDAD_DONE_UPLOADING_KEYS, self._uuid) + + # + # Document storage, retrieval and sync. + # + + def put_doc(self, doc): + """ + Update a document in the local encrypted database. + + @param doc: the document to update + @type doc: SoledadDocument + + @return: the new revision identifier for the document + @rtype: str + """ + return self._db.put_doc(doc) + + def delete_doc(self, doc): + """ + Delete a document from the local encrypted database. + + @param doc: the document to delete + @type doc: SoledadDocument + + @return: the new revision identifier for the document + @rtype: str + """ + return self._db.delete_doc(doc) + + def get_doc(self, doc_id, include_deleted=False): + """ + Retrieve a document from the local encrypted database. + + @param doc_id: the unique document identifier + @type doc_id: str + @param include_deleted: if True, deleted documents will be + returned with empty content; otherwise asking for a deleted + document will return None + @type include_deleted: bool + + @return: the document object or None + @rtype: SoledadDocument + """ + return self._db.get_doc(doc_id, include_deleted=include_deleted) + + def get_docs(self, doc_ids, check_for_conflicts=True, + include_deleted=False): + """ + Get the content for many documents. + + @param doc_ids: a list of document identifiers + @type doc_ids: list + @param check_for_conflicts: if set False, then the conflict check will + be skipped, and 'None' will be returned instead of True/False + @type check_for_conflicts: bool + + @return: iterable giving the Document object for each document id + in matching doc_ids order. + @rtype: generator + """ + return self._db.get_docs(doc_ids, + check_for_conflicts=check_for_conflicts, + include_deleted=include_deleted) + + def get_all_docs(self, include_deleted=False): + """Get the JSON content for all documents in the database. + + @param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted documents will not + be included in the results. + @return: (generation, [Document]) + The current generation of the database, followed by a list of all + the documents in the database. + """ + return self._db.get_all_docs(include_deleted) + + def create_doc(self, content, doc_id=None): + """ + Create a new document in the local encrypted database. + + @param content: the contents of the new document + @type content: dict + @param doc_id: an optional identifier specifying the document id + @type doc_id: str + + @return: the new document + @rtype: SoledadDocument + """ + return self._db.create_doc(content, doc_id=doc_id) + + def create_doc_from_json(self, json, doc_id=None): + """ + Create a new document. + + You can optionally specify the document identifier, but the document + must not already exist. See 'put_doc' if you want to override an + existing document. + If the database specifies a maximum document size and the document + exceeds it, create will fail and raise a DocumentTooBig exception. + + @param json: The JSON document string + @type json: str + @param doc_id: An optional identifier specifying the document id. + @type doc_id: + @return: The new cocument + @rtype: SoledadDocument + """ + return self._db.create_doc_from_json(json, doc_id=doc_id) + + def create_index(self, index_name, *index_expressions): + """ + Create an named index, which can then be queried for future lookups. + Creating an index which already exists is not an error, and is cheap. + Creating an index which does not match the index_expressions of the + existing index is an error. + Creating an index will block until the expressions have been evaluated + and the index generated. + + @param index_name: A unique name which can be used as a key prefix + @type index_name: str + @param index_expressions: index expressions defining the index + information. + @type index_expressions: dict + + Examples: + + "fieldname", or "fieldname.subfieldname" to index alphabetically + sorted on the contents of a field. + + "number(fieldname, width)", "lower(fieldname)" + """ + if self._db: + return self._db.create_index(index_name, *index_expressions) + + def delete_index(self, index_name): + """ + Remove a named index. + + @param index_name: The name of the index we are removing + @type index_name: str + """ + if self._db: + return self._db.delete_index(index_name) + + def list_indexes(self): + """ + List the definitions of all known indexes. + + @return: A list of [('index-name', ['field', 'field2'])] definitions. + @rtype: list + """ + if self._db: + return self._db.list_indexes() + + def get_from_index(self, index_name, *key_values): + """ + Return documents that match the keys supplied. + + You must supply exactly the same number of values as have been defined + in the index. It is possible to do a prefix match by using '*' to + indicate a wildcard match. You can only supply '*' to trailing entries, + (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) + It is also possible to append a '*' to the last supplied value (eg + 'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + + @param index_name: The index to query + @type index_name: str + @param key_values: values to match. eg, if you have + an index with 3 fields then you would have: + get_from_index(index_name, val1, val2, val3) + @type key_values: tuple + @return: List of [Document] + @rtype: list + """ + if self._db: + return self._db.get_from_index(index_name, *key_values) + + def get_range_from_index(self, index_name, start_value, end_value): + """ + Return documents that fall within the specified range. + + Both ends of the range are inclusive. For both start_value and + end_value, one must supply exactly the same number of values as have + been defined in the index, or pass None. In case of a single column + index, a string is accepted as an alternative for a tuple with a single + value. It is possible to do a prefix match by using '*' to indicate + a wildcard match. You can only supply '*' to trailing entries, (eg + 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also + possible to append a '*' to the last supplied value (eg 'val*', '*', + '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + + @param index_name: The index to query + @type index_name: str + @param start_values: tuples of values that define the lower bound of + the range. eg, if you have an index with 3 fields then you would + have: (val1, val2, val3) + @type start_values: tuple + @param end_values: tuples of values that define the upper bound of the + range. eg, if you have an index with 3 fields then you would have: + (val1, val2, val3) + @type end_values: tuple + @return: List of [Document] + @rtype: list + """ + if self._db: + return self._db.get_range_from_index( + index_name, start_value, end_value) + + def get_index_keys(self, index_name): + """ + Return all keys under which documents are indexed in this index. + + @param index_name: The index to query + @type index_name: str + @return: [] A list of tuples of indexed keys. + @rtype: list + """ + if self._db: + return self._db.get_index_keys(index_name) + + def get_doc_conflicts(self, doc_id): + """ + Get the list of conflicts for the given document. + + @param doc_id: the document id + @type doc_id: str + + @return: a list of the document entries that are conflicted + @rtype: list + """ + if self._db: + return self._db.get_doc_conflicts(doc_id) + + def resolve_doc(self, doc, conflicted_doc_revs): + """ + Mark a document as no longer conflicted. + + @param doc: a document with the new content to be inserted. + @type doc: SoledadDocument + @param conflicted_doc_revs: a list of revisions that the new content + supersedes. + @type conflicted_doc_revs: list + """ + if self._db: + return self._db.resolve_doc(doc, conflicted_doc_revs) + + def sync(self): + """ + Synchronize the local encrypted replica with a remote replica. + + @param url: the url of the target replica to sync with + @type url: str + + @return: the local generation before the synchronisation was + performed. + @rtype: str + """ + if self._db: + local_gen = self._db.sync( + urlparse.urljoin(self.server_url, 'user-%s' % self._uuid), + creds=self._creds, autocreate=True) + signal(SOLEDAD_DONE_DATA_SYNC, self._uuid) + return local_gen + + def need_sync(self, url): + """ + Return if local db replica differs from remote url's replica. + + @param url: The remote replica to compare with local replica. + @type url: str + + @return: Whether remote replica and local replica differ. + @rtype: bool + """ + target = SoledadSyncTarget(url, creds=self._creds, crypto=self._crypto) + info = target.get_sync_info(self._db._get_replica_uid()) + # compare source generation with target's last known source generation + if self._db._get_generation() != info[4]: + signal(SOLEDAD_NEW_DATA_TO_SYNC, self._uuid) + return True + return False + + def _set_token(self, token): + """ + Set the authentication token for remote database access. + + Build the credentials dictionary with the following format: + + self._{ + 'token': { + 'uuid': '' + 'token': '' + } + + @param token: The authentication token. + @type token: str + """ + self._creds = { + 'token': { + 'uuid': self._uuid, + 'token': token, + } + } + + def _get_token(self): + """ + Return current token from credentials dictionary. + """ + return self._creds['token']['token'] + + token = property(_get_token, _set_token, doc='The authentication Token.') + + # + # Recovery document export and import methods + # + def export_recovery_document(self, include_uuid=True): + """ + Export the storage secrets and (optionally) the uuid. + + A recovery document has the following structure: + + { + self.STORAGE_SECRET_KEY: , + self.UUID_KEY: '', # (optional) + } + + @param include_uuid: Should the uuid be included? + @type include_uuid: bool + + @return: The recovery document. + @rtype: dict + """ + data = {self.STORAGE_SECRETS_KEY: self._secrets} + if include_uuid: + data[self.UUID_KEY] = self._uuid + return data + + def import_recovery_document(self, data): + """ + Import storage secrets for symmetric encryption and uuid (if present) + from a recovery document. + + A recovery document has the following structure: + + { + self.STORAGE_SECRET_KEY: , + self.UUID_KEY: '', # (optional) + } + + @param data: The recovery document. + @type data: dict + """ + # include new secrets in our secret pool. + for secret_id, secret_data in data[self.STORAGE_SECRETS_KEY].items(): + if secret_id not in self._secrets: + self._secrets[secret_id] = secret_data + self._store_secrets() # save new secrets in local file + # set uuid if present + if self.UUID_KEY in data: + self._uuid = data[self.UUID_KEY] + # choose first secret to use is none is assigned + if self._secret_id is None: + self._set_secret_id(data[self.STORAGE_SECRETS_KEY].items()[0][0]) + + # + # Setters/getters + # + + def _get_uuid(self): + return self._uuid + + uuid = property(_get_uuid, doc='The user uuid.') + + def _get_secret_id(self): + return self._secret_id + + secret_id = property( + _get_secret_id, + doc='The active secret id.') + + def _get_secrets_path(self): + return self._secrets_path + + secrets_path = property( + _get_secrets_path, + doc='The path for the file containing the encrypted symmetric secret.') + + def _get_local_db_path(self): + return self._local_db_path + + local_db_path = property( + _get_local_db_path, + doc='The path for the local database replica.') + + def _get_server_url(self): + return self._server_url + + server_url = property( + _get_server_url, + doc='The URL of the Soledad server.') + + storage_secret = property( + _get_storage_secret, + doc='The secret used for symmetric encryption.') + + def _get_passphrase(self): + return self._passphrase + + passphrase = property( + _get_passphrase, + doc='The passphrase for locking and unlocking encryption secrets for ' + 'local and remote storage.') + + +#----------------------------------------------------------------------------- +# Monkey patching u1db to be able to provide a custom SSL cert +#----------------------------------------------------------------------------- + +# We need a more reasonable timeout (in seconds) +SOLEDAD_TIMEOUT = 10 + + +class VerifiedHTTPSConnection(httplib.HTTPSConnection): + """HTTPSConnection verifying server side certificates.""" + # derived from httplib.py + + def connect(self): + "Connect to a host on a given (SSL) port." + sock = socket.create_connection((self.host, self.port), + SOLEDAD_TIMEOUT, self.source_address) + if self._tunnel_host: + self.sock = sock + self._tunnel() + + self.sock = ssl.wrap_socket(sock, + ca_certs=SOLEDAD_CERT, + cert_reqs=ssl.CERT_REQUIRED) + match_hostname(self.sock.getpeercert(), self.host) + + +old__VerifiedHTTPSConnection = http_client._VerifiedHTTPSConnection +http_client._VerifiedHTTPSConnection = VerifiedHTTPSConnection + + +__all__ = ['soledad_assert', 'Soledad'] diff --git a/client/src/leap/soledad/client/auth.py b/client/src/leap/soledad/client/auth.py new file mode 100644 index 00000000..3cd6dabe --- /dev/null +++ b/client/src/leap/soledad/client/auth.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +# auth.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +""" +Methods for token-based authentication. + +These methods have to be included in all classes that extend HTTPClient so +they can do token-based auth requests to the Soledad server. +""" + + +from u1db import errors + + +class TokenBasedAuth(object): + """ + Encapsulate token-auth methods for classes that inherit from + u1db.remote.http_client.HTTPClient. + """ + + def set_token_credentials(self, uuid, token): + """ + Store given credentials so we can sign the request later. + + @param uuid: The user's uuid. + @type uuid: str + @param token: The authentication token. + @type token: str + """ + self._creds = {'token': (uuid, token)} + + def _sign_request(self, method, url_query, params): + """ + Return an authorization header to be included in the HTTP request, in + the form: + + [('Authorization', 'Token . + + +""" +Cryptographic utilities for Soledad. +""" + + +import os +import binascii +import hmac +import hashlib + + +from pycryptopp.cipher.aes import AES +from pycryptopp.cipher.xsalsa20 import XSalsa20 + + +from leap.soledad.common import ( + soledad_assert, + soledad_assert_type, +) + + +class EncryptionMethods(object): + """ + Representation of encryption methods that can be used. + """ + + AES_256_CTR = 'aes-256-ctr' + XSALSA20 = 'xsalsa20' + + +class UnknownEncryptionMethod(Exception): + """ + Raised when trying to encrypt/decrypt with unknown method. + """ + pass + + +class NoSymmetricSecret(Exception): + """ + Raised when trying to get a hashed passphrase. + """ + + +class SoledadCrypto(object): + """ + General cryptographic functionality. + """ + + MAC_KEY_LENGTH = 64 + + def __init__(self, soledad): + """ + Initialize the crypto object. + + @param soledad: A Soledad instance for key lookup. + @type soledad: leap.soledad.Soledad + """ + self._soledad = soledad + + def encrypt_sym(self, data, key, + method=EncryptionMethods.AES_256_CTR): + """ + Encrypt C{data} using a {password}. + + Currently, the only encryption method supported is AES-256 CTR mode. + + @param data: The data to be encrypted. + @type data: str + @param key: The key used to encrypt C{data} (must be 256 bits long). + @type key: str + @param method: The encryption method to use. + @type method: str + + @return: A tuple with the initial value and the encrypted data. + @rtype: (long, str) + """ + soledad_assert_type(key, str) + + soledad_assert( + len(key) == 32, # 32 x 8 = 256 bits. + 'Wrong key size: %s bits (must be 256 bits long).' % + (len(key) * 8)) + iv = None + # AES-256 in CTR mode + if method == EncryptionMethods.AES_256_CTR: + iv = os.urandom(16) + ciphertext = AES(key=key, iv=iv).process(data) + # XSalsa20 + elif method == EncryptionMethods.XSALSA20: + iv = os.urandom(24) + ciphertext = XSalsa20(key=key, iv=iv).process(data) + else: + # raise if method is unknown + raise UnknownEncryptionMethod('Unkwnown method: %s' % method) + return binascii.b2a_base64(iv), ciphertext + + def decrypt_sym(self, data, key, + method=EncryptionMethods.AES_256_CTR, **kwargs): + """ + Decrypt data using symmetric secret. + + Currently, the only encryption method supported is AES-256 CTR mode. + + @param data: The data to be decrypted. + @type data: str + @param key: The key used to decrypt C{data} (must be 256 bits long). + @type key: str + @param method: The encryption method to use. + @type method: str + @param kwargs: Other parameters specific to each encryption method. + @type kwargs: dict + + @return: The decrypted data. + @rtype: str + """ + soledad_assert_type(key, str) + # assert params + soledad_assert( + len(key) == 32, # 32 x 8 = 256 bits. + 'Wrong key size: %s (must be 256 bits long).' % len(key)) + soledad_assert( + 'iv' in kwargs, + '%s needs an initial value.' % method) + # AES-256 in CTR mode + if method == EncryptionMethods.AES_256_CTR: + return AES( + key=key, iv=binascii.a2b_base64(kwargs['iv'])).process(data) + elif method == EncryptionMethods.XSALSA20: + return XSalsa20( + key=key, iv=binascii.a2b_base64(kwargs['iv'])).process(data) + + # raise if method is unknown + raise UnknownEncryptionMethod('Unkwnown method: %s' % method) + + def doc_passphrase(self, doc_id): + """ + Generate a passphrase for symmetric encryption of document's contents. + + The password is derived using HMAC having sha256 as underlying hash + function. The key used for HMAC are the first + C{soledad.REMOTE_STORAGE_SECRET_KENGTH} bytes of Soledad's storage + secret stripped from the first MAC_KEY_LENGTH characters. The HMAC + message is C{doc_id}. + + @param doc_id: The id of the document that will be encrypted using + this passphrase. + @type doc_id: str + + @return: The passphrase. + @rtype: str + + @raise NoSymmetricSecret: if no symmetric secret was supplied. + """ + if self.secret is None: + raise NoSymmetricSecret() + return hmac.new( + self.secret[ + self.MAC_KEY_LENGTH: + self._soledad.REMOTE_STORAGE_SECRET_LENGTH], + doc_id, + hashlib.sha256).digest() + + def doc_mac_key(self, doc_id): + """ + Generate a key for calculating a MAC for a document whose id is + C{doc_id}. + + The key is derived using HMAC having sha256 as underlying hash + function. The key used for HMAC is the first MAC_KEY_LENGTH characters + of Soledad's storage secret. The HMAC message is C{doc_id}. + + @param doc_id: The id of the document. + @type doc_id: str + + @return: The key. + @rtype: str + + @raise NoSymmetricSecret: if no symmetric secret was supplied. + """ + if self.secret is None: + raise NoSymmetricSecret() + return hmac.new( + self.secret[:self.MAC_KEY_LENGTH], + doc_id, + hashlib.sha256).digest() + + # + # secret setters/getters + # + + def _get_secret(self): + return self._soledad.storage_secret + + secret = property( + _get_secret, doc='The secret used for symmetric encryption') diff --git a/client/src/leap/soledad/client/dbwrapper.py b/client/src/leap/soledad/client/dbwrapper.py new file mode 100644 index 00000000..a27a933e --- /dev/null +++ b/client/src/leap/soledad/client/dbwrapper.py @@ -0,0 +1,183 @@ +# -*- coding: utf-8 -*- +# dbwrapper.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Thread-safe wrapper for sqlite/pysqlcipher. + +*TODO* +At some point we surely will want to switch to a twisted way of dealing +with this, using defers and proper callbacks. But I had this tested for +some time so postponing that refactor. +""" +import logging +import threading +import Queue +import time + +import exceptions + +from functools import partial + +from leap.soledad.client import sqlcipher + +logger = logging.getLogger(__name__) + + +class SQLCipherWrapper(threading.Thread): + + def __init__(self, *args, **kwargs): + """ + Initializes a wrapper that proxies method and attribute + access to an underlying SQLCipher instance. We instantiate sqlcipher + in a thread, and all method accesses communicate with it using a + Queue. + + :param *args: position arguments to pass to pysqlcipher initialization + :type args: tuple + + :param **kwargs: keyword arguments to pass to pysqlcipher + initialization + :type kwargs: dict + """ + threading.Thread.__init__(self) + self._db = None + self._wrargs = args, kwargs + + self._queue = Queue.Queue() + self._stopped = threading.Event() + + self.start() + + def _init_db(self): + """ + Initializes sqlcipher database. + + This is called on a separate thread. + """ + # instantiate u1db + args, kwargs = self._wrargs + self._db = sqlcipher.open(*args, **kwargs) + + def run(self): + """ + Main loop for the sqlcipher thread. + """ + logger.debug("SQLCipherWrapper thread started.") + logger.debug("Initializing sqlcipher") + end_mths = ("__end_thread", "_SQLCipherWrapper__end_thread") + + self._init_db() + self._lock = threading.Lock() + + ct = 0 + started = False + + while True: + if self._db is None: + if started: + break + if ct > 10: + break # XXX DEBUG + logger.debug('db not ready yet, waiting...') + time.sleep(1) + ct += 1 + + started = True + + with self._lock: + try: + mth, q, wrargs = self._queue.get() + except: + logger.error("exception getting args from queue") + + res = None + attr = getattr(self._db, mth, None) + if not attr: + if mth not in end_mths: + logger.error('method %s does not exist' % (mth,)) + res = AttributeError( + "_db instance has no attribute %s" % mth) + + elif callable(attr): + # invoke the method with the passed args + args = wrargs.get('args', []) + kwargs = wrargs.get('kwargs', {}) + try: + res = attr(*args, **kwargs) + except Exception as e: + logger.error( + "Error on proxied method %s: '%r'." % ( + attr, e)) + res = e + else: + # non-callable attribute + res = attr + logger.debug('returning proxied db call...') + q.put(res) + + if mth in end_mths: + logger.debug('ending thread') + break + + logger.debug("SQLCipherWrapper thread terminated.") + self._stopped.set() + + def close(self): + """ + Closes the sqlcipher database and finishes the thread. This method + should always be called explicitely. + """ + self.__getattr__('close')() + self.__end_thread() + + def __getattr__(self, attr): + """ + Returns _db proxied attributes. + """ + + def __proxied_mth(method, *args, **kwargs): + if not self._stopped.isSet(): + wrargs = {'args': args, 'kwargs': kwargs} + q = Queue.Queue() + self._queue.put((method, q, wrargs)) + res = q.get() + q.task_done() + + if isinstance(res, exceptions.BaseException): + # XXX should get the original bt + raise res + return res + else: + logger.warning("tried to call proxied meth " + "but stopped is set: %s" % + (method,)) + + rgetattr = object.__getattribute__ + + if attr != "_db": + proxied = partial(__proxied_mth, attr) + return proxied + + # fallback to regular behavior + return rgetattr(self, attr) + + def __del__(self): + """ + Do not trust this get called. No guarantees given. Because of a funny + dance with the refs and the way the gc works, we should be calling the + close method explicitely. + """ + self.close() diff --git a/client/src/leap/soledad/client/shared_db.py b/client/src/leap/soledad/client/shared_db.py new file mode 100644 index 00000000..a6ca504d --- /dev/null +++ b/client/src/leap/soledad/client/shared_db.py @@ -0,0 +1,138 @@ +# -*- coding: utf-8 -*- +# shared_db.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +""" +A shared database for storing/retrieving encrypted key material. +""" + +import simplejson as json + + +from u1db.remote import http_database + + +from leap.soledad.client.auth import TokenBasedAuth + + +#----------------------------------------------------------------------------- +# Soledad shared database +#----------------------------------------------------------------------------- + +class NoTokenForAuth(Exception): + """ + No token was found for token-based authentication. + """ + + +class Unauthorized(Exception): + """ + User does not have authorization to perform task. + """ + + +class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): + """ + This is a shared recovery database that enables users to store their + encryption secrets in the server and retrieve them afterwards. + """ + # TODO: prevent client from messing with the shared DB. + # TODO: define and document API. + + # + # Token auth methods. + # + + def set_token_credentials(self, uuid, token): + """ + Store given credentials so we can sign the request later. + + @param uuid: The user's uuid. + @type uuid: str + @param token: The authentication token. + @type token: str + """ + TokenBasedAuth.set_token_credentials(self, uuid, token) + + def _sign_request(self, method, url_query, params): + """ + Return an authorization header to be included in the HTTP request. + + @param method: The HTTP method. + @type method: str + @param url_query: The URL query string. + @type url_query: str + @param params: A list with encoded query parameters. + @type param: list + + @return: The Authorization header. + @rtype: list of tuple + """ + return TokenBasedAuth._sign_request(self, method, url_query, params) + + # + # Modified HTTPDatabase methods. + # + + @staticmethod + def open_database(url, create, creds=None): + # TODO: users should not be able to create the shared database, so we + # have to remove this from here in the future. + """ + Open a Soledad shared database. + + @param url: URL of the remote database. + @type url: str + @param create: Should the database be created if it does not already + exist? + @type create: bool + @param token: An authentication token for accessing the shared db. + @type token: str + + @return: The shared database in the given url. + @rtype: SoledadSharedDatabase + """ + db = SoledadSharedDatabase(url, creds=creds) + db.open(create) + return db + + @staticmethod + def delete_database(url): + """ + Dummy method that prevents from deleting shared database. + + @raise: This will always raise an Unauthorized exception. + + @param url: The database URL. + @type url: str + """ + raise Unauthorized("Can't delete shared database.") + + def __init__(self, url, document_factory=None, creds=None): + """ + Initialize database with auth token and encryption powers. + + @param url: URL of the remote database. + @type url: str + @param document_factory: A factory for U1BD documents. + @type document_factory: u1db.Document + @param creds: A tuple containing the authentication method and + credentials. + @type creds: tuple + """ + http_database.HTTPDatabase.__init__(self, url, document_factory, + creds) diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py new file mode 100644 index 00000000..c605c28c --- /dev/null +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -0,0 +1,696 @@ +# -*- coding: utf-8 -*- +# sqlcipher.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +""" +A U1DB backend that uses SQLCipher as its persistence layer. + +The SQLCipher API (http://sqlcipher.net/sqlcipher-api/) is fully implemented, +with the exception of the following statements: + + * PRAGMA cipher_use_hmac + * PRAGMA cipher_default_use_mac + +SQLCipher 2.0 introduced a per-page HMAC to validate that the page data has +not be tampered with. By default, when creating or opening a database using +SQLCipher 2, SQLCipher will attempt to use an HMAC check. This change in +database format means that SQLCipher 2 can't operate on version 1.1.x +databases by default. Thus, in order to provide backward compatibility with +SQLCipher 1.1.x, PRAGMA cipher_use_hmac can be used to disable the HMAC +functionality on specific databases. + +In some very specific cases, it is not possible to call PRAGMA cipher_use_hmac +as one of the first operations on a database. An example of this is when +trying to ATTACH a 1.1.x database to the main database. In these cases PRAGMA +cipher_default_use_hmac can be used to globally alter the default use of HMAC +when opening a database. + +So, as the statements above were introduced for backwards compatibility with +SLCipher 1.1 databases, we do not implement them as all SQLCipher databases +handled by Soledad should be created by SQLCipher >= 2.0. +""" +import logging +import os +import time +import string +import threading + + +from u1db.backends import sqlite_backend +from pysqlcipher import dbapi2 +from u1db import errors as u1db_errors +from leap.soledad.common.document import SoledadDocument + +logger = logging.getLogger(__name__) + + +# Monkey-patch u1db.backends.sqlite_backend with pysqlcipher.dbapi2 +sqlite_backend.dbapi2 = dbapi2 + +# It seems that, as long as we are not using old sqlite versions, serialized +# mode is enabled by default at compile time. So accessing db connections from +# different threads should be safe, as long as no attempt is made to use them +# from multiple threads with no locking. +# See https://sqlite.org/threadsafe.html +# and http://bugs.python.org/issue16509 + +SQLITE_CHECK_SAME_THREAD = False + + +def open(path, password, create=True, document_factory=None, crypto=None, + raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, + cipher_page_size=1024): + """Open a database at the given location. + + Will raise u1db.errors.DatabaseDoesNotExist if create=False and the + database does not already exist. + + @param path: The filesystem path for the database to open. + @param type: str + @param create: True/False, should the database be created if it doesn't + already exist? + @param type: bool + @param document_factory: A function that will be called with the same + parameters as Document.__init__. + @type document_factory: callable + @param crypto: An instance of SoledadCrypto so we can encrypt/decrypt + document contents when syncing. + @type crypto: soledad.crypto.SoledadCrypto + @param raw_key: Whether C{password} is a raw 64-char hex string or a + passphrase that should be hashed to obtain the encyrption key. + @type raw_key: bool + @param cipher: The cipher and mode to use. + @type cipher: str + @param kdf_iter: The number of iterations to use. + @type kdf_iter: int + @param cipher_page_size: The page size. + @type cipher_page_size: int + + @return: An instance of Database. + @rtype SQLCipherDatabase + """ + return SQLCipherDatabase.open_database( + path, password, create=create, document_factory=document_factory, + crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter, + cipher_page_size=cipher_page_size) + + +# +# Exceptions +# + +class DatabaseIsNotEncrypted(Exception): + """ + Exception raised when trying to open non-encrypted databases. + """ + pass + + +class NotAnHexString(Exception): + """ + Raised when trying to (raw) key the database with a non-hex string. + """ + pass + + +# +# The SQLCipher database +# + +class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): + """A U1DB implementation that uses SQLCipher as its persistence layer.""" + + _index_storage_value = 'expand referenced encrypted' + k_lock = threading.Lock() + + def __init__(self, sqlcipher_file, password, document_factory=None, + crypto=None, raw_key=False, cipher='aes-256-cbc', + kdf_iter=4000, cipher_page_size=1024): + """ + Create a new sqlcipher file. + + @param sqlcipher_file: The path for the SQLCipher file. + @type sqlcipher_file: str + @param password: The password that protects the SQLCipher db. + @type password: str + @param document_factory: A function that will be called with the same + parameters as Document.__init__. + @type document_factory: callable + @param crypto: An instance of SoledadCrypto so we can encrypt/decrypt + document contents when syncing. + @type crypto: soledad.crypto.SoledadCrypto + @param raw_key: Whether C{password} is a raw 64-char hex string or a + passphrase that should be hashed to obtain the encyrption key. + @type raw_key: bool + @param cipher: The cipher and mode to use. + @type cipher: str + @param kdf_iter: The number of iterations to use. + @type kdf_iter: int + @param cipher_page_size: The page size. + @type cipher_page_size: int + """ + # ensure the db is encrypted if the file already exists + if os.path.exists(sqlcipher_file): + self.assert_db_is_encrypted( + sqlcipher_file, password, raw_key, cipher, kdf_iter, + cipher_page_size) + # connect to the database + with self.k_lock: + self._db_handle = dbapi2.connect( + sqlcipher_file, + check_same_thread=SQLITE_CHECK_SAME_THREAD) + # set SQLCipher cryptographic parameters + self._set_crypto_pragmas( + self._db_handle, password, raw_key, cipher, kdf_iter, + cipher_page_size) + self._real_replica_uid = None + self._ensure_schema() + self._crypto = crypto + + def factory(doc_id=None, rev=None, json='{}', has_conflicts=False, + syncable=True): + return SoledadDocument(doc_id=doc_id, rev=rev, json=json, + has_conflicts=has_conflicts, + syncable=syncable) + self.set_document_factory(factory) + + @classmethod + def _open_database(cls, sqlcipher_file, password, document_factory=None, + crypto=None, raw_key=False, cipher='aes-256-cbc', + kdf_iter=4000, cipher_page_size=1024): + """ + Open a SQLCipher database. + + @param sqlcipher_file: The path for the SQLCipher file. + @type sqlcipher_file: str + @param password: The password that protects the SQLCipher db. + @type password: str + @param document_factory: A function that will be called with the same + parameters as Document.__init__. + @type document_factory: callable + @param crypto: An instance of SoledadCrypto so we can encrypt/decrypt + document contents when syncing. + @type crypto: soledad.crypto.SoledadCrypto + @param raw_key: Whether C{password} is a raw 64-char hex string or a + passphrase that should be hashed to obtain the encyrption key. + @type raw_key: bool + @param cipher: The cipher and mode to use. + @type cipher: str + @param kdf_iter: The number of iterations to use. + @type kdf_iter: int + @param cipher_page_size: The page size. + @type cipher_page_size: int + + @return: The database object. + @rtype: SQLCipherDatabase + """ + if not os.path.isfile(sqlcipher_file): + raise u1db_errors.DatabaseDoesNotExist() + + tries = 2 + # Note: There seems to be a bug in sqlite 3.5.9 (with python2.6) + # where without re-opening the database on Windows, it + # doesn't see the transaction that was just committed + while True: + + with cls.k_lock: + db_handle = dbapi2.connect( + sqlcipher_file, + check_same_thread=SQLITE_CHECK_SAME_THREAD) + + try: + # set cryptographic params + cls._set_crypto_pragmas( + db_handle, password, raw_key, cipher, kdf_iter, + cipher_page_size) + c = db_handle.cursor() + # XXX if we use it here, it should be public + v, err = cls._which_index_storage(c) + except Exception as exc: + logger.warning("ERROR OPENING DATABASE!") + logger.debug("error was: %r" % exc) + v, err = None, exc + finally: + db_handle.close() + if v is not None: + break + # possibly another process is initializing it, wait for it to be + # done + if tries == 0: + raise err # go for the richest error? + tries -= 1 + time.sleep(cls.WAIT_FOR_PARALLEL_INIT_HALF_INTERVAL) + return SQLCipherDatabase._sqlite_registry[v]( + sqlcipher_file, password, document_factory=document_factory, + crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter, + cipher_page_size=cipher_page_size) + + @classmethod + def open_database(cls, sqlcipher_file, password, create, backend_cls=None, + document_factory=None, crypto=None, raw_key=False, + cipher='aes-256-cbc', kdf_iter=4000, + cipher_page_size=1024): + """ + Open a SQLCipher database. + + @param sqlcipher_file: The path for the SQLCipher file. + @type sqlcipher_file: str + @param password: The password that protects the SQLCipher db. + @type password: str + @param create: Should the datbase be created if it does not already + exist? + @type: bool + @param backend_cls: A class to use as backend. + @type backend_cls: type + @param document_factory: A function that will be called with the same + parameters as Document.__init__. + @type document_factory: callable + @param crypto: An instance of SoledadCrypto so we can encrypt/decrypt + document contents when syncing. + @type crypto: soledad.crypto.SoledadCrypto + @param raw_key: Whether C{password} is a raw 64-char hex string or a + passphrase that should be hashed to obtain the encyrption key. + @type raw_key: bool + @param cipher: The cipher and mode to use. + @type cipher: str + @param kdf_iter: The number of iterations to use. + @type kdf_iter: int + @param cipher_page_size: The page size. + @type cipher_page_size: int + + @return: The database object. + @rtype: SQLCipherDatabase + """ + try: + return cls._open_database( + sqlcipher_file, password, document_factory=document_factory, + crypto=crypto, raw_key=raw_key, cipher=cipher, + kdf_iter=kdf_iter, cipher_page_size=cipher_page_size) + except u1db_errors.DatabaseDoesNotExist: + if not create: + raise + # TODO: remove backend class from here. + if backend_cls is None: + # default is SQLCipherPartialExpandDatabase + backend_cls = SQLCipherDatabase + return backend_cls( + sqlcipher_file, password, document_factory=document_factory, + crypto=crypto, raw_key=raw_key, cipher=cipher, + kdf_iter=kdf_iter, cipher_page_size=cipher_page_size) + + def sync(self, url, creds=None, autocreate=True): + """ + Synchronize documents with remote replica exposed at url. + + @param url: The url of the target replica to sync with. + @type url: str + @param creds: optional dictionary giving credentials. + to authorize the operation with the server. + @type creds: dict + @param autocreate: Ask the target to create the db if non-existent. + @type autocreate: bool + + @return: The local generation before the synchronisation was performed. + @rtype: int + """ + from u1db.sync import Synchronizer + from leap.soledad.client.target import SoledadSyncTarget + return Synchronizer( + self, + SoledadSyncTarget(url, + creds=creds, + crypto=self._crypto)).sync(autocreate=autocreate) + + def _extra_schema_init(self, c): + """ + Add any extra fields, etc to the basic table definitions. + + This method is called by u1db.backends.sqlite_backend._initialize() + method, which is executed when the database schema is created. Here, + we use it to include the "syncable" property for LeapDocuments. + + @param c: The cursor for querying the database. + @type c: dbapi2.cursor + """ + c.execute( + 'ALTER TABLE document ' + 'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE') + + def _put_and_update_indexes(self, old_doc, doc): + """ + Update a document and all indexes related to it. + + @param old_doc: The old version of the document. + @type old_doc: u1db.Document + @param doc: The new version of the document. + @type doc: u1db.Document + """ + sqlite_backend.SQLitePartialExpandDatabase._put_and_update_indexes( + self, old_doc, doc) + c = self._db_handle.cursor() + c.execute('UPDATE document SET syncable=? ' + 'WHERE doc_id=?', + (doc.syncable, doc.doc_id)) + + def _get_doc(self, doc_id, check_for_conflicts=False): + """ + Get just the document content, without fancy handling. + + @param doc_id: The unique document identifier + @type doc_id: str + @param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise asking for a deleted + document will return None. + @type include_deleted: bool + + @return: a Document object. + @type: u1db.Document + """ + doc = sqlite_backend.SQLitePartialExpandDatabase._get_doc( + self, doc_id, check_for_conflicts) + if doc: + c = self._db_handle.cursor() + c.execute('SELECT syncable FROM document ' + 'WHERE doc_id=?', + (doc.doc_id,)) + result = c.fetchone() + doc.syncable = bool(result[0]) + return doc + + # + # SQLCipher API methods + # + + @classmethod + def assert_db_is_encrypted(cls, sqlcipher_file, key, raw_key, cipher, + kdf_iter, cipher_page_size): + """ + Assert that C{sqlcipher_file} contains an encrypted database. + + When opening an existing database, PRAGMA key will not immediately + throw an error if the key provided is incorrect. To test that the + database can be successfully opened with the provided key, it is + necessary to perform some operation on the database (i.e. read from + it) and confirm it is success. + + The easiest way to do this is select off the sqlite_master table, + which will attempt to read the first page of the database and will + parse the schema. + + @param sqlcipher_file: The path for the SQLCipher file. + @type sqlcipher_file: str + @param key: The key that protects the SQLCipher db. + @type key: str + @param raw_key: Whether C{key} is a raw 64-char hex string or a + passphrase that should be hashed to obtain the encyrption key. + @type raw_key: bool + @param cipher: The cipher and mode to use. + @type cipher: str + @param kdf_iter: The number of iterations to use. + @type kdf_iter: int + @param cipher_page_size: The page size. + @type cipher_page_size: int + """ + try: + # try to open an encrypted database with the regular u1db + # backend should raise a DatabaseError exception. + sqlite_backend.SQLitePartialExpandDatabase(sqlcipher_file) + raise DatabaseIsNotEncrypted() + except dbapi2.DatabaseError: + # assert that we can access it using SQLCipher with the given + # key + with cls.k_lock: + db_handle = dbapi2.connect( + sqlcipher_file, + check_same_thread=SQLITE_CHECK_SAME_THREAD) + cls._set_crypto_pragmas( + db_handle, key, raw_key, cipher, + kdf_iter, cipher_page_size) + db_handle.cursor().execute( + 'SELECT count(*) FROM sqlite_master') + + @classmethod + def _set_crypto_pragmas(cls, db_handle, key, raw_key, cipher, kdf_iter, + cipher_page_size): + """ + Set cryptographic params (key, cipher, KDF number of iterations and + cipher page size). + """ + cls._pragma_key(db_handle, key, raw_key) + cls._pragma_cipher(db_handle, cipher) + cls._pragma_kdf_iter(db_handle, kdf_iter) + cls._pragma_cipher_page_size(db_handle, cipher_page_size) + + @classmethod + def _pragma_key(cls, db_handle, key, raw_key): + """ + Set the C{key} for use with the database. + + The process of creating a new, encrypted database is called 'keying' + the database. SQLCipher uses just-in-time key derivation at the point + it is first needed for an operation. This means that the key (and any + options) must be set before the first operation on the database. As + soon as the database is touched (e.g. SELECT, CREATE TABLE, UPDATE, + etc.) and pages need to be read or written, the key is prepared for + use. + + Implementation Notes: + + * PRAGMA key should generally be called as the first operation on a + database. + + @param key: The key for use with the database. + @type key: str + @param raw_key: Whether C{key} is a raw 64-char hex string or a + passphrase that should be hashed to obtain the encyrption key. + @type raw_key: bool + """ + if raw_key: + cls._pragma_key_raw(db_handle, key) + else: + cls._pragma_key_passphrase(db_handle, key) + + @classmethod + def _pragma_key_passphrase(cls, db_handle, passphrase): + """ + Set a passphrase for encryption key derivation. + + The key itself can be a passphrase, which is converted to a key using + PBKDF2 key derivation. The result is used as the encryption key for + the database. By using this method, there is no way to alter the KDF; + if you want to do so you should use a raw key instead and derive the + key using your own KDF. + + @param db_handle: A handle to the SQLCipher database. + @type db_handle: pysqlcipher.Connection + @param passphrase: The passphrase used to derive the encryption key. + @type passphrase: str + """ + db_handle.cursor().execute("PRAGMA key = '%s'" % passphrase) + + @classmethod + def _pragma_key_raw(cls, db_handle, key): + """ + Set a raw hexadecimal encryption key. + + It is possible to specify an exact byte sequence using a blob literal. + With this method, it is the calling application's responsibility to + ensure that the data provided is a 64 character hex string, which will + be converted directly to 32 bytes (256 bits) of key data. + + @param db_handle: A handle to the SQLCipher database. + @type db_handle: pysqlcipher.Connection + @param key: A 64 character hex string. + @type key: str + """ + if not all(c in string.hexdigits for c in key): + raise NotAnHexString(key) + db_handle.cursor().execute('PRAGMA key = "x\'%s"' % key) + + @classmethod + def _pragma_cipher(cls, db_handle, cipher='aes-256-cbc'): + """ + Set the cipher and mode to use for symmetric encryption. + + SQLCipher uses aes-256-cbc as the default cipher and mode of + operation. It is possible to change this, though not generally + recommended, using PRAGMA cipher. + + SQLCipher makes direct use of libssl, so all cipher options available + to libssl are also available for use with SQLCipher. See `man enc` for + OpenSSL's supported ciphers. + + Implementation Notes: + + * PRAGMA cipher must be called after PRAGMA key and before the first + actual database operation or it will have no effect. + + * If a non-default value is used PRAGMA cipher to create a database, + it must also be called every time that database is opened. + + * SQLCipher does not implement its own encryption. Instead it uses the + widely available and peer-reviewed OpenSSL libcrypto for all + cryptographic functions. + + @param db_handle: A handle to the SQLCipher database. + @type db_handle: pysqlcipher.Connection + @param cipher: The cipher and mode to use. + @type cipher: str + """ + db_handle.cursor().execute("PRAGMA cipher = '%s'" % cipher) + + @classmethod + def _pragma_kdf_iter(cls, db_handle, kdf_iter=4000): + """ + Set the number of iterations for the key derivation function. + + SQLCipher uses PBKDF2 key derivation to strengthen the key and make it + resistent to brute force and dictionary attacks. The default + configuration uses 4000 PBKDF2 iterations (effectively 16,000 SHA1 + operations). PRAGMA kdf_iter can be used to increase or decrease the + number of iterations used. + + Implementation Notes: + + * PRAGMA kdf_iter must be called after PRAGMA key and before the first + actual database operation or it will have no effect. + + * If a non-default value is used PRAGMA kdf_iter to create a database, + it must also be called every time that database is opened. + + * It is not recommended to reduce the number of iterations if a + passphrase is in use. + + @param db_handle: A handle to the SQLCipher database. + @type db_handle: pysqlcipher.Connection + @param kdf_iter: The number of iterations to use. + @type kdf_iter: int + """ + db_handle.cursor().execute("PRAGMA kdf_iter = '%d'" % kdf_iter) + + @classmethod + def _pragma_cipher_page_size(cls, db_handle, cipher_page_size=1024): + """ + Set the page size of the encrypted database. + + SQLCipher 2 introduced the new PRAGMA cipher_page_size that can be + used to adjust the page size for the encrypted database. The default + page size is 1024 bytes, but it can be desirable for some applications + to use a larger page size for increased performance. For instance, + some recent testing shows that increasing the page size can noticeably + improve performance (5-30%) for certain queries that manipulate a + large number of pages (e.g. selects without an index, large inserts in + a transaction, big deletes). + + To adjust the page size, call the pragma immediately after setting the + key for the first time and each subsequent time that you open the + database. + + Implementation Notes: + + * PRAGMA cipher_page_size must be called after PRAGMA key and before + the first actual database operation or it will have no effect. + + * If a non-default value is used PRAGMA cipher_page_size to create a + database, it must also be called every time that database is opened. + + @param db_handle: A handle to the SQLCipher database. + @type db_handle: pysqlcipher.Connection + @param cipher_page_size: The page size. + @type cipher_page_size: int + """ + db_handle.cursor().execute( + "PRAGMA cipher_page_size = '%d'" % cipher_page_size) + + @classmethod + def _pragma_rekey(cls, db_handle, new_key, raw_key): + """ + Change the key of an existing encrypted database. + + To change the key on an existing encrypted database, it must first be + unlocked with the current encryption key. Once the database is + readable and writeable, PRAGMA rekey can be used to re-encrypt every + page in the database with a new key. + + * PRAGMA rekey must be called after PRAGMA key. It can be called at any + time once the database is readable. + + * PRAGMA rekey can not be used to encrypted a standard SQLite + database! It is only useful for changing the key on an existing + database. + + * Previous versions of SQLCipher provided a PRAGMA rekey_cipher and + code>PRAGMA rekey_kdf_iter. These are deprecated and should not be + used. Instead, use sqlcipher_export(). + + @param db_handle: A handle to the SQLCipher database. + @type db_handle: pysqlcipher.Connection + @param new_key: The new key. + @type new_key: str + @param raw_key: Whether C{password} is a raw 64-char hex string or a + passphrase that should be hashed to obtain the encyrption key. + @type raw_key: bool + """ + if raw_key: + cls._pragma_rekey_raw(db_handle, key) + else: + cls._pragma_rekey_passphrase(db_handle, key) + + @classmethod + def _pragma_rekey_passphrase(cls, db_handle, passphrase): + """ + Change the passphrase for encryption key derivation. + + The key itself can be a passphrase, which is converted to a key using + PBKDF2 key derivation. The result is used as the encryption key for + the database. + + @param db_handle: A handle to the SQLCipher database. + @type db_handle: pysqlcipher.Connection + @param passphrase: The passphrase used to derive the encryption key. + @type passphrase: str + """ + db_handle.cursor().execute("PRAGMA rekey = '%s'" % passphrase) + + @classmethod + def _pragma_rekey_raw(cls, db_handle, key): + """ + Change the raw hexadecimal encryption key. + + It is possible to specify an exact byte sequence using a blob literal. + With this method, it is the calling application's responsibility to + ensure that the data provided is a 64 character hex string, which will + be converted directly to 32 bytes (256 bits) of key data. + + @param db_handle: A handle to the SQLCipher database. + @type db_handle: pysqlcipher.Connection + @param key: A 64 character hex string. + @type key: str + """ + if not all(c in string.hexdigits for c in key): + raise NotAnHexString(key) + db_handle.cursor().execute('PRAGMA rekey = "x\'%s"' % passphrase) + + def __del__(self): + """ + Closes db_handle upon object destruction. + """ + if self._db_handle is not None: + self._db_handle.close() + + +sqlite_backend.SQLiteDatabase.register_implementation(SQLCipherDatabase) diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py new file mode 100644 index 00000000..d0bc3706 --- /dev/null +++ b/client/src/leap/soledad/client/target.py @@ -0,0 +1,452 @@ +# -*- coding: utf-8 -*- +# target.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +""" +A U1DB backend for encrypting data before sending to server and decrypting +after receiving. +""" + +import simplejson as json +import hashlib +import hmac +import binascii + + +from u1db.remote import utils +from u1db.errors import BrokenSyncStream +from u1db.remote.http_target import HTTPSyncTarget + + +from leap.soledad.common import soledad_assert +from leap.soledad.common.crypto import ( + EncryptionSchemes, + MacMethods, + ENC_JSON_KEY, + ENC_SCHEME_KEY, + ENC_METHOD_KEY, + ENC_IV_KEY, + MAC_KEY, + MAC_METHOD_KEY, +) +from leap.soledad.common.document import SoledadDocument +from leap.soledad.client.auth import TokenBasedAuth +from leap.soledad.client.crypto import ( + EncryptionMethods, + UnknownEncryptionMethod, +) + + +# +# Exceptions +# + +class DocumentNotEncrypted(Exception): + """ + Raised for failures in document encryption. + """ + pass + + +class UnknownEncryptionScheme(Exception): + """ + Raised when trying to decrypt from unknown encryption schemes. + """ + pass + + +class UnknownMacMethod(Exception): + """ + Raised when trying to authenticate document's content with unknown MAC + mehtod. + """ + pass + + +class WrongMac(Exception): + """ + Raised when failing to authenticate document's contents based on MAC. + """ + + +# +# Crypto utilities for a SoledadDocument. +# + + +def mac_doc(crypto, doc_id, doc_rev, ciphertext, mac_method): + """ + Calculate a MAC for C{doc} using C{ciphertext}. + + Current MAC method used is HMAC, with the following parameters: + + * key: sha256(storage_secret, doc_id) + * msg: doc_id + doc_rev + ciphertext + * digestmod: sha256 + + @param crypto: A SoledadCryto instance used to perform the encryption. + @type crypto: leap.soledad.crypto.SoledadCrypto + @param doc_id: The id of the document. + @type doc_id: str + @param doc_rev: The revision of the document. + @type doc_rev: str + @param ciphertext: The content of the document. + @type ciphertext: str + @param mac_method: The MAC method to use. + @type mac_method: str + + @return: The calculated MAC. + @rtype: str + """ + if mac_method == MacMethods.HMAC: + return hmac.new( + crypto.doc_mac_key(doc_id), + str(doc_id) + str(doc_rev) + ciphertext, + hashlib.sha256).digest() + # raise if we do not know how to handle this MAC method + raise UnknownMacMethod('Unknown MAC method: %s.' % mac_method) + + +def encrypt_doc(crypto, doc): + """ + Encrypt C{doc}'s content. + + Encrypt doc's contents using AES-256 CTR mode and return a valid JSON + string representing the following: + + { + ENC_JSON_KEY: '', + ENC_SCHEME_KEY: 'symkey', + ENC_METHOD_KEY: EncryptionMethods.AES_256_CTR, + ENC_IV_KEY: '', + MAC_KEY: '' + MAC_METHOD_KEY: 'hmac' + } + + @param crypto: A SoledadCryto instance used to perform the encryption. + @type crypto: leap.soledad.crypto.SoledadCrypto + @param doc: The document with contents to be encrypted. + @type doc: SoledadDocument + + @return: The JSON serialization of the dict representing the encrypted + content. + @rtype: str + """ + soledad_assert(doc.is_tombstone() is False) + # encrypt content using AES-256 CTR mode + iv, ciphertext = crypto.encrypt_sym( + str(doc.get_json()), # encryption/decryption routines expect str + crypto.doc_passphrase(doc.doc_id), + method=EncryptionMethods.AES_256_CTR) + # Return a representation for the encrypted content. In the following, we + # convert binary data to hexadecimal representation so the JSON + # serialization does not complain about what it tries to serialize. + hex_ciphertext = binascii.b2a_hex(ciphertext) + return json.dumps({ + ENC_JSON_KEY: hex_ciphertext, + ENC_SCHEME_KEY: EncryptionSchemes.SYMKEY, + ENC_METHOD_KEY: EncryptionMethods.AES_256_CTR, + ENC_IV_KEY: iv, + MAC_KEY: binascii.b2a_hex(mac_doc( # store the mac as hex. + crypto, doc.doc_id, doc.rev, + ciphertext, + MacMethods.HMAC)), + MAC_METHOD_KEY: MacMethods.HMAC, + }) + + +def decrypt_doc(crypto, doc): + """ + Decrypt C{doc}'s content. + + Return the JSON string representation of the document's decrypted content. + + The content of the document should have the following structure: + + { + ENC_JSON_KEY: '', + ENC_SCHEME_KEY: '', + ENC_METHOD_KEY: '', + ENC_IV_KEY: '', # (optional) + MAC_KEY: '' + MAC_METHOD_KEY: 'hmac' + } + + C{enc_blob} is the encryption of the JSON serialization of the document's + content. For now Soledad just deals with documents whose C{enc_scheme} is + EncryptionSchemes.SYMKEY and C{enc_method} is + EncryptionMethods.AES_256_CTR. + + @param crypto: A SoledadCryto instance to perform the encryption. + @type crypto: leap.soledad.crypto.SoledadCrypto + @param doc: The document to be decrypted. + @type doc: SoledadDocument + + @return: The JSON serialization of the decrypted content. + @rtype: str + """ + soledad_assert(doc.is_tombstone() is False) + soledad_assert(ENC_JSON_KEY in doc.content) + soledad_assert(ENC_SCHEME_KEY in doc.content) + soledad_assert(ENC_METHOD_KEY in doc.content) + soledad_assert(MAC_KEY in doc.content) + soledad_assert(MAC_METHOD_KEY in doc.content) + # verify MAC + ciphertext = binascii.a2b_hex( # content is stored as hex. + doc.content[ENC_JSON_KEY]) + mac = mac_doc( + crypto, doc.doc_id, doc.rev, + ciphertext, + doc.content[MAC_METHOD_KEY]) + # we compare mac's hashes to avoid possible timing attacks that might + # exploit python's builtin comparison operator behaviour, which fails + # immediatelly when non-matching bytes are found. + doc_mac_hash = hashlib.sha256( + binascii.a2b_hex( # the mac is stored as hex + doc.content[MAC_KEY])).digest() + calculated_mac_hash = hashlib.sha256(mac).digest() + if doc_mac_hash != calculated_mac_hash: + raise WrongMac('Could not authenticate document\'s contents.') + # decrypt doc's content + enc_scheme = doc.content[ENC_SCHEME_KEY] + plainjson = None + if enc_scheme == EncryptionSchemes.SYMKEY: + enc_method = doc.content[ENC_METHOD_KEY] + if enc_method == EncryptionMethods.AES_256_CTR: + soledad_assert(ENC_IV_KEY in doc.content) + plainjson = crypto.decrypt_sym( + ciphertext, + crypto.doc_passphrase(doc.doc_id), + method=enc_method, + iv=doc.content[ENC_IV_KEY]) + else: + raise UnknownEncryptionMethod(enc_method) + else: + raise UnknownEncryptionScheme(enc_scheme) + return plainjson + + +# +# SoledadSyncTarget +# + +class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): + """ + A SyncTarget that encrypts data before sending and decrypts data after + receiving. + """ + + # + # Token auth methods. + # + + def set_token_credentials(self, uuid, token): + """ + Store given credentials so we can sign the request later. + + @param uuid: The user's uuid. + @type uuid: str + @param token: The authentication token. + @type token: str + """ + TokenBasedAuth.set_token_credentials(self, uuid, token) + + def _sign_request(self, method, url_query, params): + """ + Return an authorization header to be included in the HTTP request. + + @param method: The HTTP method. + @type method: str + @param url_query: The URL query string. + @type url_query: str + @param params: A list with encoded query parameters. + @type param: list + + @return: The Authorization header. + @rtype: list of tuple + """ + return TokenBasedAuth._sign_request(self, method, url_query, params) + + # + # Modified HTTPSyncTarget methods. + # + + @staticmethod + def connect(url, crypto=None): + return SoledadSyncTarget(url, crypto=crypto) + + def __init__(self, url, creds=None, crypto=None): + """ + Initialize the SoledadSyncTarget. + + @param url: The url of the target replica to sync with. + @type url: str + @param creds: optional dictionary giving credentials. + to authorize the operation with the server. + @type creds: dict + @param soledad: An instance of Soledad so we can encrypt/decrypt + document contents when syncing. + @type soledad: soledad.Soledad + """ + HTTPSyncTarget.__init__(self, url, creds) + self._crypto = crypto + + def _parse_sync_stream(self, data, return_doc_cb, ensure_callback=None): + """ + Parse incoming synchronization stream and insert documents in the + local database. + + If an incoming document's encryption scheme is equal to + EncryptionSchemes.SYMKEY, then this method will decrypt it with + Soledad's symmetric key. + + @param data: The body of the HTTP response. + @type data: str + @param return_doc_cb: A callback to insert docs from target. + @type return_doc_cb: function + @param ensure_callback: A callback to ensure we have the correct + target_replica_uid, if it was just created. + @type ensure_callback: function + + @raise BrokenSyncStream: If C{data} is malformed. + + @return: A dictionary representing the first line of the response got + from remote replica. + @rtype: list of str + """ + parts = data.splitlines() # one at a time + if not parts or parts[0] != '[': + raise BrokenSyncStream + data = parts[1:-1] + comma = False + if data: + line, comma = utils.check_and_strip_comma(data[0]) + res = json.loads(line) + if ensure_callback and 'replica_uid' in res: + ensure_callback(res['replica_uid']) + for entry in data[1:]: + if not comma: # missing in between comma + raise BrokenSyncStream + line, comma = utils.check_and_strip_comma(entry) + entry = json.loads(line) + #------------------------------------------------------------- + # symmetric decryption of document's contents + #------------------------------------------------------------- + # if arriving content was symmetrically encrypted, we decrypt + # it. + doc = SoledadDocument( + entry['id'], entry['rev'], entry['content']) + if doc.content and ENC_SCHEME_KEY in doc.content: + if doc.content[ENC_SCHEME_KEY] == \ + EncryptionSchemes.SYMKEY: + doc.set_json(decrypt_doc(self._crypto, doc)) + #------------------------------------------------------------- + # end of symmetric decryption + #------------------------------------------------------------- + return_doc_cb(doc, entry['gen'], entry['trans_id']) + if parts[-1] != ']': + try: + partdic = json.loads(parts[-1]) + except ValueError: + pass + else: + if isinstance(partdic, dict): + self._error(partdic) + raise BrokenSyncStream + if not data or comma: # no entries or bad extra comma + raise BrokenSyncStream + return res + + def sync_exchange(self, docs_by_generations, source_replica_uid, + last_known_generation, last_known_trans_id, + return_doc_cb, ensure_callback=None): + """ + Find out which documents the remote database does not know about, + encrypt and send them. + + This does the same as the parent's method but encrypts content before + syncing. + + @param docs_by_generations: A list of (doc_id, generation, trans_id) + of local documents that were changed since the last local + generation the remote replica knows about. + @type docs_by_generations: list of tuples + @param source_replica_uid: The uid of the source replica. + @type source_replica_uid: str + @param last_known_generation: Target's last known generation. + @type last_known_generation: int + @param last_known_trans_id: Target's last known transaction id. + @type last_known_trans_id: str + @param return_doc_cb: A callback for inserting received documents from + target. + @type return_doc_cb: function + @param ensure_callback: A callback that ensures we know the target + replica uid if the target replica was just created. + @type ensure_callback: function + + @return: The new generation and transaction id of the target replica. + @rtype: tuple + """ + self._ensure_connection() + if self._trace_hook: # for tests + self._trace_hook('sync_exchange') + url = '%s/sync-from/%s' % (self._url.path, source_replica_uid) + self._conn.putrequest('POST', url) + self._conn.putheader('content-type', 'application/x-u1db-sync-stream') + for header_name, header_value in self._sign_request('POST', url, {}): + self._conn.putheader(header_name, header_value) + entries = ['['] + size = 1 + + def prepare(**dic): + entry = comma + '\r\n' + json.dumps(dic) + entries.append(entry) + return len(entry) + + comma = '' + size += prepare( + last_known_generation=last_known_generation, + last_known_trans_id=last_known_trans_id, + ensure=ensure_callback is not None) + comma = ',' + for doc, gen, trans_id in docs_by_generations: + # skip non-syncable docs + if isinstance(doc, SoledadDocument) and not doc.syncable: + continue + #------------------------------------------------------------- + # symmetric encryption of document's contents + #------------------------------------------------------------- + doc_json = doc.get_json() + if not doc.is_tombstone(): + doc_json = encrypt_doc(self._crypto, doc) + #------------------------------------------------------------- + # end of symmetric encryption + #------------------------------------------------------------- + size += prepare(id=doc.doc_id, rev=doc.rev, + content=doc_json, + gen=gen, trans_id=trans_id) + entries.append('\r\n]') + size += len(entries[-1]) + self._conn.putheader('content-length', str(size)) + self._conn.endheaders() + for entry in entries: + self._conn.send(entry) + entries = None + data, _ = self._response() + res = self._parse_sync_stream(data, return_doc_cb, ensure_callback) + data = None + return res['new_generation'], res['new_transaction_id'] -- cgit v1.2.3