diff options
Diffstat (limited to 'client')
| -rw-r--r-- | client/changes/feature_3118-provide-a-way-to-access-the-saved-password | 1 | ||||
| -rw-r--r-- | client/changes/feature_3487-split-soledad-into-common-client-and-server | 1 | ||||
| -rw-r--r-- | client/setup.py | 73 | ||||
| -rw-r--r-- | client/src/leap/__init__.py | 6 | ||||
| -rw-r--r-- | client/src/leap/soledad/__init__.py | 6 | ||||
| -rw-r--r-- | client/src/leap/soledad/client/__init__.py | 1129 | ||||
| -rw-r--r-- | client/src/leap/soledad/client/auth.py | 71 | ||||
| -rw-r--r-- | client/src/leap/soledad/client/crypto.py | 213 | ||||
| -rw-r--r-- | client/src/leap/soledad/client/dbwrapper.py | 183 | ||||
| -rw-r--r-- | client/src/leap/soledad/client/shared_db.py | 138 | ||||
| -rw-r--r-- | client/src/leap/soledad/client/sqlcipher.py | 696 | ||||
| -rw-r--r-- | client/src/leap/soledad/client/target.py | 452 | 
12 files changed, 2969 insertions, 0 deletions
diff --git a/client/changes/feature_3118-provide-a-way-to-access-the-saved-password b/client/changes/feature_3118-provide-a-way-to-access-the-saved-password new file mode 100644 index 00000000..69cb0b1d --- /dev/null +++ b/client/changes/feature_3118-provide-a-way-to-access-the-saved-password @@ -0,0 +1 @@ +  o Add public method to access the saved password. Closes #3118. diff --git a/client/changes/feature_3487-split-soledad-into-common-client-and-server b/client/changes/feature_3487-split-soledad-into-common-client-and-server new file mode 100644 index 00000000..2eab6b56 --- /dev/null +++ b/client/changes/feature_3487-split-soledad-into-common-client-and-server @@ -0,0 +1 @@ +  o Split soledad package into common, client and server. Closes #3487. diff --git a/client/setup.py b/client/setup.py new file mode 100644 index 00000000..291c95fe --- /dev/null +++ b/client/setup.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +# setup.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +from setuptools import ( +    setup, +    find_packages +) + + +install_requirements = [ +    'pysqlcipher', +    'pysqlite',  # TODO: this should not be a dep, see #2945 +    'simplejson', +    'oauth',  # this is not strictly needed by us, but we need it +              # until u1db adds it to its release as a dep. +    'u1db', +    'scrypt', +    'pyxdg', +    'pycryptopp', +    'leap.soledad.common>=0.3.0', +] + + +trove_classifiers = ( +    "Development Status :: 3 - Alpha", +    "Intended Audience :: Developers", +    "License :: OSI Approved :: " +    "GNU General Public License v3 or later (GPLv3+)", +    "Environment :: Console", +    "Operating System :: OS Independent", +    "Operating System :: POSIX", +    "Programming Language :: Python :: 2.6", +    "Programming Language :: Python :: 2.7", +    "Topic :: Database :: Front-Ends", +    "Topic :: Software Development :: Libraries :: Python Modules" +) + + +setup( +    name='leap.soledad.client', +    version='0.3.0', +    url='https://leap.se/', +    license='GPLv3+', +    description='Synchronization of locally encrypted data among devices.', +    author='The LEAP Encryption Access Project', +    author_email='info@leap.se', +    long_description=( +        "Soledad is the part of LEAP that allows application data to be " +        "securely shared among devices. It provides, to other parts of the " +        "LEAP client, an API for data storage and sync." +    ), +    namespace_packages=["leap", "leap.soledad"], +    packages=find_packages('src'), +    package_dir={'': 'src'}, +    install_requires=install_requirements, +    classifiers=trove_classifiers, +    extras_require={'signaling': ['leap.common']}, +) diff --git a/client/src/leap/__init__.py b/client/src/leap/__init__.py new file mode 100644 index 00000000..f48ad105 --- /dev/null +++ b/client/src/leap/__init__.py @@ -0,0 +1,6 @@ +# See http://peak.telecommunity.com/DevCenter/setuptools#namespace-packages +try: +    __import__('pkg_resources').declare_namespace(__name__) +except ImportError: +    from pkgutil import extend_path +    __path__ = extend_path(__path__, __name__) diff --git a/client/src/leap/soledad/__init__.py b/client/src/leap/soledad/__init__.py new file mode 100644 index 00000000..f48ad105 --- /dev/null +++ b/client/src/leap/soledad/__init__.py @@ -0,0 +1,6 @@ +# See http://peak.telecommunity.com/DevCenter/setuptools#namespace-packages +try: +    __import__('pkg_resources').declare_namespace(__name__) +except ImportError: +    from pkgutil import extend_path +    __path__ = extend_path(__path__, __name__) diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py new file mode 100644 index 00000000..fc8219fa --- /dev/null +++ b/client/src/leap/soledad/client/__init__.py @@ -0,0 +1,1129 @@ +# -*- coding: utf-8 -*- +# __init__.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +""" +Soledad - Synchronization Of Locally Encrypted Data Among Devices. + +Soledad is the part of LEAP that manages storage and synchronization of +application data. It is built on top of U1DB reference Python API and +implements (1) a SQLCipher backend for local storage in the client, (2) a +SyncTarget that encrypts data before syncing, and (3) a CouchDB backend for +remote storage in the server side. +""" + +import os +import binascii +import logging +import urlparse +import simplejson as json +import scrypt +import httplib +import socket +import ssl +import errno + + +from xdg import BaseDirectory +from hashlib import sha256 +from u1db.remote import http_client +from u1db.remote.ssl_match_hostname import match_hostname + + +# +# Signaling function +# + +SOLEDAD_CREATING_KEYS = 'Creating keys...' +SOLEDAD_DONE_CREATING_KEYS = 'Done creating keys.' +SOLEDAD_DOWNLOADING_KEYS = 'Downloading keys...' +SOLEDAD_DONE_DOWNLOADING_KEYS = 'Done downloading keys.' +SOLEDAD_UPLOADING_KEYS = 'Uploading keys...' +SOLEDAD_DONE_UPLOADING_KEYS = 'Done uploading keys.' +SOLEDAD_NEW_DATA_TO_SYNC = 'New data available.' +SOLEDAD_DONE_DATA_SYNC = 'Done data sync.' + +# we want to use leap.common.events to emits signals, if it is available. +try: +    from leap.common import events +    from leap.common.events import signal +    SOLEDAD_CREATING_KEYS = events.events_pb2.SOLEDAD_CREATING_KEYS +    SOLEDAD_DONE_CREATING_KEYS = events.events_pb2.SOLEDAD_DONE_CREATING_KEYS +    SOLEDAD_DOWNLOADING_KEYS = events.events_pb2.SOLEDAD_DOWNLOADING_KEYS +    SOLEDAD_DONE_DOWNLOADING_KEYS = \ +        events.events_pb2.SOLEDAD_DONE_DOWNLOADING_KEYS +    SOLEDAD_UPLOADING_KEYS = events.events_pb2.SOLEDAD_UPLOADING_KEYS +    SOLEDAD_DONE_UPLOADING_KEYS = \ +        events.events_pb2.SOLEDAD_DONE_UPLOADING_KEYS +    SOLEDAD_NEW_DATA_TO_SYNC = events.events_pb2.SOLEDAD_NEW_DATA_TO_SYNC +    SOLEDAD_DONE_DATA_SYNC = events.events_pb2.SOLEDAD_DONE_DATA_SYNC + +except ImportError: +    # we define a fake signaling function and fake signal constants that will +    # allow for logging signaling attempts in case leap.common.events is not +    # available. + +    def signal(signal, content=""): +        logger.info("Would signal: %s - %s." % (str(signal), content)) + + +from leap.soledad.common import soledad_assert, soledad_assert_type +from leap.soledad.common.document import SoledadDocument +from leap.soledad.client.crypto import SoledadCrypto +from leap.soledad.client.dbwrapper import SQLCipherWrapper +from leap.soledad.client.shared_db import SoledadSharedDatabase +from leap.soledad.client.sqlcipher import open as sqlcipher_open +from leap.soledad.client.sqlcipher import SQLCipherDatabase +from leap.soledad.client.target import SoledadSyncTarget + + +logger = logging.getLogger(name=__name__) + + +# +# Constants +# + +SOLEDAD_CERT = None +""" +Path to the certificate file used to certify the SSL connection between +Soledad client and server. +""" + +SECRETS_DOC_ID_HASH_PREFIX = 'uuid-' + + +# +# Soledad: local encrypted storage and remote encrypted sync. +# + +class NoStorageSecret(Exception): +    """ +    Raised when trying to use a storage secret but none is available. +    """ +    pass + + +class PassphraseTooShort(Exception): +    """ +    Raised when trying to change the passphrase but the provided passphrase is +    too short. +    """ + + +class Soledad(object): +    """ +    Soledad provides encrypted data storage and sync. + +    A Soledad instance is used to store and retrieve data in a local encrypted +    database and synchronize this database with Soledad server. + +    This class is also responsible for bootstrapping users' account by +    creating cryptographic secrets and/or storing/fetching them on Soledad +    server. + +    Soledad uses C{leap.common.events} to signal events. The possible events +    to be signaled are: + +        SOLEDAD_CREATING_KEYS: emitted during bootstrap sequence when key +            generation starts. +        SOLEDAD_DONE_CREATING_KEYS: emitted during bootstrap sequence when key +            generation finishes. +        SOLEDAD_UPLOADING_KEYS: emitted during bootstrap sequence when soledad +            starts sending keys to server. +        SOLEDAD_DONE_UPLOADING_KEYS: emitted during bootstrap sequence when +            soledad finishes sending keys to server. +        SOLEDAD_DOWNLOADING_KEYS: emitted during bootstrap sequence when +            soledad starts to retrieve keys from server. +        SOLEDAD_DONE_DOWNLOADING_KEYS: emitted during bootstrap sequence when +            soledad finishes downloading keys from server. +        SOLEDAD_NEW_DATA_TO_SYNC: emitted upon call to C{need_sync()} when +          there's indeed new data to be synchronized between local database +          replica and server's replica. +        SOLEDAD_DONE_DATA_SYNC: emitted inside C{sync()} method when it has +            finished synchronizing with remote replica. +    """ + +    LOCAL_DATABASE_FILE_NAME = 'soledad.u1db' +    """ +    The name of the local SQLCipher U1DB database file. +    """ + +    STORAGE_SECRETS_FILE_NAME = "soledad.json" +    """ +    The name of the file where the storage secrets will be stored. +    """ + +    GENERATED_SECRET_LENGTH = 1024 +    """ +    The length of the generated secret used to derive keys for symmetric +    encryption for local and remote storage. +    """ + +    LOCAL_STORAGE_SECRET_LENGTH = 512 +    """ +    The length of the secret used to derive a passphrase for the SQLCipher +    database. +    """ + +    REMOTE_STORAGE_SECRET_LENGTH = \ +        GENERATED_SECRET_LENGTH - LOCAL_STORAGE_SECRET_LENGTH +    """ +    The length of the secret used to derive an encryption key and a MAC auth +    key for remote storage. +    """ + +    SALT_LENGTH = 64 +    """ +    The length of the salt used to derive the key for the storage secret +    encryption. +    """ + +    MINIMUM_PASSPHRASE_LENGTH = 6 +    """ +    The minimum length for a passphrase. The passphrase length is only checked +    when the user changes her passphrase, not when she instantiates Soledad. +    """ + +    IV_SEPARATOR = ":" +    """ +    A separator used for storing the encryption initial value prepended to the +    ciphertext. +    """ + +    UUID_KEY = 'uuid' +    STORAGE_SECRETS_KEY = 'storage_secrets' +    SECRET_KEY = 'secret' +    CIPHER_KEY = 'cipher' +    LENGTH_KEY = 'length' +    KDF_KEY = 'kdf' +    KDF_SALT_KEY = 'kdf_salt' +    KDF_LENGTH_KEY = 'kdf_length' +    KDF_SCRYPT = 'scrypt' +    CIPHER_AES256 = 'aes256' +    """ +    Keys used to access storage secrets in recovery documents. +    """ + +    DEFAULT_PREFIX = os.path.join( +        BaseDirectory.xdg_config_home, +        'leap', 'soledad') +    """ +    Prefix for default values for path. +    """ + +    def __init__(self, uuid, passphrase, secrets_path, local_db_path, +                 server_url, cert_file, auth_token=None, secret_id=None): +        """ +        Initialize configuration, cryptographic keys and dbs. + +        @param uuid: User's uuid. +        @type uuid: str +        @param passphrase: The passphrase for locking and unlocking encryption +            secrets for local and remote storage. +        @type passphrase: str +        @param secrets_path: Path for storing encrypted key used for +            symmetric encryption. +        @type secrets_path: str +        @param local_db_path: Path for local encrypted storage db. +        @type local_db_path: str +        @param server_url: URL for Soledad server. This is used either to sync +            with the user's remote db and to interact with the shared recovery +            database. +        @type server_url: str +        @param cert_file: Path to the SSL certificate to use in the +            connection to the server_url. +        @type cert_file: str +        @param auth_token: Authorization token for accessing remote databases. +        @type auth_token: str +        """ +        # get config params +        self._uuid = uuid +        self._passphrase = passphrase +        # init crypto variables +        self._secrets = {} +        self._secret_id = secret_id +        # init config (possibly with default values) +        self._init_config(secrets_path, local_db_path, server_url) +        self._set_token(auth_token) +        # configure SSL certificate +        global SOLEDAD_CERT +        SOLEDAD_CERT = cert_file +        # initiate bootstrap sequence +        self._bootstrap() + +    def _init_config(self, secrets_path, local_db_path, server_url): +        """ +        Initialize configuration using default values for missing params. +        """ +        # initialize secrets_path +        self._secrets_path = secrets_path +        if self._secrets_path is None: +            self._secrets_path = os.path.join( +                self.DEFAULT_PREFIX, self.STORAGE_SECRETS_FILE_NAME) +        # initialize local_db_path +        self._local_db_path = local_db_path +        if self._local_db_path is None: +            self._local_db_path = os.path.join( +                self.DEFAULT_PREFIX, self.LOCAL_DATABASE_FILE_NAME) +        # initialize server_url +        self._server_url = server_url +        soledad_assert( +            self._server_url is not None, +            'Missing URL for Soledad server.') + +    # +    # initialization/destruction methods +    # + +    def _bootstrap(self): +        """ +        Bootstrap local Soledad instance. + +        Soledad Client bootstrap is the following sequence of stages: + +        * stage 0 - local environment setup. +            - directory initialization. +            - crypto submodule initialization +        * stage 1 - secret generation/loading: +            - if secrets exist locally, load them. +            - else, if secrets exist in server, download them. +            - else, generate a new secret. +        * stage 2 - store secrets in server. +        * stage 3 - database initialization. + +        This method decides which bootstrap stages have already been performed +        and performs the missing ones in order. +        """ +        # TODO: make sure key storage always happens (even if this method is +        #       interrupted). +        # TODO: write tests for bootstrap stages. +        # TODO: log each bootstrap step. +        # stage 0  - socal environment setup +        self._init_dirs() +        self._crypto = SoledadCrypto(self) +        # stage 1 - secret generation/loading +        if not self._has_secret():  # try to load from local storage. +            logger.info( +                'Trying to fetch cryptographic secrets from shared recovery ' +                'database...') +            # there are no secrets in local storage, so try to fetch encrypted +            # secrets from server. +            doc = self._get_secrets_from_shared_db() +            if doc: +                # found secrets in server, so import them. +                logger.info( +                    'Found cryptographic secrets in shared recovery ' +                    'database.') +                self.import_recovery_document(doc.content) +            else: +                # there are no secrets in server also, so generate a secret. +                logger.info( +                    'No cryptographic secrets found, creating new secrets...') +                self._set_secret_id(self._gen_secret()) +        # Stage 2 - storage of encrypted secrets in the server. +        self._put_secrets_in_shared_db() +        # Stage 3 - Local database initialization +        self._init_db() + +    def _init_dirs(self): +        """ +        Create work directories. + +        @raise OSError: in case file exists and is not a dir. +        """ +        paths = map( +            lambda x: os.path.dirname(x), +            [self._local_db_path, self._secrets_path]) +        for path in paths: +            logger.info('Creating directory: %s.' % path) +            try: +                os.makedirs(path) +            except OSError as exc: +                if exc.errno == errno.EEXIST and os.path.isdir(path): +                    pass +                else: +                    raise + +    def _init_db(self): +        """ +        Initialize the U1DB SQLCipher database for local storage. + +        Currently, Soledad uses the default SQLCipher cipher, i.e. +        'aes-256-cbc'. We use scrypt to derive a 256-bit encryption key and +        uses the 'raw PRAGMA key' format to handle the key to SQLCipher. + +        The first C{self.REMOTE_STORAGE_SECRET_LENGTH} bytes of the storage +        secret are used for remote storage encryption. We use the next +        C{self.LOCAL_STORAGE_SECRET} bytes to derive a key for local storage. +        From these bytes, the first C{self.SALT_LENGTH} are used as the salt +        and the rest as the password for the scrypt hashing. +        """ +        # salt indexes +        salt_start = self.REMOTE_STORAGE_SECRET_LENGTH +        salt_end = salt_start + self.SALT_LENGTH +        # password indexes +        pwd_start = salt_end +        pwd_end = salt_start + self.LOCAL_STORAGE_SECRET_LENGTH +        # calculate the key for local encryption +        secret = self._get_storage_secret() +        key = scrypt.hash( +            secret[pwd_start:pwd_end],  # the password +            secret[salt_start:salt_end],  # the salt +            buflen=32,  # we need a key with 256 bits (32 bytes) +        ) + +        self._db = sqlcipher_open( +            self._local_db_path, +            binascii.b2a_hex(key),  # sqlcipher only accepts the hex version +            create=True, +            document_factory=SoledadDocument, +            crypto=self._crypto, +            raw_key=True) + +    def close(self): +        """ +        Close underlying U1DB database. +        """ +        if hasattr(self, '_db') and isinstance( +                self._db, +                SQLCipherDatabase): +            self._db.close() + +    def __del__(self): +        """ +        Make sure local database is closed when object is destroyed. +        """ +        # Watch out! We have no guarantees  that this is properly called. +        self.close() + +    # +    # Management of secret for symmetric encryption. +    # + +    def _get_storage_secret(self): +        """ +        Return the storage secret. + +        Storage secret is encrypted before being stored. This method decrypts +        and returns the stored secret. + +        @return: The storage secret. +        @rtype: str +        """ +        # calculate the encryption key +        key = scrypt.hash( +            self._passphrase, +            # the salt is stored base64 encoded +            binascii.a2b_base64( +                self._secrets[self._secret_id][self.KDF_SALT_KEY]), +            buflen=32,  # we need a key with 256 bits (32 bytes). +        ) +        # recover the initial value and ciphertext +        iv, ciphertext = self._secrets[self._secret_id][self.SECRET_KEY].split( +            self.IV_SEPARATOR, 1) +        ciphertext = binascii.a2b_base64(ciphertext) +        return self._crypto.decrypt_sym(ciphertext, key, iv=iv) + +    def _set_secret_id(self, secret_id): +        """ +        Define the id of the storage secret to be used. + +        This method will also replace the secret in the crypto object. +        """ +        self._secret_id = secret_id + +    def _load_secrets(self): +        """ +        Load storage secrets from local file. + +        The content of the file has the following format: + +            { +                "storage_secrets": { +                    "<secret_id>": { +                        'kdf': 'scrypt', +                        'kdf_salt': '<b64 repr of salt>' +                        'kdf_length': <key length> +                        "cipher": "aes256", +                        "length": <secret length>, +                        "secret": "<encrypted storage_secret 1>", +                    } +                } +            } +        """ +        # does the file exist in disk? +        if not os.path.isfile(self._secrets_path): +            raise IOError('File does not exist: %s' % self._secrets_path) +        # read storage secrets from file +        content = None +        with open(self._secrets_path, 'r') as f: +            content = json.loads(f.read()) +        self._secrets = content[self.STORAGE_SECRETS_KEY] +        # choose first secret if no secret_id was given +        if self._secret_id is None: +            self._set_secret_id(self._secrets.items()[0][0]) + +    def _has_secret(self): +        """ +        Return whether there is a storage secret available for use or not. + +        @return: Whether there's a storage secret for symmetric encryption. +        @rtype: bool +        """ +        if self._secret_id is None or self._secret_id not in self._secrets: +            try: +                self._load_secrets()  # try to load from disk +            except IOError, e: +                logger.error('IOError: %s' % str(e)) +        try: +            self._get_storage_secret() +            return True +        except: +            return False + +    def _gen_secret(self): +        """ +        Generate a secret for symmetric encryption and store in a local +        encrypted file. + +        This method emits the following signals: + +            * SOLEDAD_CREATING_KEYS +            * SOLEDAD_DONE_CREATING_KEYS + +        A secret has the following structure: + +            { +                '<secret_id>': { +                        'kdf': 'scrypt', +                        'kdf_salt': '<b64 repr of salt>' +                        'kdf_length': <key length> +                        'cipher': 'aes256', +                        'length': <secret length>, +                        'secret': '<encrypted b64 repr of storage_secret>', +                } +            } + +        @return: The id of the generated secret. +        @rtype: str +        """ +        signal(SOLEDAD_CREATING_KEYS, self._uuid) +        # generate random secret +        secret = os.urandom(self.GENERATED_SECRET_LENGTH) +        secret_id = sha256(secret).hexdigest() +        # generate random salt +        salt = os.urandom(self.SALT_LENGTH) +        # get a 256-bit key +        key = scrypt.hash(self._passphrase, salt, buflen=32) +        iv, ciphertext = self._crypto.encrypt_sym(secret, key) +        self._secrets[secret_id] = { +            # leap.soledad.crypto submodule uses AES256 for symmetric +            # encryption. +            self.KDF_KEY: self.KDF_SCRYPT, +            self.KDF_SALT_KEY: binascii.b2a_base64(salt), +            self.KDF_LENGTH_KEY: len(key), +            self.CIPHER_KEY: self.CIPHER_AES256, +            self.LENGTH_KEY: len(secret), +            self.SECRET_KEY: '%s%s%s' % ( +                str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)), +        } +        self._store_secrets() +        signal(SOLEDAD_DONE_CREATING_KEYS, self._uuid) +        return secret_id + +    def _store_secrets(self): +        """ +        Store a secret in C{Soledad.STORAGE_SECRETS_FILE_PATH}. + +        The contents of the stored file have the following format: + +            { +                'storage_secrets': { +                    '<secret_id>': { +                        'kdf': 'scrypt', +                        'kdf_salt': '<salt>' +                        'kdf_length': <len> +                        'cipher': 'aes256', +                        'length': 1024, +                        'secret': '<encrypted storage_secret 1>', +                    } +                } +            } +        """ +        data = { +            self.STORAGE_SECRETS_KEY: self._secrets, +        } +        with open(self._secrets_path, 'w') as f: +            f.write(json.dumps(data)) + +    def change_passphrase(self, new_passphrase): +        """ +        Change the passphrase that encrypts the storage secret. + +        @param new_passphrase: The new passphrase. +        @type new_passphrase: str + +        @raise NoStorageSecret: Raised if there's no storage secret available. +        """ +        # maybe we want to add more checks to guarantee passphrase is +        # reasonable? +        soledad_assert_type(new_passphrase, str) +        if len(new_passphrase) < self.MINIMUM_PASSPHRASE_LENGTH: +            raise PassphraseTooShort( +                'Passphrase must be at least %d characters long!' % +                self.MINIMUM_PASSPHRASE_LENGTH) +        # ensure there's a secret for which the passphrase will be changed. +        if not self._has_secret(): +            raise NoStorageSecret() +        secret = self._get_storage_secret() +        # generate random salt +        new_salt = os.urandom(self.SALT_LENGTH) +        # get a 256-bit key +        key = scrypt.hash(new_passphrase, new_salt, buflen=32) +        iv, ciphertext = self._crypto.encrypt_sym(secret, key) +        self._secrets[self._secret_id] = { +            # leap.soledad.crypto submodule uses AES256 for symmetric +            # encryption. +            self.KDF_KEY: self.KDF_SCRYPT,  # TODO: remove hard coded kdf +            self.KDF_SALT_KEY: binascii.b2a_base64(new_salt), +            self.KDF_LENGTH_KEY: len(key), +            self.CIPHER_KEY: self.CIPHER_AES256, +            self.LENGTH_KEY: len(secret), +            self.SECRET_KEY: '%s%s%s' % ( +                str(iv), self.IV_SEPARATOR, binascii.b2a_base64(ciphertext)), +        } + +        self._store_secrets() +        self._passphrase = new_passphrase + +    # +    # General crypto utility methods. +    # + +    def _uuid_hash(self): +        """ +        Calculate a hash for storing/retrieving key material on shared +        database, based on user's uuid. + +        @return: the hash +        @rtype: str +        """ +        return sha256( +            '%s%s' % ( +                SECRETS_DOC_ID_HASH_PREFIX, +                self._uuid)).hexdigest() + +    def _shared_db(self): +        """ +        Return an instance of the shared recovery database object. +        """ +        if self.server_url: +            return SoledadSharedDatabase.open_database( +                urlparse.urljoin(self.server_url, 'shared'), +                False,  # TODO: eliminate need to create db here. +                creds=self._creds) + +    def _get_secrets_from_shared_db(self): +        """ +        Retrieve the document with encrypted key material from the shared +        database. + +        @return: a document with encrypted key material in its contents +        @rtype: SoledadDocument +        """ +        signal(SOLEDAD_DOWNLOADING_KEYS, self._uuid) +        db = self._shared_db() +        if not db: +            logger.warning('No shared db found') +            return +        doc = db.get_doc(self._uuid_hash()) +        signal(SOLEDAD_DONE_DOWNLOADING_KEYS, self._uuid) +        return doc + +    def _put_secrets_in_shared_db(self): +        """ +        Assert local keys are the same as shared db's ones. + +        Try to fetch keys from shared recovery database. If they already exist +        in the remote db, assert that that data is the same as local data. +        Otherwise, upload keys to shared recovery database. + +        """ +        soledad_assert( +            self._has_secret(), +            'Tried to send keys to server but they don\'t exist in local ' +            'storage.') +        # try to get secrets doc from server, otherwise create it +        doc = self._get_secrets_from_shared_db() +        if doc is None: +            doc = SoledadDocument(doc_id=self._uuid_hash()) +        # fill doc with encrypted secrets +        doc.content = self.export_recovery_document(include_uuid=False) +        # upload secrets to server +        signal(SOLEDAD_UPLOADING_KEYS, self._uuid) +        db = self._shared_db() +        if not db: +            logger.warning('No shared db found') +            return +        db.put_doc(doc) +        signal(SOLEDAD_DONE_UPLOADING_KEYS, self._uuid) + +    # +    # Document storage, retrieval and sync. +    # + +    def put_doc(self, doc): +        """ +        Update a document in the local encrypted database. + +        @param doc: the document to update +        @type doc: SoledadDocument + +        @return: the new revision identifier for the document +        @rtype: str +        """ +        return self._db.put_doc(doc) + +    def delete_doc(self, doc): +        """ +        Delete a document from the local encrypted database. + +        @param doc: the document to delete +        @type doc: SoledadDocument + +        @return: the new revision identifier for the document +        @rtype: str +        """ +        return self._db.delete_doc(doc) + +    def get_doc(self, doc_id, include_deleted=False): +        """ +        Retrieve a document from the local encrypted database. + +        @param doc_id: the unique document identifier +        @type doc_id: str +        @param include_deleted: if True, deleted documents will be +            returned with empty content; otherwise asking for a deleted +            document will return None +        @type include_deleted: bool + +        @return: the document object or None +        @rtype: SoledadDocument +        """ +        return self._db.get_doc(doc_id, include_deleted=include_deleted) + +    def get_docs(self, doc_ids, check_for_conflicts=True, +                 include_deleted=False): +        """ +        Get the content for many documents. + +        @param doc_ids: a list of document identifiers +        @type doc_ids: list +        @param check_for_conflicts: if set False, then the conflict check will +            be skipped, and 'None' will be returned instead of True/False +        @type check_for_conflicts: bool + +        @return: iterable giving the Document object for each document id +            in matching doc_ids order. +        @rtype: generator +        """ +        return self._db.get_docs(doc_ids, +                                 check_for_conflicts=check_for_conflicts, +                                 include_deleted=include_deleted) + +    def get_all_docs(self, include_deleted=False): +        """Get the JSON content for all documents in the database. + +        @param include_deleted: If set to True, deleted documents will be +            returned with empty content. Otherwise deleted documents will not +            be included in the results. +        @return: (generation, [Document]) +            The current generation of the database, followed by a list of all +            the documents in the database. +        """ +        return self._db.get_all_docs(include_deleted) + +    def create_doc(self, content, doc_id=None): +        """ +        Create a new document in the local encrypted database. + +        @param content: the contents of the new document +        @type content: dict +        @param doc_id: an optional identifier specifying the document id +        @type doc_id: str + +        @return: the new document +        @rtype: SoledadDocument +        """ +        return self._db.create_doc(content, doc_id=doc_id) + +    def create_doc_from_json(self, json, doc_id=None): +        """ +        Create a new document. + +        You can optionally specify the document identifier, but the document +        must not already exist. See 'put_doc' if you want to override an +        existing document. +        If the database specifies a maximum document size and the document +        exceeds it, create will fail and raise a DocumentTooBig exception. + +        @param json: The JSON document string +        @type json: str +        @param doc_id: An optional identifier specifying the document id. +        @type doc_id: +        @return: The new cocument +        @rtype: SoledadDocument +        """ +        return self._db.create_doc_from_json(json, doc_id=doc_id) + +    def create_index(self, index_name, *index_expressions): +        """ +        Create an named index, which can then be queried for future lookups. +        Creating an index which already exists is not an error, and is cheap. +        Creating an index which does not match the index_expressions of the +        existing index is an error. +        Creating an index will block until the expressions have been evaluated +        and the index generated. + +        @param index_name: A unique name which can be used as a key prefix +        @type index_name: str +        @param index_expressions: index expressions defining the index +            information. +        @type index_expressions: dict + +            Examples: + +            "fieldname", or "fieldname.subfieldname" to index alphabetically +            sorted on the contents of a field. + +            "number(fieldname, width)", "lower(fieldname)" +        """ +        if self._db: +            return self._db.create_index(index_name, *index_expressions) + +    def delete_index(self, index_name): +        """ +        Remove a named index. + +        @param index_name: The name of the index we are removing +        @type index_name: str +        """ +        if self._db: +            return self._db.delete_index(index_name) + +    def list_indexes(self): +        """ +        List the definitions of all known indexes. + +        @return: A list of [('index-name', ['field', 'field2'])] definitions. +        @rtype: list +        """ +        if self._db: +            return self._db.list_indexes() + +    def get_from_index(self, index_name, *key_values): +        """ +        Return documents that match the keys supplied. + +        You must supply exactly the same number of values as have been defined +        in the index. It is possible to do a prefix match by using '*' to +        indicate a wildcard match. You can only supply '*' to trailing entries, +        (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) +        It is also possible to append a '*' to the last supplied value (eg +        'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + +        @param index_name: The index to query +        @type index_name: str +        @param key_values: values to match. eg, if you have +            an index with 3 fields then you would have: +            get_from_index(index_name, val1, val2, val3) +        @type key_values: tuple +        @return: List of [Document] +        @rtype: list +        """ +        if self._db: +            return self._db.get_from_index(index_name, *key_values) + +    def get_range_from_index(self, index_name, start_value, end_value): +        """ +        Return documents that fall within the specified range. + +        Both ends of the range are inclusive. For both start_value and +        end_value, one must supply exactly the same number of values as have +        been defined in the index, or pass None. In case of a single column +        index, a string is accepted as an alternative for a tuple with a single +        value. It is possible to do a prefix match by using '*' to indicate +        a wildcard match. You can only supply '*' to trailing entries, (eg +        'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also +        possible to append a '*' to the last supplied value (eg 'val*', '*', +        '*' or 'val', 'val*', '*', but not 'val*', 'val', '*') + +        @param index_name: The index to query +        @type index_name: str +        @param start_values: tuples of values that define the lower bound of +            the range. eg, if you have an index with 3 fields then you would +            have: (val1, val2, val3) +        @type start_values: tuple +        @param end_values: tuples of values that define the upper bound of the +            range. eg, if you have an index with 3 fields then you would have: +            (val1, val2, val3) +        @type end_values: tuple +        @return: List of [Document] +        @rtype: list +        """ +        if self._db: +            return self._db.get_range_from_index( +                index_name, start_value, end_value) + +    def get_index_keys(self, index_name): +        """ +        Return all keys under which documents are indexed in this index. + +        @param index_name: The index to query +        @type index_name: str +        @return: [] A list of tuples of indexed keys. +        @rtype: list +        """ +        if self._db: +            return self._db.get_index_keys(index_name) + +    def get_doc_conflicts(self, doc_id): +        """ +        Get the list of conflicts for the given document. + +        @param doc_id: the document id +        @type doc_id: str + +        @return: a list of the document entries that are conflicted +        @rtype: list +        """ +        if self._db: +            return self._db.get_doc_conflicts(doc_id) + +    def resolve_doc(self, doc, conflicted_doc_revs): +        """ +        Mark a document as no longer conflicted. + +        @param doc: a document with the new content to be inserted. +        @type doc: SoledadDocument +        @param conflicted_doc_revs: a list of revisions that the new content +            supersedes. +        @type conflicted_doc_revs: list +        """ +        if self._db: +            return self._db.resolve_doc(doc, conflicted_doc_revs) + +    def sync(self): +        """ +        Synchronize the local encrypted replica with a remote replica. + +        @param url: the url of the target replica to sync with +        @type url: str + +        @return: the local generation before the synchronisation was +            performed. +        @rtype: str +        """ +        if self._db: +            local_gen = self._db.sync( +                urlparse.urljoin(self.server_url, 'user-%s' % self._uuid), +                creds=self._creds, autocreate=True) +            signal(SOLEDAD_DONE_DATA_SYNC, self._uuid) +            return local_gen + +    def need_sync(self, url): +        """ +        Return if local db replica differs from remote url's replica. + +        @param url: The remote replica to compare with local replica. +        @type url: str + +        @return: Whether remote replica and local replica differ. +        @rtype: bool +        """ +        target = SoledadSyncTarget(url, creds=self._creds, crypto=self._crypto) +        info = target.get_sync_info(self._db._get_replica_uid()) +        # compare source generation with target's last known source generation +        if self._db._get_generation() != info[4]: +            signal(SOLEDAD_NEW_DATA_TO_SYNC, self._uuid) +            return True +        return False + +    def _set_token(self, token): +        """ +        Set the authentication token for remote database access. + +        Build the credentials dictionary with the following format: + +            self._{ +                'token': { +                    'uuid': '<uuid>' +                    'token': '<token>' +            } + +        @param token: The authentication token. +        @type token: str +        """ +        self._creds = { +            'token': { +                'uuid': self._uuid, +                'token': token, +            } +        } + +    def _get_token(self): +        """ +        Return current token from credentials dictionary. +        """ +        return self._creds['token']['token'] + +    token = property(_get_token, _set_token, doc='The authentication Token.') + +    # +    # Recovery document export and import methods +    # +    def export_recovery_document(self, include_uuid=True): +        """ +        Export the storage secrets and (optionally) the uuid. + +        A recovery document has the following structure: + +            { +                self.STORAGE_SECRET_KEY: <secrets dict>, +                self.UUID_KEY: '<uuid>',  # (optional) +            } + +        @param include_uuid: Should the uuid be included? +        @type include_uuid: bool + +        @return: The recovery document. +        @rtype: dict +        """ +        data = {self.STORAGE_SECRETS_KEY: self._secrets} +        if include_uuid: +            data[self.UUID_KEY] = self._uuid +        return data + +    def import_recovery_document(self, data): +        """ +        Import storage secrets for symmetric encryption and uuid (if present) +        from a recovery document. + +        A recovery document has the following structure: + +            { +                self.STORAGE_SECRET_KEY: <secrets dict>, +                self.UUID_KEY: '<uuid>',  # (optional) +            } + +        @param data: The recovery document. +        @type data: dict +        """ +        # include new secrets in our secret pool. +        for secret_id, secret_data in data[self.STORAGE_SECRETS_KEY].items(): +            if secret_id not in self._secrets: +                self._secrets[secret_id] = secret_data +        self._store_secrets()  # save new secrets in local file +        # set uuid if present +        if self.UUID_KEY in data: +            self._uuid = data[self.UUID_KEY] +        # choose first secret to use is none is assigned +        if self._secret_id is None: +            self._set_secret_id(data[self.STORAGE_SECRETS_KEY].items()[0][0]) + +    # +    # Setters/getters +    # + +    def _get_uuid(self): +        return self._uuid + +    uuid = property(_get_uuid, doc='The user uuid.') + +    def _get_secret_id(self): +        return self._secret_id + +    secret_id = property( +        _get_secret_id, +        doc='The active secret id.') + +    def _get_secrets_path(self): +        return self._secrets_path + +    secrets_path = property( +        _get_secrets_path, +        doc='The path for the file containing the encrypted symmetric secret.') + +    def _get_local_db_path(self): +        return self._local_db_path + +    local_db_path = property( +        _get_local_db_path, +        doc='The path for the local database replica.') + +    def _get_server_url(self): +        return self._server_url + +    server_url = property( +        _get_server_url, +        doc='The URL of the Soledad server.') + +    storage_secret = property( +        _get_storage_secret, +        doc='The secret used for symmetric encryption.') + +    def _get_passphrase(self): +        return self._passphrase + +    passphrase = property( +        _get_passphrase, +        doc='The passphrase for locking and unlocking encryption secrets for ' +            'local and remote storage.') + + +#----------------------------------------------------------------------------- +# Monkey patching u1db to be able to provide a custom SSL cert +#----------------------------------------------------------------------------- + +# We need a more reasonable timeout (in seconds) +SOLEDAD_TIMEOUT = 10 + + +class VerifiedHTTPSConnection(httplib.HTTPSConnection): +    """HTTPSConnection verifying server side certificates.""" +    # derived from httplib.py + +    def connect(self): +        "Connect to a host on a given (SSL) port." +        sock = socket.create_connection((self.host, self.port), +                                        SOLEDAD_TIMEOUT, self.source_address) +        if self._tunnel_host: +            self.sock = sock +            self._tunnel() + +        self.sock = ssl.wrap_socket(sock, +                                    ca_certs=SOLEDAD_CERT, +                                    cert_reqs=ssl.CERT_REQUIRED) +        match_hostname(self.sock.getpeercert(), self.host) + + +old__VerifiedHTTPSConnection = http_client._VerifiedHTTPSConnection +http_client._VerifiedHTTPSConnection = VerifiedHTTPSConnection + + +__all__ = ['soledad_assert', 'Soledad'] diff --git a/client/src/leap/soledad/client/auth.py b/client/src/leap/soledad/client/auth.py new file mode 100644 index 00000000..3cd6dabe --- /dev/null +++ b/client/src/leap/soledad/client/auth.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +# auth.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +""" +Methods for token-based authentication. + +These methods have to be included in all classes that extend HTTPClient so +they can do token-based auth requests to the Soledad server. +""" + + +from u1db import errors + + +class TokenBasedAuth(object): +    """ +    Encapsulate token-auth methods for classes that inherit from +    u1db.remote.http_client.HTTPClient. +    """ + +    def set_token_credentials(self, uuid, token): +        """ +        Store given credentials so we can sign the request later. + +        @param uuid: The user's uuid. +        @type uuid: str +        @param token: The authentication token. +        @type token: str +        """ +        self._creds = {'token': (uuid, token)} + +    def _sign_request(self, method, url_query, params): +        """ +        Return an authorization header to be included in the HTTP request, in +        the form: + +            [('Authorization', 'Token <base64 encoded creds')] + +        @param method: The HTTP method. +        @type method: str +        @param url_query: The URL query string. +        @type url_query: str +        @param params: A list with encoded query parameters. +        @type param: list + +        @return: The Authorization header. +        @rtype: list of tuple +        """ +        if 'token' in self._creds: +            uuid, token = self._creds['token'] +            auth = '%s:%s' % (uuid, token) +            return [('Authorization', 'Token %s' % auth.encode('base64')[:-1])] +        else: +            raise errors.UnknownAuthMethod( +                'Wrong credentials: %s' % self._creds) + diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py new file mode 100644 index 00000000..9fcff8e9 --- /dev/null +++ b/client/src/leap/soledad/client/crypto.py @@ -0,0 +1,213 @@ +# -*- coding: utf-8 -*- +# crypto.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +""" +Cryptographic utilities for Soledad. +""" + + +import os +import binascii +import hmac +import hashlib + + +from pycryptopp.cipher.aes import AES +from pycryptopp.cipher.xsalsa20 import XSalsa20 + + +from leap.soledad.common import ( +    soledad_assert, +    soledad_assert_type, +) + + +class EncryptionMethods(object): +    """ +    Representation of encryption methods that can be used. +    """ + +    AES_256_CTR = 'aes-256-ctr' +    XSALSA20 = 'xsalsa20' + + +class UnknownEncryptionMethod(Exception): +    """ +    Raised when trying to encrypt/decrypt with unknown method. +    """ +    pass + + +class NoSymmetricSecret(Exception): +    """ +    Raised when trying to get a hashed passphrase. +    """ + + +class SoledadCrypto(object): +    """ +    General cryptographic functionality. +    """ + +    MAC_KEY_LENGTH = 64 + +    def __init__(self, soledad): +        """ +        Initialize the crypto object. + +        @param soledad: A Soledad instance for key lookup. +        @type soledad: leap.soledad.Soledad +        """ +        self._soledad = soledad + +    def encrypt_sym(self, data, key, +                    method=EncryptionMethods.AES_256_CTR): +        """ +        Encrypt C{data} using a {password}. + +        Currently, the only  encryption method supported is AES-256 CTR mode. + +        @param data: The data to be encrypted. +        @type data: str +        @param key: The key used to encrypt C{data} (must be 256 bits long). +        @type key: str +        @param method: The encryption method to use. +        @type method: str + +        @return: A tuple with the initial value and the encrypted data. +        @rtype: (long, str) +        """ +        soledad_assert_type(key, str) + +        soledad_assert( +            len(key) == 32,  # 32 x 8 = 256 bits. +            'Wrong key size: %s bits (must be 256 bits long).' % +            (len(key) * 8)) +        iv = None +        # AES-256 in CTR mode +        if method == EncryptionMethods.AES_256_CTR: +            iv = os.urandom(16) +            ciphertext = AES(key=key, iv=iv).process(data) +        # XSalsa20 +        elif method == EncryptionMethods.XSALSA20: +            iv = os.urandom(24) +            ciphertext = XSalsa20(key=key, iv=iv).process(data) +        else: +            # raise if method is unknown +            raise UnknownEncryptionMethod('Unkwnown method: %s' % method) +        return binascii.b2a_base64(iv), ciphertext + +    def decrypt_sym(self, data, key, +                    method=EncryptionMethods.AES_256_CTR, **kwargs): +        """ +        Decrypt data using symmetric secret. + +        Currently, the only encryption method supported is AES-256 CTR mode. + +        @param data: The data to be decrypted. +        @type data: str +        @param key: The key used to decrypt C{data} (must be 256 bits long). +        @type key: str +        @param method: The encryption method to use. +        @type method: str +        @param kwargs: Other parameters specific to each encryption method. +        @type kwargs: dict + +        @return: The decrypted data. +        @rtype: str +        """ +        soledad_assert_type(key, str) +        # assert params +        soledad_assert( +            len(key) == 32,  # 32 x 8 = 256 bits. +            'Wrong key size: %s (must be 256 bits long).' % len(key)) +        soledad_assert( +            'iv' in kwargs, +            '%s needs an initial value.' % method) +        # AES-256 in CTR mode +        if method == EncryptionMethods.AES_256_CTR: +            return AES( +                key=key, iv=binascii.a2b_base64(kwargs['iv'])).process(data) +        elif method == EncryptionMethods.XSALSA20: +            return XSalsa20( +                key=key, iv=binascii.a2b_base64(kwargs['iv'])).process(data) + +        # raise if method is unknown +        raise UnknownEncryptionMethod('Unkwnown method: %s' % method) + +    def doc_passphrase(self, doc_id): +        """ +        Generate a passphrase for symmetric encryption of document's contents. + +        The password is derived using HMAC having sha256 as underlying hash +        function. The key used for HMAC are the first +        C{soledad.REMOTE_STORAGE_SECRET_KENGTH} bytes of Soledad's storage +        secret stripped from the first MAC_KEY_LENGTH characters. The HMAC +        message is C{doc_id}. + +        @param doc_id: The id of the document that will be encrypted using +            this passphrase. +        @type doc_id: str + +        @return: The passphrase. +        @rtype: str + +        @raise NoSymmetricSecret: if no symmetric secret was supplied. +        """ +        if self.secret is None: +            raise NoSymmetricSecret() +        return hmac.new( +            self.secret[ +                self.MAC_KEY_LENGTH: +                self._soledad.REMOTE_STORAGE_SECRET_LENGTH], +            doc_id, +            hashlib.sha256).digest() + +    def doc_mac_key(self, doc_id): +        """ +        Generate a key for calculating a MAC for a document whose id is +        C{doc_id}. + +        The key is derived using HMAC having sha256 as underlying hash +        function. The key used for HMAC is the first MAC_KEY_LENGTH characters +        of Soledad's storage secret. The HMAC message is C{doc_id}. + +        @param doc_id: The id of the document. +        @type doc_id: str + +        @return: The key. +        @rtype: str + +        @raise NoSymmetricSecret: if no symmetric secret was supplied. +        """ +        if self.secret is None: +            raise NoSymmetricSecret() +        return hmac.new( +            self.secret[:self.MAC_KEY_LENGTH], +            doc_id, +            hashlib.sha256).digest() + +    # +    # secret setters/getters +    # + +    def _get_secret(self): +        return self._soledad.storage_secret + +    secret = property( +        _get_secret, doc='The secret used for symmetric encryption') diff --git a/client/src/leap/soledad/client/dbwrapper.py b/client/src/leap/soledad/client/dbwrapper.py new file mode 100644 index 00000000..a27a933e --- /dev/null +++ b/client/src/leap/soledad/client/dbwrapper.py @@ -0,0 +1,183 @@ +# -*- coding: utf-8 -*- +# dbwrapper.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. +""" +Thread-safe wrapper for sqlite/pysqlcipher. + +*TODO* +At some point we surely will want to switch to a twisted way of dealing +with this, using defers and proper callbacks. But I had this tested for +some time so postponing that refactor. +""" +import logging +import threading +import Queue +import time + +import exceptions + +from functools import partial + +from leap.soledad.client import sqlcipher + +logger = logging.getLogger(__name__) + + +class SQLCipherWrapper(threading.Thread): + +    def __init__(self, *args, **kwargs): +        """ +        Initializes a wrapper that proxies method and attribute +        access to an underlying SQLCipher instance. We instantiate sqlcipher +        in a thread, and all method accesses communicate with it using a +        Queue. + +        :param *args: position arguments to pass to pysqlcipher initialization +        :type args: tuple + +        :param **kwargs: keyword arguments to pass to pysqlcipher +                         initialization +        :type kwargs: dict +        """ +        threading.Thread.__init__(self) +        self._db = None +        self._wrargs = args, kwargs + +        self._queue = Queue.Queue() +        self._stopped = threading.Event() + +        self.start() + +    def _init_db(self): +        """ +        Initializes sqlcipher database. + +        This is called on a separate thread. +        """ +        # instantiate u1db +        args, kwargs = self._wrargs +        self._db = sqlcipher.open(*args, **kwargs) + +    def run(self): +        """ +        Main loop for the sqlcipher thread. +        """ +        logger.debug("SQLCipherWrapper thread started.") +        logger.debug("Initializing sqlcipher") +        end_mths = ("__end_thread", "_SQLCipherWrapper__end_thread") + +        self._init_db() +        self._lock = threading.Lock() + +        ct = 0 +        started = False + +        while True: +            if self._db is None: +                if started: +                    break +                if ct > 10: +                    break  # XXX DEBUG +                logger.debug('db not ready yet, waiting...') +                time.sleep(1) +                ct += 1 + +            started = True + +            with self._lock: +                try: +                    mth, q, wrargs = self._queue.get() +                except: +                    logger.error("exception getting args from queue") + +                res = None +                attr = getattr(self._db, mth, None) +                if not attr: +                    if mth not in end_mths: +                        logger.error('method %s does not exist' % (mth,)) +                        res = AttributeError( +                            "_db instance has no attribute %s" % mth) + +                elif callable(attr): +                    # invoke the method with the passed args +                    args = wrargs.get('args', []) +                    kwargs = wrargs.get('kwargs', {}) +                    try: +                        res = attr(*args, **kwargs) +                    except Exception as e: +                        logger.error( +                            "Error on proxied method %s: '%r'." % ( +                            attr, e)) +                        res = e +                else: +                    # non-callable attribute +                    res = attr +                logger.debug('returning proxied db call...') +                q.put(res) + +            if mth in end_mths: +                logger.debug('ending thread') +                break + +        logger.debug("SQLCipherWrapper thread terminated.") +        self._stopped.set() + +    def close(self): +        """ +        Closes the sqlcipher database and finishes the thread. This method +        should always be called explicitely. +        """ +        self.__getattr__('close')() +        self.__end_thread() + +    def __getattr__(self, attr): +        """ +        Returns _db proxied attributes. +        """ + +        def __proxied_mth(method, *args, **kwargs): +            if not self._stopped.isSet(): +                wrargs = {'args': args, 'kwargs': kwargs} +                q = Queue.Queue() +                self._queue.put((method, q, wrargs)) +                res = q.get() +                q.task_done() + +                if isinstance(res, exceptions.BaseException): +                    # XXX should get the original bt +                    raise res +                return res +            else: +                logger.warning("tried to call proxied meth " +                               "but stopped is set: %s" % +                               (method,)) + +        rgetattr = object.__getattribute__ + +        if attr != "_db": +            proxied = partial(__proxied_mth, attr) +            return proxied + +        # fallback to regular behavior +        return rgetattr(self, attr) + +    def __del__(self): +        """ +        Do not trust this get called. No guarantees given. Because of a funny +        dance with the refs and the way the gc works, we should be calling the +        close method explicitely. +        """ +        self.close() diff --git a/client/src/leap/soledad/client/shared_db.py b/client/src/leap/soledad/client/shared_db.py new file mode 100644 index 00000000..a6ca504d --- /dev/null +++ b/client/src/leap/soledad/client/shared_db.py @@ -0,0 +1,138 @@ +# -*- coding: utf-8 -*- +# shared_db.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +""" +A shared database for storing/retrieving encrypted key material. +""" + +import simplejson as json + + +from u1db.remote import http_database + + +from leap.soledad.client.auth import TokenBasedAuth + + +#----------------------------------------------------------------------------- +# Soledad shared database +#----------------------------------------------------------------------------- + +class NoTokenForAuth(Exception): +    """ +    No token was found for token-based authentication. +    """ + + +class Unauthorized(Exception): +    """ +    User does not have authorization to perform task. +    """ + + +class SoledadSharedDatabase(http_database.HTTPDatabase, TokenBasedAuth): +    """ +    This is a shared recovery database that enables users to store their +    encryption secrets in the server and retrieve them afterwards. +    """ +    # TODO: prevent client from messing with the shared DB. +    # TODO: define and document API. + +    # +    # Token auth methods. +    # + +    def set_token_credentials(self, uuid, token): +        """ +        Store given credentials so we can sign the request later. + +        @param uuid: The user's uuid. +        @type uuid: str +        @param token: The authentication token. +        @type token: str +        """ +        TokenBasedAuth.set_token_credentials(self, uuid, token) + +    def _sign_request(self, method, url_query, params): +        """ +        Return an authorization header to be included in the HTTP request. + +        @param method: The HTTP method. +        @type method: str +        @param url_query: The URL query string. +        @type url_query: str +        @param params: A list with encoded query parameters. +        @type param: list + +        @return: The Authorization header. +        @rtype: list of tuple +        """ +        return TokenBasedAuth._sign_request(self, method, url_query, params) + +    # +    # Modified HTTPDatabase methods. +    # + +    @staticmethod +    def open_database(url, create, creds=None): +        # TODO: users should not be able to create the shared database, so we +        # have to remove this from here in the future. +        """ +        Open a Soledad shared database. + +        @param url: URL of the remote database. +        @type url: str +        @param create: Should the database be created if it does not already +            exist? +        @type create: bool +        @param token: An authentication token for accessing the shared db. +        @type token: str + +        @return: The shared database in the given url. +        @rtype: SoledadSharedDatabase +        """ +        db = SoledadSharedDatabase(url, creds=creds) +        db.open(create) +        return db + +    @staticmethod +    def delete_database(url): +        """ +        Dummy method that prevents from deleting shared database. + +        @raise: This will always raise an Unauthorized exception. + +        @param url: The database URL. +        @type url: str +        """ +        raise Unauthorized("Can't delete shared database.") + +    def __init__(self, url, document_factory=None, creds=None): +        """ +        Initialize database with auth token and encryption powers. + +        @param url: URL of the remote database. +        @type url: str +        @param document_factory: A factory for U1BD documents. +        @type document_factory: u1db.Document +        @param creds: A tuple containing the authentication method and +            credentials. +        @type creds: tuple +        """ +        http_database.HTTPDatabase.__init__(self, url, document_factory, +                                            creds) diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py new file mode 100644 index 00000000..c605c28c --- /dev/null +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -0,0 +1,696 @@ +# -*- coding: utf-8 -*- +# sqlcipher.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +""" +A U1DB backend that uses SQLCipher as its persistence layer. + +The SQLCipher API (http://sqlcipher.net/sqlcipher-api/) is fully implemented, +with the exception of the following statements: + +  * PRAGMA cipher_use_hmac +  * PRAGMA cipher_default_use_mac + +SQLCipher 2.0 introduced a per-page HMAC to validate that the page data has +not be tampered with. By default, when creating or opening a database using +SQLCipher 2, SQLCipher will attempt to use an HMAC check. This change in +database format means that SQLCipher 2 can't operate on version 1.1.x +databases by default. Thus, in order to provide backward compatibility with +SQLCipher 1.1.x, PRAGMA cipher_use_hmac can be used to disable the HMAC +functionality on specific databases. + +In some very specific cases, it is not possible to call PRAGMA cipher_use_hmac +as one of the first operations on a database. An example of this is when +trying to ATTACH a 1.1.x database to the main database. In these cases PRAGMA +cipher_default_use_hmac can be used to globally alter the default use of HMAC +when opening a database. + +So, as the statements above were introduced for backwards compatibility with +SLCipher 1.1 databases, we do not implement them as all SQLCipher databases +handled by Soledad should be created by SQLCipher >= 2.0. +""" +import logging +import os +import time +import string +import threading + + +from u1db.backends import sqlite_backend +from pysqlcipher import dbapi2 +from u1db import errors as u1db_errors +from leap.soledad.common.document import SoledadDocument + +logger = logging.getLogger(__name__) + + +# Monkey-patch u1db.backends.sqlite_backend with pysqlcipher.dbapi2 +sqlite_backend.dbapi2 = dbapi2 + +# It seems that, as long as we are not using old sqlite versions, serialized +# mode is enabled by default at compile time. So accessing db connections from +# different threads should be safe, as long as no attempt is made to use them +# from multiple threads with no locking. +# See https://sqlite.org/threadsafe.html +# and http://bugs.python.org/issue16509 + +SQLITE_CHECK_SAME_THREAD = False + + +def open(path, password, create=True, document_factory=None, crypto=None, +         raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, +         cipher_page_size=1024): +    """Open a database at the given location. + +    Will raise u1db.errors.DatabaseDoesNotExist if create=False and the +    database does not already exist. + +    @param path: The filesystem path for the database to open. +    @param type: str +    @param create: True/False, should the database be created if it doesn't +        already exist? +    @param type: bool +    @param document_factory: A function that will be called with the same +        parameters as Document.__init__. +    @type document_factory: callable +    @param crypto: An instance of SoledadCrypto so we can encrypt/decrypt +        document contents when syncing. +    @type crypto: soledad.crypto.SoledadCrypto +    @param raw_key: Whether C{password} is a raw 64-char hex string or a +        passphrase that should be hashed to obtain the encyrption key. +    @type raw_key: bool +    @param cipher: The cipher and mode to use. +    @type cipher: str +    @param kdf_iter: The number of iterations to use. +    @type kdf_iter: int +    @param cipher_page_size: The page size. +    @type cipher_page_size: int + +    @return: An instance of Database. +    @rtype SQLCipherDatabase +    """ +    return SQLCipherDatabase.open_database( +        path, password, create=create, document_factory=document_factory, +        crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter, +        cipher_page_size=cipher_page_size) + + +# +# Exceptions +# + +class DatabaseIsNotEncrypted(Exception): +    """ +    Exception raised when trying to open non-encrypted databases. +    """ +    pass + + +class NotAnHexString(Exception): +    """ +    Raised when trying to (raw) key the database with a non-hex string. +    """ +    pass + + +# +# The SQLCipher database +# + +class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): +    """A U1DB implementation that uses SQLCipher as its persistence layer.""" + +    _index_storage_value = 'expand referenced encrypted' +    k_lock = threading.Lock() + +    def __init__(self, sqlcipher_file, password, document_factory=None, +                 crypto=None, raw_key=False, cipher='aes-256-cbc', +                 kdf_iter=4000, cipher_page_size=1024): +        """ +        Create a new sqlcipher file. + +        @param sqlcipher_file: The path for the SQLCipher file. +        @type sqlcipher_file: str +        @param password: The password that protects the SQLCipher db. +        @type password: str +        @param document_factory: A function that will be called with the same +            parameters as Document.__init__. +        @type document_factory: callable +        @param crypto: An instance of SoledadCrypto so we can encrypt/decrypt +            document contents when syncing. +        @type crypto: soledad.crypto.SoledadCrypto +        @param raw_key: Whether C{password} is a raw 64-char hex string or a +            passphrase that should be hashed to obtain the encyrption key. +        @type raw_key: bool +        @param cipher: The cipher and mode to use. +        @type cipher: str +        @param kdf_iter: The number of iterations to use. +        @type kdf_iter: int +        @param cipher_page_size: The page size. +        @type cipher_page_size: int +        """ +        # ensure the db is encrypted if the file already exists +        if os.path.exists(sqlcipher_file): +            self.assert_db_is_encrypted( +                sqlcipher_file, password, raw_key, cipher, kdf_iter, +                cipher_page_size) +        # connect to the database +        with self.k_lock: +            self._db_handle = dbapi2.connect( +                sqlcipher_file, +                check_same_thread=SQLITE_CHECK_SAME_THREAD) +            # set SQLCipher cryptographic parameters +            self._set_crypto_pragmas( +                self._db_handle, password, raw_key, cipher, kdf_iter, +                cipher_page_size) +            self._real_replica_uid = None +            self._ensure_schema() +            self._crypto = crypto + +        def factory(doc_id=None, rev=None, json='{}', has_conflicts=False, +                    syncable=True): +            return SoledadDocument(doc_id=doc_id, rev=rev, json=json, +                                   has_conflicts=has_conflicts, +                                   syncable=syncable) +        self.set_document_factory(factory) + +    @classmethod +    def _open_database(cls, sqlcipher_file, password, document_factory=None, +                       crypto=None, raw_key=False, cipher='aes-256-cbc', +                       kdf_iter=4000, cipher_page_size=1024): +        """ +        Open a SQLCipher database. + +        @param sqlcipher_file: The path for the SQLCipher file. +        @type sqlcipher_file: str +        @param password: The password that protects the SQLCipher db. +        @type password: str +        @param document_factory: A function that will be called with the same +            parameters as Document.__init__. +        @type document_factory: callable +        @param crypto: An instance of SoledadCrypto so we can encrypt/decrypt +            document contents when syncing. +        @type crypto: soledad.crypto.SoledadCrypto +        @param raw_key: Whether C{password} is a raw 64-char hex string or a +            passphrase that should be hashed to obtain the encyrption key. +        @type raw_key: bool +        @param cipher: The cipher and mode to use. +        @type cipher: str +        @param kdf_iter: The number of iterations to use. +        @type kdf_iter: int +        @param cipher_page_size: The page size. +        @type cipher_page_size: int + +        @return: The database object. +        @rtype: SQLCipherDatabase +        """ +        if not os.path.isfile(sqlcipher_file): +            raise u1db_errors.DatabaseDoesNotExist() + +        tries = 2 +        # Note: There seems to be a bug in sqlite 3.5.9 (with python2.6) +        #       where without re-opening the database on Windows, it +        #       doesn't see the transaction that was just committed +        while True: + +            with cls.k_lock: +                db_handle = dbapi2.connect( +                    sqlcipher_file, +                    check_same_thread=SQLITE_CHECK_SAME_THREAD) + +                try: +                    # set cryptographic params +                    cls._set_crypto_pragmas( +                        db_handle, password, raw_key, cipher, kdf_iter, +                        cipher_page_size) +                    c = db_handle.cursor() +                    # XXX if we use it here, it should be public +                    v, err = cls._which_index_storage(c) +                except Exception as exc: +                    logger.warning("ERROR OPENING DATABASE!") +                    logger.debug("error was: %r" % exc) +                    v, err = None, exc +                finally: +                    db_handle.close() +                if v is not None: +                    break +            # possibly another process is initializing it, wait for it to be +            # done +            if tries == 0: +                raise err  # go for the richest error? +            tries -= 1 +            time.sleep(cls.WAIT_FOR_PARALLEL_INIT_HALF_INTERVAL) +        return SQLCipherDatabase._sqlite_registry[v]( +            sqlcipher_file, password, document_factory=document_factory, +            crypto=crypto, raw_key=raw_key, cipher=cipher, kdf_iter=kdf_iter, +            cipher_page_size=cipher_page_size) + +    @classmethod +    def open_database(cls, sqlcipher_file, password, create, backend_cls=None, +                      document_factory=None, crypto=None, raw_key=False, +                      cipher='aes-256-cbc', kdf_iter=4000, +                      cipher_page_size=1024): +        """ +        Open a SQLCipher database. + +        @param sqlcipher_file: The path for the SQLCipher file. +        @type sqlcipher_file: str +        @param password: The password that protects the SQLCipher db. +        @type password: str +        @param create: Should the datbase be created if it does not already +            exist? +        @type: bool +        @param backend_cls: A class to use as backend. +        @type backend_cls: type +        @param document_factory: A function that will be called with the same +            parameters as Document.__init__. +        @type document_factory: callable +        @param crypto: An instance of SoledadCrypto so we can encrypt/decrypt +            document contents when syncing. +        @type crypto: soledad.crypto.SoledadCrypto +        @param raw_key: Whether C{password} is a raw 64-char hex string or a +            passphrase that should be hashed to obtain the encyrption key. +        @type raw_key: bool +        @param cipher: The cipher and mode to use. +        @type cipher: str +        @param kdf_iter: The number of iterations to use. +        @type kdf_iter: int +        @param cipher_page_size: The page size. +        @type cipher_page_size: int + +        @return: The database object. +        @rtype: SQLCipherDatabase +        """ +        try: +            return cls._open_database( +                sqlcipher_file, password, document_factory=document_factory, +                crypto=crypto, raw_key=raw_key, cipher=cipher, +                kdf_iter=kdf_iter, cipher_page_size=cipher_page_size) +        except u1db_errors.DatabaseDoesNotExist: +            if not create: +                raise +            # TODO: remove backend class from here. +            if backend_cls is None: +                # default is SQLCipherPartialExpandDatabase +                backend_cls = SQLCipherDatabase +            return backend_cls( +                sqlcipher_file, password, document_factory=document_factory, +                crypto=crypto, raw_key=raw_key, cipher=cipher, +                kdf_iter=kdf_iter, cipher_page_size=cipher_page_size) + +    def sync(self, url, creds=None, autocreate=True): +        """ +        Synchronize documents with remote replica exposed at url. + +        @param url: The url of the target replica to sync with. +        @type url: str +        @param creds: optional dictionary giving credentials. +            to authorize the operation with the server. +        @type creds: dict +        @param autocreate: Ask the target to create the db if non-existent. +        @type autocreate: bool + +        @return: The local generation before the synchronisation was performed. +        @rtype: int +        """ +        from u1db.sync import Synchronizer +        from leap.soledad.client.target import SoledadSyncTarget +        return Synchronizer( +            self, +            SoledadSyncTarget(url, +                              creds=creds, +                              crypto=self._crypto)).sync(autocreate=autocreate) + +    def _extra_schema_init(self, c): +        """ +        Add any extra fields, etc to the basic table definitions. + +        This method is called by u1db.backends.sqlite_backend._initialize() +        method, which is executed when the database schema is created. Here, +        we use it to include the "syncable" property for LeapDocuments. + +        @param c: The cursor for querying the database. +        @type c: dbapi2.cursor +        """ +        c.execute( +            'ALTER TABLE document ' +            'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE') + +    def _put_and_update_indexes(self, old_doc, doc): +        """ +        Update a document and all indexes related to it. + +        @param old_doc: The old version of the document. +        @type old_doc: u1db.Document +        @param doc: The new version of the document. +        @type doc: u1db.Document +        """ +        sqlite_backend.SQLitePartialExpandDatabase._put_and_update_indexes( +            self, old_doc, doc) +        c = self._db_handle.cursor() +        c.execute('UPDATE document SET syncable=? ' +                  'WHERE doc_id=?', +                  (doc.syncable, doc.doc_id)) + +    def _get_doc(self, doc_id, check_for_conflicts=False): +        """ +        Get just the document content, without fancy handling. + +        @param doc_id: The unique document identifier +        @type doc_id: str +        @param include_deleted: If set to True, deleted documents will be +            returned with empty content. Otherwise asking for a deleted +            document will return None. +        @type include_deleted: bool + +        @return: a Document object. +        @type: u1db.Document +        """ +        doc = sqlite_backend.SQLitePartialExpandDatabase._get_doc( +            self, doc_id, check_for_conflicts) +        if doc: +            c = self._db_handle.cursor() +            c.execute('SELECT syncable FROM document ' +                      'WHERE doc_id=?', +                      (doc.doc_id,)) +            result = c.fetchone() +            doc.syncable = bool(result[0]) +        return doc + +    # +    # SQLCipher API methods +    # + +    @classmethod +    def assert_db_is_encrypted(cls, sqlcipher_file, key, raw_key, cipher, +                               kdf_iter, cipher_page_size): +        """ +        Assert that C{sqlcipher_file} contains an encrypted database. + +        When opening an existing database, PRAGMA key will not immediately +        throw an error if the key provided is incorrect. To test that the +        database can be successfully opened with the provided key, it is +        necessary to perform some operation on the database (i.e. read from +        it) and confirm it is success. + +        The easiest way to do this is select off the sqlite_master table, +        which will attempt to read the first page of the database and will +        parse the schema. + +        @param sqlcipher_file: The path for the SQLCipher file. +        @type sqlcipher_file: str +        @param key: The key that protects the SQLCipher db. +        @type key: str +        @param raw_key: Whether C{key} is a raw 64-char hex string or a +            passphrase that should be hashed to obtain the encyrption key. +        @type raw_key: bool +        @param cipher: The cipher and mode to use. +        @type cipher: str +        @param kdf_iter: The number of iterations to use. +        @type kdf_iter: int +        @param cipher_page_size: The page size. +        @type cipher_page_size: int +        """ +        try: +            # try to open an encrypted database with the regular u1db +            # backend should raise a DatabaseError exception. +            sqlite_backend.SQLitePartialExpandDatabase(sqlcipher_file) +            raise DatabaseIsNotEncrypted() +        except dbapi2.DatabaseError: +            # assert that we can access it using SQLCipher with the given +            # key +            with cls.k_lock: +                db_handle = dbapi2.connect( +                    sqlcipher_file, +                    check_same_thread=SQLITE_CHECK_SAME_THREAD) +                cls._set_crypto_pragmas( +                    db_handle, key, raw_key, cipher, +                    kdf_iter, cipher_page_size) +                db_handle.cursor().execute( +                    'SELECT count(*) FROM sqlite_master') + +    @classmethod +    def _set_crypto_pragmas(cls, db_handle, key, raw_key, cipher, kdf_iter, +                            cipher_page_size): +        """ +        Set cryptographic params (key, cipher, KDF number of iterations and +        cipher page size). +        """ +        cls._pragma_key(db_handle, key, raw_key) +        cls._pragma_cipher(db_handle, cipher) +        cls._pragma_kdf_iter(db_handle, kdf_iter) +        cls._pragma_cipher_page_size(db_handle, cipher_page_size) + +    @classmethod +    def _pragma_key(cls, db_handle, key, raw_key): +        """ +        Set the C{key} for use with the database. + +        The process of creating a new, encrypted database is called 'keying' +        the database. SQLCipher uses just-in-time key derivation at the point +        it is first needed for an operation. This means that the key (and any +        options) must be set before the first operation on the database. As +        soon as the database is touched (e.g. SELECT, CREATE TABLE, UPDATE, +        etc.) and pages need to be read or written, the key is prepared for +        use. + +        Implementation Notes: + +        * PRAGMA key should generally be called as the first operation on a +          database. + +        @param key: The key for use with the database. +        @type key: str +        @param raw_key: Whether C{key} is a raw 64-char hex string or a +            passphrase that should be hashed to obtain the encyrption key. +        @type raw_key: bool +        """ +        if raw_key: +            cls._pragma_key_raw(db_handle, key) +        else: +            cls._pragma_key_passphrase(db_handle, key) + +    @classmethod +    def _pragma_key_passphrase(cls, db_handle, passphrase): +        """ +        Set a passphrase for encryption key derivation. + +        The key itself can be a passphrase, which is converted to a key using +        PBKDF2 key derivation. The result is used as the encryption key for +        the database. By using this method, there is no way to alter the KDF; +        if you want to do so you should use a raw key instead and derive the +        key using your own KDF. + +        @param db_handle: A handle to the SQLCipher database. +        @type db_handle: pysqlcipher.Connection +        @param passphrase: The passphrase used to derive the encryption key. +        @type passphrase: str +        """ +        db_handle.cursor().execute("PRAGMA key = '%s'" % passphrase) + +    @classmethod +    def _pragma_key_raw(cls, db_handle, key): +        """ +        Set a raw hexadecimal encryption key. + +        It is possible to specify an exact byte sequence using a blob literal. +        With this method, it is the calling application's responsibility to +        ensure that the data provided is a 64 character hex string, which will +        be converted directly to 32 bytes (256 bits) of key data. + +        @param db_handle: A handle to the SQLCipher database. +        @type db_handle: pysqlcipher.Connection +        @param key: A 64 character hex string. +        @type key: str +        """ +        if not all(c in string.hexdigits for c in key): +            raise NotAnHexString(key) +        db_handle.cursor().execute('PRAGMA key = "x\'%s"' % key) + +    @classmethod +    def _pragma_cipher(cls, db_handle, cipher='aes-256-cbc'): +        """ +        Set the cipher and mode to use for symmetric encryption. + +        SQLCipher uses aes-256-cbc as the default cipher and mode of +        operation. It is possible to change this, though not generally +        recommended, using PRAGMA cipher. + +        SQLCipher makes direct use of libssl, so all cipher options available +        to libssl are also available for use with SQLCipher. See `man enc` for +        OpenSSL's supported ciphers. + +        Implementation Notes: + +        * PRAGMA cipher must be called after PRAGMA key and before the first +          actual database operation or it will have no effect. + +        * If a non-default value is used PRAGMA cipher to create a database, +          it must also be called every time that database is opened. + +        * SQLCipher does not implement its own encryption. Instead it uses the +          widely available and peer-reviewed OpenSSL libcrypto for all +          cryptographic functions. + +        @param db_handle: A handle to the SQLCipher database. +        @type db_handle: pysqlcipher.Connection +        @param cipher: The cipher and mode to use. +        @type cipher: str +        """ +        db_handle.cursor().execute("PRAGMA cipher = '%s'" % cipher) + +    @classmethod +    def _pragma_kdf_iter(cls, db_handle, kdf_iter=4000): +        """ +        Set the number of iterations for the key derivation function. + +        SQLCipher uses PBKDF2 key derivation to strengthen the key and make it +        resistent to brute force and dictionary attacks. The default +        configuration uses 4000 PBKDF2 iterations (effectively 16,000 SHA1 +        operations). PRAGMA kdf_iter can be used to increase or decrease the +        number of iterations used. + +        Implementation Notes: + +        * PRAGMA kdf_iter must be called after PRAGMA key and before the first +          actual database operation or it will have no effect. + +        * If a non-default value is used PRAGMA kdf_iter to create a database, +          it must also be called every time that database is opened. + +        * It is not recommended to reduce the number of iterations if a +          passphrase is in use. + +        @param db_handle: A handle to the SQLCipher database. +        @type db_handle: pysqlcipher.Connection +        @param kdf_iter: The number of iterations to use. +        @type kdf_iter: int +        """ +        db_handle.cursor().execute("PRAGMA kdf_iter = '%d'" % kdf_iter) + +    @classmethod +    def _pragma_cipher_page_size(cls, db_handle, cipher_page_size=1024): +        """ +        Set the page size of the encrypted database. + +        SQLCipher 2 introduced the new PRAGMA cipher_page_size that can be +        used to adjust the page size for the encrypted database. The default +        page size is 1024 bytes, but it can be desirable for some applications +        to use a larger page size for increased performance. For instance, +        some recent testing shows that increasing the page size can noticeably +        improve performance (5-30%) for certain queries that manipulate a +        large number of pages (e.g. selects without an index, large inserts in +        a transaction, big deletes). + +        To adjust the page size, call the pragma immediately after setting the +        key for the first time and each subsequent time that you open the +        database. + +        Implementation Notes: + +        * PRAGMA cipher_page_size must be called after PRAGMA key and before +          the first actual database operation or it will have no effect. + +        * If a non-default value is used PRAGMA cipher_page_size to create a +          database, it must also be called every time that database is opened. + +        @param db_handle: A handle to the SQLCipher database. +        @type db_handle: pysqlcipher.Connection +        @param cipher_page_size: The page size. +        @type cipher_page_size: int +        """ +        db_handle.cursor().execute( +            "PRAGMA cipher_page_size = '%d'" % cipher_page_size) + +    @classmethod +    def _pragma_rekey(cls, db_handle, new_key, raw_key): +        """ +        Change the key of an existing encrypted database. + +        To change the key on an existing encrypted database, it must first be +        unlocked with the current encryption key. Once the database is +        readable and writeable, PRAGMA rekey can be used to re-encrypt every +        page in the database with a new key. + +        * PRAGMA rekey must be called after PRAGMA key. It can be called at any +          time once the database is readable. + +        * PRAGMA rekey can not be used to encrypted a standard SQLite +          database! It is only useful for changing the key on an existing +          database. + +        * Previous versions of SQLCipher provided a PRAGMA rekey_cipher and +          code>PRAGMA rekey_kdf_iter. These are deprecated and should not be +          used. Instead, use sqlcipher_export(). + +        @param db_handle: A handle to the SQLCipher database. +        @type db_handle: pysqlcipher.Connection +        @param new_key: The new key. +        @type new_key: str +        @param raw_key: Whether C{password} is a raw 64-char hex string or a +            passphrase that should be hashed to obtain the encyrption key. +        @type raw_key: bool +        """ +        if raw_key: +            cls._pragma_rekey_raw(db_handle, key) +        else: +            cls._pragma_rekey_passphrase(db_handle, key) + +    @classmethod +    def _pragma_rekey_passphrase(cls, db_handle, passphrase): +        """ +        Change the passphrase for encryption key derivation. + +        The key itself can be a passphrase, which is converted to a key using +        PBKDF2 key derivation. The result is used as the encryption key for +        the database. + +        @param db_handle: A handle to the SQLCipher database. +        @type db_handle: pysqlcipher.Connection +        @param passphrase: The passphrase used to derive the encryption key. +        @type passphrase: str +        """ +        db_handle.cursor().execute("PRAGMA rekey = '%s'" % passphrase) + +    @classmethod +    def _pragma_rekey_raw(cls, db_handle, key): +        """ +        Change the raw hexadecimal encryption key. + +        It is possible to specify an exact byte sequence using a blob literal. +        With this method, it is the calling application's responsibility to +        ensure that the data provided is a 64 character hex string, which will +        be converted directly to 32 bytes (256 bits) of key data. + +        @param db_handle: A handle to the SQLCipher database. +        @type db_handle: pysqlcipher.Connection +        @param key: A 64 character hex string. +        @type key: str +        """ +        if not all(c in string.hexdigits for c in key): +            raise NotAnHexString(key) +        db_handle.cursor().execute('PRAGMA rekey = "x\'%s"' % passphrase) + +    def __del__(self): +        """ +        Closes db_handle upon object destruction. +        """ +        if self._db_handle is not None: +            self._db_handle.close() + + +sqlite_backend.SQLiteDatabase.register_implementation(SQLCipherDatabase) diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py new file mode 100644 index 00000000..d0bc3706 --- /dev/null +++ b/client/src/leap/soledad/client/target.py @@ -0,0 +1,452 @@ +# -*- coding: utf-8 -*- +# target.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +""" +A U1DB backend for encrypting data before sending to server and decrypting +after receiving. +""" + +import simplejson as json +import hashlib +import hmac +import binascii + + +from u1db.remote import utils +from u1db.errors import BrokenSyncStream +from u1db.remote.http_target import HTTPSyncTarget + + +from leap.soledad.common import soledad_assert +from leap.soledad.common.crypto import ( +    EncryptionSchemes, +    MacMethods, +    ENC_JSON_KEY, +    ENC_SCHEME_KEY, +    ENC_METHOD_KEY, +    ENC_IV_KEY, +    MAC_KEY, +    MAC_METHOD_KEY, +) +from leap.soledad.common.document import SoledadDocument +from leap.soledad.client.auth import TokenBasedAuth +from leap.soledad.client.crypto import ( +    EncryptionMethods, +    UnknownEncryptionMethod, +) + + +# +# Exceptions +# + +class DocumentNotEncrypted(Exception): +    """ +    Raised for failures in document encryption. +    """ +    pass + + +class UnknownEncryptionScheme(Exception): +    """ +    Raised when trying to decrypt from unknown encryption schemes. +    """ +    pass + + +class UnknownMacMethod(Exception): +    """ +    Raised when trying to authenticate document's content with unknown MAC +    mehtod. +    """ +    pass + + +class WrongMac(Exception): +    """ +    Raised when failing to authenticate document's contents based on MAC. +    """ + + +# +# Crypto utilities for a SoledadDocument. +# + + +def mac_doc(crypto, doc_id, doc_rev, ciphertext, mac_method): +    """ +    Calculate a MAC for C{doc} using C{ciphertext}. + +    Current MAC method used is HMAC, with the following parameters: + +        * key: sha256(storage_secret, doc_id) +        * msg: doc_id + doc_rev + ciphertext +        * digestmod: sha256 + +    @param crypto: A SoledadCryto instance used to perform the encryption. +    @type crypto: leap.soledad.crypto.SoledadCrypto +    @param doc_id: The id of the document. +    @type doc_id: str +    @param doc_rev: The revision of the document. +    @type doc_rev: str +    @param ciphertext: The content of the document. +    @type ciphertext: str +    @param mac_method: The MAC method to use. +    @type mac_method: str + +    @return: The calculated MAC. +    @rtype: str +    """ +    if mac_method == MacMethods.HMAC: +        return hmac.new( +            crypto.doc_mac_key(doc_id), +            str(doc_id) + str(doc_rev) + ciphertext, +            hashlib.sha256).digest() +    # raise if we do not know how to handle this MAC method +    raise UnknownMacMethod('Unknown MAC method: %s.' % mac_method) + + +def encrypt_doc(crypto, doc): +    """ +    Encrypt C{doc}'s content. + +    Encrypt doc's contents using AES-256 CTR mode and return a valid JSON +    string representing the following: + +        { +            ENC_JSON_KEY: '<encrypted doc JSON string>', +            ENC_SCHEME_KEY: 'symkey', +            ENC_METHOD_KEY: EncryptionMethods.AES_256_CTR, +            ENC_IV_KEY: '<the initial value used to encrypt>', +            MAC_KEY: '<mac>' +            MAC_METHOD_KEY: 'hmac' +        } + +    @param crypto: A SoledadCryto instance used to perform the encryption. +    @type crypto: leap.soledad.crypto.SoledadCrypto +    @param doc: The document with contents to be encrypted. +    @type doc: SoledadDocument + +    @return: The JSON serialization of the dict representing the encrypted +        content. +    @rtype: str +    """ +    soledad_assert(doc.is_tombstone() is False) +    # encrypt content using AES-256 CTR mode +    iv, ciphertext = crypto.encrypt_sym( +        str(doc.get_json()),  # encryption/decryption routines expect str +        crypto.doc_passphrase(doc.doc_id), +        method=EncryptionMethods.AES_256_CTR) +    # Return a representation for the encrypted content. In the following, we +    # convert binary data to hexadecimal representation so the JSON +    # serialization does not complain about what it tries to serialize. +    hex_ciphertext = binascii.b2a_hex(ciphertext) +    return json.dumps({ +        ENC_JSON_KEY: hex_ciphertext, +        ENC_SCHEME_KEY: EncryptionSchemes.SYMKEY, +        ENC_METHOD_KEY: EncryptionMethods.AES_256_CTR, +        ENC_IV_KEY: iv, +        MAC_KEY: binascii.b2a_hex(mac_doc(  # store the mac as hex. +            crypto, doc.doc_id, doc.rev, +            ciphertext, +            MacMethods.HMAC)), +        MAC_METHOD_KEY: MacMethods.HMAC, +    }) + + +def decrypt_doc(crypto, doc): +    """ +    Decrypt C{doc}'s content. + +    Return the JSON string representation of the document's decrypted content. + +    The content of the document should have the following structure: + +        { +            ENC_JSON_KEY: '<enc_blob>', +            ENC_SCHEME_KEY: '<enc_scheme>', +            ENC_METHOD_KEY: '<enc_method>', +            ENC_IV_KEY: '<initial value used to encrypt>',  # (optional) +            MAC_KEY: '<mac>' +            MAC_METHOD_KEY: 'hmac' +        } + +    C{enc_blob} is the encryption of the JSON serialization of the document's +    content. For now Soledad just deals with documents whose C{enc_scheme} is +    EncryptionSchemes.SYMKEY and C{enc_method} is +    EncryptionMethods.AES_256_CTR. + +    @param crypto: A SoledadCryto instance to perform the encryption. +    @type crypto: leap.soledad.crypto.SoledadCrypto +    @param doc: The document to be decrypted. +    @type doc: SoledadDocument + +    @return: The JSON serialization of the decrypted content. +    @rtype: str +    """ +    soledad_assert(doc.is_tombstone() is False) +    soledad_assert(ENC_JSON_KEY in doc.content) +    soledad_assert(ENC_SCHEME_KEY in doc.content) +    soledad_assert(ENC_METHOD_KEY in doc.content) +    soledad_assert(MAC_KEY in doc.content) +    soledad_assert(MAC_METHOD_KEY in doc.content) +    # verify MAC +    ciphertext = binascii.a2b_hex(  # content is stored as hex. +        doc.content[ENC_JSON_KEY]) +    mac = mac_doc( +        crypto, doc.doc_id, doc.rev, +        ciphertext, +        doc.content[MAC_METHOD_KEY]) +    # we compare mac's hashes to avoid possible timing attacks that might +    # exploit python's builtin comparison operator behaviour, which fails +    # immediatelly when non-matching bytes are found. +    doc_mac_hash = hashlib.sha256( +        binascii.a2b_hex(  # the mac is stored as hex +            doc.content[MAC_KEY])).digest() +    calculated_mac_hash = hashlib.sha256(mac).digest() +    if doc_mac_hash != calculated_mac_hash: +        raise WrongMac('Could not authenticate document\'s contents.') +    # decrypt doc's content +    enc_scheme = doc.content[ENC_SCHEME_KEY] +    plainjson = None +    if enc_scheme == EncryptionSchemes.SYMKEY: +        enc_method = doc.content[ENC_METHOD_KEY] +        if enc_method == EncryptionMethods.AES_256_CTR: +            soledad_assert(ENC_IV_KEY in doc.content) +            plainjson = crypto.decrypt_sym( +                ciphertext, +                crypto.doc_passphrase(doc.doc_id), +                method=enc_method, +                iv=doc.content[ENC_IV_KEY]) +        else: +            raise UnknownEncryptionMethod(enc_method) +    else: +        raise UnknownEncryptionScheme(enc_scheme) +    return plainjson + + +# +# SoledadSyncTarget +# + +class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): +    """ +    A SyncTarget that encrypts data before sending and decrypts data after +    receiving. +    """ + +    # +    # Token auth methods. +    # + +    def set_token_credentials(self, uuid, token): +        """ +        Store given credentials so we can sign the request later. + +        @param uuid: The user's uuid. +        @type uuid: str +        @param token: The authentication token. +        @type token: str +        """ +        TokenBasedAuth.set_token_credentials(self, uuid, token) + +    def _sign_request(self, method, url_query, params): +        """ +        Return an authorization header to be included in the HTTP request. + +        @param method: The HTTP method. +        @type method: str +        @param url_query: The URL query string. +        @type url_query: str +        @param params: A list with encoded query parameters. +        @type param: list + +        @return: The Authorization header. +        @rtype: list of tuple +        """ +        return TokenBasedAuth._sign_request(self, method, url_query, params) + +    # +    # Modified HTTPSyncTarget methods. +    # + +    @staticmethod +    def connect(url, crypto=None): +        return SoledadSyncTarget(url, crypto=crypto) + +    def __init__(self, url, creds=None, crypto=None): +        """ +        Initialize the SoledadSyncTarget. + +        @param url: The url of the target replica to sync with. +        @type url: str +        @param creds: optional dictionary giving credentials. +            to authorize the operation with the server. +        @type creds: dict +        @param soledad: An instance of Soledad so we can encrypt/decrypt +            document contents when syncing. +        @type soledad: soledad.Soledad +        """ +        HTTPSyncTarget.__init__(self, url, creds) +        self._crypto = crypto + +    def _parse_sync_stream(self, data, return_doc_cb, ensure_callback=None): +        """ +        Parse incoming synchronization stream and insert documents in the +        local database. + +        If an incoming document's encryption scheme is equal to +        EncryptionSchemes.SYMKEY, then this method will decrypt it with +        Soledad's symmetric key. + +        @param data: The body of the HTTP response. +        @type data: str +        @param return_doc_cb: A callback to insert docs from target. +        @type return_doc_cb: function +        @param ensure_callback: A callback to ensure we have the correct +            target_replica_uid, if it was just created. +        @type ensure_callback: function + +        @raise BrokenSyncStream: If C{data} is malformed. + +        @return: A dictionary representing the first line of the response got +            from remote replica. +        @rtype: list of str +        """ +        parts = data.splitlines()  # one at a time +        if not parts or parts[0] != '[': +            raise BrokenSyncStream +        data = parts[1:-1] +        comma = False +        if data: +            line, comma = utils.check_and_strip_comma(data[0]) +            res = json.loads(line) +            if ensure_callback and 'replica_uid' in res: +                ensure_callback(res['replica_uid']) +            for entry in data[1:]: +                if not comma:  # missing in between comma +                    raise BrokenSyncStream +                line, comma = utils.check_and_strip_comma(entry) +                entry = json.loads(line) +                #------------------------------------------------------------- +                # symmetric decryption of document's contents +                #------------------------------------------------------------- +                # if arriving content was symmetrically encrypted, we decrypt +                # it. +                doc = SoledadDocument( +                    entry['id'], entry['rev'], entry['content']) +                if doc.content and ENC_SCHEME_KEY in doc.content: +                    if doc.content[ENC_SCHEME_KEY] == \ +                            EncryptionSchemes.SYMKEY: +                        doc.set_json(decrypt_doc(self._crypto, doc)) +                #------------------------------------------------------------- +                # end of symmetric decryption +                #------------------------------------------------------------- +                return_doc_cb(doc, entry['gen'], entry['trans_id']) +        if parts[-1] != ']': +            try: +                partdic = json.loads(parts[-1]) +            except ValueError: +                pass +            else: +                if isinstance(partdic, dict): +                    self._error(partdic) +            raise BrokenSyncStream +        if not data or comma:  # no entries or bad extra comma +            raise BrokenSyncStream +        return res + +    def sync_exchange(self, docs_by_generations, source_replica_uid, +                      last_known_generation, last_known_trans_id, +                      return_doc_cb, ensure_callback=None): +        """ +        Find out which documents the remote database does not know about, +        encrypt and send them. + +        This does the same as the parent's method but encrypts content before +        syncing. + +        @param docs_by_generations: A list of (doc_id, generation, trans_id) +            of local documents that were changed since the last local +            generation the remote replica knows about. +        @type docs_by_generations: list of tuples +        @param source_replica_uid: The uid of the source replica. +        @type source_replica_uid: str +        @param last_known_generation: Target's last known generation. +        @type last_known_generation: int +        @param last_known_trans_id: Target's last known transaction id. +        @type last_known_trans_id: str +        @param return_doc_cb: A callback for inserting received documents from +            target. +        @type return_doc_cb: function +        @param ensure_callback: A callback that ensures we know the target +            replica uid if the target replica was just created. +        @type ensure_callback: function + +        @return: The new generation and transaction id of the target replica. +        @rtype: tuple +        """ +        self._ensure_connection() +        if self._trace_hook:  # for tests +            self._trace_hook('sync_exchange') +        url = '%s/sync-from/%s' % (self._url.path, source_replica_uid) +        self._conn.putrequest('POST', url) +        self._conn.putheader('content-type', 'application/x-u1db-sync-stream') +        for header_name, header_value in self._sign_request('POST', url, {}): +            self._conn.putheader(header_name, header_value) +        entries = ['['] +        size = 1 + +        def prepare(**dic): +            entry = comma + '\r\n' + json.dumps(dic) +            entries.append(entry) +            return len(entry) + +        comma = '' +        size += prepare( +            last_known_generation=last_known_generation, +            last_known_trans_id=last_known_trans_id, +            ensure=ensure_callback is not None) +        comma = ',' +        for doc, gen, trans_id in docs_by_generations: +            # skip non-syncable docs +            if isinstance(doc, SoledadDocument) and not doc.syncable: +                continue +            #------------------------------------------------------------- +            # symmetric encryption of document's contents +            #------------------------------------------------------------- +            doc_json = doc.get_json() +            if not doc.is_tombstone(): +                doc_json = encrypt_doc(self._crypto, doc) +            #------------------------------------------------------------- +            # end of symmetric encryption +            #------------------------------------------------------------- +            size += prepare(id=doc.doc_id, rev=doc.rev, +                            content=doc_json, +                            gen=gen, trans_id=trans_id) +        entries.append('\r\n]') +        size += len(entries[-1]) +        self._conn.putheader('content-length', str(size)) +        self._conn.endheaders() +        for entry in entries: +            self._conn.send(entry) +        entries = None +        data, _ = self._response() +        res = self._parse_sync_stream(data, return_doc_cb, ensure_callback) +        data = None +        return res['new_generation'], res['new_transaction_id']  | 
