From 4cd843205514aea4feedbe3d1bbb9fa0c90ab7bf Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Wed, 12 Mar 2014 02:57:49 -0400 Subject: move encryption of syncing docs to local db --- client/pkg/requirements.pip | 3 +- client/src/leap/soledad/client/__init__.py | 190 ++++++++++++++++++++-- client/src/leap/soledad/client/crypto.py | 209 +++++++++++++----------- client/src/leap/soledad/client/sqlcipher.py | 16 +- client/src/leap/soledad/client/target.py | 239 ++++++++++++++++++++-------- 5 files changed, 480 insertions(+), 177 deletions(-) diff --git a/client/pkg/requirements.pip b/client/pkg/requirements.pip index 8733f320..ff1b4f35 100644 --- a/client/pkg/requirements.pip +++ b/client/pkg/requirements.pip @@ -21,4 +21,5 @@ oauth # pysqlite should not be a dep, see #2945 pysqlite -cchardet \ No newline at end of file +cchardet +taskthread diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 72db5d96..6b0fa6a6 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # __init__.py -# Copyright (C) 2013 LEAP +# Copyright (C) 2013, 2014 LEAP # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -27,12 +27,15 @@ import binascii import errno import httplib import logging +import multiprocessing import os import socket +import sqlite3 import ssl import urlparse import hmac +#from functools import partial from hashlib import sha256 from threading import Lock from collections import defaultdict @@ -42,6 +45,7 @@ try: except ImportError: import chardet +from taskthread import TimerTask from u1db.remote import http_client from u1db.remote.ssl_match_hostname import match_hostname @@ -107,7 +111,7 @@ from leap.soledad.client.crypto import SoledadCrypto from leap.soledad.client.shared_db import SoledadSharedDatabase from leap.soledad.client.sqlcipher import open as sqlcipher_open from leap.soledad.client.sqlcipher import SQLCipherDatabase -from leap.soledad.client.target import SoledadSyncTarget +from leap.soledad.client.target import SoledadSyncTarget, encrypt_docstr logger = logging.getLogger(name=__name__) @@ -149,6 +153,85 @@ class BootstrapSequenceError(Exception): """ +def encrypt_doc_task(doc_id, doc_rev, content, key, secret): + encrypted_content = encrypt_docstr( + content, doc_id, doc_rev, key, secret) + return doc_id, doc_rev, encrypted_content + + +class SyncEncrypterPool(object): + """ + Pool of workers that spawn subprocesses to execute the symmetric encryption + of documents to be synced. + """ + # TODO implement throttling to reduce cpu usage?? + # TODO move to its own module + + WORKERS = 10 + TABLE_NAME = "docs_tosync" + FIELD_NAMES = "doc_id", "rev", "content" + + def __init__(self, crypto, sync_db): + """ + Initialize the pool of encryption-workers. + + :param crypto: A SoledadCryto instance to perform the encryption. + :type crypto: leap.soledad.crypto.SoledadCrypto + + :param sync_db: a database connection handle + :type sync_db: handle + """ + self._pool = multiprocessing.Pool(self.WORKERS) + self._crypto = crypto + self._sync_db = sync_db + + def encrypt_doc(self, doc): + """ + Symmetrically encrypt a document. + + :param doc: The document with contents to be encrypted. + :type doc: SoledadDocument + """ + print "ENCRYPTING DOC --->", doc + soledad_assert(not doc.is_tombstone()) + docstr = doc.get_json() + key = self._crypto.doc_passphrase(doc.doc_id) + secret = self._crypto.secret + args = doc.doc_id, doc.rev, docstr, key, secret + + try: + self._pool.apply_async(encrypt_doc_task, args, + callback=self.encrypt_doc_cb) + except Exception as exc: + logger.exception(exc) + + def encrypt_doc_cb(self, result): + doc_id, doc_rev, content = result + self.insert_encrypted_doc(doc_id, doc_rev, content) + + def insert_encrypted_doc(self, doc_id, doc_rev, content): + """ + Insert the contents of the encrypted doc into the local sync + database. + + :param doc: The document with contents to be encrypted. + :type doc: SoledadDocument + :param content: The encrypted document. + :type content: str + """ + print ">>>>>>>>>>>> inserting encrypted doc: ", content + c = self._sync_db.cursor() + sql_del = "DELETE FROM '%s' WHERE doc_id=?" % (self.TABLE_NAME,) + c.execute(sql_del, (doc_id, )) + sql_ins = "INSERT INTO '%s' VALUES (?, ?, ?)" % (self.TABLE_NAME,) + print "inserting encrypted -------------", doc_id, doc_rev + print "content: ", content + c.execute(sql_ins, (doc_id, doc_rev, content)) + self._sync_db.commit() + + # TODO have to cleanly handle removals too + + class Soledad(object): """ Soledad provides encrypted data storage and sync. @@ -187,6 +270,12 @@ class Soledad(object): The name of the local SQLCipher U1DB database file. """ + LOCAL_SYMMETRIC_SYNC_FILE_NAME = 'sync.u1db' + """ + The name of the local symmetrically encrypted documents to + sync database file. + """ + STORAGE_SECRETS_FILE_NAME = "soledad.json" """ The name of the file where the storage secrets will be stored. @@ -249,30 +338,37 @@ class Soledad(object): """ syncing_lock = defaultdict(Lock) + encrypting_lock = Lock() """ A dictionary that hold locks which avoid multiple sync attempts from the same database replica. """ def __init__(self, uuid, passphrase, secrets_path, local_db_path, - server_url, cert_file, auth_token=None, secret_id=None): + server_url, cert_file, + auth_token=None, secret_id=None): """ Initialize configuration, cryptographic keys and dbs. :param uuid: User's uuid. :type uuid: str + :param passphrase: The passphrase for locking and unlocking encryption secrets for local and remote storage. :type passphrase: unicode + :param secrets_path: Path for storing encrypted key used for symmetric encryption. :type secrets_path: str + :param local_db_path: Path for local encrypted storage db. :type local_db_path: str + :param server_url: URL for Soledad server. This is used either to sync - with the user's remote db and to interact with the shared recovery - database. + with the user's remote db and to interact with the + shared recovery database. :type server_url: str + :param cert_file: Path to the certificate of the ca used to validate the SSL certificate used by the remote soledad server. @@ -290,8 +386,12 @@ class Soledad(object): # init crypto variables self._secrets = {} self._secret_id = secret_id + # init config (possibly with default values) - self._init_config(secrets_path, local_db_path, server_url) + sync_db_path = "%s-sync" % local_db_path + self._init_config(secrets_path, local_db_path, server_url, + sync_db_path) + self._set_token(auth_token) self._shared_db_instance = None # configure SSL certificate @@ -300,7 +400,13 @@ class Soledad(object): # initiate bootstrap sequence self._bootstrap() # might raise BootstrapSequenceError() - def _init_config(self, secrets_path, local_db_path, server_url): + # initialize syncing queue encryption pool + self._sync_pool = SyncEncrypterPool(self._crypto, self._sync_db) + self._sync_watcher = TimerTask(self._encrypt_syncing_docs, delay=10) + self._sync_watcher.start() + + def _init_config(self, secrets_path, local_db_path, server_url, + local_sync_path): """ Initialize configuration using default values for missing params. """ @@ -319,6 +425,12 @@ class Soledad(object): soledad_assert( self._server_url is not None, 'Missing URL for Soledad server.') + # initialize local_sync_path + self._local_sync_path = local_sync_path + print "INITIALIZING SYNC DB ---->", local_sync_path + if self._local_sync_path is None: + self._local_sync_path = os.path.join( + self.DEFAULT_PREFIX, self.LOCAL_SYMMETRIC_SYNC_FILE_NAME) # # initialization/destruction methods @@ -447,6 +559,9 @@ class Soledad(object): else: raise secrets_problem + # STAGE 5 - local sync documents and queue initialization + self._init_sync_db() + def _init_dirs(self): """ Create work directories. @@ -503,6 +618,27 @@ class Soledad(object): crypto=self._crypto, raw_key=True) + def _init_sync_db(self): + """ + Initialize the Symmetrically-Encrypted document to be synced database, + and the queue to communicate with subprocess workers. + """ + print "INITIALIZING SYNC DB" + self._sync_db = sqlite3.connect(self._local_sync_path, + check_same_thread=False) + self._create_sync_db() + self._sync_queue = multiprocessing.Queue() + + def _create_sync_db(self): + """ + Create local sync documents db if needed. + """ + sql = ("""CREATE TABLE IF NOT EXISTS %s """ + """(doc_id, rev, content)""" % SyncEncrypterPool.TABLE_NAME) + c = self._sync_db.cursor() + c.execute(sql) + self._sync_db.commit() + def close(self): """ Close underlying U1DB database. @@ -790,7 +926,10 @@ class Soledad(object): :rtype: str """ doc.content = self._convert_to_unicode(doc.content) - return self._db.put_doc(doc) + new_rev = self._db.put_doc(doc) + # enqueue the modified document for symmetric encryption before sync + self._sync_queue.put_nowait(doc) + return new_rev def delete_doc(self, doc): """ @@ -895,8 +1034,11 @@ class Soledad(object): :return: the new document :rtype: SoledadDocument """ - return self._db.create_doc( + doc = self._db.create_doc( self._convert_to_unicode(content), doc_id=doc_id) + # enqueue the modified document for symmetric encryption before sync + self._sync_queue.put_nowait(doc) + return doc def create_doc_from_json(self, json, doc_id=None): """ @@ -915,7 +1057,10 @@ class Soledad(object): :return: The new cocument :rtype: SoledadDocument """ - return self._db.create_doc_from_json(json, doc_id=doc_id) + doc = self._db.create_doc_from_json(json, doc_id=doc_id) + # enqueue the modified document for encryption before sync + self._sync_queue.put_nowait(doc) + return doc def create_index(self, index_name, *index_expressions): """ @@ -1314,6 +1459,31 @@ class Soledad(object): def _passphrase_as_string(self): return self._passphrase.encode('utf-8') + # + # Symmetric encryption + # + + def _encrypt_syncing_docs(self): + """ + Process the syncing queue and send the documents there + to be encrypted in the sync db. + """ + lock = self.encrypting_lock + # optional wait flag used to avoid blocking + if not lock.acquire(False): + return + else: + queue = self._sync_queue + try: + while not queue.empty(): + doc = queue.get_nowait() + self._sync_pool.encrypt_doc(doc) + except Exception as exc: + logger.error("Error while encrypting docs to sync") + logger.exception(exc) + finally: + lock.release() + #----------------------------------------------------------------------------- # Monkey patching u1db to be able to provide a custom SSL cert diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py index a6372107..c1f65171 100644 --- a/client/src/leap/soledad/client/crypto.py +++ b/client/src/leap/soledad/client/crypto.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # crypto.py -# Copyright (C) 2013 LEAP +# Copyright (C) 2013,2014 LEAP # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -36,6 +36,8 @@ from leap.soledad.common import ( soledad_assert_type, ) +MAC_KEY_LENGTH = 64 + class EncryptionMethods(object): """ @@ -59,13 +61,116 @@ class NoSymmetricSecret(Exception): """ +def encrypt_sym(data, key, method): + """ + Encrypt C{data} using a {password}. + + Currently, the only encryption methods supported are AES-256 in CTR + mode and XSalsa20. + + :param data: The data to be encrypted. + :type data: str + :param key: The key used to encrypt C{data} (must be 256 bits long). + :type key: str + :param method: The encryption method to use. + :type method: str + + :return: A tuple with the initial value and the encrypted data. + :rtype: (long, str) + """ + soledad_assert_type(key, str) + + soledad_assert( + len(key) == 32, # 32 x 8 = 256 bits. + 'Wrong key size: %s bits (must be 256 bits long).' % + (len(key) * 8)) + iv = None + # AES-256 in CTR mode + if method == EncryptionMethods.AES_256_CTR: + iv = os.urandom(16) + ciphertext = AES(key=key, iv=iv).process(data) + # XSalsa20 + elif method == EncryptionMethods.XSALSA20: + iv = os.urandom(24) + ciphertext = XSalsa20(key=key, iv=iv).process(data) + else: + # raise if method is unknown + raise UnknownEncryptionMethod('Unkwnown method: %s' % method) + return binascii.b2a_base64(iv), ciphertext + + +def decrypt_sym(data, key, method, **kwargs): + """ + Decrypt data using symmetric secret. + + Currently, the only encryption method supported is AES-256 CTR mode. + + :param data: The data to be decrypted. + :type data: str + :param key: The key used to decrypt C{data} (must be 256 bits long). + :type key: str + :param method: The encryption method to use. + :type method: str + :param kwargs: Other parameters specific to each encryption method. + :type kwargs: dict + + :return: The decrypted data. + :rtype: str + """ + soledad_assert_type(key, str) + # assert params + soledad_assert( + len(key) == 32, # 32 x 8 = 256 bits. + 'Wrong key size: %s (must be 256 bits long).' % len(key)) + soledad_assert( + 'iv' in kwargs, + '%s needs an initial value.' % method) + # AES-256 in CTR mode + if method == EncryptionMethods.AES_256_CTR: + return AES( + key=key, iv=binascii.a2b_base64(kwargs['iv'])).process(data) + elif method == EncryptionMethods.XSALSA20: + return XSalsa20( + key=key, iv=binascii.a2b_base64(kwargs['iv'])).process(data) + + # raise if method is unknown + raise UnknownEncryptionMethod('Unkwnown method: %s' % method) + + +def doc_mac_key(doc_id, secret): + """ + Generate a key for calculating a MAC for a document whose id is + C{doc_id}. + + The key is derived using HMAC having sha256 as underlying hash + function. The key used for HMAC is the first MAC_KEY_LENGTH characters + of Soledad's storage secret. The HMAC message is C{doc_id}. + + :param doc_id: The id of the document. + :type doc_id: str + + :param secret: soledad secret storage + :type secret: Soledad.storage_secret + + :return: The key. + :rtype: str + + :raise NoSymmetricSecret: if no symmetric secret was supplied. + """ + if secret is None: + raise NoSymmetricSecret() + + return hmac.new( + secret[:MAC_KEY_LENGTH], + doc_id, + hashlib.sha256).digest() + + class SoledadCrypto(object): """ General cryptographic functionality. """ - MAC_KEY_LENGTH = 64 - def __init__(self, soledad): """ Initialize the crypto object. @@ -77,78 +182,14 @@ class SoledadCrypto(object): def encrypt_sym(self, data, key, method=EncryptionMethods.AES_256_CTR): - """ - Encrypt C{data} using a {password}. - - Currently, the only encryption method supported is AES-256 CTR mode. - - :param data: The data to be encrypted. - :type data: str - :param key: The key used to encrypt C{data} (must be 256 bits long). - :type key: str - :param method: The encryption method to use. - :type method: str - - :return: A tuple with the initial value and the encrypted data. - :rtype: (long, str) - """ - soledad_assert_type(key, str) - - soledad_assert( - len(key) == 32, # 32 x 8 = 256 bits. - 'Wrong key size: %s bits (must be 256 bits long).' % - (len(key) * 8)) - iv = None - # AES-256 in CTR mode - if method == EncryptionMethods.AES_256_CTR: - iv = os.urandom(16) - ciphertext = AES(key=key, iv=iv).process(data) - # XSalsa20 - elif method == EncryptionMethods.XSALSA20: - iv = os.urandom(24) - ciphertext = XSalsa20(key=key, iv=iv).process(data) - else: - # raise if method is unknown - raise UnknownEncryptionMethod('Unkwnown method: %s' % method) - return binascii.b2a_base64(iv), ciphertext + return encrypt_sym(data, key, method) def decrypt_sym(self, data, key, method=EncryptionMethods.AES_256_CTR, **kwargs): - """ - Decrypt data using symmetric secret. - - Currently, the only encryption method supported is AES-256 CTR mode. - - :param data: The data to be decrypted. - :type data: str - :param key: The key used to decrypt C{data} (must be 256 bits long). - :type key: str - :param method: The encryption method to use. - :type method: str - :param kwargs: Other parameters specific to each encryption method. - :type kwargs: dict + return decrypt_sym(data, key, method, **kwargs) - :return: The decrypted data. - :rtype: str - """ - soledad_assert_type(key, str) - # assert params - soledad_assert( - len(key) == 32, # 32 x 8 = 256 bits. - 'Wrong key size: %s (must be 256 bits long).' % len(key)) - soledad_assert( - 'iv' in kwargs, - '%s needs an initial value.' % method) - # AES-256 in CTR mode - if method == EncryptionMethods.AES_256_CTR: - return AES( - key=key, iv=binascii.a2b_base64(kwargs['iv'])).process(data) - elif method == EncryptionMethods.XSALSA20: - return XSalsa20( - key=key, iv=binascii.a2b_base64(kwargs['iv'])).process(data) - - # raise if method is unknown - raise UnknownEncryptionMethod('Unkwnown method: %s' % method) + def doc_mac_key(self, doc_id, secret): + return doc_mac_key(doc_id, self.secret) def doc_passphrase(self, doc_id): """ @@ -173,35 +214,11 @@ class SoledadCrypto(object): raise NoSymmetricSecret() return hmac.new( self.secret[ - self.MAC_KEY_LENGTH: + MAC_KEY_LENGTH: self._soledad.REMOTE_STORAGE_SECRET_LENGTH], doc_id, hashlib.sha256).digest() - def doc_mac_key(self, doc_id): - """ - Generate a key for calculating a MAC for a document whose id is - C{doc_id}. - - The key is derived using HMAC having sha256 as underlying hash - function. The key used for HMAC is the first MAC_KEY_LENGTH characters - of Soledad's storage secret. The HMAC message is C{doc_id}. - - :param doc_id: The id of the document. - :type doc_id: str - - :return: The key. - :rtype: str - - :raise NoSymmetricSecret: if no symmetric secret was supplied. - """ - if self.secret is None: - raise NoSymmetricSecret() - return hmac.new( - self.secret[:self.MAC_KEY_LENGTH], - doc_id, - hashlib.sha256).digest() - # # secret setters/getters # diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index a17c6a5f..9183afac 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # sqlcipher.py -# Copyright (C) 2013 LEAP +# Copyright (C) 2013, 2014 LEAP # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,8 +14,6 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . - - """ A U1DB backend that uses SQLCipher as its persistence layer. @@ -143,7 +141,9 @@ class NotAnHexString(Exception): # class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): - """A U1DB implementation that uses SQLCipher as its persistence layer.""" + """ + A U1DB implementation that uses SQLCipher as its persistence layer. + """ _index_storage_value = 'expand referenced encrypted' k_lock = threading.Lock() @@ -184,7 +184,10 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): self.assert_db_is_encrypted( sqlcipher_file, password, raw_key, cipher, kdf_iter, cipher_page_size) - # connect to the database + + self._sync_db_path = "%s-sync" % sqlcipher_file + + # connect to the sqlcipher database with self.k_lock: self._db_handle = dbapi2.connect( sqlcipher_file, @@ -397,7 +400,8 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): self, SoledadSyncTarget(url, creds=creds, - crypto=self._crypto)) + crypto=self._crypto, + sync_db_path=self._sync_db_path)) def _extra_schema_init(self, c): """ diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index 3b3d6870..56652b0b 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # target.py -# Copyright (C) 2013 LEAP +# Copyright (C) 2013, 2014 LEAP # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -24,6 +24,8 @@ import gzip import hashlib import hmac import logging +import os +import sqlite3 import urllib import simplejson as json @@ -56,6 +58,9 @@ from leap.soledad.client.crypto import ( EncryptionMethods, UnknownEncryptionMethod, ) +from leap.soledad.client.crypto import encrypt_sym, doc_mac_key + +from leap.common.check import leap_check logger = logging.getLogger(__name__) @@ -76,7 +81,7 @@ class DocumentNotEncrypted(Exception): # -def mac_doc(crypto, doc_id, doc_rev, ciphertext, mac_method): +def mac_doc(doc_id, doc_rev, ciphertext, mac_method, secret): """ Calculate a MAC for C{doc} using C{ciphertext}. @@ -86,8 +91,6 @@ def mac_doc(crypto, doc_id, doc_rev, ciphertext, mac_method): * msg: doc_id + doc_rev + ciphertext * digestmod: sha256 - :param crypto: A SoledadCryto instance used to perform the encryption. - :type crypto: leap.soledad.crypto.SoledadCrypto :param doc_id: The id of the document. :type doc_id: str :param doc_rev: The revision of the document. @@ -96,20 +99,22 @@ def mac_doc(crypto, doc_id, doc_rev, ciphertext, mac_method): :type ciphertext: str :param mac_method: The MAC method to use. :type mac_method: str + :param secret: soledad secret + :type secret: Soledad.secret_storage :return: The calculated MAC. :rtype: str """ if mac_method == MacMethods.HMAC: return hmac.new( - crypto.doc_mac_key(doc_id), + doc_mac_key(doc_id, secret), str(doc_id) + str(doc_rev) + ciphertext, hashlib.sha256).digest() # raise if we do not know how to handle this MAC method raise UnknownMacMethod('Unknown MAC method: %s.' % mac_method) -def encrypt_doc(crypto, doc): +def encrypt_docstr(docstr, doc_id, doc_rev, key, secret): """ Encrypt C{doc}'s content. @@ -125,21 +130,29 @@ def encrypt_doc(crypto, doc): MAC_METHOD_KEY: 'hmac' } - :param crypto: A SoledadCryto instance used to perform the encryption. - :type crypto: leap.soledad.crypto.SoledadCrypto - :param doc: The document with contents to be encrypted. - :type doc: SoledadDocument + :param docstr: A representation of the document to be encrypted. + :type docstr: str or unicode. + + :param doc_id: The document id. + :type doc_id: str + + :param doc_rev: The document revision. + :type doc_rev: str + + :param key: The key used to encrypt ``data`` (must be 256 bits long). + :type key: str + + :param secret: + :type secret: :return: The JSON serialization of the dict representing the encrypted content. :rtype: str """ - soledad_assert(doc.is_tombstone() is False) # encrypt content using AES-256 CTR mode - iv, ciphertext = crypto.encrypt_sym( - str(doc.get_json()), # encryption/decryption routines expect str - crypto.doc_passphrase(doc.doc_id), - method=EncryptionMethods.AES_256_CTR) + iv, ciphertext = encrypt_sym( + str(docstr), # encryption/decryption routines expect str + key, method=EncryptionMethods.AES_256_CTR) # Return a representation for the encrypted content. In the following, we # convert binary data to hexadecimal representation so the JSON # serialization does not complain about what it tries to serialize. @@ -150,9 +163,8 @@ def encrypt_doc(crypto, doc): ENC_METHOD_KEY: EncryptionMethods.AES_256_CTR, ENC_IV_KEY: iv, MAC_KEY: binascii.b2a_hex(mac_doc( # store the mac as hex. - crypto, doc.doc_id, doc.rev, - ciphertext, - MacMethods.HMAC)), + doc_id, doc_rev, ciphertext, + MacMethods.HMAC, secret)), MAC_METHOD_KEY: MacMethods.HMAC, }) @@ -197,9 +209,9 @@ def decrypt_doc(crypto, doc): ciphertext = binascii.a2b_hex( # content is stored as hex. doc.content[ENC_JSON_KEY]) mac = mac_doc( - crypto, doc.doc_id, doc.rev, + doc.doc_id, doc.rev, ciphertext, - doc.content[MAC_METHOD_KEY]) + doc.content[MAC_METHOD_KEY], crypto.secret) # we compare mac's hashes to avoid possible timing attacks that might # exploit python's builtin comparison operator behaviour, which fails # immediatelly when non-matching bytes are found. @@ -254,63 +266,50 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): """ A SyncTarget that encrypts data before sending and decrypts data after receiving. - """ - - # - # Token auth methods. - # - - def set_token_credentials(self, uuid, token): - """ - Store given credentials so we can sign the request later. - - :param uuid: The user's uuid. - :type uuid: str - :param token: The authentication token. - :type token: str - """ - TokenBasedAuth.set_token_credentials(self, uuid, token) - def _sign_request(self, method, url_query, params): - """ - Return an authorization header to be included in the HTTP request. - - :param method: The HTTP method. - :type method: str - :param url_query: The URL query string. - :type url_query: str - :param params: A list with encoded query parameters. - :type param: list - - :return: The Authorization header. - :rtype: list of tuple - """ - return TokenBasedAuth._sign_request(self, method, url_query, params) + Normally encryption will have been written to the sync database upon + document modification. The sync database is also used to write temporarily + the parsed documents that the remote send us, before being decrypted and + written to the main database. + """ # # Modified HTTPSyncTarget methods. # - @staticmethod - def connect(url, crypto=None): - return SoledadSyncTarget(url, crypto=crypto) - - def __init__(self, url, creds=None, crypto=None): + def __init__(self, url, creds=None, crypto=None, sync_db_path=None): """ Initialize the SoledadSyncTarget. :param url: The url of the target replica to sync with. :type url: str + :param creds: optional dictionary giving credentials. - to authorize the operation with the server. + to authorize the operation with the server. :type creds: dict + :param soledad: An instance of Soledad so we can encrypt/decrypt - document contents when syncing. + document contents when syncing. :type soledad: soledad.Soledad + + :param sync_db_path: Optional. Path to the db with the symmetric + encryption of the syncing documents. If + None, encryption will be done in-place, + instead of retreiving it from the dedicated + database. + :type sync_db_path: str """ HTTPSyncTarget.__init__(self, url, creds) self._crypto = crypto + self._sync_db = None + if sync_db_path is not None: + self._init_sync_db(sync_db_path) + + @staticmethod + def connect(url, crypto=None): + return SoledadSyncTarget(url, crypto=crypto) + def _parse_sync_stream(self, data, return_doc_cb, ensure_callback=None): """ Parse incoming synchronization stream and insert documents in the @@ -322,17 +321,19 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): :param data: The body of the HTTP response. :type data: str + :param return_doc_cb: A callback to insert docs from target. :type return_doc_cb: function + :param ensure_callback: A callback to ensure we have the correct - target_replica_uid, if it was just created. + target_replica_uid, if it was just created. :type ensure_callback: function :raise BrokenSyncStream: If C{data} is malformed. :return: A dictionary representing the first line of the response got - from remote replica. - :rtype: list of str + from remote replica. + :rtype: dict """ parts = data.splitlines() # one at a time if not parts or parts[0] != '[': @@ -475,10 +476,11 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): :param last_known_trans_id: Target's last known transaction id. :type last_known_trans_id: str :param return_doc_cb: A callback for inserting received documents from - target. + target. :type return_doc_cb: function :param ensure_callback: A callback that ensures we know the target - replica uid if the target replica was just created. + replica uid if the target replica was just + created. :type ensure_callback: function :return: The new generation and transaction id of the target replica. @@ -507,6 +509,8 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): last_known_trans_id=last_known_trans_id, ensure=ensure_callback is not None) comma = ',' + + synced = [] for doc, gen, trans_id in docs_by_generations: # skip non-syncable docs if isinstance(doc, SoledadDocument) and not doc.syncable: @@ -516,13 +520,31 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): #------------------------------------------------------------- doc_json = doc.get_json() if not doc.is_tombstone(): - doc_json = encrypt_doc(self._crypto, doc) + if self._sync_db is None: + # fallback case, for tests + doc_json = encrypt_docstr( + json.dumps(doc.get_json()), + doc.doc_id, doc.rev, self._crypto.secret) + else: + try: + doc_json = self.get_encrypted_doc_from_db( + doc.doc_id, doc.rev) + except Exception as exc: + logger.error("Error while getting " + "encrypted doc from db") + logger.exception(exc) + continue + if doc_json is None: + # Not marked as tombstone, but we got nothing + # from the sync db. Maybe not encrypted yet. + continue #------------------------------------------------------------- # end of symmetric encryption #------------------------------------------------------------- size += prepare(id=doc.doc_id, rev=doc.rev, content=doc_json, gen=gen, trans_id=trans_id) + synced.append((doc.doc_id, doc.rev)) entries.append('\r\n]') size += len(entries[-1]) self._conn.putheader('content-length', str(size)) @@ -533,5 +555,94 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): data, headers = self._response() res = self._parse_sync_stream(data, return_doc_cb, ensure_callback) + + # delete documents from the sync queue + self.delete_encrypted_docs_from_db(synced) + data = None return res['new_generation'], res['new_transaction_id'] + + # + # Token auth methods. + # + + def set_token_credentials(self, uuid, token): + """ + Store given credentials so we can sign the request later. + + :param uuid: The user's uuid. + :type uuid: str + :param token: The authentication token. + :type token: str + """ + TokenBasedAuth.set_token_credentials(self, uuid, token) + + def _sign_request(self, method, url_query, params): + """ + Return an authorization header to be included in the HTTP request. + + :param method: The HTTP method. + :type method: str + :param url_query: The URL query string. + :type url_query: str + :param params: A list with encoded query parameters. + :type param: list + + :return: The Authorization header. + :rtype: list of tuple + """ + return TokenBasedAuth._sign_request(self, method, url_query, params) + + # + # Syncing db + # + + def _init_sync_db(self, path): + """ + Open a connection to the local db of encrypted docs for sync. + + :param path: The path to the local db. + :type path: str + """ + leap_check(path is not None, "Need a path to initialize db") + if not os.path.isfile(path): + logger.warning("Cannot open db: non-existent file!") + return + self._sync_db = sqlite3.connect(path, check_same_thread=False) + + def get_encrypted_doc_from_db(self, doc_id, doc_rev): + """ + Retrieve encrypted document from the database of encrypted docs for + sync. + + :param doc_id: The Document id. + :type doc_id: str + + :param doc_rev: The document revision + :type doc_rev: str + """ + c = self._sync_db.cursor() + # XXX interpolate table name + sql = ("SELECT content FROM docs_tosync " + "WHERE doc_id=? and rev=?") + c.execute(sql, (doc_id, doc_rev)) + res = c.fetchall() + if len(res) != 0: + return res[0][0] + + def delete_encrypted_docs_from_db(self, docs_ids): + """ + Delete several encrypted documents from the database of symmetrically + encrypted docs to sync. + + :param docs_ids: an iterable with (doc_id, doc_rev) for all documents + to be deleted. + :type docs_ids: any iterable of tuples of str + """ + c = self._sync_db.cursor() + for doc_id, doc_rev in docs_ids: + # XXX interpolate table name + sql = ("DELETE FROM docs_tosync " + "WHERE doc_id=? and rev=?") + c.execute(sql, (doc_id, doc_rev)) + self._sync_db.commit() -- cgit v1.2.3