summaryrefslogtreecommitdiff
path: root/client/src/leap/soledad
diff options
context:
space:
mode:
Diffstat (limited to 'client/src/leap/soledad')
-rw-r--r--client/src/leap/soledad/__init__.py6
-rw-r--r--client/src/leap/soledad/client/__init__.py30
-rw-r--r--client/src/leap/soledad/client/_crypto.py557
-rw-r--r--client/src/leap/soledad/client/_db/__init__.py0
-rw-r--r--client/src/leap/soledad/client/_db/adbapi.py298
-rw-r--r--client/src/leap/soledad/client/_db/blobs.py554
-rw-r--r--client/src/leap/soledad/client/_db/dbschema.sql42
-rw-r--r--client/src/leap/soledad/client/_db/pragmas.py379
-rw-r--r--client/src/leap/soledad/client/_db/sqlcipher.py633
-rw-r--r--client/src/leap/soledad/client/_db/sqlite.py930
-rw-r--r--client/src/leap/soledad/client/_document.py254
-rw-r--r--client/src/leap/soledad/client/_http.py74
-rw-r--r--client/src/leap/soledad/client/_pipes.py78
-rw-r--r--client/src/leap/soledad/client/_recovery_code.py33
-rw-r--r--client/src/leap/soledad/client/_secrets/__init__.py129
-rw-r--r--client/src/leap/soledad/client/_secrets/crypto.py138
-rw-r--r--client/src/leap/soledad/client/_secrets/storage.py120
-rw-r--r--client/src/leap/soledad/client/_secrets/util.py63
-rw-r--r--client/src/leap/soledad/client/_version.py484
-rw-r--r--client/src/leap/soledad/client/api.py848
-rw-r--r--client/src/leap/soledad/client/auth.py69
-rw-r--r--client/src/leap/soledad/client/crypto.py448
-rw-r--r--client/src/leap/soledad/client/events.py54
-rw-r--r--client/src/leap/soledad/client/examples/README4
-rw-r--r--client/src/leap/soledad/client/examples/benchmarks/.gitignore1
-rwxr-xr-xclient/src/leap/soledad/client/examples/benchmarks/get_sample.sh3
-rw-r--r--client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py179
-rw-r--r--client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py179
-rw-r--r--client/src/leap/soledad/client/examples/compare.txt8
-rw-r--r--client/src/leap/soledad/client/examples/manifest.phk50
-rw-r--r--client/src/leap/soledad/client/examples/plot-async-db.py45
-rw-r--r--client/src/leap/soledad/client/examples/run_benchmark.py30
-rw-r--r--client/src/leap/soledad/client/examples/soledad_sync.py63
-rw-r--r--client/src/leap/soledad/client/examples/use_adbapi.py105
-rw-r--r--client/src/leap/soledad/client/examples/use_api.py69
-rw-r--r--client/src/leap/soledad/client/http_target/__init__.py94
-rw-r--r--client/src/leap/soledad/client/http_target/api.py248
-rw-r--r--client/src/leap/soledad/client/http_target/fetch.py161
-rw-r--r--client/src/leap/soledad/client/http_target/fetch_protocol.py157
-rw-r--r--client/src/leap/soledad/client/http_target/send.py107
-rw-r--r--client/src/leap/soledad/client/http_target/send_protocol.py75
-rw-r--r--client/src/leap/soledad/client/http_target/support.py220
-rw-r--r--client/src/leap/soledad/client/interfaces.py368
-rw-r--r--client/src/leap/soledad/client/shared_db.py134
-rw-r--r--client/src/leap/soledad/client/sync.py231
45 files changed, 0 insertions, 8752 deletions
diff --git a/client/src/leap/soledad/__init__.py b/client/src/leap/soledad/__init__.py
deleted file mode 100644
index f48ad105..00000000
--- a/client/src/leap/soledad/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# See http://peak.telecommunity.com/DevCenter/setuptools#namespace-packages
-try:
- __import__('pkg_resources').declare_namespace(__name__)
-except ImportError:
- from pkgutil import extend_path
- __path__ = extend_path(__path__, __name__)
diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py
deleted file mode 100644
index bcad78db..00000000
--- a/client/src/leap/soledad/client/__init__.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# -*- coding: utf-8 -*-
-# __init__.py
-# Copyright (C) 2013, 2014 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-Soledad - Synchronization Of Locally Encrypted Data Among Devices.
-"""
-from leap.soledad.common import soledad_assert
-
-from .api import Soledad
-from ._document import Document, AttachmentStates
-from ._version import get_versions
-
-__version__ = get_versions()['version']
-del get_versions
-
-__all__ = ['soledad_assert', 'Soledad', 'Document', 'AttachmentStates',
- '__version__']
diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py
deleted file mode 100644
index 8cedf52e..00000000
--- a/client/src/leap/soledad/client/_crypto.py
+++ /dev/null
@@ -1,557 +0,0 @@
-# -*- coding: utf-8 -*-
-# _crypto.py
-# Copyright (C) 2016 LEAP Encryption Access Project
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-"""
-Cryptographic operations for the soledad client.
-
-This module implements streaming crypto operations.
-It replaces the old client.crypto module, that will be deprecated in soledad
-0.12.
-
-The algorithm for encrypting and decrypting is as follow:
-
-The KEY is a 32 bytes value.
-The IV is a random 16 bytes value.
-The PREAMBLE is a packed_structure with encryption metadata, such as IV.
-The SEPARATOR is a space.
-
-Encryption
-----------
-
-IV = os.urandom(16)
-PREAMBLE = BLOB_SIGNATURE_MAGIC, ENC_SCHEME, ENC_METHOD, time, IV, doc_id, rev,
-and size.
-
-PREAMBLE = base64_encoded(PREAMBLE)
-CIPHERTEXT = base64_encoded(AES_GCM(KEY, cleartext) + resulting_tag) if armor
-
-CIPHERTEXT = AES_GCM(KEY, cleartext) + resulting_tag if not armor
-# "resulting_tag" came from AES-GCM encryption. It will be the last 16 bytes of
-# our ciphertext.
-
-encrypted_payload = PREAMBLE + SEPARATOR + CIPHERTEXT
-
-Decryption
-----------
-
-Ciphertext and Tag CAN come encoded in base64 (with armor=True) or raw (with
-armor=False). Preamble will always come encoded in base64.
-
-PREAMBLE, CIPHERTEXT = PAYLOAD.SPLIT(' ', 1)
-
-PREAMBLE = base64_decode(PREAMBLE)
-CIPHERTEXT = base64_decode(CIPHERTEXT) if armor else CIPHERTEXT
-
-CIPHERTEXT, TAG = CIPHERTEXT[:-16], CIPHERTEXT[-16:]
-CLEARTEXT = aes_gcm_decrypt(KEY, IV, CIPHERTEXT, TAG, associated_data=PREAMBLE)
-
-AES-GCM will check preamble authenticity as well, since we are using
-Authenticated Encryption with Associated Data (AEAD). Ciphertext and associated
-data (PREAMBLE) authenticity will both be checked together during decryption.
-PREAMBLE consistency (if it matches the desired document, for instance) is
-checked during PREAMBLE reading.
-"""
-
-
-import base64
-import hashlib
-import warnings
-import hmac
-import os
-import struct
-import time
-
-from io import BytesIO
-from collections import namedtuple
-
-from twisted.internet import defer
-from twisted.internet import interfaces
-from twisted.web.client import FileBodyProducer
-
-from leap.soledad.common import soledad_assert
-from cryptography.exceptions import InvalidTag
-from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
-from cryptography.hazmat.backends import default_backend
-
-from zope.interface import implementer
-
-
-SECRET_LENGTH = 64
-SEPARATOR = ' ' # Anything that doesn't belong to base64 encoding
-
-CRYPTO_BACKEND = default_backend()
-
-PACMAN = struct.Struct('2sbbQ16s255p255pQ')
-LEGACY_PACMAN = struct.Struct('2sbbQ16s255p255p')
-BLOB_SIGNATURE_MAGIC = '\x13\x37'
-
-
-ENC_SCHEME = namedtuple('SCHEME', 'symkey')(1)
-ENC_METHOD = namedtuple('METHOD', 'aes_256_ctr aes_256_gcm')(1, 2)
-DocInfo = namedtuple('DocInfo', 'doc_id rev')
-
-
-class EncryptionDecryptionError(Exception):
- pass
-
-
-class InvalidBlob(Exception):
- pass
-
-
-class SoledadCrypto(object):
- """
- This class provides convenient methods for document encryption and
- decryption using BlobEncryptor and BlobDecryptor classes.
- """
- def __init__(self, secret):
- """
- Initialize the crypto object.
-
- :param secret: The Soledad remote storage secret.
- :type secret: str
- """
- self.secret = secret
-
- def encrypt_doc(self, doc):
- """
- Creates and configures a BlobEncryptor, asking it to start encryption
- and wrapping the result as a simple JSON string with a "raw" key.
-
- :param doc: the document to be encrypted.
- :type doc: Document
- :return: A deferred whose callback will be invoked with a JSON string
- containing the ciphertext as the value of "raw" key.
- :rtype: twisted.internet.defer.Deferred
- """
-
- def put_raw(blob):
- raw = blob.getvalue()
- return '{"raw": "' + raw + '"}'
-
- content = BytesIO(str(doc.get_json()))
- info = DocInfo(doc.doc_id, doc.rev)
- del doc
- encryptor = BlobEncryptor(info, content, secret=self.secret)
- d = encryptor.encrypt()
- d.addCallback(put_raw)
- return d
-
- def decrypt_doc(self, doc):
- """
- Creates and configures a BlobDecryptor, asking it decrypt and returning
- the decrypted cleartext content from the encrypted document.
-
- :param doc: the document to be decrypted.
- :type doc: Document
- :return: The decrypted cleartext content of the document.
- :rtype: str
- """
- info = DocInfo(doc.doc_id, doc.rev)
- ciphertext = BytesIO()
- payload = doc.content['raw']
- del doc
- ciphertext.write(str(payload))
- decryptor = BlobDecryptor(info, ciphertext, secret=self.secret)
- return decryptor.decrypt()
-
-
-def encrypt_sym(data, key, method=ENC_METHOD.aes_256_gcm):
- """
- Encrypt data using AES-256 cipher in selected mode.
-
- :param data: The data to be encrypted.
- :type data: str
- :param key: The key used to encrypt data (must be 256 bits long).
- :type key: str
-
- :return: A tuple with the initialization vector and the ciphertext, both
- encoded as base64.
- :rtype: (str, str)
- """
- mode = _mode_by_method(method)
- encryptor = AESWriter(key, mode=mode)
- encryptor.write(data)
- _, ciphertext = encryptor.end()
- iv = base64.b64encode(encryptor.iv)
- tag = encryptor.tag or ''
- return iv, ciphertext + tag
-
-
-def decrypt_sym(data, key, iv, method=ENC_METHOD.aes_256_gcm):
- """
- Decrypt data using AES-256 cipher in selected mode.
-
- :param data: The data to be decrypted.
- :type data: str
- :param key: The symmetric key used to decrypt data (must be 256 bits
- long).
- :type key: str
- :param iv: The base64 encoded initialization vector.
- :type iv: str
-
- :return: The decrypted data.
- :rtype: str
- """
- _iv = base64.b64decode(str(iv))
- mode = _mode_by_method(method)
- tag = None
- if mode == modes.GCM:
- data, tag = data[:-16], data[-16:]
- decryptor = AESWriter(key, _iv, tag=tag, mode=mode)
- decryptor.write(data)
- _, plaintext = decryptor.end()
- return plaintext
-
-
-# TODO maybe rename this to Encryptor, since it will be used by blobs an non
-# blobs in soledad.
-class BlobEncryptor(object):
- """
- Produces encrypted data from the cleartext data associated with a given
- Document using AES-256 cipher in GCM mode.
-
- The production happens using a Twisted's FileBodyProducer, which uses a
- Cooperator to schedule calls and can be paused/resumed. Each call takes at
- most 65536 bytes from the input.
-
- Both the production input and output are file descriptors, so they can be
- applied to a stream of data.
- """
- # TODO
- # This class needs further work to allow for proper streaming.
- # Right now we HAVE TO WAIT until the end of the stream before encoding the
- # result. It should be possible to do that just encoding the chunks and
- # passing them to a sink, but for that we have to encode the chunks at
- # proper alignment (3 bytes?) with b64 if armor is defined.
-
- def __init__(self, doc_info, content_fd, secret=None, armor=True,
- sink=None):
- if not secret:
- raise EncryptionDecryptionError('no secret given')
-
- self.doc_id = doc_info.doc_id
- self.rev = doc_info.rev
- self.armor = armor
-
- self._content_fd = content_fd
- self._content_size = self._get_rounded_size(content_fd)
- self._producer = FileBodyProducer(content_fd, readSize=2**16)
-
- self.sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret)
- self._aes = AESWriter(self.sym_key, _buffer=sink)
- self._aes.authenticate(self._encode_preamble())
-
- def _get_rounded_size(self, fd):
- """
- Returns a rounded value in order to minimize information leaks due to
- the original size being exposed.
- """
- fd.seek(0, os.SEEK_END)
- size = _ceiling(fd.tell())
- fd.seek(0)
- return size
-
- @property
- def iv(self):
- return self._aes.iv
-
- @property
- def tag(self):
- return self._aes.tag
-
- def encrypt(self):
- """
- Starts producing encrypted data from the cleartext data.
-
- :return: A deferred which will be fired when encryption ends and whose
- callback will be invoked with the resulting ciphertext.
- :rtype: twisted.internet.defer.Deferred
- """
- # XXX pass a sink to aes?
- d = self._producer.startProducing(self._aes)
- d.addCallback(lambda _: self._end_crypto_stream_and_encode_result())
- return d
-
- def _encode_preamble(self):
- current_time = int(time.time())
-
- preamble = PACMAN.pack(
- BLOB_SIGNATURE_MAGIC,
- ENC_SCHEME.symkey,
- ENC_METHOD.aes_256_gcm,
- current_time,
- self.iv,
- str(self.doc_id),
- str(self.rev),
- self._content_size)
- return preamble
-
- def _end_crypto_stream_and_encode_result(self):
-
- # TODO ---- this needs to be refactored to allow PROPER streaming
- # We should write the preamble as soon as possible,
- # Is it possible to write the AES stream as soon as it is encrypted by
- # chunks?
- # FIXME also, it needs to be able to encode chunks with base64 if armor
-
- preamble, encrypted = self._aes.end()
- result = BytesIO()
- result.write(
- base64.urlsafe_b64encode(preamble))
- result.write(SEPARATOR)
-
- if self.armor:
- result.write(
- base64.urlsafe_b64encode(encrypted + self.tag))
- else:
- result.write(encrypted + self.tag)
-
- result.seek(0)
- return defer.succeed(result)
-
-
-# TODO maybe rename this to just Decryptor, since it will be used by blobs
-# and non blobs in soledad.
-class BlobDecryptor(object):
- """
- Decrypts an encrypted blob associated with a given Document.
-
- Will raise an exception if the blob doesn't have the expected structure, or
- if the GCM tag doesn't verify.
- """
- def __init__(self, doc_info, ciphertext_fd, result=None,
- secret=None, armor=True, start_stream=True, tag=None):
- if not secret:
- raise EncryptionDecryptionError('no secret given')
-
- self.doc_id = doc_info.doc_id
- self.rev = doc_info.rev
- self.fd = ciphertext_fd
- self.armor = armor
- self._producer = None
- self.result = result or BytesIO()
- sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret)
- self.size = None
- self.tag = None
-
- preamble, iv = self._consume_preamble()
- soledad_assert(preamble)
- soledad_assert(iv)
-
- self._aes = AESWriter(sym_key, iv, self.result, tag=tag or self.tag)
- self._aes.authenticate(preamble)
- if start_stream:
- self._start_stream()
-
- @property
- def decrypted_content_size(self):
- return self._aes.written
-
- def _start_stream(self):
- self._producer = FileBodyProducer(self.fd, readSize=2**16)
-
- def _consume_preamble(self):
- """
- Consume the preamble and write remaining bytes as ciphertext. This
- function is called during a stream and can be holding both, so we need
- to consume only preamble and store the remaining.
- """
- self.fd.seek(0)
- try:
- parts = self.fd.getvalue().split(SEPARATOR, 1)
- preamble = base64.urlsafe_b64decode(parts[0])
- if len(parts) == 2:
- ciphertext = parts[1]
- if self.armor:
- ciphertext = base64.urlsafe_b64decode(ciphertext)
- self.tag, ciphertext = ciphertext[-16:], ciphertext[:-16]
- self.fd.seek(0)
- self.fd.write(ciphertext)
- self.fd.seek(len(ciphertext))
- self.fd.truncate()
- self.fd.seek(0)
-
- except (TypeError, ValueError):
- raise InvalidBlob
-
- try:
- if len(preamble) == LEGACY_PACMAN.size:
- warnings.warn("Decrypting a legacy document without size. " +
- "This will be deprecated in 0.12. Doc was: " +
- "doc_id: %s rev: %s" % (self.doc_id, self.rev),
- Warning)
- unpacked_data = LEGACY_PACMAN.unpack(preamble)
- magic, sch, meth, ts, iv, doc_id, rev = unpacked_data
- elif len(preamble) == PACMAN.size:
- unpacked_data = PACMAN.unpack(preamble)
- magic, sch, meth, ts, iv, doc_id, rev, doc_size = unpacked_data
- self.size = doc_size
- else:
- raise InvalidBlob("Unexpected preamble size %d", len(preamble))
- except struct.error as e:
- raise InvalidBlob(e)
-
- if magic != BLOB_SIGNATURE_MAGIC:
- raise InvalidBlob
- # TODO check timestamp. Just as a sanity check, but for instance
- # we can refuse to process something that is in the future or
- # too far in the past (1984 would be nice, hehe)
- if sch != ENC_SCHEME.symkey:
- raise InvalidBlob('Invalid scheme: %s' % sch)
- if meth != ENC_METHOD.aes_256_gcm:
- raise InvalidBlob('Invalid encryption scheme: %s' % meth)
- if rev != self.rev:
- msg = 'Invalid revision. Expected: %s, was: %s' % (self.rev, rev)
- raise InvalidBlob(msg)
- if doc_id != self.doc_id:
- msg = 'Invalid doc_id. '
- + 'Expected: %s, was: %s' % (self.doc_id, doc_id)
- raise InvalidBlob(msg)
-
- return preamble, iv
-
- def _end_stream(self):
- try:
- self._aes.end()
- except InvalidTag:
- raise InvalidBlob('Invalid Tag. Blob authentication failed.')
- fd = self.result
- fd.seek(0)
- return self.result
-
- def decrypt(self):
- """
- Starts producing encrypted data from the cleartext data.
-
- :return: A deferred which will be fired when encryption ends and whose
- callback will be invoked with the resulting ciphertext.
- :rtype: twisted.internet.defer.Deferred
- """
- d = self.startProducing()
- d.addCallback(lambda _: self._end_stream())
- return d
-
- def startProducing(self):
- if not self._producer:
- self._start_stream()
- return self._producer.startProducing(self._aes)
-
- def endStream(self):
- self._end_stream()
-
- def write(self, data):
- self._aes.write(data)
-
- def close(self):
- result = self._aes.end()
- return result
-
-
-@implementer(interfaces.IConsumer)
-class AESWriter(object):
- """
- A Twisted's Consumer implementation that takes an input file descriptor and
- applies AES-256 cipher in GCM mode.
-
- It is used both for encryption and decryption of a stream, depending of the
- value of the tag parameter. If you pass a tag, it will operate in
- decryption mode, verifying the authenticity of the preamble and ciphertext.
- If no tag is passed, encryption mode is assumed, which will generate a tag.
- """
-
- def __init__(self, key, iv=None, _buffer=None, tag=None, mode=modes.GCM):
- if len(key) != 32:
- raise EncryptionDecryptionError('key is not 256 bits')
-
- if tag is not None:
- # if tag, we're decrypting
- assert iv is not None
-
- self.iv = iv or os.urandom(16)
- self.buffer = _buffer or BytesIO()
- cipher = _get_aes_cipher(key, self.iv, tag, mode)
- cipher = cipher.decryptor() if tag else cipher.encryptor()
- self.cipher, self.aead = cipher, ''
- self.written = 0
-
- def authenticate(self, data):
- self.aead += data
- self.cipher.authenticate_additional_data(data)
-
- @property
- def tag(self):
- return getattr(self.cipher, 'tag', None)
-
- def write(self, data):
- self.written += len(data)
- self.buffer.write(self.cipher.update(data))
-
- def end(self):
- self.buffer.write(self.cipher.finalize())
- return self.aead, self.buffer.getvalue()
-
-
-def is_symmetrically_encrypted(content):
- """
- Returns True if the document was symmetrically encrypted.
- 'EzcB' is the base64 encoding of \x13\x37 magic number and 1 (symmetrically
- encrypted value for enc_scheme flag).
-
- :param doc: The document content as string
- :type doc: str
-
- :rtype: bool
- """
- sym_signature = '{"raw": "EzcB'
- return content and content.startswith(sym_signature)
-
-
-# utils
-
-
-def _hmac_sha256(key, data):
- return hmac.new(key, data, hashlib.sha256).digest()
-
-
-def _get_sym_key_for_doc(doc_id, secret):
- key = secret[SECRET_LENGTH:]
- return _hmac_sha256(key, doc_id)
-
-
-def _get_aes_cipher(key, iv, tag, mode=modes.GCM):
- mode = mode(iv, tag) if mode == modes.GCM else mode(iv)
- return Cipher(algorithms.AES(key), mode, backend=CRYPTO_BACKEND)
-
-
-def _mode_by_method(method):
- if method == ENC_METHOD.aes_256_gcm:
- return modes.GCM
- else:
- return modes.CTR
-
-
-def _ceiling(size):
- """
- Some simplistic ceiling scheme that uses powers of 2.
- We report everything below 4096 bytes as that minimum threshold.
- See #8759 for research pending for less simplistic/aggresive strategies.
- """
- for i in xrange(12, 31):
- step = 2 ** i
- if size < step:
- return step
diff --git a/client/src/leap/soledad/client/_db/__init__.py b/client/src/leap/soledad/client/_db/__init__.py
deleted file mode 100644
index e69de29b..00000000
--- a/client/src/leap/soledad/client/_db/__init__.py
+++ /dev/null
diff --git a/client/src/leap/soledad/client/_db/adbapi.py b/client/src/leap/soledad/client/_db/adbapi.py
deleted file mode 100644
index 5c28d108..00000000
--- a/client/src/leap/soledad/client/_db/adbapi.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# -*- coding: utf-8 -*-
-# adbapi.py
-# Copyright (C) 2013, 2014 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-An asyncrhonous interface to soledad using sqlcipher backend.
-It uses twisted.enterprise.adbapi.
-"""
-import re
-import sys
-
-from functools import partial
-
-from twisted.enterprise import adbapi
-from twisted.internet.defer import DeferredSemaphore
-from twisted.python import compat
-from zope.proxy import ProxyBase, setProxiedObject
-
-from leap.soledad.common.log import getLogger
-from leap.soledad.common.errors import DatabaseAccessError
-
-from . import sqlcipher
-from . import pragmas
-
-if sys.version_info[0] < 3:
- from pysqlcipher import dbapi2
-else:
- from pysqlcipher3 import dbapi2
-
-
-logger = getLogger(__name__)
-
-
-"""
-How long the SQLCipher connection should wait for the lock to go away until
-raising an exception.
-"""
-SQLCIPHER_CONNECTION_TIMEOUT = 10
-
-"""
-How many times a SQLCipher query should be retried in case of timeout.
-"""
-SQLCIPHER_MAX_RETRIES = 20
-
-
-def getConnectionPool(opts, openfun=None, driver="pysqlcipher"):
- """
- Return a connection pool.
-
- :param opts:
- Options for the SQLCipher connection.
- :type opts: SQLCipherOptions
- :param openfun:
- Callback invoked after every connect() on the underlying DB-API
- object.
- :type openfun: callable
- :param driver:
- The connection driver.
- :type driver: str
-
- :return: A U1DB connection pool.
- :rtype: U1DBConnectionPool
- """
- if openfun is None and driver == "pysqlcipher":
- openfun = partial(pragmas.set_init_pragmas, opts=opts)
- return U1DBConnectionPool(
- opts,
- # the following params are relayed "as is" to twisted's
- # ConnectionPool.
- "%s.dbapi2" % driver, opts.path, timeout=SQLCIPHER_CONNECTION_TIMEOUT,
- check_same_thread=False, cp_openfun=openfun)
-
-
-class U1DBConnection(adbapi.Connection):
- """
- A wrapper for a U1DB connection instance.
- """
-
- u1db_wrapper = sqlcipher.SoledadSQLCipherWrapper
- """
- The U1DB wrapper to use.
- """
-
- def __init__(self, pool, init_u1db=False):
- """
- :param pool: The pool of connections to that owns this connection.
- :type pool: adbapi.ConnectionPool
- :param init_u1db: Wether the u1db database should be initialized.
- :type init_u1db: bool
- """
- self.init_u1db = init_u1db
- try:
- adbapi.Connection.__init__(self, pool)
- except dbapi2.DatabaseError as e:
- raise DatabaseAccessError(
- 'Error initializing connection to sqlcipher database: %s'
- % str(e))
-
- def reconnect(self):
- """
- Reconnect to the U1DB database.
- """
- if self._connection is not None:
- self._pool.disconnect(self._connection)
- self._connection = self._pool.connect()
-
- if self.init_u1db:
- self._u1db = self.u1db_wrapper(
- self._connection,
- self._pool.opts)
-
- def __getattr__(self, name):
- """
- Route the requested attribute either to the U1DB wrapper or to the
- connection.
-
- :param name: The name of the attribute.
- :type name: str
- """
- if name.startswith('u1db_'):
- attr = re.sub('^u1db_', '', name)
- return getattr(self._u1db, attr)
- else:
- return getattr(self._connection, name)
-
-
-class U1DBTransaction(adbapi.Transaction):
- """
- A wrapper for a U1DB 'cursor' object.
- """
-
- def __getattr__(self, name):
- """
- Route the requested attribute either to the U1DB wrapper of the
- connection or to the actual connection cursor.
-
- :param name: The name of the attribute.
- :type name: str
- """
- if name.startswith('u1db_'):
- attr = re.sub('^u1db_', '', name)
- return getattr(self._connection._u1db, attr)
- else:
- return getattr(self._cursor, name)
-
-
-class U1DBConnectionPool(adbapi.ConnectionPool):
- """
- Represent a pool of connections to an U1DB database.
- """
-
- connectionFactory = U1DBConnection
- transactionFactory = U1DBTransaction
-
- def __init__(self, opts, *args, **kwargs):
- """
- Initialize the connection pool.
- """
- self.opts = opts
- try:
- adbapi.ConnectionPool.__init__(self, *args, **kwargs)
- except dbapi2.DatabaseError as e:
- raise DatabaseAccessError(
- 'Error initializing u1db connection pool: %s' % str(e))
-
- # all u1db connections, hashed by thread-id
- self._u1dbconnections = {}
-
- # The replica uid, primed by the connections on init.
- self.replica_uid = ProxyBase(None)
-
- try:
- conn = self.connectionFactory(
- self, init_u1db=True)
- replica_uid = conn._u1db._real_replica_uid
- setProxiedObject(self.replica_uid, replica_uid)
- except DatabaseAccessError as e:
- self.threadpool.stop()
- raise DatabaseAccessError(
- "Error initializing connection factory: %s" % str(e))
-
- def runU1DBQuery(self, meth, *args, **kw):
- """
- Execute a U1DB query in a thread, using a pooled connection.
-
- Concurrent threads trying to update the same database may timeout
- because of other threads holding the database lock. Because of this,
- we will retry SQLCIPHER_MAX_RETRIES times and fail after that.
-
- :param meth: The U1DB wrapper method name.
- :type meth: str
-
- :return: a Deferred which will fire the return value of
- 'self._runU1DBQuery(Transaction(...), *args, **kw)', or a Failure.
- :rtype: twisted.internet.defer.Deferred
- """
- meth = "u1db_%s" % meth
- semaphore = DeferredSemaphore(SQLCIPHER_MAX_RETRIES)
-
- def _run_interaction():
- return self.runInteraction(
- self._runU1DBQuery, meth, *args, **kw)
-
- def _errback(failure):
- failure.trap(dbapi2.OperationalError)
- if failure.getErrorMessage() == "database is locked":
- logger.warn("database operation timed out")
- should_retry = semaphore.acquire()
- if should_retry:
- logger.warn("trying again...")
- return _run_interaction()
- logger.warn("giving up!")
- return failure
-
- d = _run_interaction()
- d.addErrback(_errback)
- return d
-
- def _runU1DBQuery(self, trans, meth, *args, **kw):
- """
- Execute a U1DB query.
-
- :param trans: An U1DB transaction.
- :type trans: adbapi.Transaction
- :param meth: the U1DB wrapper method name.
- :type meth: str
- """
- meth = getattr(trans, meth)
- return meth(*args, **kw)
- # XXX should return a fetchall?
-
- # XXX add _runOperation too
-
- def _runInteraction(self, interaction, *args, **kw):
- """
- Interact with the database and return the result.
-
- :param interaction:
- A callable object whose first argument is an
- L{adbapi.Transaction}.
- :type interaction: callable
- :return: a Deferred which will fire the return value of
- 'interaction(Transaction(...), *args, **kw)', or a Failure.
- :rtype: twisted.internet.defer.Deferred
- """
- tid = self.threadID()
- u1db = self._u1dbconnections.get(tid)
- conn = self.connectionFactory(
- self, init_u1db=not bool(u1db))
-
- if self.replica_uid is None:
- replica_uid = conn._u1db._real_replica_uid
- setProxiedObject(self.replica_uid, replica_uid)
-
- if u1db is None:
- self._u1dbconnections[tid] = conn._u1db
- else:
- conn._u1db = u1db
-
- trans = self.transactionFactory(self, conn)
- try:
- result = interaction(trans, *args, **kw)
- trans.close()
- conn.commit()
- return result
- except:
- excType, excValue, excTraceback = sys.exc_info()
- try:
- conn.rollback()
- except:
- logger.error(None, "Rollback failed")
- compat.reraise(excValue, excTraceback)
-
- def finalClose(self):
- """
- A final close, only called by the shutdown trigger.
- """
- self.shutdownID = None
- if self.threadpool.started:
- self.threadpool.stop()
- self.running = False
- for conn in self.connections.values():
- self._close(conn)
- for u1db in self._u1dbconnections.values():
- self._close(u1db)
- self.connections.clear()
diff --git a/client/src/leap/soledad/client/_db/blobs.py b/client/src/leap/soledad/client/_db/blobs.py
deleted file mode 100644
index 10b90c71..00000000
--- a/client/src/leap/soledad/client/_db/blobs.py
+++ /dev/null
@@ -1,554 +0,0 @@
-# -*- coding: utf-8 -*-
-# _blobs.py
-# Copyright (C) 2017 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-Clientside BlobBackend Storage.
-"""
-
-from urlparse import urljoin
-
-import binascii
-import os
-import base64
-
-from io import BytesIO
-from functools import partial
-
-from twisted.logger import Logger
-from twisted.enterprise import adbapi
-from twisted.internet import defer
-from twisted.web.client import FileBodyProducer
-
-import treq
-
-from leap.soledad.common.errors import SoledadError
-from leap.common.files import mkdir_p
-
-from .._document import BlobDoc
-from .._crypto import DocInfo
-from .._crypto import BlobEncryptor
-from .._crypto import BlobDecryptor
-from .._http import HTTPClient
-from .._pipes import TruncatedTailPipe
-from .._pipes import PreamblePipe
-
-from . import pragmas
-from . import sqlcipher
-
-
-logger = Logger()
-FIXED_REV = 'ImmutableRevision' # Blob content is immutable
-
-
-class BlobAlreadyExistsError(SoledadError):
- pass
-
-
-class ConnectionPool(adbapi.ConnectionPool):
-
- def insertAndGetLastRowid(self, *args, **kwargs):
- """
- Execute an SQL query and return the last rowid.
-
- See: https://sqlite.org/c3ref/last_insert_rowid.html
- """
- return self.runInteraction(
- self._insertAndGetLastRowid, *args, **kwargs)
-
- def _insertAndGetLastRowid(self, trans, *args, **kw):
- trans.execute(*args, **kw)
- return trans.lastrowid
-
- def blob(self, table, column, irow, flags):
- """
- Open a BLOB for incremental I/O.
-
- Return a handle to the BLOB that would be selected by:
-
- SELECT column FROM table WHERE rowid = irow;
-
- See: https://sqlite.org/c3ref/blob_open.html
-
- :param table: The table in which to lookup the blob.
- :type table: str
- :param column: The column where the BLOB is located.
- :type column: str
- :param rowid: The rowid of the BLOB.
- :type rowid: int
- :param flags: If zero, BLOB is opened for read-only. If non-zero,
- BLOB is opened for RW.
- :type flags: int
-
- :return: A BLOB handle.
- :rtype: pysqlcipher.dbapi.Blob
- """
- return self.runInteraction(self._blob, table, column, irow, flags)
-
- def _blob(self, trans, table, column, irow, flags):
- # TODO: should not use transaction private variable here
- handle = trans._connection.blob(table, column, irow, flags)
- return handle
-
-
-def check_http_status(code):
- if code == 409:
- raise BlobAlreadyExistsError()
- elif code != 200:
- raise SoledadError("Server Error")
-
-
-class DecrypterBuffer(object):
-
- def __init__(self, blob_id, secret, tag):
- self.doc_info = DocInfo(blob_id, FIXED_REV)
- self.secret = secret
- self.tag = tag
- self.preamble_pipe = PreamblePipe(self._make_decryptor)
-
- def _make_decryptor(self, preamble):
- self.decrypter = BlobDecryptor(
- self.doc_info, preamble,
- secret=self.secret,
- armor=False,
- start_stream=False,
- tag=self.tag)
- return TruncatedTailPipe(self.decrypter, tail_size=len(self.tag))
-
- def write(self, data):
- self.preamble_pipe.write(data)
-
- def close(self):
- real_size = self.decrypter.decrypted_content_size
- return self.decrypter._end_stream(), real_size
-
-
-class BlobManager(object):
- """
- Ideally, the decrypting flow goes like this:
-
- - GET a blob from remote server.
- - Decrypt the preamble
- - Allocate a zeroblob in the sqlcipher sink
- - Mark the blob as unusable (ie, not verified)
- - Decrypt the payload incrementally, and write chunks to sqlcipher
- ** Is it possible to use a small buffer for the aes writer w/o
- ** allocating all the memory in openssl?
- - Finalize the AES decryption
- - If preamble + payload verifies correctly, mark the blob as usable
-
- """
-
- def __init__(
- self, local_path, remote, key, secret, user, token=None,
- cert_file=None):
- if local_path:
- mkdir_p(os.path.dirname(local_path))
- self.local = SQLiteBlobBackend(local_path, key)
- self.remote = remote
- self.secret = secret
- self.user = user
- self._client = HTTPClient(user, token, cert_file)
-
- def close(self):
- if hasattr(self, 'local') and self.local:
- return self.local.close()
-
- @defer.inlineCallbacks
- def remote_list(self):
- uri = urljoin(self.remote, self.user + '/')
- data = yield self._client.get(uri)
- defer.returnValue((yield data.json()))
-
- def local_list(self):
- return self.local.list()
-
- @defer.inlineCallbacks
- def send_missing(self):
- our_blobs = yield self.local_list()
- server_blobs = yield self.remote_list()
- missing = [b_id for b_id in our_blobs if b_id not in server_blobs]
- logger.info("Amount of documents missing on server: %s" % len(missing))
- # TODO: Send concurrently when we are able to stream directly from db
- for blob_id in missing:
- fd = yield self.local.get(blob_id)
- logger.info("Upload local blob: %s" % blob_id)
- yield self._encrypt_and_upload(blob_id, fd)
-
- @defer.inlineCallbacks
- def fetch_missing(self):
- # TODO: Use something to prioritize user requests over general new docs
- our_blobs = yield self.local_list()
- server_blobs = yield self.remote_list()
- docs_we_want = [b_id for b_id in server_blobs if b_id not in our_blobs]
- logger.info("Fetching new docs from server: %s" % len(docs_we_want))
- # TODO: Fetch concurrently when we are able to stream directly into db
- for blob_id in docs_we_want:
- logger.info("Fetching new doc: %s" % blob_id)
- yield self.get(blob_id)
-
- @defer.inlineCallbacks
- def put(self, doc, size):
- if (yield self.local.exists(doc.blob_id)):
- error_message = "Blob already exists: %s" % doc.blob_id
- raise BlobAlreadyExistsError(error_message)
- fd = doc.blob_fd
- # TODO this is a tee really, but ok... could do db and upload
- # concurrently. not sure if we'd gain something.
- yield self.local.put(doc.blob_id, fd, size=size)
- # In fact, some kind of pipe is needed here, where each write on db
- # handle gets forwarded into a write on the connection handle
- fd = yield self.local.get(doc.blob_id)
- yield self._encrypt_and_upload(doc.blob_id, fd)
-
- @defer.inlineCallbacks
- def get(self, blob_id):
- local_blob = yield self.local.get(blob_id)
- if local_blob:
- logger.info("Found blob in local database: %s" % blob_id)
- defer.returnValue(local_blob)
-
- result = yield self._download_and_decrypt(blob_id)
-
- if not result:
- defer.returnValue(None)
- blob, size = result
-
- if blob:
- logger.info("Got decrypted blob of type: %s" % type(blob))
- blob.seek(0)
- yield self.local.put(blob_id, blob, size=size)
- defer.returnValue((yield self.local.get(blob_id)))
- else:
- # XXX we shouldn't get here, but we will...
- # lots of ugly error handling possible:
- # 1. retry, might be network error
- # 2. try later, maybe didn't finished streaming
- # 3.. resignation, might be error while verifying
- logger.error('sorry, dunno what happened')
-
- @defer.inlineCallbacks
- def _encrypt_and_upload(self, blob_id, fd):
- # TODO ------------------------------------------
- # this is wrong, is doing 2 stages.
- # the crypto producer can be passed to
- # the uploader and react as data is written.
- # try to rewrite as a tube: pass the fd to aes and let aes writer
- # produce data to the treq request fd.
- # ------------------------------------------------
- logger.info("Staring upload of blob: %s" % blob_id)
- doc_info = DocInfo(blob_id, FIXED_REV)
- uri = urljoin(self.remote, self.user + "/" + blob_id)
- crypter = BlobEncryptor(doc_info, fd, secret=self.secret,
- armor=False)
- fd = yield crypter.encrypt()
- response = yield self._client.put(uri, data=fd)
- check_http_status(response.code)
- logger.info("Finished upload: %s" % (blob_id,))
-
- @defer.inlineCallbacks
- def _download_and_decrypt(self, blob_id):
- logger.info("Staring download of blob: %s" % blob_id)
- # TODO this needs to be connected in a tube
- uri = urljoin(self.remote, self.user + '/' + blob_id)
- data = yield self._client.get(uri)
-
- if data.code == 404:
- logger.warn("Blob not found in server: %s" % blob_id)
- defer.returnValue(None)
- elif not data.headers.hasHeader('Tag'):
- logger.error("Server didn't send a tag header for: %s" % blob_id)
- defer.returnValue(None)
- tag = data.headers.getRawHeaders('Tag')[0]
- tag = base64.urlsafe_b64decode(tag)
- buf = DecrypterBuffer(blob_id, self.secret, tag)
-
- # incrementally collect the body of the response
- yield treq.collect(data, buf.write)
- fd, size = buf.close()
- logger.info("Finished download: (%s, %d)" % (blob_id, size))
- defer.returnValue((fd, size))
-
- @defer.inlineCallbacks
- def delete(self, blob_id):
- logger.info("Staring deletion of blob: %s" % blob_id)
- yield self._delete_from_remote(blob_id)
- if (yield self.local.exists(blob_id)):
- yield self.local.delete(blob_id)
-
- def _delete_from_remote(self, blob_id):
- # TODO this needs to be connected in a tube
- uri = urljoin(self.remote, self.user + '/' + blob_id)
- return self._client.delete(uri)
-
-
-class SQLiteBlobBackend(object):
-
- def __init__(self, path, key=None):
- self.path = os.path.abspath(
- os.path.join(path, 'soledad_blob.db'))
- mkdir_p(os.path.dirname(self.path))
- if not key:
- raise ValueError('key cannot be None')
- backend = 'pysqlcipher.dbapi2'
- opts = sqlcipher.SQLCipherOptions(
- '/tmp/ignored', binascii.b2a_hex(key),
- is_raw_key=True, create=True)
- pragmafun = partial(pragmas.set_init_pragmas, opts=opts)
- openfun = _sqlcipherInitFactory(pragmafun)
-
- self.dbpool = ConnectionPool(
- backend, self.path, check_same_thread=False, timeout=5,
- cp_openfun=openfun, cp_min=1, cp_max=2, cp_name='blob_pool')
-
- def close(self):
- from twisted._threads import AlreadyQuit
- try:
- self.dbpool.close()
- except AlreadyQuit:
- pass
-
- @defer.inlineCallbacks
- def put(self, blob_id, blob_fd, size=None):
- logger.info("Saving blob in local database...")
- insert = 'INSERT INTO blobs (blob_id, payload) VALUES (?, zeroblob(?))'
- irow = yield self.dbpool.insertAndGetLastRowid(insert, (blob_id, size))
- handle = yield self.dbpool.blob('blobs', 'payload', irow, 1)
- blob_fd.seek(0)
- # XXX I have to copy the buffer here so that I'm able to
- # return a non-closed file to the caller (blobmanager.get)
- # FIXME should remove this duplication!
- # have a look at how treq does cope with closing the handle
- # for uploading a file
- producer = FileBodyProducer(blob_fd)
- done = yield producer.startProducing(handle)
- logger.info("Finished saving blob in local database.")
- defer.returnValue(done)
-
- @defer.inlineCallbacks
- def get(self, blob_id):
- # TODO we can also stream the blob value using sqlite
- # incremental interface for blobs - and just return the raw fd instead
- select = 'SELECT payload FROM blobs WHERE blob_id = ?'
- result = yield self.dbpool.runQuery(select, (blob_id,))
- if result:
- defer.returnValue(BytesIO(str(result[0][0])))
-
- @defer.inlineCallbacks
- def list(self):
- query = 'select blob_id from blobs'
- result = yield self.dbpool.runQuery(query)
- if result:
- defer.returnValue([b_id[0] for b_id in result])
- else:
- defer.returnValue([])
-
- @defer.inlineCallbacks
- def exists(self, blob_id):
- query = 'SELECT blob_id from blobs WHERE blob_id = ?'
- result = yield self.dbpool.runQuery(query, (blob_id,))
- defer.returnValue(bool(len(result)))
-
- def delete(self, blob_id):
- query = 'DELETE FROM blobs WHERE blob_id = ?'
- return self.dbpool.runQuery(query, (blob_id,))
-
-
-def _init_blob_table(conn):
- maybe_create = (
- "CREATE TABLE IF NOT EXISTS "
- "blobs ("
- "blob_id PRIMARY KEY, "
- "payload BLOB)")
- conn.execute(maybe_create)
-
-
-def _sqlcipherInitFactory(fun):
- def _initialize(conn):
- fun(conn)
- _init_blob_table(conn)
- return _initialize
-
-
-#
-# testing facilities
-#
-
-@defer.inlineCallbacks
-def testit(reactor):
- # configure logging to stdout
- from twisted.python import log
- import sys
- log.startLogging(sys.stdout)
-
- # parse command line arguments
- import argparse
-
- parser = argparse.ArgumentParser()
- parser.add_argument('--url', default='http://localhost:9000/')
- parser.add_argument('--path', default='/tmp/blobs')
- parser.add_argument('--secret', default='secret')
- parser.add_argument('--uuid', default='user')
- parser.add_argument('--token', default=None)
- parser.add_argument('--cert-file', default='')
-
- subparsers = parser.add_subparsers(help='sub-command help', dest='action')
-
- # parse upload command
- parser_upload = subparsers.add_parser(
- 'upload', help='upload blob and bypass local db')
- parser_upload.add_argument('payload')
- parser_upload.add_argument('blob_id')
-
- # parse download command
- parser_download = subparsers.add_parser(
- 'download', help='download blob and bypass local db')
- parser_download.add_argument('blob_id')
- parser_download.add_argument('--output-file', default='/tmp/incoming-file')
-
- # parse put command
- parser_put = subparsers.add_parser(
- 'put', help='put blob in local db and upload')
- parser_put.add_argument('payload')
- parser_put.add_argument('blob_id')
-
- # parse get command
- parser_get = subparsers.add_parser(
- 'get', help='get blob from local db, get if needed')
- parser_get.add_argument('blob_id')
-
- # parse delete command
- parser_get = subparsers.add_parser(
- 'delete', help='delete blob from local and remote db')
- parser_get.add_argument('blob_id')
-
- # parse list command
- parser_get = subparsers.add_parser(
- 'list', help='list local and remote blob ids')
-
- # parse send_missing command
- parser_get = subparsers.add_parser(
- 'send_missing', help='send all pending upload blobs')
-
- # parse send_missing command
- parser_get = subparsers.add_parser(
- 'fetch_missing', help='fetch all new server blobs')
-
- # parse arguments
- args = parser.parse_args()
-
- # TODO convert these into proper unittests
-
- def _manager():
- mkdir_p(os.path.dirname(args.path))
- manager = BlobManager(
- args.path, args.url,
- 'A' * 32, args.secret,
- args.uuid, args.token, args.cert_file)
- return manager
-
- @defer.inlineCallbacks
- def _upload(blob_id, payload):
- logger.info(":: Starting upload only: %s" % str((blob_id, payload)))
- manager = _manager()
- with open(payload, 'r') as fd:
- yield manager._encrypt_and_upload(blob_id, fd)
- logger.info(":: Finished upload only: %s" % str((blob_id, payload)))
-
- @defer.inlineCallbacks
- def _download(blob_id):
- logger.info(":: Starting download only: %s" % blob_id)
- manager = _manager()
- result = yield manager._download_and_decrypt(blob_id)
- logger.info(":: Result of download: %s" % str(result))
- if result:
- fd, _ = result
- with open(args.output_file, 'w') as f:
- logger.info(":: Writing data to %s" % args.output_file)
- f.write(fd.read())
- logger.info(":: Finished download only: %s" % blob_id)
-
- @defer.inlineCallbacks
- def _put(blob_id, payload):
- logger.info(":: Starting full put: %s" % blob_id)
- manager = _manager()
- size = os.path.getsize(payload)
- with open(payload) as fd:
- doc = BlobDoc(fd, blob_id)
- result = yield manager.put(doc, size=size)
- logger.info(":: Result of put: %s" % str(result))
- logger.info(":: Finished full put: %s" % blob_id)
-
- @defer.inlineCallbacks
- def _get(blob_id):
- logger.info(":: Starting full get: %s" % blob_id)
- manager = _manager()
- fd = yield manager.get(blob_id)
- if fd:
- logger.info(":: Result of get: " + fd.getvalue())
- logger.info(":: Finished full get: %s" % blob_id)
-
- @defer.inlineCallbacks
- def _delete(blob_id):
- logger.info(":: Starting deletion of: %s" % blob_id)
- manager = _manager()
- yield manager.delete(blob_id)
- logger.info(":: Finished deletion of: %s" % blob_id)
-
- @defer.inlineCallbacks
- def _list():
- logger.info(":: Listing local blobs")
- manager = _manager()
- local_list = yield manager.local_list()
- logger.info(":: Local list: %s" % local_list)
- logger.info(":: Listing remote blobs")
- remote_list = yield manager.remote_list()
- logger.info(":: Remote list: %s" % remote_list)
-
- @defer.inlineCallbacks
- def _send_missing():
- logger.info(":: Sending local pending upload docs")
- manager = _manager()
- yield manager.send_missing()
- logger.info(":: Finished sending missing docs")
-
- @defer.inlineCallbacks
- def _fetch_missing():
- logger.info(":: Fetching remote new docs")
- manager = _manager()
- yield manager.fetch_missing()
- logger.info(":: Finished fetching new docs")
-
- if args.action == 'upload':
- yield _upload(args.blob_id, args.payload)
- elif args.action == 'download':
- yield _download(args.blob_id)
- elif args.action == 'put':
- yield _put(args.blob_id, args.payload)
- elif args.action == 'get':
- yield _get(args.blob_id)
- elif args.action == 'delete':
- yield _delete(args.blob_id)
- elif args.action == 'list':
- yield _list()
- elif args.action == 'send_missing':
- yield _send_missing()
- elif args.action == 'fetch_missing':
- yield _fetch_missing()
-
-
-if __name__ == '__main__':
- from twisted.internet.task import react
- react(testit)
diff --git a/client/src/leap/soledad/client/_db/dbschema.sql b/client/src/leap/soledad/client/_db/dbschema.sql
deleted file mode 100644
index ae027fc5..00000000
--- a/client/src/leap/soledad/client/_db/dbschema.sql
+++ /dev/null
@@ -1,42 +0,0 @@
--- Database schema
-CREATE TABLE transaction_log (
- generation INTEGER PRIMARY KEY AUTOINCREMENT,
- doc_id TEXT NOT NULL,
- transaction_id TEXT NOT NULL
-);
-CREATE TABLE document (
- doc_id TEXT PRIMARY KEY,
- doc_rev TEXT NOT NULL,
- content TEXT
-);
-CREATE TABLE document_fields (
- doc_id TEXT NOT NULL,
- field_name TEXT NOT NULL,
- value TEXT
-);
-CREATE INDEX document_fields_field_value_doc_idx
- ON document_fields(field_name, value, doc_id);
-
-CREATE TABLE sync_log (
- replica_uid TEXT PRIMARY KEY,
- known_generation INTEGER,
- known_transaction_id TEXT
-);
-CREATE TABLE conflicts (
- doc_id TEXT,
- doc_rev TEXT,
- content TEXT,
- CONSTRAINT conflicts_pkey PRIMARY KEY (doc_id, doc_rev)
-);
-CREATE TABLE index_definitions (
- name TEXT,
- offset INT,
- field TEXT,
- CONSTRAINT index_definitions_pkey PRIMARY KEY (name, offset)
-);
-create index index_definitions_field on index_definitions(field);
-CREATE TABLE u1db_config (
- name TEXT PRIMARY KEY,
- value TEXT
-);
-INSERT INTO u1db_config VALUES ('sql_schema', '0');
diff --git a/client/src/leap/soledad/client/_db/pragmas.py b/client/src/leap/soledad/client/_db/pragmas.py
deleted file mode 100644
index 870ed63e..00000000
--- a/client/src/leap/soledad/client/_db/pragmas.py
+++ /dev/null
@@ -1,379 +0,0 @@
-# -*- coding: utf-8 -*-
-# pragmas.py
-# Copyright (C) 2013, 2014 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-Different pragmas used in the initialization of the SQLCipher database.
-"""
-import string
-import threading
-import os
-
-from leap.soledad.common import soledad_assert
-from leap.soledad.common.log import getLogger
-
-
-logger = getLogger(__name__)
-
-
-_db_init_lock = threading.Lock()
-
-
-def set_init_pragmas(conn, opts=None, extra_queries=None):
- """
- Set the initialization pragmas.
-
- This includes the crypto pragmas, and any other options that must
- be passed early to sqlcipher db.
- """
- soledad_assert(opts is not None)
- extra_queries = [] if extra_queries is None else extra_queries
- with _db_init_lock:
- # only one execution path should initialize the db
- _set_init_pragmas(conn, opts, extra_queries)
-
-
-def _set_init_pragmas(conn, opts, extra_queries):
-
- sync_off = os.environ.get('LEAP_SQLITE_NOSYNC')
- memstore = os.environ.get('LEAP_SQLITE_MEMSTORE')
- nowal = os.environ.get('LEAP_SQLITE_NOWAL')
-
- set_crypto_pragmas(conn, opts)
-
- if not nowal:
- set_write_ahead_logging(conn)
- if sync_off:
- set_synchronous_off(conn)
- else:
- set_synchronous_normal(conn)
- if memstore:
- set_mem_temp_store(conn)
-
- for query in extra_queries:
- conn.cursor().execute(query)
-
-
-def set_crypto_pragmas(db_handle, sqlcipher_opts):
- """
- Set cryptographic params (key, cipher, KDF number of iterations and
- cipher page size).
-
- :param db_handle:
- :type db_handle:
- :param sqlcipher_opts: options for the SQLCipherDatabase
- :type sqlcipher_opts: SQLCipherOpts instance
- """
- # XXX assert CryptoOptions
- opts = sqlcipher_opts
- _set_key(db_handle, opts.key, opts.is_raw_key)
- _set_cipher(db_handle, opts.cipher)
- _set_kdf_iter(db_handle, opts.kdf_iter)
- _set_cipher_page_size(db_handle, opts.cipher_page_size)
-
-
-def _set_key(db_handle, key, is_raw_key):
- """
- Set the ``key`` for use with the database.
-
- The process of creating a new, encrypted database is called 'keying'
- the database. SQLCipher uses just-in-time key derivation at the point
- it is first needed for an operation. This means that the key (and any
- options) must be set before the first operation on the database. As
- soon as the database is touched (e.g. SELECT, CREATE TABLE, UPDATE,
- etc.) and pages need to be read or written, the key is prepared for
- use.
-
- Implementation Notes:
-
- * PRAGMA key should generally be called as the first operation on a
- database.
-
- :param key: The key for use with the database.
- :type key: str
- :param is_raw_key:
- Whether C{key} is a raw 64-char hex string or a passphrase that should
- be hashed to obtain the encyrption key.
- :type is_raw_key: bool
- """
- if is_raw_key:
- _set_key_raw(db_handle, key)
- else:
- _set_key_passphrase(db_handle, key)
-
-
-def _set_key_passphrase(db_handle, passphrase):
- """
- Set a passphrase for encryption key derivation.
-
- The key itself can be a passphrase, which is converted to a key using
- PBKDF2 key derivation. The result is used as the encryption key for
- the database. By using this method, there is no way to alter the KDF;
- if you want to do so you should use a raw key instead and derive the
- key using your own KDF.
-
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param passphrase: The passphrase used to derive the encryption key.
- :type passphrase: str
- """
- db_handle.cursor().execute("PRAGMA key = '%s'" % passphrase)
-
-
-def _set_key_raw(db_handle, key):
- """
- Set a raw hexadecimal encryption key.
-
- It is possible to specify an exact byte sequence using a blob literal.
- With this method, it is the calling application's responsibility to
- ensure that the data provided is a 64 character hex string, which will
- be converted directly to 32 bytes (256 bits) of key data.
-
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param key: A 64 character hex string.
- :type key: str
- """
- if not all(c in string.hexdigits for c in key):
- raise NotAnHexString(key)
- db_handle.cursor().execute('PRAGMA key = "x\'%s"' % key)
-
-
-def _set_cipher(db_handle, cipher='aes-256-cbc'):
- """
- Set the cipher and mode to use for symmetric encryption.
-
- SQLCipher uses aes-256-cbc as the default cipher and mode of
- operation. It is possible to change this, though not generally
- recommended, using PRAGMA cipher.
-
- SQLCipher makes direct use of libssl, so all cipher options available
- to libssl are also available for use with SQLCipher. See `man enc` for
- OpenSSL's supported ciphers.
-
- Implementation Notes:
-
- * PRAGMA cipher must be called after PRAGMA key and before the first
- actual database operation or it will have no effect.
-
- * If a non-default value is used PRAGMA cipher to create a database,
- it must also be called every time that database is opened.
-
- * SQLCipher does not implement its own encryption. Instead it uses the
- widely available and peer-reviewed OpenSSL libcrypto for all
- cryptographic functions.
-
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param cipher: The cipher and mode to use.
- :type cipher: str
- """
- db_handle.cursor().execute("PRAGMA cipher = '%s'" % cipher)
-
-
-def _set_kdf_iter(db_handle, kdf_iter=4000):
- """
- Set the number of iterations for the key derivation function.
-
- SQLCipher uses PBKDF2 key derivation to strengthen the key and make it
- resistent to brute force and dictionary attacks. The default
- configuration uses 4000 PBKDF2 iterations (effectively 16,000 SHA1
- operations). PRAGMA kdf_iter can be used to increase or decrease the
- number of iterations used.
-
- Implementation Notes:
-
- * PRAGMA kdf_iter must be called after PRAGMA key and before the first
- actual database operation or it will have no effect.
-
- * If a non-default value is used PRAGMA kdf_iter to create a database,
- it must also be called every time that database is opened.
-
- * It is not recommended to reduce the number of iterations if a
- passphrase is in use.
-
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param kdf_iter: The number of iterations to use.
- :type kdf_iter: int
- """
- db_handle.cursor().execute("PRAGMA kdf_iter = '%d'" % kdf_iter)
-
-
-def _set_cipher_page_size(db_handle, cipher_page_size=1024):
- """
- Set the page size of the encrypted database.
-
- SQLCipher 2 introduced the new PRAGMA cipher_page_size that can be
- used to adjust the page size for the encrypted database. The default
- page size is 1024 bytes, but it can be desirable for some applications
- to use a larger page size for increased performance. For instance,
- some recent testing shows that increasing the page size can noticeably
- improve performance (5-30%) for certain queries that manipulate a
- large number of pages (e.g. selects without an index, large inserts in
- a transaction, big deletes).
-
- To adjust the page size, call the pragma immediately after setting the
- key for the first time and each subsequent time that you open the
- database.
-
- Implementation Notes:
-
- * PRAGMA cipher_page_size must be called after PRAGMA key and before
- the first actual database operation or it will have no effect.
-
- * If a non-default value is used PRAGMA cipher_page_size to create a
- database, it must also be called every time that database is opened.
-
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param cipher_page_size: The page size.
- :type cipher_page_size: int
- """
- db_handle.cursor().execute(
- "PRAGMA cipher_page_size = '%d'" % cipher_page_size)
-
-
-# XXX UNUSED ?
-def set_rekey(db_handle, new_key, is_raw_key):
- """
- Change the key of an existing encrypted database.
-
- To change the key on an existing encrypted database, it must first be
- unlocked with the current encryption key. Once the database is
- readable and writeable, PRAGMA rekey can be used to re-encrypt every
- page in the database with a new key.
-
- * PRAGMA rekey must be called after PRAGMA key. It can be called at any
- time once the database is readable.
-
- * PRAGMA rekey can not be used to encrypted a standard SQLite
- database! It is only useful for changing the key on an existing
- database.
-
- * Previous versions of SQLCipher provided a PRAGMA rekey_cipher and
- code>PRAGMA rekey_kdf_iter. These are deprecated and should not be
- used. Instead, use sqlcipher_export().
-
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param new_key: The new key.
- :type new_key: str
- :param is_raw_key: Whether C{password} is a raw 64-char hex string or a
- passphrase that should be hashed to obtain the encyrption
- key.
- :type is_raw_key: bool
- """
- if is_raw_key:
- _set_rekey_raw(db_handle, new_key)
- else:
- _set_rekey_passphrase(db_handle, new_key)
-
-
-def _set_rekey_passphrase(db_handle, passphrase):
- """
- Change the passphrase for encryption key derivation.
-
- The key itself can be a passphrase, which is converted to a key using
- PBKDF2 key derivation. The result is used as the encryption key for
- the database.
-
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param passphrase: The passphrase used to derive the encryption key.
- :type passphrase: str
- """
- db_handle.cursor().execute("PRAGMA rekey = '%s'" % passphrase)
-
-
-def _set_rekey_raw(db_handle, key):
- """
- Change the raw hexadecimal encryption key.
-
- It is possible to specify an exact byte sequence using a blob literal.
- With this method, it is the calling application's responsibility to
- ensure that the data provided is a 64 character hex string, which will
- be converted directly to 32 bytes (256 bits) of key data.
-
- :param db_handle: A handle to the SQLCipher database.
- :type db_handle: pysqlcipher.Connection
- :param key: A 64 character hex string.
- :type key: str
- """
- if not all(c in string.hexdigits for c in key):
- raise NotAnHexString(key)
- db_handle.cursor().execute('PRAGMA rekey = "x\'%s"' % key)
-
-
-def set_synchronous_off(db_handle):
- """
- Change the setting of the "synchronous" flag to OFF.
- """
- logger.debug("sqlcipher: setting synchronous off")
- db_handle.cursor().execute('PRAGMA synchronous=OFF')
-
-
-def set_synchronous_normal(db_handle):
- """
- Change the setting of the "synchronous" flag to NORMAL.
- """
- logger.debug("sqlcipher: setting synchronous normal")
- db_handle.cursor().execute('PRAGMA synchronous=NORMAL')
-
-
-def set_mem_temp_store(db_handle):
- """
- Use a in-memory store for temporary tables.
- """
- logger.debug("sqlcipher: setting temp_store memory")
- db_handle.cursor().execute('PRAGMA temp_store=MEMORY')
-
-
-def set_write_ahead_logging(db_handle):
- """
- Enable write-ahead logging, and set the autocheckpoint to 50 pages.
-
- Setting the autocheckpoint to a small value, we make the reads not
- suffer too much performance degradation.
-
- From the sqlite docs:
-
- "There is a tradeoff between average read performance and average write
- performance. To maximize the read performance, one wants to keep the
- WAL as small as possible and hence run checkpoints frequently, perhaps
- as often as every COMMIT. To maximize write performance, one wants to
- amortize the cost of each checkpoint over as many writes as possible,
- meaning that one wants to run checkpoints infrequently and let the WAL
- grow as large as possible before each checkpoint. The decision of how
- often to run checkpoints may therefore vary from one application to
- another depending on the relative read and write performance
- requirements of the application. The default strategy is to run a
- checkpoint once the WAL reaches 1000 pages"
- """
- logger.debug("sqlcipher: setting write-ahead logging")
- db_handle.cursor().execute('PRAGMA journal_mode=WAL')
-
- # The optimum value can still use a little bit of tuning, but we favor
- # small sizes of the WAL file to get fast reads, since we assume that
- # the writes will be quick enough to not block too much.
-
- db_handle.cursor().execute('PRAGMA wal_autocheckpoint=50')
-
-
-class NotAnHexString(Exception):
- """
- Raised when trying to (raw) key the database with a non-hex string.
- """
- pass
diff --git a/client/src/leap/soledad/client/_db/sqlcipher.py b/client/src/leap/soledad/client/_db/sqlcipher.py
deleted file mode 100644
index d22017bd..00000000
--- a/client/src/leap/soledad/client/_db/sqlcipher.py
+++ /dev/null
@@ -1,633 +0,0 @@
-# -*- coding: utf-8 -*-
-# sqlcipher.py
-# Copyright (C) 2013, 2014 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-A U1DB backend that uses SQLCipher as its persistence layer.
-
-The SQLCipher API (http://sqlcipher.net/sqlcipher-api/) is fully implemented,
-with the exception of the following statements:
-
- * PRAGMA cipher_use_hmac
- * PRAGMA cipher_default_use_mac
-
-SQLCipher 2.0 introduced a per-page HMAC to validate that the page data has
-not be tampered with. By default, when creating or opening a database using
-SQLCipher 2, SQLCipher will attempt to use an HMAC check. This change in
-database format means that SQLCipher 2 can't operate on version 1.1.x
-databases by default. Thus, in order to provide backward compatibility with
-SQLCipher 1.1.x, PRAGMA cipher_use_hmac can be used to disable the HMAC
-functionality on specific databases.
-
-In some very specific cases, it is not possible to call PRAGMA cipher_use_hmac
-as one of the first operations on a database. An example of this is when
-trying to ATTACH a 1.1.x database to the main database. In these cases PRAGMA
-cipher_default_use_hmac can be used to globally alter the default use of HMAC
-when opening a database.
-
-So, as the statements above were introduced for backwards compatibility with
-SQLCipher 1.1 databases, we do not implement them as all SQLCipher databases
-handled by Soledad should be created by SQLCipher >= 2.0.
-"""
-import os
-import sys
-
-from functools import partial
-
-from twisted.internet import reactor
-from twisted.internet import defer
-from twisted.enterprise import adbapi
-
-from leap.soledad.common.log import getLogger
-from leap.soledad.common.l2db import errors as u1db_errors
-from leap.soledad.common.errors import DatabaseAccessError
-
-from leap.soledad.client.http_target import SoledadHTTPSyncTarget
-from leap.soledad.client.sync import SoledadSynchronizer
-
-from .._document import Document
-from . import sqlite
-from . import pragmas
-
-if sys.version_info[0] < 3:
- from pysqlcipher import dbapi2 as sqlcipher_dbapi2
-else:
- from pysqlcipher3 import dbapi2 as sqlcipher_dbapi2
-
-logger = getLogger(__name__)
-
-
-# Monkey-patch u1db.backends.sqlite with pysqlcipher.dbapi2
-sqlite.dbapi2 = sqlcipher_dbapi2
-
-
-# we may want to collect statistics from the sync process
-DO_STATS = False
-if os.environ.get('SOLEDAD_STATS'):
- DO_STATS = True
-
-
-def initialize_sqlcipher_db(opts, on_init=None, check_same_thread=True):
- """
- Initialize a SQLCipher database.
-
- :param opts:
- :type opts: SQLCipherOptions
- :param on_init: a tuple of queries to be executed on initialization
- :type on_init: tuple
- :return: pysqlcipher.dbapi2.Connection
- """
- # Note: There seemed to be a bug in sqlite 3.5.9 (with python2.6)
- # where without re-opening the database on Windows, it
- # doesn't see the transaction that was just committed
- # Removing from here now, look at the pysqlite implementation if the
- # bug shows up in windows.
-
- if not os.path.isfile(opts.path) and not opts.create:
- raise u1db_errors.DatabaseDoesNotExist()
-
- conn = sqlcipher_dbapi2.connect(
- opts.path, check_same_thread=check_same_thread)
- pragmas.set_init_pragmas(conn, opts, extra_queries=on_init)
- return conn
-
-
-def initialize_sqlcipher_adbapi_db(opts, extra_queries=None):
- from leap.soledad.client import sqlcipher_adbapi
- return sqlcipher_adbapi.getConnectionPool(
- opts, extra_queries=extra_queries)
-
-
-class SQLCipherOptions(object):
- """
- A container with options for the initialization of an SQLCipher database.
- """
-
- @classmethod
- def copy(cls, source, path=None, key=None, create=None,
- is_raw_key=None, cipher=None, kdf_iter=None,
- cipher_page_size=None, sync_db_key=None):
- """
- Return a copy of C{source} with parameters different than None
- replaced by new values.
- """
- local_vars = locals()
- args = []
- kwargs = {}
-
- for name in ["path", "key"]:
- val = local_vars[name]
- if val is not None:
- args.append(val)
- else:
- args.append(getattr(source, name))
-
- for name in ["create", "is_raw_key", "cipher", "kdf_iter",
- "cipher_page_size", "sync_db_key"]:
- val = local_vars[name]
- if val is not None:
- kwargs[name] = val
- else:
- kwargs[name] = getattr(source, name)
-
- return SQLCipherOptions(*args, **kwargs)
-
- def __init__(self, path, key, create=True, is_raw_key=False,
- cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024,
- sync_db_key=None):
- """
- :param path: The filesystem path for the database to open.
- :type path: str
- :param create:
- True/False, should the database be created if it doesn't
- already exist?
- :param create: bool
- :param is_raw_key:
- Whether ``password`` is a raw 64-char hex string or a passphrase
- that should be hashed to obtain the encyrption key.
- :type raw_key: bool
- :param cipher: The cipher and mode to use.
- :type cipher: str
- :param kdf_iter: The number of iterations to use.
- :type kdf_iter: int
- :param cipher_page_size: The page size.
- :type cipher_page_size: int
- """
- self.path = path
- self.key = key
- self.is_raw_key = is_raw_key
- self.create = create
- self.cipher = cipher
- self.kdf_iter = kdf_iter
- self.cipher_page_size = cipher_page_size
- self.sync_db_key = sync_db_key
-
- def __str__(self):
- """
- Return string representation of options, for easy debugging.
-
- :return: String representation of options.
- :rtype: str
- """
- attr_names = filter(lambda a: not a.startswith('_'), dir(self))
- attr_str = []
- for a in attr_names:
- attr_str.append(a + "=" + str(getattr(self, a)))
- name = self.__class__.__name__
- return "%s(%s)" % (name, ', '.join(attr_str))
-
-
-#
-# The SQLCipher database
-#
-
-class SQLCipherDatabase(sqlite.SQLitePartialExpandDatabase):
- """
- A U1DB implementation that uses SQLCipher as its persistence layer.
- """
-
- # The attribute _index_storage_value will be used as the lookup key for the
- # implementation of the SQLCipher storage backend.
- _index_storage_value = 'expand referenced encrypted'
-
- def __init__(self, opts):
- """
- Connect to an existing SQLCipher database, creating a new sqlcipher
- database file if needed.
-
- *** IMPORTANT ***
-
- Don't forget to close the database after use by calling the close()
- method otherwise some resources might not be freed and you may
- experience several kinds of leakages.
-
- *** IMPORTANT ***
-
- :param opts: options for initialization of the SQLCipher database.
- :type opts: SQLCipherOptions
- """
- # ensure the db is encrypted if the file already exists
- if os.path.isfile(opts.path):
- self._db_handle = _assert_db_is_encrypted(opts)
- else:
- # connect to the sqlcipher database
- self._db_handle = initialize_sqlcipher_db(opts)
-
- # TODO ---------------------------------------------------
- # Everything else in this initialization has to be factored
- # out, so it can be used from SoledadSQLCipherWrapper.__init__
- # too.
- # ---------------------------------------------------------
-
- self._ensure_schema()
- self.set_document_factory(doc_factory)
- self._prime_replica_uid()
-
- def _prime_replica_uid(self):
- """
- In the u1db implementation, _replica_uid is a property
- that returns the value in _real_replica_uid, and does
- a db query if no value found.
- Here we prime the replica uid during initialization so
- that we don't have to wait for the query afterwards.
- """
- self._real_replica_uid = None
- self._get_replica_uid()
-
- def _extra_schema_init(self, c):
- """
- Add any extra fields, etc to the basic table definitions.
-
- This method is called by u1db.backends.sqlite_backend._initialize()
- method, which is executed when the database schema is created. Here,
- we use it to include the "syncable" property for LeapDocuments.
-
- :param c: The cursor for querying the database.
- :type c: dbapi2.cursor
- """
- c.execute(
- 'ALTER TABLE document '
- 'ADD COLUMN syncable BOOL NOT NULL DEFAULT TRUE')
-
- #
- # SQLCipher API methods
- #
-
- # Extra query methods: extensions to the base u1db sqlite implmentation.
-
- def get_count_from_index(self, index_name, *key_values):
- """
- Return the count for a given combination of index_name
- and key values.
-
- Extension method made from similar methods in u1db version 13.09
-
- :param index_name: The index to query
- :type index_name: str
- :param key_values: values to match. eg, if you have
- an index with 3 fields then you would have:
- get_from_index(index_name, val1, val2, val3)
- :type key_values: tuple
- :return: count.
- :rtype: int
- """
- c = self._db_handle.cursor()
- definition = self._get_index_definition(index_name)
-
- if len(key_values) != len(definition):
- raise u1db_errors.InvalidValueForIndex()
- tables = ["document_fields d%d" % i for i in range(len(definition))]
- novalue_where = ["d.doc_id = d%d.doc_id"
- " AND d%d.field_name = ?"
- % (i, i) for i in range(len(definition))]
- exact_where = [novalue_where[i] + (" AND d%d.value = ?" % (i,))
- for i in range(len(definition))]
- args = []
- where = []
- for idx, (field, value) in enumerate(zip(definition, key_values)):
- args.append(field)
- where.append(exact_where[idx])
- args.append(value)
-
- tables = ["document_fields d%d" % i for i in range(len(definition))]
- statement = (
- "SELECT COUNT(*) FROM document d, %s WHERE %s " % (
- ', '.join(tables),
- ' AND '.join(where),
- ))
- try:
- c.execute(statement, tuple(args))
- except sqlcipher_dbapi2.OperationalError as e:
- raise sqlcipher_dbapi2.OperationalError(
- str(e) + '\nstatement: %s\nargs: %s\n' % (statement, args))
- res = c.fetchall()
- return res[0][0]
-
- def close(self):
- """
- Close db connections.
- """
- # TODO should be handled by adbapi instead
- # TODO syncdb should be stopped first
-
- if logger is not None: # logger might be none if called from __del__
- logger.debug("SQLCipher backend: closing")
-
- # close the actual database
- if getattr(self, '_db_handle', False):
- self._db_handle.close()
- self._db_handle = None
-
- # indexes
-
- def _put_and_update_indexes(self, old_doc, doc):
- """
- Update a document and all indexes related to it.
-
- :param old_doc: The old version of the document.
- :type old_doc: u1db.Document
- :param doc: The new version of the document.
- :type doc: u1db.Document
- """
- sqlite.SQLitePartialExpandDatabase._put_and_update_indexes(
- self, old_doc, doc)
- c = self._db_handle.cursor()
- c.execute('UPDATE document SET syncable=? WHERE doc_id=?',
- (doc.syncable, doc.doc_id))
-
- def _get_doc(self, doc_id, check_for_conflicts=False):
- """
- Get just the document content, without fancy handling.
-
- :param doc_id: The unique document identifier
- :type doc_id: str
- :param include_deleted: If set to True, deleted documents will be
- returned with empty content. Otherwise asking for a deleted
- document will return None.
- :type include_deleted: bool
-
- :return: a Document object.
- :type: u1db.Document
- """
- doc = sqlite.SQLitePartialExpandDatabase._get_doc(
- self, doc_id, check_for_conflicts)
- if doc:
- c = self._db_handle.cursor()
- c.execute('SELECT syncable FROM document WHERE doc_id=?',
- (doc.doc_id,))
- result = c.fetchone()
- doc.syncable = bool(result[0])
- return doc
-
- def __del__(self):
- """
- Free resources when deleting or garbage collecting the database.
-
- This is only here to minimze problems if someone ever forgets to call
- the close() method after using the database; you should not rely on
- garbage collecting to free up the database resources.
- """
- self.close()
-
-
-class SQLCipherU1DBSync(SQLCipherDatabase):
- """
- Soledad syncer implementation.
- """
-
- """
- The name of the local symmetrically encrypted documents to
- sync database file.
- """
- LOCAL_SYMMETRIC_SYNC_FILE_NAME = 'sync.u1db'
-
- """
- Period or recurrence of the Looping Call that will do the encryption to the
- syncdb (in seconds).
- """
- ENCRYPT_LOOP_PERIOD = 1
-
- def __init__(self, opts, soledad_crypto, replica_uid, cert_file):
- self._opts = opts
- self._path = opts.path
- self._crypto = soledad_crypto
- self.__replica_uid = replica_uid
- self._cert_file = cert_file
-
- # storage for the documents received during a sync
- self.received_docs = []
-
- self.running = False
- self._db_handle = None
-
- # initialize the main db before scheduling a start
- self._initialize_main_db()
- self._reactor = reactor
- self._reactor.callWhenRunning(self._start)
-
- if DO_STATS:
- self.sync_phase = None
-
- def commit(self):
- self._db_handle.commit()
-
- @property
- def _replica_uid(self):
- return str(self.__replica_uid)
-
- def _start(self):
- if not self.running:
- self.running = True
-
- def _initialize_main_db(self):
- try:
- self._db_handle = initialize_sqlcipher_db(
- self._opts, check_same_thread=False)
- self._real_replica_uid = None
- self._ensure_schema()
- self.set_document_factory(doc_factory)
- except sqlcipher_dbapi2.DatabaseError as e:
- raise DatabaseAccessError(str(e))
-
- @defer.inlineCallbacks
- def sync(self, url, creds=None):
- """
- Synchronize documents with remote replica exposed at url.
-
- It is not safe to initiate more than one sync process and let them run
- concurrently. It is responsibility of the caller to ensure that there
- are no concurrent sync processes running. This is currently controlled
- by the main Soledad object because it may also run post-sync hooks,
- which should be run while the lock is locked.
-
- :param url: The url of the target replica to sync with.
- :type url: str
- :param creds: optional dictionary giving credentials to authorize the
- operation with the server.
- :type creds: dict
-
- :return:
- A Deferred, that will fire with the local generation (type `int`)
- before the synchronisation was performed.
- :rtype: Deferred
- """
- syncer = self._get_syncer(url, creds=creds)
- if DO_STATS:
- self.sync_phase = syncer.sync_phase
- self.syncer = syncer
- self.sync_exchange_phase = syncer.sync_exchange_phase
- local_gen_before_sync = yield syncer.sync()
- self.received_docs = syncer.received_docs
- defer.returnValue(local_gen_before_sync)
-
- def _get_syncer(self, url, creds=None):
- """
- Get a synchronizer for ``url`` using ``creds``.
-
- :param url: The url of the target replica to sync with.
- :type url: str
- :param creds: optional dictionary giving credentials.
- to authorize the operation with the server.
- :type creds: dict
-
- :return: A synchronizer.
- :rtype: Synchronizer
- """
- return SoledadSynchronizer(
- self,
- SoledadHTTPSyncTarget(
- url,
- # XXX is the replica_uid ready?
- self._replica_uid,
- creds=creds,
- crypto=self._crypto,
- cert_file=self._cert_file))
-
- #
- # Symmetric encryption of syncing docs
- #
-
- def get_generation(self):
- # FIXME
- # XXX this SHOULD BE a callback
- return self._get_generation()
-
-
-class U1DBSQLiteBackend(sqlite.SQLitePartialExpandDatabase):
- """
- A very simple wrapper for u1db around sqlcipher backend.
-
- Instead of initializing the database on the fly, it just uses an existing
- connection that is passed to it in the initializer.
-
- It can be used in tests and debug runs to initialize the adbapi with plain
- sqlite connections, decoupled from the sqlcipher layer.
- """
-
- def __init__(self, conn):
- self._db_handle = conn
- self._real_replica_uid = None
- self._ensure_schema()
- self._factory = Document
-
-
-class SoledadSQLCipherWrapper(SQLCipherDatabase):
- """
- A wrapper for u1db that uses the Soledad-extended sqlcipher backend.
-
- Instead of initializing the database on the fly, it just uses an existing
- connection that is passed to it in the initializer.
-
- It can be used from adbapi to initialize a soledad database after
- getting a regular connection to a sqlcipher database.
- """
- def __init__(self, conn, opts):
- self._db_handle = conn
- self._real_replica_uid = None
- self._ensure_schema()
- self.set_document_factory(doc_factory)
- self._prime_replica_uid()
-
-
-def _assert_db_is_encrypted(opts):
- """
- Assert that the sqlcipher file contains an encrypted database.
-
- When opening an existing database, PRAGMA key will not immediately
- throw an error if the key provided is incorrect. To test that the
- database can be successfully opened with the provided key, it is
- necessary to perform some operation on the database (i.e. read from
- it) and confirm it is success.
-
- The easiest way to do this is select off the sqlite_master table,
- which will attempt to read the first page of the database and will
- parse the schema.
-
- :param opts:
- """
- # We try to open an encrypted database with the regular u1db
- # backend should raise a DatabaseError exception.
- # If the regular backend succeeds, then we need to stop because
- # the database was not properly initialized.
- try:
- sqlite.SQLitePartialExpandDatabase(opts.path)
- except sqlcipher_dbapi2.DatabaseError:
- # assert that we can access it using SQLCipher with the given
- # key
- dummy_query = ('SELECT count(*) FROM sqlite_master',)
- return initialize_sqlcipher_db(opts, on_init=dummy_query)
- else:
- raise DatabaseIsNotEncrypted()
-
-#
-# Exceptions
-#
-
-
-class DatabaseIsNotEncrypted(Exception):
- """
- Exception raised when trying to open non-encrypted databases.
- """
- pass
-
-
-def doc_factory(doc_id=None, rev=None, json='{}', has_conflicts=False,
- syncable=True):
- """
- Return a default Soledad Document.
- Used in the initialization for SQLCipherDatabase
- """
- return Document(doc_id=doc_id, rev=rev, json=json,
- has_conflicts=has_conflicts, syncable=syncable)
-
-
-sqlite.SQLiteDatabase.register_implementation(SQLCipherDatabase)
-
-
-#
-# twisted.enterprise.adbapi SQLCipher implementation
-#
-
-SQLCIPHER_CONNECTION_TIMEOUT = 10
-
-
-def getConnectionPool(opts, extra_queries=None):
- openfun = partial(
- pragmas.set_init_pragmas,
- opts=opts,
- extra_queries=extra_queries)
- return SQLCipherConnectionPool(
- database=opts.path,
- check_same_thread=False,
- cp_openfun=openfun,
- timeout=SQLCIPHER_CONNECTION_TIMEOUT)
-
-
-class SQLCipherConnection(adbapi.Connection):
- pass
-
-
-class SQLCipherTransaction(adbapi.Transaction):
- pass
-
-
-class SQLCipherConnectionPool(adbapi.ConnectionPool):
-
- connectionFactory = SQLCipherConnection
- transactionFactory = SQLCipherTransaction
-
- def __init__(self, *args, **kwargs):
- adbapi.ConnectionPool.__init__(
- self, "pysqlcipher.dbapi2", *args, **kwargs)
diff --git a/client/src/leap/soledad/client/_db/sqlite.py b/client/src/leap/soledad/client/_db/sqlite.py
deleted file mode 100644
index 4f7b1259..00000000
--- a/client/src/leap/soledad/client/_db/sqlite.py
+++ /dev/null
@@ -1,930 +0,0 @@
-# Copyright 2011 Canonical Ltd.
-# Copyright 2016 LEAP Encryption Access Project
-#
-# This file is part of u1db.
-#
-# u1db is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License version 3
-# as published by the Free Software Foundation.
-#
-# u1db is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with u1db. If not, see <http://www.gnu.org/licenses/>.
-
-"""
-A L2DB implementation that uses SQLite as its persistence layer.
-"""
-
-import errno
-import os
-import json
-import sys
-import time
-import uuid
-import pkg_resources
-
-from sqlite3 import dbapi2
-
-from leap.soledad.common.l2db.backends import CommonBackend, CommonSyncTarget
-from leap.soledad.common.l2db import (
- Document, errors,
- query_parser, vectorclock)
-
-
-class SQLiteDatabase(CommonBackend):
- """A U1DB implementation that uses SQLite as its persistence layer."""
-
- _sqlite_registry = {}
-
- def __init__(self, sqlite_file, document_factory=None):
- """Create a new sqlite file."""
- self._db_handle = dbapi2.connect(sqlite_file)
- self._real_replica_uid = None
- self._ensure_schema()
- self._factory = document_factory or Document
-
- def set_document_factory(self, factory):
- self._factory = factory
-
- def get_sync_target(self):
- return SQLiteSyncTarget(self)
-
- @classmethod
- def _which_index_storage(cls, c):
- try:
- c.execute("SELECT value FROM u1db_config"
- " WHERE name = 'index_storage'")
- except dbapi2.OperationalError as e:
- # The table does not exist yet
- return None, e
- else:
- return c.fetchone()[0], None
-
- WAIT_FOR_PARALLEL_INIT_HALF_INTERVAL = 0.5
-
- @classmethod
- def _open_database(cls, sqlite_file, document_factory=None):
- if not os.path.isfile(sqlite_file):
- raise errors.DatabaseDoesNotExist()
- tries = 2
- while True:
- # Note: There seems to be a bug in sqlite 3.5.9 (with python2.6)
- # where without re-opening the database on Windows, it
- # doesn't see the transaction that was just committed
- db_handle = dbapi2.connect(sqlite_file)
- c = db_handle.cursor()
- v, err = cls._which_index_storage(c)
- db_handle.close()
- if v is not None:
- break
- # possibly another process is initializing it, wait for it to be
- # done
- if tries == 0:
- raise err # go for the richest error?
- tries -= 1
- time.sleep(cls.WAIT_FOR_PARALLEL_INIT_HALF_INTERVAL)
- return SQLiteDatabase._sqlite_registry[v](
- sqlite_file, document_factory=document_factory)
-
- @classmethod
- def open_database(cls, sqlite_file, create, backend_cls=None,
- document_factory=None):
- try:
- return cls._open_database(
- sqlite_file, document_factory=document_factory)
- except errors.DatabaseDoesNotExist:
- if not create:
- raise
- if backend_cls is None:
- # default is SQLitePartialExpandDatabase
- backend_cls = SQLitePartialExpandDatabase
- return backend_cls(sqlite_file, document_factory=document_factory)
-
- @staticmethod
- def delete_database(sqlite_file):
- try:
- os.unlink(sqlite_file)
- except OSError as ex:
- if ex.errno == errno.ENOENT:
- raise errors.DatabaseDoesNotExist()
- raise
-
- @staticmethod
- def register_implementation(klass):
- """Register that we implement an SQLiteDatabase.
-
- The attribute _index_storage_value will be used as the lookup key.
- """
- SQLiteDatabase._sqlite_registry[klass._index_storage_value] = klass
-
- def _get_sqlite_handle(self):
- """Get access to the underlying sqlite database.
-
- This should only be used by the test suite, etc, for examining the
- state of the underlying database.
- """
- return self._db_handle
-
- def _close_sqlite_handle(self):
- """Release access to the underlying sqlite database."""
- self._db_handle.close()
-
- def close(self):
- self._close_sqlite_handle()
-
- def _is_initialized(self, c):
- """Check if this database has been initialized."""
- c.execute("PRAGMA case_sensitive_like=ON")
- try:
- c.execute("SELECT value FROM u1db_config"
- " WHERE name = 'sql_schema'")
- except dbapi2.OperationalError:
- # The table does not exist yet
- val = None
- else:
- val = c.fetchone()
- if val is not None:
- return True
- return False
-
- def _initialize(self, c):
- """Create the schema in the database."""
- # read the script with sql commands
- # TODO: Change how we set up the dependency. Most likely use something
- # like lp:dirspec to grab the file from a common resource
- # directory. Doesn't specifically need to be handled until we get
- # to the point of packaging this.
- schema_content = pkg_resources.resource_string(
- __name__, 'dbschema.sql')
- # Note: We'd like to use c.executescript() here, but it seems that
- # executescript always commits, even if you set
- # isolation_level = None, so if we want to properly handle
- # exclusive locking and rollbacks between processes, we need
- # to execute it line-by-line
- for line in schema_content.split(';'):
- if not line:
- continue
- c.execute(line)
- # add extra fields
- self._extra_schema_init(c)
- # A unique identifier should be set for this replica. Implementations
- # don't have to strictly use uuid here, but we do want the uid to be
- # unique amongst all databases that will sync with each other.
- # We might extend this to using something with hostname for easier
- # debugging.
- self._set_replica_uid_in_transaction(uuid.uuid4().hex)
- c.execute("INSERT INTO u1db_config VALUES" " ('index_storage', ?)",
- (self._index_storage_value,))
-
- def _ensure_schema(self):
- """Ensure that the database schema has been created."""
- old_isolation_level = self._db_handle.isolation_level
- c = self._db_handle.cursor()
- if self._is_initialized(c):
- return
- try:
- # autocommit/own mgmt of transactions
- self._db_handle.isolation_level = None
- with self._db_handle:
- # only one execution path should initialize the db
- c.execute("begin exclusive")
- if self._is_initialized(c):
- return
- self._initialize(c)
- finally:
- self._db_handle.isolation_level = old_isolation_level
-
- def _extra_schema_init(self, c):
- """Add any extra fields, etc to the basic table definitions."""
-
- def _parse_index_definition(self, index_field):
- """Parse a field definition for an index, returning a Getter."""
- # Note: We may want to keep a Parser object around, and cache the
- # Getter objects for a greater length of time. Specifically, if
- # you create a bunch of indexes, and then insert 50k docs, you'll
- # re-parse the indexes between puts. The time to insert the docs
- # is still likely to dominate put_doc time, though.
- parser = query_parser.Parser()
- getter = parser.parse(index_field)
- return getter
-
- def _update_indexes(self, doc_id, raw_doc, getters, db_cursor):
- """Update document_fields for a single document.
-
- :param doc_id: Identifier for this document
- :param raw_doc: The python dict representation of the document.
- :param getters: A list of [(field_name, Getter)]. Getter.get will be
- called to evaluate the index definition for this document, and the
- results will be inserted into the db.
- :param db_cursor: An sqlite Cursor.
- :return: None
- """
- values = []
- for field_name, getter in getters:
- for idx_value in getter.get(raw_doc):
- values.append((doc_id, field_name, idx_value))
- if values:
- db_cursor.executemany(
- "INSERT INTO document_fields VALUES (?, ?, ?)", values)
-
- def _set_replica_uid(self, replica_uid):
- """Force the replica_uid to be set."""
- with self._db_handle:
- self._set_replica_uid_in_transaction(replica_uid)
-
- def _set_replica_uid_in_transaction(self, replica_uid):
- """Set the replica_uid. A transaction should already be held."""
- c = self._db_handle.cursor()
- c.execute("INSERT OR REPLACE INTO u1db_config"
- " VALUES ('replica_uid', ?)",
- (replica_uid,))
- self._real_replica_uid = replica_uid
-
- def _get_replica_uid(self):
- if self._real_replica_uid is not None:
- return self._real_replica_uid
- c = self._db_handle.cursor()
- c.execute("SELECT value FROM u1db_config WHERE name = 'replica_uid'")
- val = c.fetchone()
- if val is None:
- return None
- self._real_replica_uid = val[0]
- return self._real_replica_uid
-
- _replica_uid = property(_get_replica_uid)
-
- def _get_generation(self):
- c = self._db_handle.cursor()
- c.execute('SELECT max(generation) FROM transaction_log')
- val = c.fetchone()[0]
- if val is None:
- return 0
- return val
-
- def _get_generation_info(self):
- c = self._db_handle.cursor()
- c.execute(
- 'SELECT max(generation), transaction_id FROM transaction_log ')
- val = c.fetchone()
- if val[0] is None:
- return(0, '')
- return val
-
- def _get_trans_id_for_gen(self, generation):
- if generation == 0:
- return ''
- c = self._db_handle.cursor()
- c.execute(
- 'SELECT transaction_id FROM transaction_log WHERE generation = ?',
- (generation,))
- val = c.fetchone()
- if val is None:
- raise errors.InvalidGeneration
- return val[0]
-
- def _get_transaction_log(self):
- c = self._db_handle.cursor()
- c.execute("SELECT doc_id, transaction_id FROM transaction_log"
- " ORDER BY generation")
- return c.fetchall()
-
- def _get_doc(self, doc_id, check_for_conflicts=False):
- """Get just the document content, without fancy handling."""
- c = self._db_handle.cursor()
- if check_for_conflicts:
- c.execute(
- "SELECT document.doc_rev, document.content, "
- "count(conflicts.doc_rev) FROM document LEFT OUTER JOIN "
- "conflicts ON conflicts.doc_id = document.doc_id WHERE "
- "document.doc_id = ? GROUP BY document.doc_id, "
- "document.doc_rev, document.content;", (doc_id,))
- else:
- c.execute(
- "SELECT doc_rev, content, 0 FROM document WHERE doc_id = ?",
- (doc_id,))
- val = c.fetchone()
- if val is None:
- return None
- doc_rev, content, conflicts = val
- doc = self._factory(doc_id, doc_rev, content)
- doc.has_conflicts = conflicts > 0
- return doc
-
- def _has_conflicts(self, doc_id):
- c = self._db_handle.cursor()
- c.execute("SELECT 1 FROM conflicts WHERE doc_id = ? LIMIT 1",
- (doc_id,))
- val = c.fetchone()
- if val is None:
- return False
- else:
- return True
-
- def get_doc(self, doc_id, include_deleted=False):
- doc = self._get_doc(doc_id, check_for_conflicts=True)
- if doc is None:
- return None
- if doc.is_tombstone() and not include_deleted:
- return None
- return doc
-
- def get_all_docs(self, include_deleted=False):
- """Get all documents from the database."""
- generation = self._get_generation()
- results = []
- c = self._db_handle.cursor()
- c.execute(
- "SELECT document.doc_id, document.doc_rev, document.content, "
- "count(conflicts.doc_rev) FROM document LEFT OUTER JOIN conflicts "
- "ON conflicts.doc_id = document.doc_id GROUP BY document.doc_id, "
- "document.doc_rev, document.content;")
- rows = c.fetchall()
- for doc_id, doc_rev, content, conflicts in rows:
- if content is None and not include_deleted:
- continue
- doc = self._factory(doc_id, doc_rev, content)
- doc.has_conflicts = conflicts > 0
- results.append(doc)
- return (generation, results)
-
- def put_doc(self, doc):
- if doc.doc_id is None:
- raise errors.InvalidDocId()
- self._check_doc_id(doc.doc_id)
- self._check_doc_size(doc)
- with self._db_handle:
- old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True)
- if old_doc and old_doc.has_conflicts:
- raise errors.ConflictedDoc()
- if old_doc and doc.rev is None and old_doc.is_tombstone():
- new_rev = self._allocate_doc_rev(old_doc.rev)
- else:
- if old_doc is not None:
- if old_doc.rev != doc.rev:
- raise errors.RevisionConflict()
- else:
- if doc.rev is not None:
- raise errors.RevisionConflict()
- new_rev = self._allocate_doc_rev(doc.rev)
- doc.rev = new_rev
- self._put_and_update_indexes(old_doc, doc)
- return new_rev
-
- def _expand_to_fields(self, doc_id, base_field, raw_doc, save_none):
- """Convert a dict representation into named fields.
-
- So something like: {'key1': 'val1', 'key2': 'val2'}
- gets converted into: [(doc_id, 'key1', 'val1', 0)
- (doc_id, 'key2', 'val2', 0)]
- :param doc_id: Just added to every record.
- :param base_field: if set, these are nested keys, so each field should
- be appropriately prefixed.
- :param raw_doc: The python dictionary.
- """
- # TODO: Handle lists
- values = []
- for field_name, value in raw_doc.iteritems():
- if value is None and not save_none:
- continue
- if base_field:
- full_name = base_field + '.' + field_name
- else:
- full_name = field_name
- if value is None or isinstance(value, (int, float, basestring)):
- values.append((doc_id, full_name, value, len(values)))
- else:
- subvalues = self._expand_to_fields(doc_id, full_name, value,
- save_none)
- for _, subfield_name, val, _ in subvalues:
- values.append((doc_id, subfield_name, val, len(values)))
- return values
-
- def _put_and_update_indexes(self, old_doc, doc):
- """Actually insert a document into the database.
-
- This both updates the existing documents content, and any indexes that
- refer to this document.
- """
- raise NotImplementedError(self._put_and_update_indexes)
-
- def whats_changed(self, old_generation=0):
- c = self._db_handle.cursor()
- c.execute("SELECT generation, doc_id, transaction_id"
- " FROM transaction_log"
- " WHERE generation > ? ORDER BY generation DESC",
- (old_generation,))
- results = c.fetchall()
- cur_gen = old_generation
- seen = set()
- changes = []
- newest_trans_id = ''
- for generation, doc_id, trans_id in results:
- if doc_id not in seen:
- changes.append((doc_id, generation, trans_id))
- seen.add(doc_id)
- if changes:
- cur_gen = changes[0][1] # max generation
- newest_trans_id = changes[0][2]
- changes.reverse()
- else:
- c.execute("SELECT generation, transaction_id"
- " FROM transaction_log ORDER BY generation DESC LIMIT 1")
- results = c.fetchone()
- if not results:
- cur_gen = 0
- newest_trans_id = ''
- else:
- cur_gen, newest_trans_id = results
-
- return cur_gen, newest_trans_id, changes
-
- def delete_doc(self, doc):
- with self._db_handle:
- old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True)
- if old_doc is None:
- raise errors.DocumentDoesNotExist
- if old_doc.rev != doc.rev:
- raise errors.RevisionConflict()
- if old_doc.is_tombstone():
- raise errors.DocumentAlreadyDeleted
- if old_doc.has_conflicts:
- raise errors.ConflictedDoc()
- new_rev = self._allocate_doc_rev(doc.rev)
- doc.rev = new_rev
- doc.make_tombstone()
- self._put_and_update_indexes(old_doc, doc)
- return new_rev
-
- def _get_conflicts(self, doc_id):
- c = self._db_handle.cursor()
- c.execute("SELECT doc_rev, content FROM conflicts WHERE doc_id = ?",
- (doc_id,))
- return [self._factory(doc_id, doc_rev, content)
- for doc_rev, content in c.fetchall()]
-
- def get_doc_conflicts(self, doc_id):
- with self._db_handle:
- conflict_docs = self._get_conflicts(doc_id)
- if not conflict_docs:
- return []
- this_doc = self._get_doc(doc_id)
- this_doc.has_conflicts = True
- return [this_doc] + conflict_docs
-
- def _get_replica_gen_and_trans_id(self, other_replica_uid):
- c = self._db_handle.cursor()
- c.execute("SELECT known_generation, known_transaction_id FROM sync_log"
- " WHERE replica_uid = ?",
- (other_replica_uid,))
- val = c.fetchone()
- if val is None:
- other_gen = 0
- trans_id = ''
- else:
- other_gen = val[0]
- trans_id = val[1]
- return other_gen, trans_id
-
- def _set_replica_gen_and_trans_id(self, other_replica_uid,
- other_generation, other_transaction_id):
- with self._db_handle:
- self._do_set_replica_gen_and_trans_id(
- other_replica_uid, other_generation, other_transaction_id)
-
- def _do_set_replica_gen_and_trans_id(self, other_replica_uid,
- other_generation,
- other_transaction_id):
- c = self._db_handle.cursor()
- c.execute("INSERT OR REPLACE INTO sync_log VALUES (?, ?, ?)",
- (other_replica_uid, other_generation,
- other_transaction_id))
-
- def _put_doc_if_newer(self, doc, save_conflict, replica_uid=None,
- replica_gen=None, replica_trans_id=None):
- return super(SQLiteDatabase, self)._put_doc_if_newer(
- doc,
- save_conflict=save_conflict,
- replica_uid=replica_uid, replica_gen=replica_gen,
- replica_trans_id=replica_trans_id)
-
- def _add_conflict(self, c, doc_id, my_doc_rev, my_content):
- c.execute("INSERT INTO conflicts VALUES (?, ?, ?)",
- (doc_id, my_doc_rev, my_content))
-
- def _delete_conflicts(self, c, doc, conflict_revs):
- deleting = [(doc.doc_id, c_rev) for c_rev in conflict_revs]
- c.executemany("DELETE FROM conflicts"
- " WHERE doc_id=? AND doc_rev=?", deleting)
- doc.has_conflicts = self._has_conflicts(doc.doc_id)
-
- def _prune_conflicts(self, doc, doc_vcr):
- if self._has_conflicts(doc.doc_id):
- autoresolved = False
- c_revs_to_prune = []
- for c_doc in self._get_conflicts(doc.doc_id):
- c_vcr = vectorclock.VectorClockRev(c_doc.rev)
- if doc_vcr.is_newer(c_vcr):
- c_revs_to_prune.append(c_doc.rev)
- elif doc.same_content_as(c_doc):
- c_revs_to_prune.append(c_doc.rev)
- doc_vcr.maximize(c_vcr)
- autoresolved = True
- if autoresolved:
- doc_vcr.increment(self._replica_uid)
- doc.rev = doc_vcr.as_str()
- c = self._db_handle.cursor()
- self._delete_conflicts(c, doc, c_revs_to_prune)
-
- def _force_doc_sync_conflict(self, doc):
- my_doc = self._get_doc(doc.doc_id)
- c = self._db_handle.cursor()
- self._prune_conflicts(doc, vectorclock.VectorClockRev(doc.rev))
- self._add_conflict(c, doc.doc_id, my_doc.rev, my_doc.get_json())
- doc.has_conflicts = True
- self._put_and_update_indexes(my_doc, doc)
-
- def resolve_doc(self, doc, conflicted_doc_revs):
- with self._db_handle:
- cur_doc = self._get_doc(doc.doc_id)
- # TODO: https://bugs.launchpad.net/u1db/+bug/928274
- # I think we have a logic bug in resolve_doc
- # Specifically, cur_doc.rev is always in the final vector
- # clock of revisions that we supersede, even if it wasn't in
- # conflicted_doc_revs. We still add it as a conflict, but the
- # fact that _put_doc_if_newer propagates resolutions means I
- # think that conflict could accidentally be resolved. We need
- # to add a test for this case first. (create a rev, create a
- # conflict, create another conflict, resolve the first rev
- # and first conflict, then make sure that the resolved
- # rev doesn't supersede the second conflict rev.) It *might*
- # not matter, because the superseding rev is in as a
- # conflict, but it does seem incorrect
- new_rev = self._ensure_maximal_rev(cur_doc.rev,
- conflicted_doc_revs)
- superseded_revs = set(conflicted_doc_revs)
- c = self._db_handle.cursor()
- doc.rev = new_rev
- if cur_doc.rev in superseded_revs:
- self._put_and_update_indexes(cur_doc, doc)
- else:
- self._add_conflict(c, doc.doc_id, new_rev, doc.get_json())
- # TODO: Is there some way that we could construct a rev that would
- # end up in superseded_revs, such that we add a conflict, and
- # then immediately delete it?
- self._delete_conflicts(c, doc, superseded_revs)
-
- def list_indexes(self):
- """Return the list of indexes and their definitions."""
- c = self._db_handle.cursor()
- # TODO: How do we test the ordering?
- c.execute("SELECT name, field FROM index_definitions"
- " ORDER BY name, offset")
- definitions = []
- cur_name = None
- for name, field in c.fetchall():
- if cur_name != name:
- definitions.append((name, []))
- cur_name = name
- definitions[-1][-1].append(field)
- return definitions
-
- def _get_index_definition(self, index_name):
- """Return the stored definition for a given index_name."""
- c = self._db_handle.cursor()
- c.execute("SELECT field FROM index_definitions"
- " WHERE name = ? ORDER BY offset", (index_name,))
- fields = [x[0] for x in c.fetchall()]
- if not fields:
- raise errors.IndexDoesNotExist
- return fields
-
- @staticmethod
- def _strip_glob(value):
- """Remove the trailing * from a value."""
- assert value[-1] == '*'
- return value[:-1]
-
- def _format_query(self, definition, key_values):
- # First, build the definition. We join the document_fields table
- # against itself, as many times as the 'width' of our definition.
- # We then do a query for each key_value, one-at-a-time.
- # Note: All of these strings are static, we could cache them, etc.
- tables = ["document_fields d%d" % i for i in range(len(definition))]
- novalue_where = ["d.doc_id = d%d.doc_id"
- " AND d%d.field_name = ?"
- % (i, i) for i in range(len(definition))]
- wildcard_where = [novalue_where[i] +
- (" AND d%d.value NOT NULL" % (i,))
- for i in range(len(definition))]
- exact_where = [novalue_where[i] +
- (" AND d%d.value = ?" % (i,))
- for i in range(len(definition))]
- like_where = [novalue_where[i] +
- (" AND d%d.value GLOB ?" % (i,))
- for i in range(len(definition))]
- is_wildcard = False
- # Merge the lists together, so that:
- # [field1, field2, field3], [val1, val2, val3]
- # Becomes:
- # (field1, val1, field2, val2, field3, val3)
- args = []
- where = []
- for idx, (field, value) in enumerate(zip(definition, key_values)):
- args.append(field)
- if value.endswith('*'):
- if value == '*':
- where.append(wildcard_where[idx])
- else:
- # This is a glob match
- if is_wildcard:
- # We can't have a partial wildcard following
- # another wildcard
- raise errors.InvalidGlobbing
- where.append(like_where[idx])
- args.append(value)
- is_wildcard = True
- else:
- if is_wildcard:
- raise errors.InvalidGlobbing
- where.append(exact_where[idx])
- args.append(value)
- statement = (
- "SELECT d.doc_id, d.doc_rev, d.content, count(c.doc_rev) FROM "
- "document d, %s LEFT OUTER JOIN conflicts c ON c.doc_id = "
- "d.doc_id WHERE %s GROUP BY d.doc_id, d.doc_rev, d.content ORDER "
- "BY %s;" % (', '.join(tables), ' AND '.join(where), ', '.join(
- ['d%d.value' % i for i in range(len(definition))])))
- return statement, args
-
- def get_from_index(self, index_name, *key_values):
- definition = self._get_index_definition(index_name)
- if len(key_values) != len(definition):
- raise errors.InvalidValueForIndex()
- statement, args = self._format_query(definition, key_values)
- c = self._db_handle.cursor()
- try:
- c.execute(statement, tuple(args))
- except dbapi2.OperationalError as e:
- raise dbapi2.OperationalError(
- str(e) +
- '\nstatement: %s\nargs: %s\n' % (statement, args))
- res = c.fetchall()
- results = []
- for row in res:
- doc = self._factory(row[0], row[1], row[2])
- doc.has_conflicts = row[3] > 0
- results.append(doc)
- return results
-
- def _format_range_query(self, definition, start_value, end_value):
- tables = ["document_fields d%d" % i for i in range(len(definition))]
- novalue_where = [
- "d.doc_id = d%d.doc_id AND d%d.field_name = ?" % (i, i) for i in
- range(len(definition))]
- wildcard_where = [
- novalue_where[i] + (" AND d%d.value NOT NULL" % (i,)) for i in
- range(len(definition))]
- like_where = [
- novalue_where[i] + (
- " AND (d%d.value < ? OR d%d.value GLOB ?)" % (i, i)) for i in
- range(len(definition))]
- range_where_lower = [
- novalue_where[i] + (" AND d%d.value >= ?" % (i,)) for i in
- range(len(definition))]
- range_where_upper = [
- novalue_where[i] + (" AND d%d.value <= ?" % (i,)) for i in
- range(len(definition))]
- args = []
- where = []
- if start_value:
- if isinstance(start_value, basestring):
- start_value = (start_value,)
- if len(start_value) != len(definition):
- raise errors.InvalidValueForIndex()
- is_wildcard = False
- for idx, (field, value) in enumerate(zip(definition, start_value)):
- args.append(field)
- if value.endswith('*'):
- if value == '*':
- where.append(wildcard_where[idx])
- else:
- # This is a glob match
- if is_wildcard:
- # We can't have a partial wildcard following
- # another wildcard
- raise errors.InvalidGlobbing
- where.append(range_where_lower[idx])
- args.append(self._strip_glob(value))
- is_wildcard = True
- else:
- if is_wildcard:
- raise errors.InvalidGlobbing
- where.append(range_where_lower[idx])
- args.append(value)
- if end_value:
- if isinstance(end_value, basestring):
- end_value = (end_value,)
- if len(end_value) != len(definition):
- raise errors.InvalidValueForIndex()
- is_wildcard = False
- for idx, (field, value) in enumerate(zip(definition, end_value)):
- args.append(field)
- if value.endswith('*'):
- if value == '*':
- where.append(wildcard_where[idx])
- else:
- # This is a glob match
- if is_wildcard:
- # We can't have a partial wildcard following
- # another wildcard
- raise errors.InvalidGlobbing
- where.append(like_where[idx])
- args.append(self._strip_glob(value))
- args.append(value)
- is_wildcard = True
- else:
- if is_wildcard:
- raise errors.InvalidGlobbing
- where.append(range_where_upper[idx])
- args.append(value)
- statement = (
- "SELECT d.doc_id, d.doc_rev, d.content, count(c.doc_rev) FROM "
- "document d, %s LEFT OUTER JOIN conflicts c ON c.doc_id = "
- "d.doc_id WHERE %s GROUP BY d.doc_id, d.doc_rev, d.content ORDER "
- "BY %s;" % (', '.join(tables), ' AND '.join(where), ', '.join(
- ['d%d.value' % i for i in range(len(definition))])))
- return statement, args
-
- def get_range_from_index(self, index_name, start_value=None,
- end_value=None):
- """Return all documents with key values in the specified range."""
- definition = self._get_index_definition(index_name)
- statement, args = self._format_range_query(
- definition, start_value, end_value)
- c = self._db_handle.cursor()
- try:
- c.execute(statement, tuple(args))
- except dbapi2.OperationalError as e:
- raise dbapi2.OperationalError(
- str(e) +
- '\nstatement: %s\nargs: %s\n' % (statement, args))
- res = c.fetchall()
- results = []
- for row in res:
- doc = self._factory(row[0], row[1], row[2])
- doc.has_conflicts = row[3] > 0
- results.append(doc)
- return results
-
- def get_index_keys(self, index_name):
- c = self._db_handle.cursor()
- definition = self._get_index_definition(index_name)
- value_fields = ', '.join([
- 'd%d.value' % i for i in range(len(definition))])
- tables = ["document_fields d%d" % i for i in range(len(definition))]
- novalue_where = [
- "d.doc_id = d%d.doc_id AND d%d.field_name = ?" % (i, i) for i in
- range(len(definition))]
- where = [
- novalue_where[i] + (" AND d%d.value NOT NULL" % (i,)) for i in
- range(len(definition))]
- statement = (
- "SELECT %s FROM document d, %s WHERE %s GROUP BY %s;" % (
- value_fields, ', '.join(tables), ' AND '.join(where),
- value_fields))
- try:
- c.execute(statement, tuple(definition))
- except dbapi2.OperationalError as e:
- raise dbapi2.OperationalError(
- str(e) +
- '\nstatement: %s\nargs: %s\n' % (statement, tuple(definition)))
- return c.fetchall()
-
- def delete_index(self, index_name):
- with self._db_handle:
- c = self._db_handle.cursor()
- c.execute("DELETE FROM index_definitions WHERE name = ?",
- (index_name,))
- c.execute(
- "DELETE FROM document_fields WHERE document_fields.field_name "
- " NOT IN (SELECT field from index_definitions)")
-
-
-class SQLiteSyncTarget(CommonSyncTarget):
-
- def get_sync_info(self, source_replica_uid):
- source_gen, source_trans_id = self._db._get_replica_gen_and_trans_id(
- source_replica_uid)
- my_gen, my_trans_id = self._db._get_generation_info()
- return (
- self._db._replica_uid, my_gen, my_trans_id, source_gen,
- source_trans_id)
-
- def record_sync_info(self, source_replica_uid, source_replica_generation,
- source_replica_transaction_id):
- if self._trace_hook:
- self._trace_hook('record_sync_info')
- self._db._set_replica_gen_and_trans_id(
- source_replica_uid, source_replica_generation,
- source_replica_transaction_id)
-
-
-class SQLitePartialExpandDatabase(SQLiteDatabase):
- """An SQLite Backend that expands documents into a document_field table.
-
- It stores the original document text in document.doc. For fields that are
- indexed, the data goes into document_fields.
- """
-
- _index_storage_value = 'expand referenced'
-
- def _get_indexed_fields(self):
- """Determine what fields are indexed."""
- c = self._db_handle.cursor()
- c.execute("SELECT field FROM index_definitions")
- return set([x[0] for x in c.fetchall()])
-
- def _evaluate_index(self, raw_doc, field):
- parser = query_parser.Parser()
- getter = parser.parse(field)
- return getter.get(raw_doc)
-
- def _put_and_update_indexes(self, old_doc, doc):
- c = self._db_handle.cursor()
- if doc and not doc.is_tombstone():
- raw_doc = json.loads(doc.get_json())
- else:
- raw_doc = {}
- if old_doc is not None:
- c.execute("UPDATE document SET doc_rev=?, content=?"
- " WHERE doc_id = ?",
- (doc.rev, doc.get_json(), doc.doc_id))
- c.execute("DELETE FROM document_fields WHERE doc_id = ?",
- (doc.doc_id,))
- else:
- c.execute("INSERT INTO document (doc_id, doc_rev, content)"
- " VALUES (?, ?, ?)",
- (doc.doc_id, doc.rev, doc.get_json()))
- indexed_fields = self._get_indexed_fields()
- if indexed_fields:
- # It is expected that len(indexed_fields) is shorter than
- # len(raw_doc)
- getters = [(field, self._parse_index_definition(field))
- for field in indexed_fields]
- self._update_indexes(doc.doc_id, raw_doc, getters, c)
- trans_id = self._allocate_transaction_id()
- c.execute("INSERT INTO transaction_log(doc_id, transaction_id)"
- " VALUES (?, ?)", (doc.doc_id, trans_id))
-
- def create_index(self, index_name, *index_expressions):
- with self._db_handle:
- c = self._db_handle.cursor()
- cur_fields = self._get_indexed_fields()
- definition = [(index_name, idx, field)
- for idx, field in enumerate(index_expressions)]
- try:
- c.executemany("INSERT INTO index_definitions VALUES (?, ?, ?)",
- definition)
- except dbapi2.IntegrityError as e:
- stored_def = self._get_index_definition(index_name)
- if stored_def == [x[-1] for x in definition]:
- return
- raise errors.IndexNameTakenError(
- str(e) +
- str(sys.exc_info()[2])
- )
- new_fields = set(
- [f for f in index_expressions if f not in cur_fields])
- if new_fields:
- self._update_all_indexes(new_fields)
-
- def _iter_all_docs(self):
- c = self._db_handle.cursor()
- c.execute("SELECT doc_id, content FROM document")
- while True:
- next_rows = c.fetchmany()
- if not next_rows:
- break
- for row in next_rows:
- yield row
-
- def _update_all_indexes(self, new_fields):
- """Iterate all the documents, and add content to document_fields.
-
- :param new_fields: The index definitions that need to be added.
- """
- getters = [(field, self._parse_index_definition(field))
- for field in new_fields]
- c = self._db_handle.cursor()
- for doc_id, doc in self._iter_all_docs():
- if doc is None:
- continue
- raw_doc = json.loads(doc)
- self._update_indexes(doc_id, raw_doc, getters, c)
-
-
-SQLiteDatabase.register_implementation(SQLitePartialExpandDatabase)
diff --git a/client/src/leap/soledad/client/_document.py b/client/src/leap/soledad/client/_document.py
deleted file mode 100644
index 9c8577cb..00000000
--- a/client/src/leap/soledad/client/_document.py
+++ /dev/null
@@ -1,254 +0,0 @@
-# -*- coding: utf-8 -*-
-# _document.py
-# Copyright (C) 2017 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-Public interfaces for adding extra client features to the generic
-SoledadDocument.
-"""
-
-import weakref
-import uuid
-
-from twisted.internet import defer
-
-from zope.interface import Interface
-from zope.interface import implementer
-
-from leap.soledad.common.document import SoledadDocument
-
-
-class IDocumentWithAttachment(Interface):
- """
- A document that can have an attachment.
- """
-
- def set_store(self, store):
- """
- Set the store used by this file to manage attachments.
-
- :param store: The store used to manage attachments.
- :type store: Soledad
- """
-
- def put_attachment(self, fd):
- """
- Attach data to this document.
-
- Add the attachment to local storage, enqueue for upload.
-
- The document content will be updated with a pointer to the attachment,
- but the document has to be manually put in the database to reflect
- modifications.
-
- :param fd: A file-like object whose content will be attached to this
- document.
- :type fd: file-like
-
- :return: A deferred which fires when the attachment has been added to
- local storage.
- :rtype: Deferred
- """
-
- def get_attachment(self):
- """
- Return the data attached to this document.
-
- If document content contains a pointer to the attachment, try to get
- the attachment from local storage and, if not found, from remote
- storage.
-
- :return: A deferred which fires with a file like-object whose content
- is the attachment of this document, or None if nothing is
- attached.
- :rtype: Deferred
- """
-
- def delete_attachment(self):
- """
- Delete the attachment of this document.
-
- The pointer to the attachment will be removed from the document
- content, but the document has to be manually put in the database to
- reflect modifications.
-
- :return: A deferred which fires when the attachment has been deleted
- from local storage.
- :rtype: Deferred
- """
-
- def get_attachment_state(self):
- """
- Return the state of the attachment of this document.
-
- The state is a member of AttachmentStates and is of one of NONE,
- LOCAL, REMOTE or SYNCED.
-
- :return: A deferred which fires with The state of the attachment of
- this document.
- :rtype: Deferred
- """
-
- def is_dirty(self):
- """
- Return whether this document's content differs from the contents stored
- in local database.
-
- :return: A deferred which fires with True or False, depending on
- whether this document is dirty or not.
- :rtype: Deferred
- """
-
- def upload_attachment(self):
- """
- Upload this document's attachment.
-
- :return: A deferred which fires with the state of the attachment after
- it's been uploaded, or NONE if there's no attachment for this
- document.
- :rtype: Deferred
- """
-
- def download_attachment(self):
- """
- Download this document's attachment.
-
- :return: A deferred which fires with the state of the attachment after
- it's been downloaded, or NONE if there's no attachment for
- this document.
- :rtype: Deferred
- """
-
-
-class BlobDoc(object):
-
- # TODO probably not needed, but convenient for testing for now.
-
- def __init__(self, content, blob_id):
-
- self.blob_id = blob_id
- self.is_blob = True
- self.blob_fd = content
- if blob_id is None:
- blob_id = uuid.uuid4().get_hex()
- self.blob_id = blob_id
-
-
-class AttachmentStates(object):
- NONE = 0
- LOCAL = 1
- REMOTE = 2
- SYNCED = 4
-
-
-@implementer(IDocumentWithAttachment)
-class Document(SoledadDocument):
-
- def __init__(self, doc_id=None, rev=None, json='{}', has_conflicts=False,
- syncable=True, store=None):
- SoledadDocument.__init__(self, doc_id=doc_id, rev=rev, json=json,
- has_conflicts=has_conflicts,
- syncable=syncable)
- self.set_store(store)
-
- #
- # properties
- #
-
- @property
- def _manager(self):
- if not self.store or not hasattr(self.store, 'blobmanager'):
- raise Exception('No blob manager found to manage attachments.')
- return self.store.blobmanager
-
- @property
- def _blob_id(self):
- if self.content and 'blob_id' in self.content:
- return self.content['blob_id']
- return None
-
- def get_store(self):
- return self._store() if self._store else None
-
- def set_store(self, store):
- self._store = weakref.ref(store) if store else None
-
- store = property(get_store, set_store)
-
- #
- # attachment api
- #
-
- def put_attachment(self, fd):
- # add pointer to content
- blob_id = self._blob_id or str(uuid.uuid4())
- if not self.content:
- self.content = {}
- self.content['blob_id'] = blob_id
- # put using manager
- blob = BlobDoc(fd, blob_id)
- fd.seek(0, 2)
- size = fd.tell()
- fd.seek(0)
- return self._manager.put(blob, size)
-
- def get_attachment(self):
- if not self._blob_id:
- return defer.succeed(None)
- return self._manager.get(self._blob_id)
-
- def delete_attachment(self):
- raise NotImplementedError
-
- @defer.inlineCallbacks
- def get_attachment_state(self):
- state = AttachmentStates.NONE
-
- if not self._blob_id:
- defer.returnValue(state)
-
- local_list = yield self._manager.local_list()
- if self._blob_id in local_list:
- state |= AttachmentStates.LOCAL
-
- remote_list = yield self._manager.remote_list()
- if self._blob_id in remote_list:
- state |= AttachmentStates.REMOTE
-
- defer.returnValue(state)
-
- @defer.inlineCallbacks
- def is_dirty(self):
- stored = yield self.store.get_doc(self.doc_id)
- if stored.content != self.content:
- defer.returnValue(True)
- defer.returnValue(False)
-
- @defer.inlineCallbacks
- def upload_attachment(self):
- if not self._blob_id:
- defer.returnValue(AttachmentStates.NONE)
-
- fd = yield self._manager.get_blob(self._blob_id)
- # TODO: turn following method into a public one
- yield self._manager._encrypt_and_upload(self._blob_id, fd)
- defer.returnValue(self.get_attachment_state())
-
- @defer.inlineCallbacks
- def download_attachment(self):
- if not self._blob_id:
- defer.returnValue(None)
- yield self.get_attachment()
- defer.returnValue(self.get_attachment_state())
diff --git a/client/src/leap/soledad/client/_http.py b/client/src/leap/soledad/client/_http.py
deleted file mode 100644
index 2a6b9e39..00000000
--- a/client/src/leap/soledad/client/_http.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# -*- coding: utf-8 -*-
-# _http.py
-# Copyright (C) 2017 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-A twisted-based, TLS-pinned, token-authenticated HTTP client.
-"""
-import base64
-
-from twisted.internet import reactor
-from twisted.web.iweb import IAgent
-from twisted.web.client import Agent
-from twisted.web.http_headers import Headers
-
-from treq.client import HTTPClient as _HTTPClient
-
-from zope.interface import implementer
-
-from leap.common.certs import get_compatible_ssl_context_factory
-
-
-__all__ = ['HTTPClient', 'PinnedTokenAgent']
-
-
-class HTTPClient(_HTTPClient):
-
- def __init__(self, uuid, token, cert_file):
- self._agent = PinnedTokenAgent(uuid, token, cert_file)
- super(self.__class__, self).__init__(self._agent)
-
- def set_token(self, token):
- self._agent.set_token(token)
-
-
-@implementer(IAgent)
-class PinnedTokenAgent(Agent):
-
- def __init__(self, uuid, token, cert_file):
- self._uuid = uuid
- self._token = None
- self._creds = None
- self.set_token(token)
- # pin this agent with the platform TLS certificate
- factory = get_compatible_ssl_context_factory(cert_file)
- Agent.__init__(self, reactor, contextFactory=factory)
-
- def set_token(self, token):
- self._token = token
- self._creds = self._encoded_creds()
-
- def _encoded_creds(self):
- creds = '%s:%s' % (self._uuid, self._token)
- encoded = base64.b64encode(creds)
- return 'Token %s' % encoded
-
- def request(self, method, uri, headers=None, bodyProducer=None):
- # authenticate the request
- headers = headers or Headers()
- headers.addRawHeader('Authorization', self._creds)
- # perform the authenticated request
- return Agent.request(
- self, method, uri, headers=headers, bodyProducer=bodyProducer)
diff --git a/client/src/leap/soledad/client/_pipes.py b/client/src/leap/soledad/client/_pipes.py
deleted file mode 100644
index eef3f1f9..00000000
--- a/client/src/leap/soledad/client/_pipes.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# -*- coding: utf-8 -*-
-# _pipes.py
-# Copyright (C) 2017 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-Components for piping data on streams.
-"""
-from io import BytesIO
-
-
-__all__ = ['TruncatedTailPipe', 'PreamblePipe']
-
-
-class TruncatedTailPipe(object):
- """
- Truncate the last `tail_size` bytes from the stream.
- """
-
- def __init__(self, output=None, tail_size=16):
- self.tail_size = tail_size
- self.output = output or BytesIO()
- self.buffer = BytesIO()
-
- def write(self, data):
- self.buffer.write(data)
- if self.buffer.tell() > self.tail_size:
- self._truncate_tail()
-
- def _truncate_tail(self):
- overflow_size = self.buffer.tell() - self.tail_size
- self.buffer.seek(0)
- self.output.write(self.buffer.read(overflow_size))
- remaining = self.buffer.read()
- self.buffer.seek(0)
- self.buffer.write(remaining)
- self.buffer.truncate()
-
- def close(self):
- return self.output
-
-
-class PreamblePipe(object):
- """
- Consumes data until a space is found, then calls a callback with it and
- starts forwarding data to consumer returned by this callback.
- """
-
- def __init__(self, callback):
- self.callback = callback
- self.preamble = BytesIO()
- self.output = None
-
- def write(self, data):
- if not self.output:
- self._write_preamble(data)
- else:
- self.output.write(data)
-
- def _write_preamble(self, data):
- if ' ' not in data:
- self.preamble.write(data)
- return
- preamble_chunk, remaining = data.split(' ', 1)
- self.preamble.write(preamble_chunk)
- self.output = self.callback(self.preamble)
- self.output.write(remaining)
diff --git a/client/src/leap/soledad/client/_recovery_code.py b/client/src/leap/soledad/client/_recovery_code.py
deleted file mode 100644
index 04235a29..00000000
--- a/client/src/leap/soledad/client/_recovery_code.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# -*- coding: utf-8 -*-
-# _recovery_code.py
-# Copyright (C) 2017 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-import os
-import binascii
-
-from leap.soledad.common.log import getLogger
-
-logger = getLogger(__name__)
-
-
-class RecoveryCode(object):
-
- # When we turn this string to hex, it will double in size
- code_length = 6
-
- def generate(self):
- logger.info("generating new recovery code...")
- return binascii.hexlify(os.urandom(self.code_length))
diff --git a/client/src/leap/soledad/client/_secrets/__init__.py b/client/src/leap/soledad/client/_secrets/__init__.py
deleted file mode 100644
index b6c81cda..00000000
--- a/client/src/leap/soledad/client/_secrets/__init__.py
+++ /dev/null
@@ -1,129 +0,0 @@
-# -*- coding: utf-8 -*-
-# _secrets/__init__.py
-# Copyright (C) 2016 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-import os
-import scrypt
-
-from leap.soledad.common.log import getLogger
-
-from leap.soledad.client._secrets.storage import SecretsStorage
-from leap.soledad.client._secrets.crypto import SecretsCrypto
-from leap.soledad.client._secrets.util import emit, UserDataMixin
-
-
-logger = getLogger(__name__)
-
-
-class Secrets(UserDataMixin):
-
- lengths = {
- 'remote_secret': 512, # remote_secret is used to encrypt remote data.
- 'local_salt': 64, # local_salt is used in conjunction with
- 'local_secret': 448, # local_secret to derive a local_key for storage
- }
-
- def __init__(self, soledad):
- self._soledad = soledad
- self._secrets = {}
- self.crypto = SecretsCrypto(soledad)
- self.storage = SecretsStorage(soledad)
- self._bootstrap()
-
- #
- # bootstrap
- #
-
- def _bootstrap(self):
-
- # attempt to load secrets from local storage
- encrypted = self.storage.load_local()
- if encrypted:
- self._secrets = self.crypto.decrypt(encrypted)
- # maybe update the format of storage of local secret.
- if encrypted['version'] < self.crypto.VERSION:
- self.store_secrets()
- return
-
- # no secret was found in local storage, so this is a first run of
- # soledad for this user in this device. It is mandatory that we check
- # if there's a secret stored in server.
- encrypted = self.storage.load_remote()
- if encrypted:
- self._secrets = self.crypto.decrypt(encrypted)
- self.store_secrets()
- return
-
- # we have *not* found a secret neither in local nor in remote storage,
- # so we have to generate a new one, and then store it.
- self._secrets = self._generate()
- self.store_secrets()
-
- #
- # generation
- #
-
- @emit('creating')
- def _generate(self):
- logger.info("generating new set of secrets...")
- secrets = {}
- for name, length in self.lengths.iteritems():
- secret = os.urandom(length)
- secrets[name] = secret
- logger.info("new set of secrets successfully generated")
- return secrets
-
- #
- # crypto
- #
-
- def store_secrets(self):
- # TODO: we have to improve the logic here, as we want to make sure that
- # whatever is stored locally should only be used after remote storage
- # is successful. Otherwise, this soledad could start encrypting with a
- # secret while another soledad in another device could start encrypting
- # with another secret, which would lead to decryption failures during
- # sync.
- encrypted = self.crypto.encrypt(self._secrets)
- self.storage.save_local(encrypted)
- self.storage.save_remote(encrypted)
-
- #
- # secrets
- #
-
- @property
- def remote_secret(self):
- return self._secrets.get('remote_secret')
-
- @property
- def local_salt(self):
- return self._secrets.get('local_salt')
-
- @property
- def local_secret(self):
- return self._secrets.get('local_secret')
-
- @property
- def local_key(self):
- # local storage key is scrypt-derived from `local_secret` and
- # `local_salt` above
- secret = scrypt.hash(
- password=self.local_secret,
- salt=self.local_salt,
- buflen=32, # we need a key with 256 bits (32 bytes)
- )
- return secret
diff --git a/client/src/leap/soledad/client/_secrets/crypto.py b/client/src/leap/soledad/client/_secrets/crypto.py
deleted file mode 100644
index 8148151d..00000000
--- a/client/src/leap/soledad/client/_secrets/crypto.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# -*- coding: utf-8 -*-
-# _secrets/crypto.py
-# Copyright (C) 2016 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-import binascii
-import json
-import os
-import scrypt
-
-from leap.soledad.common import soledad_assert
-from leap.soledad.common.log import getLogger
-
-from leap.soledad.client._crypto import encrypt_sym, decrypt_sym, ENC_METHOD
-from leap.soledad.client._secrets.util import SecretsError
-
-
-logger = getLogger(__name__)
-
-
-class SecretsCrypto(object):
-
- VERSION = 2
-
- def __init__(self, soledad):
- self._soledad = soledad
-
- def _get_key(self, salt):
- passphrase = self._soledad.passphrase.encode('utf8')
- key = scrypt.hash(passphrase, salt, buflen=32)
- return key
-
- #
- # encryption
- #
-
- def encrypt(self, secrets):
- encoded = {}
- for name, value in secrets.iteritems():
- encoded[name] = binascii.b2a_base64(value)
- plaintext = json.dumps(encoded)
- salt = os.urandom(64) # TODO: get salt length from somewhere else
- key = self._get_key(salt)
- iv, ciphertext = encrypt_sym(plaintext, key,
- method=ENC_METHOD.aes_256_gcm)
- encrypted = {
- 'version': self.VERSION,
- 'kdf': 'scrypt',
- 'kdf_salt': binascii.b2a_base64(salt),
- 'kdf_length': len(key),
- 'cipher': ENC_METHOD.aes_256_gcm,
- 'length': len(plaintext),
- 'iv': str(iv),
- 'secrets': binascii.b2a_base64(ciphertext),
- }
- return encrypted
-
- #
- # decryption
- #
-
- def decrypt(self, data):
- version = data.setdefault('version', 1)
- method = getattr(self, '_decrypt_v%d' % version)
- try:
- return method(data)
- except Exception as e:
- logger.error('error decrypting secrets: %r' % e)
- raise SecretsError(e)
-
- def _decrypt_v1(self, data):
- # get encrypted secret from dictionary: the old format allowed for
- # storage of more than one secret, but this feature was never used and
- # soledad has been using only one secret so far. As there is a corner
- # case where the old 'active_secret' key might not be set, we just
- # ignore it and pop the only secret found in the 'storage_secrets' key.
- secret_id = data['storage_secrets'].keys().pop()
- encrypted = data['storage_secrets'][secret_id]
-
- # assert that we know how to decrypt the secret
- soledad_assert('cipher' in encrypted)
- cipher = encrypted['cipher']
- if cipher == 'aes256':
- cipher = ENC_METHOD.aes_256_ctr
- soledad_assert(cipher in ENC_METHOD)
-
- # decrypt
- salt = binascii.a2b_base64(encrypted['kdf_salt'])
- key = self._get_key(salt)
- separator = ':'
- iv, ciphertext = encrypted['secret'].split(separator, 1)
- ciphertext = binascii.a2b_base64(ciphertext)
- plaintext = self._decrypt(key, iv, ciphertext, encrypted, cipher)
-
- # create secrets dictionary
- secrets = {
- 'remote_secret': plaintext[0:512],
- 'local_salt': plaintext[512:576],
- 'local_secret': plaintext[576:1024],
- }
- return secrets
-
- def _decrypt_v2(self, encrypted):
- cipher = encrypted['cipher']
- soledad_assert(cipher in ENC_METHOD)
-
- salt = binascii.a2b_base64(encrypted['kdf_salt'])
- key = self._get_key(salt)
- iv = encrypted['iv']
- ciphertext = binascii.a2b_base64(encrypted['secrets'])
- plaintext = self._decrypt(
- key, iv, ciphertext, encrypted, cipher)
- encoded = json.loads(plaintext)
- secrets = {}
- for name, value in encoded.iteritems():
- secrets[name] = binascii.a2b_base64(value)
- return secrets
-
- def _decrypt(self, key, iv, ciphertext, encrypted, method):
- # assert some properties of the stored secret
- soledad_assert(encrypted['kdf'] == 'scrypt')
- soledad_assert(encrypted['kdf_length'] == len(key))
- # decrypt
- plaintext = decrypt_sym(ciphertext, key, iv, method)
- soledad_assert(encrypted['length'] == len(plaintext))
- return plaintext
diff --git a/client/src/leap/soledad/client/_secrets/storage.py b/client/src/leap/soledad/client/_secrets/storage.py
deleted file mode 100644
index 85713a48..00000000
--- a/client/src/leap/soledad/client/_secrets/storage.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# -*- coding: utf-8 -*-
-# _secrets/storage.py
-# Copyright (C) 2016 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-import json
-import six.moves.urllib.parse as urlparse
-
-from hashlib import sha256
-
-from leap.soledad.common import SHARED_DB_NAME
-from leap.soledad.common.log import getLogger
-
-from leap.soledad.client.shared_db import SoledadSharedDatabase
-from leap.soledad.client._document import Document
-from leap.soledad.client._secrets.util import emit, UserDataMixin
-
-
-logger = getLogger(__name__)
-
-
-class SecretsStorage(UserDataMixin):
-
- def __init__(self, soledad):
- self._soledad = soledad
- self._shared_db = self._soledad.shared_db or self._init_shared_db()
- self.__remote_doc = None
-
- @property
- def _creds(self):
- uuid = self._soledad.uuid
- token = self._soledad.token
- return {'token': {'uuid': uuid, 'token': token}}
-
- #
- # local storage
- #
-
- def load_local(self):
- path = self._soledad.secrets_path
- logger.info("trying to load secrets from disk: %s" % path)
- try:
- with open(path, 'r') as f:
- encrypted = json.loads(f.read())
- logger.info("secrets loaded successfully from disk")
- return encrypted
- except IOError:
- logger.warn("secrets not found in disk")
- return None
-
- def save_local(self, encrypted):
- path = self._soledad.secrets_path
- json_data = json.dumps(encrypted)
- with open(path, 'w') as f:
- f.write(json_data)
-
- #
- # remote storage
- #
-
- def _init_shared_db(self):
- url = urlparse.urljoin(self._soledad.server_url, SHARED_DB_NAME)
- creds = self._creds
- db = SoledadSharedDatabase.open_database(url, creds)
- return db
-
- def _remote_doc_id(self):
- passphrase = self._soledad.passphrase.encode('utf8')
- uuid = self._soledad.uuid
- text = '%s%s' % (passphrase, uuid)
- digest = sha256(text).hexdigest()
- return digest
-
- @property
- def _remote_doc(self):
- if not self.__remote_doc and self._shared_db:
- doc = self._get_remote_doc()
- self.__remote_doc = doc
- return self.__remote_doc
-
- @emit('downloading')
- def _get_remote_doc(self):
- logger.info('trying to load secrets from server...')
- doc = self._shared_db.get_doc(self._remote_doc_id())
- if doc:
- logger.info('secrets loaded successfully from server')
- else:
- logger.warn('secrets not found in server')
- return doc
-
- def load_remote(self):
- doc = self._remote_doc
- if not doc:
- return None
- encrypted = doc.content
- return encrypted
-
- @emit('uploading')
- def save_remote(self, encrypted):
- doc = self._remote_doc
- if not doc:
- doc = Document(doc_id=self._remote_doc_id())
- doc.content = encrypted
- db = self._shared_db
- if not db:
- logger.warn('no shared db found')
- return
- db.put_doc(doc)
diff --git a/client/src/leap/soledad/client/_secrets/util.py b/client/src/leap/soledad/client/_secrets/util.py
deleted file mode 100644
index 6401889b..00000000
--- a/client/src/leap/soledad/client/_secrets/util.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# -*- coding:utf-8 -*-
-# _secrets/util.py
-# Copyright (C) 2016 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-from leap.soledad.client import events
-
-
-class SecretsError(Exception):
- pass
-
-
-class UserDataMixin(object):
- """
- When emitting an event, we have to pass a dictionary containing user data.
- This class only defines a property so we don't have to define it in
- multiple places.
- """
-
- @property
- def _user_data(self):
- uuid = self._soledad.uuid
- userid = self._soledad.userid
- # TODO: seems that uuid and userid hold the same value! We should check
- # whether we should pass something different or if the events api
- # really needs two different values.
- return {'uuid': uuid, 'userid': userid}
-
-
-def emit(verb):
- def _decorator(method):
- def _decorated(self, *args, **kwargs):
-
- # emit starting event
- user_data = self._user_data
- name = 'SOLEDAD_' + verb.upper() + '_KEYS'
- event = getattr(events, name)
- events.emit_async(event, user_data)
-
- # run the method
- result = method(self, *args, **kwargs)
-
- # emit a finished event
- name = 'SOLEDAD_DONE_' + verb.upper() + '_KEYS'
- event = getattr(events, name)
- events.emit_async(event, user_data)
-
- return result
- return _decorated
- return _decorator
diff --git a/client/src/leap/soledad/client/_version.py b/client/src/leap/soledad/client/_version.py
deleted file mode 100644
index 3ee3f81b..00000000
--- a/client/src/leap/soledad/client/_version.py
+++ /dev/null
@@ -1,484 +0,0 @@
-
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.16 (https://github.com/warner/python-versioneer)
-
-"""Git implementation of _version.py."""
-
-import errno
-import os
-import re
-import subprocess
-import sys
-
-
-def get_keywords():
- """Get the keywords needed to look up the version information."""
- # these strings will be replaced by git during git-archive.
- # setup.py/versioneer.py will grep for the variable names, so they must
- # each be defined on a line of their own. _version.py will just call
- # get_keywords().
- git_refnames = "$Format:%d$"
- git_full = "$Format:%H$"
- keywords = {"refnames": git_refnames, "full": git_full}
- return keywords
-
-
-class VersioneerConfig:
- """Container for Versioneer configuration parameters."""
-
-
-def get_config():
- """Create, populate and return the VersioneerConfig() object."""
- # these strings are filled in when 'setup.py versioneer' creates
- # _version.py
- cfg = VersioneerConfig()
- cfg.VCS = "git"
- cfg.style = "pep440"
- cfg.tag_prefix = ""
- cfg.parentdir_prefix = "None"
- cfg.versionfile_source = "src/leap/soledad/client/_version.py"
- cfg.verbose = False
- return cfg
-
-
-class NotThisMethod(Exception):
- """Exception raised if a method is not valid for the current scenario."""
-
-
-LONG_VERSION_PY = {}
-HANDLERS = {}
-
-
-def register_vcs_handler(vcs, method): # decorator
- """Decorator to mark a method as the handler for a particular VCS."""
- def decorate(f):
- """Store f in HANDLERS[vcs][method]."""
- if vcs not in HANDLERS:
- HANDLERS[vcs] = {}
- HANDLERS[vcs][method] = f
- return f
- return decorate
-
-
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
- """Call the given command(s)."""
- assert isinstance(commands, list)
- p = None
- for c in commands:
- try:
- dispcmd = str([c] + args)
- # remember shell=False, so use git.cmd on windows, not just git
- p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
- stderr=(subprocess.PIPE if hide_stderr
- else None))
- break
- except EnvironmentError:
- e = sys.exc_info()[1]
- if e.errno == errno.ENOENT:
- continue
- if verbose:
- print("unable to run %s" % dispcmd)
- print(e)
- return None
- else:
- if verbose:
- print("unable to find command, tried %s" % (commands,))
- return None
- stdout = p.communicate()[0].strip()
- if sys.version_info[0] >= 3:
- stdout = stdout.decode()
- if p.returncode != 0:
- if verbose:
- print("unable to run %s (error)" % dispcmd)
- return None
- return stdout
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
- """Try to determine the version from the parent directory name.
-
- Source tarballs conventionally unpack into a directory that includes
- both the project name and a version string.
- """
- dirname = os.path.basename(root)
- if not dirname.startswith(parentdir_prefix):
- if verbose:
- print("guessing rootdir is '%s', but '%s' doesn't start with "
- "prefix '%s'" % (root, dirname, parentdir_prefix))
- raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
- return {"version": dirname[len(parentdir_prefix):],
- "full-revisionid": None,
- "dirty": False, "error": None}
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
- """Extract version information from the given file."""
- # the code embedded in _version.py can just fetch the value of these
- # keywords. When used from setup.py, we don't want to import _version.py,
- # so we do it with a regexp instead. This function is not used from
- # _version.py.
- keywords = {}
- try:
- f = open(versionfile_abs, "r")
- for line in f.readlines():
- if line.strip().startswith("git_refnames ="):
- mo = re.search(r'=\s*"(.*)"', line)
- if mo:
- keywords["refnames"] = mo.group(1)
- if line.strip().startswith("git_full ="):
- mo = re.search(r'=\s*"(.*)"', line)
- if mo:
- keywords["full"] = mo.group(1)
- f.close()
- except EnvironmentError:
- pass
- return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
- """Get version information from git keywords."""
- if not keywords:
- raise NotThisMethod("no keywords at all, weird")
- refnames = keywords["refnames"].strip()
- if refnames.startswith("$Format"):
- if verbose:
- print("keywords are unexpanded, not using")
- raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
- refs = set([r.strip() for r in refnames.strip("()").split(",")])
- # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
- # just "foo-1.0". If we see a "tag: " prefix, prefer those.
- TAG = "tag: "
- tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
- if not tags:
- # Either we're using git < 1.8.3, or there really are no tags. We use
- # a heuristic: assume all version tags have a digit. The old git %d
- # expansion behaves like git log --decorate=short and strips out the
- # refs/heads/ and refs/tags/ prefixes that would let us distinguish
- # between branches and tags. By ignoring refnames without digits, we
- # filter out many common branch names like "release" and
- # "stabilization", as well as "HEAD" and "master".
- tags = set([r for r in refs if re.search(r'\d', r)])
- if verbose:
- print("discarding '%s', no digits" % ",".join(refs-tags))
- if verbose:
- print("likely tags: %s" % ",".join(sorted(tags)))
- for ref in sorted(tags):
- # sorting will prefer e.g. "2.0" over "2.0rc1"
- if ref.startswith(tag_prefix):
- r = ref[len(tag_prefix):]
- if verbose:
- print("picking %s" % r)
- return {"version": r,
- "full-revisionid": keywords["full"].strip(),
- "dirty": False, "error": None
- }
- # no suitable tags, so version is "0+unknown", but full hex is still there
- if verbose:
- print("no suitable tags, using unknown + full revision id")
- return {"version": "0+unknown",
- "full-revisionid": keywords["full"].strip(),
- "dirty": False, "error": "no suitable tags"}
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
- """Get version from 'git describe' in the root of the source tree.
-
- This only gets called if the git-archive 'subst' keywords were *not*
- expanded, and _version.py hasn't already been rewritten with a short
- version string, meaning we're inside a checked out source tree.
- """
- if not os.path.exists(os.path.join(root, ".git")):
- if verbose:
- print("no .git in %s" % root)
- raise NotThisMethod("no .git directory")
-
- GITS = ["git"]
- if sys.platform == "win32":
- GITS = ["git.cmd", "git.exe"]
- # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
- # if there isn't one, this yields HEX[-dirty] (no NUM)
- describe_out = run_command(GITS, ["describe", "--tags", "--dirty",
- "--always", "--long",
- "--match", "%s*" % tag_prefix],
- cwd=root)
- # --long was added in git-1.5.5
- if describe_out is None:
- raise NotThisMethod("'git describe' failed")
- describe_out = describe_out.strip()
- full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
- if full_out is None:
- raise NotThisMethod("'git rev-parse' failed")
- full_out = full_out.strip()
-
- pieces = {}
- pieces["long"] = full_out
- pieces["short"] = full_out[:7] # maybe improved later
- pieces["error"] = None
-
- # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
- # TAG might have hyphens.
- git_describe = describe_out
-
- # look for -dirty suffix
- dirty = git_describe.endswith("-dirty")
- pieces["dirty"] = dirty
- if dirty:
- git_describe = git_describe[:git_describe.rindex("-dirty")]
-
- # now we have TAG-NUM-gHEX or HEX
-
- if "-" in git_describe:
- # TAG-NUM-gHEX
- mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
- if not mo:
- # unparseable. Maybe git-describe is misbehaving?
- pieces["error"] = ("unable to parse git-describe output: '%s'"
- % describe_out)
- return pieces
-
- # tag
- full_tag = mo.group(1)
- if not full_tag.startswith(tag_prefix):
- if verbose:
- fmt = "tag '%s' doesn't start with prefix '%s'"
- print(fmt % (full_tag, tag_prefix))
- pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
- % (full_tag, tag_prefix))
- return pieces
- pieces["closest-tag"] = full_tag[len(tag_prefix):]
-
- # distance: number of commits since tag
- pieces["distance"] = int(mo.group(2))
-
- # commit: short hex revision ID
- pieces["short"] = mo.group(3)
-
- else:
- # HEX: no tags
- pieces["closest-tag"] = None
- count_out = run_command(GITS, ["rev-list", "HEAD", "--count"],
- cwd=root)
- pieces["distance"] = int(count_out) # total number of commits
-
- return pieces
-
-
-def plus_or_dot(pieces):
- """Return a + if we don't already have one, else return a ."""
- if "+" in pieces.get("closest-tag", ""):
- return "."
- return "+"
-
-
-def render_pep440(pieces):
- """Build up version string, with post-release "local version identifier".
-
- Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
- get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
- Exceptions:
- 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
- """
- if pieces["closest-tag"]:
- rendered = pieces["closest-tag"]
- if pieces["distance"] or pieces["dirty"]:
- rendered += plus_or_dot(pieces)
- rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
- if pieces["dirty"]:
- rendered += ".dirty"
- else:
- # exception #1
- rendered = "0+untagged.%d.g%s" % (pieces["distance"],
- pieces["short"])
- if pieces["dirty"]:
- rendered += ".dirty"
- return rendered
-
-
-def render_pep440_pre(pieces):
- """TAG[.post.devDISTANCE] -- No -dirty.
-
- Exceptions:
- 1: no tags. 0.post.devDISTANCE
- """
- if pieces["closest-tag"]:
- rendered = pieces["closest-tag"]
- if pieces["distance"]:
- rendered += ".post.dev%d" % pieces["distance"]
- else:
- # exception #1
- rendered = "0.post.dev%d" % pieces["distance"]
- return rendered
-
-
-def render_pep440_post(pieces):
- """TAG[.postDISTANCE[.dev0]+gHEX] .
-
- The ".dev0" means dirty. Note that .dev0 sorts backwards
- (a dirty tree will appear "older" than the corresponding clean one),
- but you shouldn't be releasing software with -dirty anyways.
-
- Exceptions:
- 1: no tags. 0.postDISTANCE[.dev0]
- """
- if pieces["closest-tag"]:
- rendered = pieces["closest-tag"]
- if pieces["distance"] or pieces["dirty"]:
- rendered += ".post%d" % pieces["distance"]
- if pieces["dirty"]:
- rendered += ".dev0"
- rendered += plus_or_dot(pieces)
- rendered += "g%s" % pieces["short"]
- else:
- # exception #1
- rendered = "0.post%d" % pieces["distance"]
- if pieces["dirty"]:
- rendered += ".dev0"
- rendered += "+g%s" % pieces["short"]
- return rendered
-
-
-def render_pep440_old(pieces):
- """TAG[.postDISTANCE[.dev0]] .
-
- The ".dev0" means dirty.
-
- Eexceptions:
- 1: no tags. 0.postDISTANCE[.dev0]
- """
- if pieces["closest-tag"]:
- rendered = pieces["closest-tag"]
- if pieces["distance"] or pieces["dirty"]:
- rendered += ".post%d" % pieces["distance"]
- if pieces["dirty"]:
- rendered += ".dev0"
- else:
- # exception #1
- rendered = "0.post%d" % pieces["distance"]
- if pieces["dirty"]:
- rendered += ".dev0"
- return rendered
-
-
-def render_git_describe(pieces):
- """TAG[-DISTANCE-gHEX][-dirty].
-
- Like 'git describe --tags --dirty --always'.
-
- Exceptions:
- 1: no tags. HEX[-dirty] (note: no 'g' prefix)
- """
- if pieces["closest-tag"]:
- rendered = pieces["closest-tag"]
- if pieces["distance"]:
- rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
- else:
- # exception #1
- rendered = pieces["short"]
- if pieces["dirty"]:
- rendered += "-dirty"
- return rendered
-
-
-def render_git_describe_long(pieces):
- """TAG-DISTANCE-gHEX[-dirty].
-
- Like 'git describe --tags --dirty --always -long'.
- The distance/hash is unconditional.
-
- Exceptions:
- 1: no tags. HEX[-dirty] (note: no 'g' prefix)
- """
- if pieces["closest-tag"]:
- rendered = pieces["closest-tag"]
- rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
- else:
- # exception #1
- rendered = pieces["short"]
- if pieces["dirty"]:
- rendered += "-dirty"
- return rendered
-
-
-def render(pieces, style):
- """Render the given version pieces into the requested style."""
- if pieces["error"]:
- return {"version": "unknown",
- "full-revisionid": pieces.get("long"),
- "dirty": None,
- "error": pieces["error"]}
-
- if not style or style == "default":
- style = "pep440" # the default
-
- if style == "pep440":
- rendered = render_pep440(pieces)
- elif style == "pep440-pre":
- rendered = render_pep440_pre(pieces)
- elif style == "pep440-post":
- rendered = render_pep440_post(pieces)
- elif style == "pep440-old":
- rendered = render_pep440_old(pieces)
- elif style == "git-describe":
- rendered = render_git_describe(pieces)
- elif style == "git-describe-long":
- rendered = render_git_describe_long(pieces)
- else:
- raise ValueError("unknown style '%s'" % style)
-
- return {"version": rendered, "full-revisionid": pieces["long"],
- "dirty": pieces["dirty"], "error": None}
-
-
-def get_versions():
- """Get version information or return default if unable to do so."""
- # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
- # __file__, we can work backwards from there to the root. Some
- # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
- # case we can only use expanded keywords.
-
- cfg = get_config()
- verbose = cfg.verbose
-
- try:
- return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
- verbose)
- except NotThisMethod:
- pass
-
- try:
- root = os.path.realpath(__file__)
- # versionfile_source is the relative path from the top of the source
- # tree (where the .git directory might live) to this file. Invert
- # this to find the root from __file__.
- for i in cfg.versionfile_source.split('/'):
- root = os.path.dirname(root)
- except NameError:
- return {"version": "0+unknown", "full-revisionid": None,
- "dirty": None,
- "error": "unable to find root of source tree"}
-
- try:
- pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
- return render(pieces, cfg.style)
- except NotThisMethod:
- pass
-
- try:
- if cfg.parentdir_prefix:
- return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
- except NotThisMethod:
- pass
-
- return {"version": "0+unknown", "full-revisionid": None,
- "dirty": None,
- "error": "unable to compute version"}
diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py
deleted file mode 100644
index c62b43f0..00000000
--- a/client/src/leap/soledad/client/api.py
+++ /dev/null
@@ -1,848 +0,0 @@
-# -*- coding: utf-8 -*-
-# api.py
-# Copyright (C) 2013, 2014 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-Soledad - Synchronization Of Locally Encrypted Data Among Devices.
-
-This module holds the public api for Soledad.
-
-Soledad is the part of LEAP that manages storage and synchronization of
-application data. It is built on top of U1DB reference Python API and
-implements (1) a SQLCipher backend for local storage in the client, (2) a
-SyncTarget that encrypts data before syncing, and (3) a CouchDB backend for
-remote storage in the server side.
-"""
-import binascii
-import errno
-import os
-import socket
-import ssl
-import uuid
-
-from itertools import chain
-import six.moves.http_client as httplib
-import six.moves.urllib.parse as urlparse
-from six import StringIO
-from collections import defaultdict
-
-from twisted.internet import defer
-from zope.interface import implementer
-
-from leap.common.config import get_path_prefix
-from leap.common.plugins import collect_plugins
-
-from leap.soledad.common import soledad_assert
-from leap.soledad.common import soledad_assert_type
-from leap.soledad.common.log import getLogger
-from leap.soledad.common.l2db.remote import http_client
-from leap.soledad.common.l2db.remote.ssl_match_hostname import match_hostname
-from leap.soledad.common.errors import DatabaseAccessError
-
-from . import events as soledad_events
-from . import interfaces as soledad_interfaces
-from ._crypto import SoledadCrypto
-from ._db import adbapi
-from ._db import blobs
-from ._db import sqlcipher
-from ._recovery_code import RecoveryCode
-from ._secrets import Secrets
-
-
-logger = getLogger(__name__)
-
-
-# we may want to collect statistics from the sync process
-DO_STATS = False
-if os.environ.get('SOLEDAD_STATS'):
- DO_STATS = True
-
-
-#
-# Constants
-#
-
-"""
-Path to the certificate file used to certify the SSL connection between
-Soledad client and server.
-"""
-SOLEDAD_CERT = None
-
-
-@implementer(soledad_interfaces.ILocalStorage,
- soledad_interfaces.ISyncableStorage,
- soledad_interfaces.ISecretsStorage)
-class Soledad(object):
- """
- Soledad provides encrypted data storage and sync.
-
- A Soledad instance is used to store and retrieve data in a local encrypted
- database and synchronize this database with Soledad server.
-
- This class is also responsible for bootstrapping users' account by
- creating cryptographic secrets and/or storing/fetching them on Soledad
- server.
- """
-
- local_db_file_name = 'soledad.u1db'
- secrets_file_name = "soledad.json"
- default_prefix = os.path.join(get_path_prefix(), 'leap', 'soledad')
-
- """
- A dictionary that holds locks which avoid multiple sync attempts from the
- same database replica. The dictionary indexes are the paths to each local
- db, so we guarantee that only one sync happens for a local db at a time.
- """
- _sync_lock = defaultdict(defer.DeferredLock)
-
- def __init__(self, uuid, passphrase, secrets_path, local_db_path,
- server_url, cert_file, shared_db=None,
- auth_token=None):
- """
- Initialize configuration, cryptographic keys and dbs.
-
- :param uuid: User's uuid.
- :type uuid: str
-
- :param passphrase:
- The passphrase for locking and unlocking encryption secrets for
- local and remote storage.
- :type passphrase: unicode
-
- :param secrets_path:
- Path for storing encrypted key used for symmetric encryption.
- :type secrets_path: str
-
- :param local_db_path: Path for local encrypted storage db.
- :type local_db_path: str
-
- :param server_url:
- URL for Soledad server. This is used either to sync with the user's
- remote db and to interact with the shared recovery database.
- :type server_url: str
-
- :param cert_file:
- Path to the certificate of the ca used to validate the SSL
- certificate used by the remote soledad server.
- :type cert_file: str
-
- :param shared_db:
- The shared database.
- :type shared_db: HTTPDatabase
-
- :param auth_token:
- Authorization token for accessing remote databases.
- :type auth_token: str
-
- :raise BootstrapSequenceError:
- Raised when the secret initialization sequence (i.e. retrieval
- from server or generation and storage on server) has failed for
- some reason.
- """
- # store config params
- self.uuid = uuid
- self.passphrase = passphrase
- self.secrets_path = secrets_path
- self._local_db_path = local_db_path
- self.server_url = server_url
- self.shared_db = shared_db
- self.token = auth_token
-
- self._dbsyncer = None
-
- # configure SSL certificate
- global SOLEDAD_CERT
- SOLEDAD_CERT = cert_file
-
- self._init_config_with_defaults()
- self._init_working_dirs()
-
- self._recovery_code = RecoveryCode()
- self._secrets = Secrets(self)
- self._crypto = SoledadCrypto(self._secrets.remote_secret)
- self._init_blobmanager()
-
- try:
- # initialize database access, trap any problems so we can shutdown
- # smoothly.
- self._init_u1db_sqlcipher_backend()
- self._init_u1db_syncer()
- except DatabaseAccessError:
- # oops! something went wrong with backend initialization. We
- # have to close any thread-related stuff we have already opened
- # here, otherwise there might be zombie threads that may clog the
- # reactor.
- if hasattr(self, '_dbpool'):
- self._dbpool.close()
- raise
-
- #
- # initialization/destruction methods
- #
-
- def _init_config_with_defaults(self):
- """
- Initialize configuration using default values for missing params.
- """
- soledad_assert_type(self.passphrase, unicode)
-
- def initialize(attr, val):
- return ((getattr(self, attr, None) is None) and
- setattr(self, attr, val))
-
- initialize("_secrets_path", os.path.join(
- self.default_prefix, self.secrets_file_name))
- initialize("_local_db_path", os.path.join(
- self.default_prefix, self.local_db_file_name))
- # initialize server_url
- soledad_assert(self.server_url is not None,
- 'Missing URL for Soledad server.')
-
- def _init_working_dirs(self):
- """
- Create work directories.
-
- :raise OSError: in case file exists and is not a dir.
- """
- paths = map(lambda x: os.path.dirname(x), [
- self._local_db_path, self._secrets_path])
- for path in paths:
- create_path_if_not_exists(path)
-
- def _init_u1db_sqlcipher_backend(self):
- """
- Initialize the U1DB SQLCipher database for local storage.
-
- Instantiates a modified twisted adbapi that will maintain a threadpool
- with a u1db-sqclipher connection for each thread, and will return
- deferreds for each u1db query.
-
- Currently, Soledad uses the default SQLCipher cipher, i.e.
- 'aes-256-cbc'. We use scrypt to derive a 256-bit encryption key,
- and internally the SQLCipherDatabase initialization uses the 'raw
- PRAGMA key' format to handle the key to SQLCipher.
- """
- tohex = binascii.b2a_hex
- # sqlcipher only accepts the hex version
- key = tohex(self._secrets.local_key)
-
- opts = sqlcipher.SQLCipherOptions(
- self._local_db_path, key,
- is_raw_key=True, create=True)
- self._sqlcipher_opts = opts
- self._dbpool = adbapi.getConnectionPool(opts)
-
- def _init_u1db_syncer(self):
- """
- Initialize the U1DB synchronizer.
- """
- replica_uid = self._dbpool.replica_uid
- self._dbsyncer = sqlcipher.SQLCipherU1DBSync(
- self._sqlcipher_opts, self._crypto, replica_uid,
- SOLEDAD_CERT)
-
- def sync_stats(self):
- sync_phase = 0
- if getattr(self._dbsyncer, 'sync_phase', None):
- sync_phase = self._dbsyncer.sync_phase[0]
- sync_exchange_phase = 0
- if getattr(self._dbsyncer, 'syncer', None):
- if getattr(self._dbsyncer.syncer, 'sync_exchange_phase', None):
- _p = self._dbsyncer.syncer.sync_exchange_phase[0]
- sync_exchange_phase = _p
- return sync_phase, sync_exchange_phase
-
- def _init_blobmanager(self):
- path = os.path.join(os.path.dirname(self._local_db_path), 'blobs')
- url = urlparse.urljoin(self.server_url, 'blobs/%s' % uuid)
- key = self._secrets.local_key
- self.blobmanager = blobs.BlobManager(path, url, key, self.uuid,
- self.token, SOLEDAD_CERT)
-
- #
- # Closing methods
- #
-
- def close(self):
- """
- Close underlying U1DB database.
- """
- logger.debug("closing soledad")
- self._dbpool.close()
- self.blobmanager.close()
- if getattr(self, '_dbsyncer', None):
- self._dbsyncer.close()
-
- #
- # ILocalStorage
- #
-
- def _defer(self, meth, *args, **kw):
- """
- Defer a method to be run on a U1DB connection pool.
-
- :param meth: A method to defer to the U1DB connection pool.
- :type meth: callable
- :return: A deferred.
- :rtype: twisted.internet.defer.Deferred
- """
- return self._dbpool.runU1DBQuery(meth, *args, **kw)
-
- def put_doc(self, doc):
- """
- Update a document.
-
- If the document currently has conflicts, put will fail.
- If the database specifies a maximum document size and the document
- exceeds it, put will fail and raise a DocumentTooBig exception.
-
- ============================== WARNING ==============================
- This method converts the document's contents to unicode in-place. This
- means that after calling `put_doc(doc)`, the contents of the
- document, i.e. `doc.content`, might be different from before the
- call.
- ============================== WARNING ==============================
-
- :param doc: A document with new content.
- :type doc: leap.soledad.common.document.Document
- :return: A deferred whose callback will be invoked with the new
- revision identifier for the document. The document object will
- also be updated.
- :rtype: twisted.internet.defer.Deferred
- """
- d = self._defer("put_doc", doc)
- return d
-
- def delete_doc(self, doc):
- """
- Mark a document as deleted.
-
- Will abort if the current revision doesn't match doc.rev.
- This will also set doc.content to None.
-
- :param doc: A document to be deleted.
- :type doc: leap.soledad.common.document.Document
- :return: A deferred.
- :rtype: twisted.internet.defer.Deferred
- """
- soledad_assert(doc is not None, "delete_doc doesn't accept None.")
- return self._defer("delete_doc", doc)
-
- def get_doc(self, doc_id, include_deleted=False):
- """
- Get the JSON string for the given document.
-
- :param doc_id: The unique document identifier
- :type doc_id: str
- :param include_deleted: If set to True, deleted documents will be
- returned with empty content. Otherwise asking for a deleted
- document will return None.
- :type include_deleted: bool
- :return: A deferred whose callback will be invoked with a document
- object.
- :rtype: twisted.internet.defer.Deferred
- """
- return self._defer(
- "get_doc", doc_id, include_deleted=include_deleted)
-
- def get_docs(
- self, doc_ids, check_for_conflicts=True, include_deleted=False):
- """
- Get the JSON content for many documents.
-
- :param doc_ids: A list of document identifiers.
- :type doc_ids: list
- :param check_for_conflicts: If set to False, then the conflict check
- will be skipped, and 'None' will be returned instead of True/False.
- :type check_for_conflicts: bool
- :param include_deleted: If set to True, deleted documents will be
- returned with empty content. Otherwise deleted documents will not
- be included in the results.
- :type include_deleted: bool
- :return: A deferred whose callback will be invoked with an iterable
- giving the document object for each document id in matching
- doc_ids order.
- :rtype: twisted.internet.defer.Deferred
- """
- return self._defer(
- "get_docs", doc_ids, check_for_conflicts=check_for_conflicts,
- include_deleted=include_deleted)
-
- def get_all_docs(self, include_deleted=False):
- """
- Get the JSON content for all documents in the database.
-
- :param include_deleted: If set to True, deleted documents will be
- returned with empty content. Otherwise deleted documents will not
- be included in the results.
- :type include_deleted: bool
-
- :return: A deferred which, when fired, will pass the a tuple
- containing (generation, [Document]) to the callback, with the
- current generation of the database, followed by a list of all the
- documents in the database.
- :rtype: twisted.internet.defer.Deferred
- """
- return self._defer("get_all_docs", include_deleted)
-
- @defer.inlineCallbacks
- def create_doc(self, content, doc_id=None):
- """
- Create a new document.
-
- You can optionally specify the document identifier, but the document
- must not already exist. See 'put_doc' if you want to override an
- existing document.
- If the database specifies a maximum document size and the document
- exceeds it, create will fail and raise a DocumentTooBig exception.
-
- :param content: A Python dictionary.
- :type content: dict
- :param doc_id: An optional identifier specifying the document id.
- :type doc_id: str
- :return: A deferred whose callback will be invoked with a document.
- :rtype: twisted.internet.defer.Deferred
- """
- # TODO we probably should pass an optional "encoding" parameter to
- # create_doc (and probably to put_doc too). There are cases (mail
- # payloads for example) in which we already have the encoding in the
- # headers, so we don't need to guess it.
- doc = yield self._defer("create_doc", content, doc_id=doc_id)
- doc.set_store(self)
- defer.returnValue(doc)
-
- def create_doc_from_json(self, json, doc_id=None):
- """
- Create a new document.
-
- You can optionally specify the document identifier, but the document
- must not already exist. See 'put_doc' if you want to override an
- existing document.
- If the database specifies a maximum document size and the document
- exceeds it, create will fail and raise a DocumentTooBig exception.
-
- :param json: The JSON document string
- :type json: dict
- :param doc_id: An optional identifier specifying the document id.
- :type doc_id: str
- :return: A deferred whose callback will be invoked with a document.
- :rtype: twisted.internet.defer.Deferred
- """
- return self._defer("create_doc_from_json", json, doc_id=doc_id)
-
- def create_index(self, index_name, *index_expressions):
- """
- Create a named index, which can then be queried for future lookups.
-
- Creating an index which already exists is not an error, and is cheap.
- Creating an index which does not match the index_expressions of the
- existing index is an error.
- Creating an index will block until the expressions have been evaluated
- and the index generated.
-
- :param index_name: A unique name which can be used as a key prefix
- :type index_name: str
- :param index_expressions: index expressions defining the index
- information.
-
- Examples:
-
- "fieldname", or "fieldname.subfieldname" to index alphabetically
- sorted on the contents of a field.
-
- "number(fieldname, width)", "lower(fieldname)"
- :type index_expresions: list of str
- :return: A deferred.
- :rtype: twisted.internet.defer.Deferred
- """
- return self._defer("create_index", index_name, *index_expressions)
-
- def delete_index(self, index_name):
- """
- Remove a named index.
-
- :param index_name: The name of the index we are removing
- :type index_name: str
- :return: A deferred.
- :rtype: twisted.internet.defer.Deferred
- """
- return self._defer("delete_index", index_name)
-
- def list_indexes(self):
- """
- List the definitions of all known indexes.
-
- :return: A deferred whose callback will be invoked with a list of
- [('index-name', ['field', 'field2'])] definitions.
- :rtype: twisted.internet.defer.Deferred
- """
- return self._defer("list_indexes")
-
- def get_from_index(self, index_name, *key_values):
- """
- Return documents that match the keys supplied.
-
- You must supply exactly the same number of values as have been defined
- in the index. It is possible to do a prefix match by using '*' to
- indicate a wildcard match. You can only supply '*' to trailing entries,
- (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.)
- It is also possible to append a '*' to the last supplied value (eg
- 'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*')
-
- :param index_name: The index to query
- :type index_name: str
- :param key_values: values to match. eg, if you have
- an index with 3 fields then you would have:
- get_from_index(index_name, val1, val2, val3)
- :type key_values: list
- :return: A deferred whose callback will be invoked with a list of
- [Document].
- :rtype: twisted.internet.defer.Deferred
- """
- return self._defer("get_from_index", index_name, *key_values)
-
- def get_count_from_index(self, index_name, *key_values):
- """
- Return the count for a given combination of index_name
- and key values.
-
- Extension method made from similar methods in u1db version 13.09
-
- :param index_name: The index to query
- :type index_name: str
- :param key_values: values to match. eg, if you have
- an index with 3 fields then you would have:
- get_from_index(index_name, val1, val2, val3)
- :type key_values: tuple
- :return: A deferred whose callback will be invoked with the count.
- :rtype: twisted.internet.defer.Deferred
- """
- return self._defer("get_count_from_index", index_name, *key_values)
-
- def get_range_from_index(self, index_name, start_value, end_value):
- """
- Return documents that fall within the specified range.
-
- Both ends of the range are inclusive. For both start_value and
- end_value, one must supply exactly the same number of values as have
- been defined in the index, or pass None. In case of a single column
- index, a string is accepted as an alternative for a tuple with a single
- value. It is possible to do a prefix match by using '*' to indicate
- a wildcard match. You can only supply '*' to trailing entries, (eg
- 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also
- possible to append a '*' to the last supplied value (eg 'val*', '*',
- '*' or 'val', 'val*', '*', but not 'val*', 'val', '*')
-
- :param index_name: The index to query
- :type index_name: str
- :param start_values: tuples of values that define the lower bound of
- the range. eg, if you have an index with 3 fields then you would
- have: (val1, val2, val3)
- :type start_values: tuple
- :param end_values: tuples of values that define the upper bound of the
- range. eg, if you have an index with 3 fields then you would have:
- (val1, val2, val3)
- :type end_values: tuple
- :return: A deferred whose callback will be invoked with a list of
- [Document].
- :rtype: twisted.internet.defer.Deferred
- """
-
- return self._defer(
- "get_range_from_index", index_name, start_value, end_value)
-
- def get_index_keys(self, index_name):
- """
- Return all keys under which documents are indexed in this index.
-
- :param index_name: The index to query
- :type index_name: str
- :return: A deferred whose callback will be invoked with a list of
- tuples of indexed keys.
- :rtype: twisted.internet.defer.Deferred
- """
- return self._defer("get_index_keys", index_name)
-
- def get_doc_conflicts(self, doc_id):
- """
- Get the list of conflicts for the given document.
-
- The order of the conflicts is such that the first entry is the value
- that would be returned by "get_doc".
-
- :param doc_id: The unique document identifier
- :type doc_id: str
- :return: A deferred whose callback will be invoked with a list of the
- Document entries that are conflicted.
- :rtype: twisted.internet.defer.Deferred
- """
- return self._defer("get_doc_conflicts", doc_id)
-
- def resolve_doc(self, doc, conflicted_doc_revs):
- """
- Mark a document as no longer conflicted.
-
- We take the list of revisions that the client knows about that it is
- superseding. This may be a different list from the actual current
- conflicts, in which case only those are removed as conflicted. This
- may fail if the conflict list is significantly different from the
- supplied information. (sync could have happened in the background from
- the time you GET_DOC_CONFLICTS until the point where you RESOLVE)
-
- :param doc: A Document with the new content to be inserted.
- :type doc: Document
- :param conflicted_doc_revs: A list of revisions that the new content
- supersedes.
- :type conflicted_doc_revs: list(str)
- :return: A deferred.
- :rtype: twisted.internet.defer.Deferred
- """
- return self._defer("resolve_doc", doc, conflicted_doc_revs)
-
- @property
- def local_db_path(self):
- return self._local_db_path
-
- @property
- def userid(self):
- return self.uuid
-
- #
- # ISyncableStorage
- #
-
- def sync(self):
- """
- Synchronize documents with the server replica.
-
- This method uses a lock to prevent multiple concurrent sync processes
- over the same local db file.
-
- :return: A deferred lock that will run the actual sync process when
- the lock is acquired, and which will fire with with the local
- generation before the synchronization was performed.
- :rtype: twisted.internet.defer.Deferred
- """
- # maybe bypass sync
- # TODO: That's because bitmask may not provide us a token, but
- # this should be handled on the caller side. Here, calling us without
- # a token is a real error.
- if not self.token:
- generation = self._dbsyncer.get_generation()
- return defer.succeed(generation)
-
- d = self.sync_lock.run(
- self._sync)
- return d
-
- def _sync(self):
- """
- Synchronize documents with the server replica.
-
- :return: A deferred whose callback will be invoked with the local
- generation before the synchronization was performed.
- :rtype: twisted.internet.defer.Deferred
- """
- sync_url = urlparse.urljoin(self.server_url, 'user-%s' % self.uuid)
- if not self._dbsyncer:
- return
- creds = {'token': {'uuid': self.uuid, 'token': self.token}}
- d = self._dbsyncer.sync(sync_url, creds=creds)
-
- def _sync_callback(local_gen):
- self._last_received_docs = docs = self._dbsyncer.received_docs
-
- # Post-Sync Hooks
- if docs:
- iface = soledad_interfaces.ISoledadPostSyncPlugin
- suitable_plugins = collect_plugins(iface)
- for plugin in suitable_plugins:
- watched = plugin.watched_doc_types
- r = [filter(
- lambda s: s.startswith(preffix),
- docs) for preffix in watched]
- filtered = list(chain(*r))
- plugin.process_received_docs(filtered)
-
- return local_gen
-
- def _sync_errback(failure):
- s = StringIO()
- failure.printDetailedTraceback(file=s)
- msg = "got exception when syncing!\n" + s.getvalue()
- logger.error(msg)
- return failure
-
- def _emit_done_data_sync(passthrough):
- user_data = {'uuid': self.uuid, 'userid': self.userid}
- soledad_events.emit_async(
- soledad_events.SOLEDAD_DONE_DATA_SYNC, user_data)
- return passthrough
-
- d.addCallbacks(_sync_callback, _sync_errback)
- d.addCallback(_emit_done_data_sync)
- return d
-
- @property
- def sync_lock(self):
- """
- Class based lock to prevent concurrent syncs using the same local db
- file.
-
- :return: A shared lock based on this instance's db file path.
- :rtype: DeferredLock
- """
- return self._sync_lock[self._local_db_path]
-
- @property
- def syncing(self):
- """
- Return wether Soledad is currently synchronizing with the server.
-
- :return: Wether Soledad is currently synchronizing with the server.
- :rtype: bool
- """
- return self.sync_lock.locked
-
- #
- # ISecretsStorage
- #
-
- @property
- def secrets(self):
- """
- Return the secrets object.
-
- :return: The secrets object.
- :rtype: Secrets
- """
- return self._secrets
-
- def change_passphrase(self, new_passphrase):
- """
- Change the passphrase that encrypts the storage secret.
-
- :param new_passphrase: The new passphrase.
- :type new_passphrase: unicode
-
- :raise NoStorageSecret: Raised if there's no storage secret available.
- """
- self.passphrase = new_passphrase
- self._secrets.store_secrets()
-
- #
- # Raw SQLCIPHER Queries
- #
-
- def raw_sqlcipher_query(self, *args, **kw):
- """
- Run a raw sqlcipher query in the local database, and return a deferred
- that will be fired with the result.
- """
- return self._dbpool.runQuery(*args, **kw)
-
- def raw_sqlcipher_operation(self, *args, **kw):
- """
- Run a raw sqlcipher operation in the local database, and return a
- deferred that will be fired with None.
- """
- return self._dbpool.runOperation(*args, **kw)
-
- #
- # Service authentication
- #
-
- @defer.inlineCallbacks
- def get_or_create_service_token(self, service):
- """
- Return the stored token for a given service, or generates and stores a
- random one if it does not exist.
-
- These tokens can be used to authenticate services.
- """
- # FIXME this could use the local sqlcipher database, to avoid
- # problems with different replicas creating different tokens.
-
- yield self.create_index('by-servicetoken', 'type', 'service')
- docs = yield self._get_token_for_service(service)
- if docs:
- doc = docs[0]
- defer.returnValue(doc.content['token'])
- else:
- token = str(uuid.uuid4()).replace('-', '')[-24:]
- yield self._set_token_for_service(service, token)
- defer.returnValue(token)
-
- def _get_token_for_service(self, service):
- return self.get_from_index('by-servicetoken', 'servicetoken', service)
-
- def _set_token_for_service(self, service, token):
- doc = {'type': 'servicetoken', 'service': service, 'token': token}
- return self.create_doc(doc)
-
- def create_recovery_code(self):
- return self._recovery_code.generate()
-
-
-def create_path_if_not_exists(path):
- try:
- if not os.path.isdir(path):
- logger.info('creating directory: %s.' % path)
- os.makedirs(path)
- except OSError as exc:
- if exc.errno == errno.EEXIST and os.path.isdir(path):
- pass
- else:
- raise
-
-# ----------------------------------------------------------------------------
-# Monkey patching u1db to be able to provide a custom SSL cert
-# ----------------------------------------------------------------------------
-
-
-# We need a more reasonable timeout (in seconds)
-SOLEDAD_TIMEOUT = 120
-
-
-class VerifiedHTTPSConnection(httplib.HTTPSConnection):
- """
- HTTPSConnection verifying server side certificates.
- """
- # derived from httplib.py
-
- def connect(self):
- """
- Connect to a host on a given (SSL) port.
- """
- try:
- source = self.source_address
- sock = socket.create_connection((self.host, self.port),
- SOLEDAD_TIMEOUT, source)
- except AttributeError:
- # source_address was introduced in 2.7
- sock = socket.create_connection((self.host, self.port),
- SOLEDAD_TIMEOUT)
- if self._tunnel_host:
- self.sock = sock
- self._tunnel()
-
- self.sock = ssl.wrap_socket(sock,
- ca_certs=SOLEDAD_CERT,
- cert_reqs=ssl.CERT_REQUIRED)
- match_hostname(self.sock.getpeercert(), self.host)
-
-
-old__VerifiedHTTPSConnection = http_client._VerifiedHTTPSConnection
-http_client._VerifiedHTTPSConnection = VerifiedHTTPSConnection
diff --git a/client/src/leap/soledad/client/auth.py b/client/src/leap/soledad/client/auth.py
deleted file mode 100644
index 78e9bf1b..00000000
--- a/client/src/leap/soledad/client/auth.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# -*- coding: utf-8 -*-
-# auth.py
-# Copyright (C) 2013 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-Methods for token-based authentication.
-
-These methods have to be included in all classes that extend HTTPClient so
-they can do token-based auth requests to the Soledad server.
-"""
-import base64
-
-from leap.soledad.common.l2db import errors
-
-
-class TokenBasedAuth(object):
- """
- Encapsulate token-auth methods for classes that inherit from
- u1db.remote.http_client.HTTPClient.
- """
-
- def set_token_credentials(self, uuid, token):
- """
- Store given credentials so we can sign the request later.
-
- :param uuid: The user's uuid.
- :type uuid: str
- :param token: The authentication token.
- :type token: str
- """
- self._creds = {'token': (uuid, token)}
-
- def _sign_request(self, method, url_query, params):
- """
- Return an authorization header to be included in the HTTP request, in
- the form:
-
- [('Authorization', 'Token <(base64 encoded) uuid:token>')]
-
- :param method: The HTTP method.
- :type method: str
- :param url_query: The URL query string.
- :type url_query: str
- :param params: A list with encoded query parameters.
- :type param: list
-
- :return: The Authorization header.
- :rtype: list of tuple
- """
- if 'token' in self._creds:
- uuid, token = self._creds['token']
- auth = '%s:%s' % (uuid, token)
- b64_token = base64.b64encode(auth)
- return [('Authorization', 'Token %s' % b64_token)]
- else:
- raise errors.UnknownAuthMethod(
- 'Wrong credentials: %s' % self._creds)
diff --git a/client/src/leap/soledad/client/crypto.py b/client/src/leap/soledad/client/crypto.py
deleted file mode 100644
index 0f19c964..00000000
--- a/client/src/leap/soledad/client/crypto.py
+++ /dev/null
@@ -1,448 +0,0 @@
-# -*- coding: utf-8 -*-
-# crypto.py
-# Copyright (C) 2013, 2014 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-Cryptographic utilities for Soledad.
-"""
-import os
-import binascii
-import hmac
-import hashlib
-import json
-
-from cryptography.hazmat.backends import default_backend
-from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
-
-from leap.soledad.common import soledad_assert
-from leap.soledad.common import soledad_assert_type
-from leap.soledad.common import crypto
-from leap.soledad.common.log import getLogger
-import warnings
-
-
-logger = getLogger(__name__)
-warnings.warn("'soledad.client.crypto' MODULE DEPRECATED",
- DeprecationWarning, stacklevel=2)
-
-
-MAC_KEY_LENGTH = 64
-
-crypto_backend = default_backend()
-
-
-def encrypt_sym(data, key):
- """
- Encrypt data using AES-256 cipher in CTR mode.
-
- :param data: The data to be encrypted.
- :type data: str
- :param key: The key used to encrypt data (must be 256 bits long).
- :type key: str
-
- :return: A tuple with the initialization vector and the encrypted data.
- :rtype: (long, str)
- """
- soledad_assert_type(key, str)
- soledad_assert(
- len(key) == 32, # 32 x 8 = 256 bits.
- 'Wrong key size: %s bits (must be 256 bits long).' %
- (len(key) * 8))
-
- iv = os.urandom(16)
- cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=crypto_backend)
- encryptor = cipher.encryptor()
- ciphertext = encryptor.update(data) + encryptor.finalize()
-
- return binascii.b2a_base64(iv), ciphertext
-
-
-def decrypt_sym(data, key, iv):
- """
- Decrypt some data previously encrypted using AES-256 cipher in CTR mode.
-
- :param data: The data to be decrypted.
- :type data: str
- :param key: The symmetric key used to decrypt data (must be 256 bits
- long).
- :type key: str
- :param iv: The initialization vector.
- :type iv: long
-
- :return: The decrypted data.
- :rtype: str
- """
- soledad_assert_type(key, str)
- # assert params
- soledad_assert(
- len(key) == 32, # 32 x 8 = 256 bits.
- 'Wrong key size: %s (must be 256 bits long).' % len(key))
- iv = binascii.a2b_base64(iv)
- cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=crypto_backend)
- decryptor = cipher.decryptor()
- return decryptor.update(data) + decryptor.finalize()
-
-
-def doc_mac_key(doc_id, secret):
- """
- Generate a key for calculating a MAC for a document whose id is
- C{doc_id}.
-
- The key is derived using HMAC having sha256 as underlying hash
- function. The key used for HMAC is the first MAC_KEY_LENGTH characters
- of Soledad's storage secret. The HMAC message is C{doc_id}.
-
- :param doc_id: The id of the document.
- :type doc_id: str
-
- :param secret: The Soledad storage secret
- :type secret: str
-
- :return: The key.
- :rtype: str
- """
- soledad_assert(secret is not None)
- return hmac.new(
- secret[:MAC_KEY_LENGTH],
- doc_id,
- hashlib.sha256).digest()
-
-
-class SoledadCrypto(object):
- """
- General cryptographic functionality encapsulated in a
- object that can be passed along.
- """
- def __init__(self, secret):
- """
- Initialize the crypto object.
-
- :param secret: The Soledad remote storage secret.
- :type secret: str
- """
- self._secret = secret
-
- def doc_mac_key(self, doc_id):
- return doc_mac_key(doc_id, self._secret)
-
- def doc_passphrase(self, doc_id):
- """
- Generate a passphrase for symmetric encryption of document's contents.
-
- The password is derived using HMAC having sha256 as underlying hash
- function. The key used for HMAC are the first
- C{soledad.REMOTE_STORAGE_SECRET_LENGTH} bytes of Soledad's storage
- secret stripped from the first MAC_KEY_LENGTH characters. The HMAC
- message is C{doc_id}.
-
- :param doc_id: The id of the document that will be encrypted using
- this passphrase.
- :type doc_id: str
-
- :return: The passphrase.
- :rtype: str
- """
- soledad_assert(self._secret is not None)
- return hmac.new(
- self._secret[MAC_KEY_LENGTH:],
- doc_id,
- hashlib.sha256).digest()
-
- def encrypt_doc(self, doc):
- """
- Wrapper around encrypt_docstr that accepts the document as argument.
-
- :param doc: the document.
- :type doc: Document
- """
- key = self.doc_passphrase(doc.doc_id)
-
- return encrypt_docstr(
- doc.get_json(), doc.doc_id, doc.rev, key, self._secret)
-
- def decrypt_doc(self, doc):
- """
- Wrapper around decrypt_doc_dict that accepts the document as argument.
-
- :param doc: the document.
- :type doc: Document
-
- :return: json string with the decrypted document
- :rtype: str
- """
- key = self.doc_passphrase(doc.doc_id)
- return decrypt_doc_dict(
- doc.content, doc.doc_id, doc.rev, key, self._secret)
-
- @property
- def secret(self):
- return self._secret
-
-
-#
-# Crypto utilities for a Document.
-#
-
-def mac_doc(doc_id, doc_rev, ciphertext, enc_scheme, enc_method, enc_iv,
- mac_method, secret):
- """
- Calculate a MAC for C{doc} using C{ciphertext}.
-
- Current MAC method used is HMAC, with the following parameters:
-
- * key: sha256(storage_secret, doc_id)
- * msg: doc_id + doc_rev + ciphertext
- * digestmod: sha256
-
- :param doc_id: The id of the document.
- :type doc_id: str
- :param doc_rev: The revision of the document.
- :type doc_rev: str
- :param ciphertext: The content of the document.
- :type ciphertext: str
- :param enc_scheme: The encryption scheme.
- :type enc_scheme: str
- :param enc_method: The encryption method.
- :type enc_method: str
- :param enc_iv: The encryption initialization vector.
- :type enc_iv: str
- :param mac_method: The MAC method to use.
- :type mac_method: str
- :param secret: The Soledad storage secret
- :type secret: str
-
- :return: The calculated MAC.
- :rtype: str
-
- :raise crypto.UnknownMacMethodError: Raised when C{mac_method} is unknown.
- """
- try:
- soledad_assert(mac_method == crypto.MacMethods.HMAC)
- except AssertionError:
- raise crypto.UnknownMacMethodError
- template = "{doc_id}{doc_rev}{ciphertext}{enc_scheme}{enc_method}{enc_iv}"
- content = template.format(
- doc_id=doc_id,
- doc_rev=doc_rev,
- ciphertext=ciphertext,
- enc_scheme=enc_scheme,
- enc_method=enc_method,
- enc_iv=enc_iv)
- return hmac.new(
- doc_mac_key(doc_id, secret),
- content,
- hashlib.sha256).digest()
-
-
-def encrypt_docstr(docstr, doc_id, doc_rev, key, secret):
- """
- Encrypt C{doc}'s content.
-
- Encrypt doc's contents using AES-256 CTR mode and return a valid JSON
- string representing the following:
-
- {
- crypto.ENC_JSON_KEY: '<encrypted doc JSON string>',
- crypto.ENC_SCHEME_KEY: 'symkey',
- crypto.ENC_METHOD_KEY: crypto.EncryptionMethods.AES_256_CTR,
- crypto.ENC_IV_KEY: '<the initial value used to encrypt>',
- MAC_KEY: '<mac>'
- crypto.MAC_METHOD_KEY: 'hmac'
- }
-
- :param docstr: A representation of the document to be encrypted.
- :type docstr: str or unicode.
-
- :param doc_id: The document id.
- :type doc_id: str
-
- :param doc_rev: The document revision.
- :type doc_rev: str
-
- :param key: The key used to encrypt ``data`` (must be 256 bits long).
- :type key: str
-
- :param secret: The Soledad storage secret (used for MAC auth).
- :type secret: str
-
- :return: The JSON serialization of the dict representing the encrypted
- content.
- :rtype: str
- """
- enc_scheme = crypto.EncryptionSchemes.SYMKEY
- enc_method = crypto.EncryptionMethods.AES_256_CTR
- mac_method = crypto.MacMethods.HMAC
- enc_iv, ciphertext = encrypt_sym(
- str(docstr), # encryption/decryption routines expect str
- key)
- mac = binascii.b2a_hex( # store the mac as hex.
- mac_doc(
- doc_id,
- doc_rev,
- ciphertext,
- enc_scheme,
- enc_method,
- enc_iv,
- mac_method,
- secret))
- # Return a representation for the encrypted content. In the following, we
- # convert binary data to hexadecimal representation so the JSON
- # serialization does not complain about what it tries to serialize.
- hex_ciphertext = binascii.b2a_hex(ciphertext)
- logger.debug("encrypting doc: %s" % doc_id)
- return json.dumps({
- crypto.ENC_JSON_KEY: hex_ciphertext,
- crypto.ENC_SCHEME_KEY: enc_scheme,
- crypto.ENC_METHOD_KEY: enc_method,
- crypto.ENC_IV_KEY: enc_iv,
- crypto.MAC_KEY: mac,
- crypto.MAC_METHOD_KEY: mac_method,
- })
-
-
-def _verify_doc_mac(doc_id, doc_rev, ciphertext, enc_scheme, enc_method,
- enc_iv, mac_method, secret, doc_mac):
- """
- Verify that C{doc_mac} is a correct MAC for the given document.
-
- :param doc_id: The id of the document.
- :type doc_id: str
- :param doc_rev: The revision of the document.
- :type doc_rev: str
- :param ciphertext: The content of the document.
- :type ciphertext: str
- :param enc_scheme: The encryption scheme.
- :type enc_scheme: str
- :param enc_method: The encryption method.
- :type enc_method: str
- :param enc_iv: The encryption initialization vector.
- :type enc_iv: str
- :param mac_method: The MAC method to use.
- :type mac_method: str
- :param secret: The Soledad storage secret
- :type secret: str
- :param doc_mac: The MAC to be verified against.
- :type doc_mac: str
-
- :raise crypto.UnknownMacMethodError: Raised when C{mac_method} is unknown.
- :raise crypto.WrongMacError: Raised when MAC could not be verified.
- """
- calculated_mac = mac_doc(
- doc_id,
- doc_rev,
- ciphertext,
- enc_scheme,
- enc_method,
- enc_iv,
- mac_method,
- secret)
- # we compare mac's hashes to avoid possible timing attacks that might
- # exploit python's builtin comparison operator behaviour, which fails
- # immediatelly when non-matching bytes are found.
- doc_mac_hash = hashlib.sha256(
- binascii.a2b_hex( # the mac is stored as hex
- doc_mac)).digest()
- calculated_mac_hash = hashlib.sha256(calculated_mac).digest()
-
- if doc_mac_hash != calculated_mac_hash:
- logger.warn("wrong MAC while decrypting doc...")
- raise crypto.WrongMacError("Could not authenticate document's "
- "contents.")
-
-
-def decrypt_doc_dict(doc_dict, doc_id, doc_rev, key, secret):
- """
- Decrypt a symmetrically encrypted C{doc}'s content.
-
- Return the JSON string representation of the document's decrypted content.
-
- The passed doc_dict argument should have the following structure:
-
- {
- crypto.ENC_JSON_KEY: '<enc_blob>',
- crypto.ENC_SCHEME_KEY: '<enc_scheme>',
- crypto.ENC_METHOD_KEY: '<enc_method>',
- crypto.ENC_IV_KEY: '<initial value used to encrypt>', # (optional)
- MAC_KEY: '<mac>'
- crypto.MAC_METHOD_KEY: 'hmac'
- }
-
- C{enc_blob} is the encryption of the JSON serialization of the document's
- content. For now Soledad just deals with documents whose C{enc_scheme} is
- crypto.EncryptionSchemes.SYMKEY and C{enc_method} is
- crypto.EncryptionMethods.AES_256_CTR.
-
- :param doc_dict: The content of the document to be decrypted.
- :type doc_dict: dict
-
- :param doc_id: The document id.
- :type doc_id: str
-
- :param doc_rev: The document revision.
- :type doc_rev: str
-
- :param key: The key used to encrypt ``data`` (must be 256 bits long).
- :type key: str
-
- :param secret: The Soledad storage secret.
- :type secret: str
-
- :return: The JSON serialization of the decrypted content.
- :rtype: str
-
- :raise UnknownEncryptionMethodError: Raised when trying to decrypt from an
- unknown encryption method.
- """
- # assert document dictionary structure
- expected_keys = set([
- crypto.ENC_JSON_KEY,
- crypto.ENC_SCHEME_KEY,
- crypto.ENC_METHOD_KEY,
- crypto.ENC_IV_KEY,
- crypto.MAC_KEY,
- crypto.MAC_METHOD_KEY,
- ])
- soledad_assert(expected_keys.issubset(set(doc_dict.keys())))
-
- ciphertext = binascii.a2b_hex(doc_dict[crypto.ENC_JSON_KEY])
- enc_scheme = doc_dict[crypto.ENC_SCHEME_KEY]
- enc_method = doc_dict[crypto.ENC_METHOD_KEY]
- enc_iv = doc_dict[crypto.ENC_IV_KEY]
- doc_mac = doc_dict[crypto.MAC_KEY]
- mac_method = doc_dict[crypto.MAC_METHOD_KEY]
-
- soledad_assert(enc_scheme == crypto.EncryptionSchemes.SYMKEY)
-
- _verify_doc_mac(
- doc_id, doc_rev, ciphertext, enc_scheme, enc_method,
- enc_iv, mac_method, secret, doc_mac)
-
- return decrypt_sym(ciphertext, key, enc_iv)
-
-
-def is_symmetrically_encrypted(doc):
- """
- Return True if the document was symmetrically encrypted.
-
- :param doc: The document to check.
- :type doc: Document
-
- :rtype: bool
- """
- if doc.content and crypto.ENC_SCHEME_KEY in doc.content:
- if doc.content[crypto.ENC_SCHEME_KEY] \
- == crypto.EncryptionSchemes.SYMKEY:
- return True
- return False
diff --git a/client/src/leap/soledad/client/events.py b/client/src/leap/soledad/client/events.py
deleted file mode 100644
index 058be59c..00000000
--- a/client/src/leap/soledad/client/events.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# -*- coding: utf-8 -*-
-# signal.py
-# Copyright (C) 2014 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-"""
-Signaling functions.
-"""
-
-from leap.common.events import emit_async
-from leap.common.events import catalog
-
-
-SOLEDAD_CREATING_KEYS = catalog.SOLEDAD_CREATING_KEYS
-SOLEDAD_DONE_CREATING_KEYS = catalog.SOLEDAD_DONE_CREATING_KEYS
-SOLEDAD_DOWNLOADING_KEYS = catalog.SOLEDAD_DOWNLOADING_KEYS
-SOLEDAD_DONE_DOWNLOADING_KEYS = \
- catalog.SOLEDAD_DONE_DOWNLOADING_KEYS
-SOLEDAD_UPLOADING_KEYS = catalog.SOLEDAD_UPLOADING_KEYS
-SOLEDAD_DONE_UPLOADING_KEYS = \
- catalog.SOLEDAD_DONE_UPLOADING_KEYS
-SOLEDAD_NEW_DATA_TO_SYNC = catalog.SOLEDAD_NEW_DATA_TO_SYNC
-SOLEDAD_DONE_DATA_SYNC = catalog.SOLEDAD_DONE_DATA_SYNC
-SOLEDAD_SYNC_SEND_STATUS = catalog.SOLEDAD_SYNC_SEND_STATUS
-SOLEDAD_SYNC_RECEIVE_STATUS = catalog.SOLEDAD_SYNC_RECEIVE_STATUS
-
-
-__all__ = [
- "catalog",
- "emit_async",
- "SOLEDAD_CREATING_KEYS",
- "SOLEDAD_DONE_CREATING_KEYS",
- "SOLEDAD_DOWNLOADING_KEYS",
- "SOLEDAD_DONE_DOWNLOADING_KEYS",
- "SOLEDAD_UPLOADING_KEYS",
- "SOLEDAD_DONE_UPLOADING_KEYS",
- "SOLEDAD_NEW_DATA_TO_SYNC",
- "SOLEDAD_DONE_DATA_SYNC",
- "SOLEDAD_SYNC_SEND_STATUS",
- "SOLEDAD_SYNC_RECEIVE_STATUS",
-]
diff --git a/client/src/leap/soledad/client/examples/README b/client/src/leap/soledad/client/examples/README
deleted file mode 100644
index 3aed8377..00000000
--- a/client/src/leap/soledad/client/examples/README
+++ /dev/null
@@ -1,4 +0,0 @@
-Right now, you can find here both an example of use
-and the benchmarking scripts.
-TODO move benchmark scripts to root scripts/ folder,
-and leave here only a minimal example.
diff --git a/client/src/leap/soledad/client/examples/benchmarks/.gitignore b/client/src/leap/soledad/client/examples/benchmarks/.gitignore
deleted file mode 100644
index 2211df63..00000000
--- a/client/src/leap/soledad/client/examples/benchmarks/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*.txt
diff --git a/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh b/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh
deleted file mode 100755
index 1995eee1..00000000
--- a/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-mkdir tmp
-wget http://www.gutenberg.org/cache/epub/101/pg101.txt -O hacker_crackdown.txt
diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py
deleted file mode 100644
index f9349758..00000000
--- a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# -*- coding: utf-8 -*-
-# measure_index_times.py
-# Copyright (C) 2014 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-Measure u1db retrieval times for different u1db index situations.
-"""
-from __future__ import print_function
-from functools import partial
-import datetime
-import hashlib
-import os
-import sys
-
-from twisted.internet import defer, reactor
-
-from leap.soledad.common import l2db
-from leap.soledad.client import adbapi
-from leap.soledad.client._db.sqlcipher import SQLCipherOptions
-
-
-folder = os.environ.get("TMPDIR", "tmp")
-numdocs = int(os.environ.get("DOCS", "1000"))
-silent = os.environ.get("SILENT", False)
-tmpdb = os.path.join(folder, "test.soledad")
-
-
-sample_file = os.environ.get("SAMPLE", "hacker_crackdown.txt")
-sample_path = os.path.join(os.curdir, sample_file)
-
-try:
- with open(sample_file) as f:
- SAMPLE = f.readlines()
-except Exception:
- print("[!] Problem opening sample file. Did you download "
- "the sample, or correctly set 'SAMPLE' env var?")
- sys.exit(1)
-
-if numdocs > len(SAMPLE):
- print("[!] Sorry! The requested DOCS number is larger than "
- "the num of lines in our sample file")
- sys.exit(1)
-
-
-def debug(*args):
- if not silent:
- print(*args)
-
-
-debug("[+] db path:", tmpdb)
-debug("[+] num docs", numdocs)
-
-if os.path.isfile(tmpdb):
- debug("[+] Removing existing db file...")
- os.remove(tmpdb)
-
-start_time = datetime.datetime.now()
-
-opts = SQLCipherOptions(tmpdb, "secret", create=True)
-dbpool = adbapi.getConnectionPool(opts)
-
-
-def createDoc(doc):
- return dbpool.runU1DBQuery("create_doc", doc)
-
-
-db_indexes = {
- 'by-chash': ['chash'],
- 'by-number': ['number']}
-
-
-def create_indexes(_):
- deferreds = []
- for index, definition in db_indexes.items():
- d = dbpool.runU1DBQuery("create_index", index, *definition)
- deferreds.append(d)
- return defer.gatherResults(deferreds)
-
-
-class TimeWitness(object):
- def __init__(self, init_time):
- self.init_time = init_time
-
- def get_time_count(self):
- return datetime.datetime.now() - self.init_time
-
-
-def get_from_index(_):
- init_time = datetime.datetime.now()
- debug("GETTING FROM INDEX...", init_time)
-
- def printValue(res, time):
- print("RESULT->", res)
- print("Index Query Took: ", time.get_time_count())
- return res
-
- d = dbpool.runU1DBQuery(
- "get_from_index", "by-chash",
- # "1150c7f10fabce0a57ce13071349fc5064f15bdb0cc1bf2852f74ef3f103aff5")
- # XXX this is line 89 from the hacker crackdown...
- # Should accept any other optional hash as an enviroment variable.
- "57793320d4997a673fc7062652da0596c36a4e9fbe31310d2281e67d56d82469")
- d.addCallback(printValue, TimeWitness(init_time))
- return d
-
-
-def getAllDocs():
- return dbpool.runU1DBQuery("get_all_docs")
-
-
-def errBack(e):
- debug("[!] ERROR FOUND!!!")
- e.printTraceback()
- reactor.stop()
-
-
-def countDocs(_):
- debug("counting docs...")
- d = getAllDocs()
- d.addCallbacks(printResult, errBack)
- d.addCallbacks(allDone, errBack)
- return d
-
-
-def printResult(r, **kwargs):
- if kwargs:
- debug(*kwargs.values())
- elif isinstance(r, l2db.Document):
- debug(r.doc_id, r.content['number'])
- else:
- len_results = len(r[1])
- debug("GOT %s results" % len(r[1]))
-
- if len_results == numdocs:
- debug("ALL GOOD")
- else:
- debug("[!] MISSING DOCS!!!!!")
- raise ValueError("We didn't expect this result len")
-
-
-def allDone(_):
- debug("ALL DONE!")
-
- end_time = datetime.datetime.now()
- print((end_time - start_time).total_seconds())
- reactor.stop()
-
-
-def insert_docs(_):
- deferreds = []
- for i in range(numdocs):
- payload = SAMPLE[i]
- chash = hashlib.sha256(payload).hexdigest()
- doc = {"number": i, "payload": payload, 'chash': chash}
- d = createDoc(doc)
- d.addCallbacks(partial(printResult, i=i, chash=chash, payload=payload),
- lambda e: e.printTraceback())
- deferreds.append(d)
- return defer.gatherResults(deferreds, consumeErrors=True)
-
-
-d = create_indexes(None)
-d.addCallback(insert_docs)
-d.addCallback(get_from_index)
-d.addCallback(countDocs)
-
-reactor.run()
diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py
deleted file mode 100644
index 4f273c64..00000000
--- a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# -*- coding: utf-8 -*-
-# measure_index_times.py
-# Copyright (C) 2014 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-Measure u1db retrieval times for different u1db index situations.
-"""
-from __future__ import print_function
-from functools import partial
-import datetime
-import hashlib
-import os
-import sys
-
-from twisted.internet import defer, reactor
-
-from leap.soledad.client import adbapi
-from leap.soledad.client._db.sqlcipher import SQLCipherOptions
-from leap.soledad.common import l2db
-
-
-folder = os.environ.get("TMPDIR", "tmp")
-numdocs = int(os.environ.get("DOCS", "1000"))
-silent = os.environ.get("SILENT", False)
-tmpdb = os.path.join(folder, "test.soledad")
-
-
-sample_file = os.environ.get("SAMPLE", "hacker_crackdown.txt")
-sample_path = os.path.join(os.curdir, sample_file)
-
-try:
- with open(sample_file) as f:
- SAMPLE = f.readlines()
-except Exception:
- print("[!] Problem opening sample file. Did you download "
- "the sample, or correctly set 'SAMPLE' env var?")
- sys.exit(1)
-
-if numdocs > len(SAMPLE):
- print("[!] Sorry! The requested DOCS number is larger than "
- "the num of lines in our sample file")
- sys.exit(1)
-
-
-def debug(*args):
- if not silent:
- print(*args)
-
-
-debug("[+] db path:", tmpdb)
-debug("[+] num docs", numdocs)
-
-if os.path.isfile(tmpdb):
- debug("[+] Removing existing db file...")
- os.remove(tmpdb)
-
-start_time = datetime.datetime.now()
-
-opts = SQLCipherOptions(tmpdb, "secret", create=True)
-dbpool = adbapi.getConnectionPool(opts)
-
-
-def createDoc(doc, doc_id):
- return dbpool.runU1DBQuery("create_doc", doc, doc_id=doc_id)
-
-
-db_indexes = {
- 'by-chash': ['chash'],
- 'by-number': ['number']}
-
-
-def create_indexes(_):
- deferreds = []
- for index, definition in db_indexes.items():
- d = dbpool.runU1DBQuery("create_index", index, *definition)
- deferreds.append(d)
- return defer.gatherResults(deferreds)
-
-
-class TimeWitness(object):
- def __init__(self, init_time):
- self.init_time = init_time
-
- def get_time_count(self):
- return datetime.datetime.now() - self.init_time
-
-
-def get_from_index(_):
- init_time = datetime.datetime.now()
- debug("GETTING FROM INDEX...", init_time)
-
- def printValue(res, time):
- print("RESULT->", res)
- print("Index Query Took: ", time.get_time_count())
- return res
-
- d = dbpool.runU1DBQuery(
- "get_doc",
- # "1150c7f10fabce0a57ce13071349fc5064f15bdb0cc1bf2852f74ef3f103aff5")
- # XXX this is line 89 from the hacker crackdown...
- # Should accept any other optional hash as an enviroment variable.
- "57793320d4997a673fc7062652da0596c36a4e9fbe31310d2281e67d56d82469")
- d.addCallback(printValue, TimeWitness(init_time))
- return d
-
-
-def getAllDocs():
- return dbpool.runU1DBQuery("get_all_docs")
-
-
-def errBack(e):
- debug("[!] ERROR FOUND!!!")
- e.printTraceback()
- reactor.stop()
-
-
-def countDocs(_):
- debug("counting docs...")
- d = getAllDocs()
- d.addCallbacks(printResult, errBack)
- d.addCallbacks(allDone, errBack)
- return d
-
-
-def printResult(r, **kwargs):
- if kwargs:
- debug(*kwargs.values())
- elif isinstance(r, l2db.Document):
- debug(r.doc_id, r.content['number'])
- else:
- len_results = len(r[1])
- debug("GOT %s results" % len(r[1]))
-
- if len_results == numdocs:
- debug("ALL GOOD")
- else:
- debug("[!] MISSING DOCS!!!!!")
- raise ValueError("We didn't expect this result len")
-
-
-def allDone(_):
- debug("ALL DONE!")
-
- end_time = datetime.datetime.now()
- print((end_time - start_time).total_seconds())
- reactor.stop()
-
-
-def insert_docs(_):
- deferreds = []
- for i in range(numdocs):
- payload = SAMPLE[i]
- chash = hashlib.sha256(payload).hexdigest()
- doc = {"number": i, "payload": payload, 'chash': chash}
- d = createDoc(doc, doc_id=chash)
- d.addCallbacks(partial(printResult, i=i, chash=chash, payload=payload),
- lambda e: e.printTraceback())
- deferreds.append(d)
- return defer.gatherResults(deferreds, consumeErrors=True)
-
-
-d = create_indexes(None)
-d.addCallback(insert_docs)
-d.addCallback(get_from_index)
-d.addCallback(countDocs)
-
-reactor.run()
diff --git a/client/src/leap/soledad/client/examples/compare.txt b/client/src/leap/soledad/client/examples/compare.txt
deleted file mode 100644
index 19a1325a..00000000
--- a/client/src/leap/soledad/client/examples/compare.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-TIMES=100 TMPDIR=/media/sdb5/leap python use_adbapi.py 1.34s user 0.16s system 53% cpu 2.832 total
-TIMES=100 TMPDIR=/media/sdb5/leap python use_api.py 1.22s user 0.14s system 62% cpu 2.181 total
-
-TIMES=1000 TMPDIR=/media/sdb5/leap python use_api.py 2.18s user 0.34s system 27% cpu 9.213 total
-TIMES=1000 TMPDIR=/media/sdb5/leap python use_adbapi.py 2.40s user 0.34s system 39% cpu 7.004 total
-
-TIMES=5000 TMPDIR=/media/sdb5/leap python use_api.py 6.63s user 1.27s system 13% cpu 57.882 total
-TIMES=5000 TMPDIR=/media/sdb5/leap python use_adbapi.py 6.84s user 1.26s system 36% cpu 22.367 total
diff --git a/client/src/leap/soledad/client/examples/manifest.phk b/client/src/leap/soledad/client/examples/manifest.phk
deleted file mode 100644
index 2c86c07d..00000000
--- a/client/src/leap/soledad/client/examples/manifest.phk
+++ /dev/null
@@ -1,50 +0,0 @@
-The Hacker's Manifesto
-
-The Hacker's Manifesto
-by: The Mentor
-
-Another one got caught today, it's all over the papers. "Teenager
-Arrested in Computer Crime Scandal", "Hacker Arrested after Bank
-Tampering." "Damn kids. They're all alike." But did you, in your
-three-piece psychology and 1950's technobrain, ever take a look behind
-the eyes of the hacker? Did you ever wonder what made him tick, what
-forces shaped him, what may have molded him? I am a hacker, enter my
-world. Mine is a world that begins with school. I'm smarter than most of
-the other kids, this crap they teach us bores me. "Damn underachiever.
-They're all alike." I'm in junior high or high school. I've listened to
-teachers explain for the fifteenth time how to reduce a fraction. I
-understand it. "No, Ms. Smith, I didn't show my work. I did it in
-my head." "Damn kid. Probably copied it. They're all alike." I made a
-discovery today. I found a computer. Wait a second, this is cool. It does
-what I want it to. If it makes a mistake, it's because I screwed it up.
-Not because it doesn't like me, or feels threatened by me, or thinks I'm
-a smart ass, or doesn't like teaching and shouldn't be here. Damn kid.
-All he does is play games. They're all alike. And then it happened... a
-door opened to a world... rushing through the phone line like heroin
-through an addict's veins, an electronic pulse is sent out, a refuge from
-the day-to-day incompetencies is sought... a board is found. "This is
-it... this is where I belong..." I know everyone here... even if I've
-never met them, never talked to them, may never hear from them again... I
-know you all... Damn kid. Tying up the phone line again. They're all
-alike... You bet your ass we're all alike... we've been spoon-fed baby
-food at school when we hungered for steak... the bits of meat that you
-did let slip through were pre-chewed and tasteless. We've been dominated
-by sadists, or ignored by the apathetic. The few that had something to
-teach found us willing pupils, but those few are like drops of water in
-the desert. This is our world now... the world of the electron and the
-switch, the beauty of the baud. We make use of a service already existing
-without paying for what could be dirt-cheap if it wasn't run by
-profiteering gluttons, and you call us criminals. We explore... and you
-call us criminals. We seek after knowledge... and you call us criminals.
-We exist without skin color, without nationality, without religious
-bias... and you call us criminals. You build atomic bombs, you wage wars,
-you murder, cheat, and lie to us and try to make us believe it's for our
-own good, yet we're the criminals. Yes, I am a criminal. My crime is that
-of curiosity. My crime is that of judging people by what they say and
-think, not what they look like. My crime is that of outsmarting you,
-something that you will never forgive me for. I am a hacker, and this is
-my manifesto. You may stop this individual, but you can't stop us all...
-after all, we're all alike.
-
-This was the last published file written by The Mentor. Shortly after
-releasing it, he was busted by the FBI. The Mentor, sadly missed.
diff --git a/client/src/leap/soledad/client/examples/plot-async-db.py b/client/src/leap/soledad/client/examples/plot-async-db.py
deleted file mode 100644
index 018a1a1d..00000000
--- a/client/src/leap/soledad/client/examples/plot-async-db.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import csv
-from matplotlib import pyplot as plt
-
-FILE = "bench.csv"
-
-# config the plot
-plt.xlabel('number of inserts')
-plt.ylabel('time (seconds)')
-plt.title('SQLCipher parallelization')
-
-kwargs = {
- 'linewidth': 1.0,
- 'linestyle': '-',
-}
-
-series = (('sync', 'r'),
- ('async', 'g'))
-
-data = {'mark': [],
- 'sync': [],
- 'async': []}
-
-with open(FILE, 'rb') as csvfile:
- series_reader = csv.reader(csvfile, delimiter=',')
- for m, s, a in series_reader:
- data['mark'].append(int(m))
- data['sync'].append(float(s))
- data['async'].append(float(a))
-
-xmax = max(data['mark'])
-xmin = min(data['mark'])
-ymax = max(data['sync'] + data['async'])
-ymin = min(data['sync'] + data['async'])
-
-for run in series:
- name = run[0]
- color = run[1]
- plt.plot(data['mark'], data[name], label=name, color=color, **kwargs)
-
-plt.axes().annotate("", xy=(xmax, ymax))
-plt.axes().annotate("", xy=(xmin, ymin))
-
-plt.grid()
-plt.legend()
-plt.show()
diff --git a/client/src/leap/soledad/client/examples/run_benchmark.py b/client/src/leap/soledad/client/examples/run_benchmark.py
deleted file mode 100644
index ddedf433..00000000
--- a/client/src/leap/soledad/client/examples/run_benchmark.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""
-Run a mini-benchmark between regular api and dbapi
-"""
-import commands
-import os
-import time
-
-TMPDIR = os.environ.get("TMPDIR", "/tmp")
-CSVFILE = 'bench.csv'
-
-cmd = "SILENT=1 TIMES={times} TMPDIR={tmpdir} python ./use_{version}api.py"
-
-
-def parse_time(r):
- return r.split('\n')[-1]
-
-
-with open(CSVFILE, 'w') as log:
-
- for times in range(0, 10000, 500):
- cmd1 = cmd.format(times=times, tmpdir=TMPDIR, version="")
- sync_time = parse_time(commands.getoutput(cmd1))
-
- cmd2 = cmd.format(times=times, tmpdir=TMPDIR, version="adb")
- async_time = parse_time(commands.getoutput(cmd2))
-
- print times, sync_time, async_time
- log.write("%s, %s, %s\n" % (times, sync_time, async_time))
- log.flush()
- time.sleep(2)
diff --git a/client/src/leap/soledad/client/examples/soledad_sync.py b/client/src/leap/soledad/client/examples/soledad_sync.py
deleted file mode 100644
index 3aed10eb..00000000
--- a/client/src/leap/soledad/client/examples/soledad_sync.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from leap.bitmask.config.providerconfig import ProviderConfig
-from leap.bitmask.crypto.srpauth import SRPAuth
-from leap.soledad.client import Soledad
-from twisted.internet import reactor
-import logging
-logging.basicConfig(level=logging.DEBUG)
-
-
-# EDIT THIS --------------------------------------------
-user = u"USERNAME"
-uuid = u"USERUUID"
-_pass = u"USERPASS"
-server_url = "https://soledad.server.example.org:2323"
-# EDIT THIS --------------------------------------------
-
-secrets_path = "/tmp/%s.secrets" % uuid
-local_db_path = "/tmp/%s.soledad" % uuid
-cert_file = "/tmp/cacert.pem"
-provider_config = '/tmp/cdev.json'
-
-
-provider = ProviderConfig()
-provider.load(provider_config)
-
-soledad = None
-
-
-def printStuff(r):
- print r
-
-
-def printErr(err):
- logging.exception(err.value)
-
-
-def init_soledad(_):
- token = srpauth.get_token()
- print "token", token
-
- global soledad
- soledad = Soledad(uuid, _pass, secrets_path, local_db_path,
- server_url, cert_file,
- auth_token=token)
-
- def getall(_):
- d = soledad.get_all_docs()
- return d
-
- d1 = soledad.create_doc({"test": 42})
- d1.addCallback(getall)
- d1.addCallbacks(printStuff, printErr)
-
- d2 = soledad.sync()
- d2.addCallbacks(printStuff, printErr)
- d2.addBoth(lambda r: reactor.stop())
-
-
-srpauth = SRPAuth(provider)
-
-d = srpauth.authenticate(user, _pass)
-d.addCallbacks(init_soledad, printErr)
-
-reactor.run()
diff --git a/client/src/leap/soledad/client/examples/use_adbapi.py b/client/src/leap/soledad/client/examples/use_adbapi.py
deleted file mode 100644
index ddb1eaae..00000000
--- a/client/src/leap/soledad/client/examples/use_adbapi.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# -*- coding: utf-8 -*-
-# use_adbapi.py
-# Copyright (C) 2014 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-Example of use of the asynchronous soledad api.
-"""
-from __future__ import print_function
-import datetime
-import os
-
-from twisted.internet import defer, reactor
-
-from leap.soledad.client import adbapi
-from leap.soledad.client._db.sqlcipher import SQLCipherOptions
-from leap.soledad.common import l2db
-
-
-folder = os.environ.get("TMPDIR", "tmp")
-times = int(os.environ.get("TIMES", "1000"))
-silent = os.environ.get("SILENT", False)
-
-tmpdb = os.path.join(folder, "test.soledad")
-
-
-def debug(*args):
- if not silent:
- print(*args)
-
-
-debug("[+] db path:", tmpdb)
-debug("[+] times", times)
-
-if os.path.isfile(tmpdb):
- debug("[+] Removing existing db file...")
- os.remove(tmpdb)
-
-start_time = datetime.datetime.now()
-
-opts = SQLCipherOptions(tmpdb, "secret", create=True)
-dbpool = adbapi.getConnectionPool(opts)
-
-
-def createDoc(doc):
- return dbpool.runU1DBQuery("create_doc", doc)
-
-
-def getAllDocs():
- return dbpool.runU1DBQuery("get_all_docs")
-
-
-def countDocs(_):
- debug("counting docs...")
- d = getAllDocs()
- d.addCallbacks(printResult, lambda e: e.printTraceback())
- d.addBoth(allDone)
-
-
-def printResult(r):
- if isinstance(r, l2db.Document):
- debug(r.doc_id, r.content['number'])
- else:
- len_results = len(r[1])
- debug("GOT %s results" % len(r[1]))
-
- if len_results == times:
- debug("ALL GOOD")
- else:
- raise ValueError("We didn't expect this result len")
-
-
-def allDone(_):
- debug("ALL DONE!")
- if silent:
- end_time = datetime.datetime.now()
- print((end_time - start_time).total_seconds())
- reactor.stop()
-
-
-deferreds = []
-payload = open('manifest.phk').read()
-
-for i in range(times):
- doc = {"number": i, "payload": payload}
- d = createDoc(doc)
- d.addCallbacks(printResult, lambda e: e.printTraceback())
- deferreds.append(d)
-
-
-all_done = defer.gatherResults(deferreds, consumeErrors=True)
-all_done.addCallback(countDocs)
-
-reactor.run()
diff --git a/client/src/leap/soledad/client/examples/use_api.py b/client/src/leap/soledad/client/examples/use_api.py
deleted file mode 100644
index db77c4b3..00000000
--- a/client/src/leap/soledad/client/examples/use_api.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# -*- coding: utf-8 -*-
-# use_api.py
-# Copyright (C) 2014 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-Example of use of the soledad api.
-"""
-from __future__ import print_function
-import datetime
-import os
-
-from leap.soledad.client import sqlcipher
-from leap.soledad.client.sqlcipher import SQLCipherOptions
-
-
-folder = os.environ.get("TMPDIR", "tmp")
-times = int(os.environ.get("TIMES", "1000"))
-silent = os.environ.get("SILENT", False)
-
-tmpdb = os.path.join(folder, "test.soledad")
-
-
-def debug(*args):
- if not silent:
- print(*args)
-
-
-debug("[+] db path:", tmpdb)
-debug("[+] times", times)
-
-if os.path.isfile(tmpdb):
- debug("[+] Removing existing db file...")
- os.remove(tmpdb)
-
-start_time = datetime.datetime.now()
-
-opts = SQLCipherOptions(tmpdb, "secret", create=True)
-db = sqlcipher.SQLCipherDatabase(opts)
-
-
-def allDone():
- debug("ALL DONE!")
-
-
-payload = open('manifest.phk').read()
-
-for i in range(times):
- doc = {"number": i, "payload": payload}
- d = db.create_doc(doc)
- debug(d.doc_id, d.content['number'])
-
-debug("Count", len(db.get_all_docs()[1]))
-if silent:
- end_time = datetime.datetime.now()
- print((end_time - start_time).total_seconds())
-
-allDone()
diff --git a/client/src/leap/soledad/client/http_target/__init__.py b/client/src/leap/soledad/client/http_target/__init__.py
deleted file mode 100644
index b67d03f6..00000000
--- a/client/src/leap/soledad/client/http_target/__init__.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# -*- coding: utf-8 -*-
-# __init__.py
-# Copyright (C) 2015 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-"""
-A U1DB backend for encrypting data before sending to server and decrypting
-after receiving.
-"""
-
-
-import os
-
-from twisted.web.client import Agent
-from twisted.internet import reactor
-
-from leap.common.certs import get_compatible_ssl_context_factory
-from leap.soledad.common.log import getLogger
-from leap.soledad.client.http_target.send import HTTPDocSender
-from leap.soledad.client.http_target.api import SyncTargetAPI
-from leap.soledad.client.http_target.fetch import HTTPDocFetcher
-from leap.soledad.client import crypto as old_crypto
-
-
-logger = getLogger(__name__)
-
-
-# we may want to collect statistics from the sync process
-DO_STATS = False
-if os.environ.get('SOLEDAD_STATS'):
- DO_STATS = True
-
-
-class SoledadHTTPSyncTarget(SyncTargetAPI, HTTPDocSender, HTTPDocFetcher):
-
- """
- A SyncTarget that encrypts data before sending and decrypts data after
- receiving.
-
- Normally encryption will have been written to the sync database upon
- document modification. The sync database is also used to write temporarily
- the parsed documents that the remote send us, before being decrypted and
- written to the main database.
- """
- def __init__(self, url, source_replica_uid, creds, crypto, cert_file):
- """
- Initialize the sync target.
-
- :param url: The server sync url.
- :type url: str
- :param source_replica_uid: The source replica uid which we use when
- deferring decryption.
- :type source_replica_uid: str
- :param creds: A dictionary containing the uuid and token.
- :type creds: creds
- :param crypto: An instance of SoledadCrypto so we can encrypt/decrypt
- document contents when syncing.
- :type crypto: soledad._crypto.SoledadCrypto
- :param cert_file: Path to the certificate of the ca used to validate
- the SSL certificate used by the remote soledad
- server.
- :type cert_file: str
- """
- if url.endswith("/"):
- url = url[:-1]
- self._url = str(url) + "/sync-from/" + str(source_replica_uid)
- self.source_replica_uid = source_replica_uid
- self._auth_header = None
- self._uuid = None
- self.set_creds(creds)
- self._crypto = crypto
- # TODO: DEPRECATED CRYPTO
- self._deprecated_crypto = old_crypto.SoledadCrypto(crypto.secret)
- self._insert_doc_cb = None
-
- # Twisted default Agent with our own ssl context factory
- factory = get_compatible_ssl_context_factory(cert_file)
- self._http = Agent(reactor, factory)
-
- if DO_STATS:
- self.sync_exchange_phase = [0]
diff --git a/client/src/leap/soledad/client/http_target/api.py b/client/src/leap/soledad/client/http_target/api.py
deleted file mode 100644
index c68185c6..00000000
--- a/client/src/leap/soledad/client/http_target/api.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# -*- coding: utf-8 -*-
-# api.py
-# Copyright (C) 2015 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-import os
-import json
-import base64
-
-from six import StringIO
-from uuid import uuid4
-
-from twisted.internet import defer
-from twisted.web.http_headers import Headers
-from twisted.web.client import FileBodyProducer
-
-from leap.soledad.client.http_target.support import readBody
-from leap.soledad.common.errors import InvalidAuthTokenError
-from leap.soledad.common.l2db.errors import HTTPError
-from leap.soledad.common.l2db import SyncTarget
-
-
-# we may want to collect statistics from the sync process
-DO_STATS = False
-if os.environ.get('SOLEDAD_STATS'):
- DO_STATS = True
-
-
-class SyncTargetAPI(SyncTarget):
- """
- Declares public methods and implements u1db.SyncTarget.
- """
-
- @property
- def uuid(self):
- return self._uuid
-
- def set_creds(self, creds):
- """
- Update credentials.
-
- :param creds: A dictionary containing the uuid and token.
- :type creds: dict
- """
- uuid = creds['token']['uuid']
- token = creds['token']['token']
- self._uuid = uuid
- auth = '%s:%s' % (uuid, token)
- b64_token = base64.b64encode(auth)
- self._auth_header = {'Authorization': ['Token %s' % b64_token]}
-
- @property
- def _base_header(self):
- return self._auth_header.copy() if self._auth_header else {}
-
- def _http_request(self, url, method='GET', body=None, headers=None,
- content_type=None, body_reader=readBody,
- body_producer=None):
- headers = headers or self._base_header
- if content_type:
- headers.update({'content-type': [content_type]})
- if not body_producer and body:
- body = FileBodyProducer(StringIO(body))
- elif body_producer:
- # Upload case, check send.py
- body = body_producer(body)
- d = self._http.request(
- method, url, headers=Headers(headers), bodyProducer=body)
- d.addCallback(body_reader)
- d.addErrback(_unauth_to_invalid_token_error)
- return d
-
- @defer.inlineCallbacks
- def get_sync_info(self, source_replica_uid):
- """
- Return information about known state of remote database.
-
- Return the replica_uid and the current database generation of the
- remote database, and its last-seen database generation for the client
- replica.
-
- :param source_replica_uid: The client-size replica uid.
- :type source_replica_uid: str
-
- :return: A deferred which fires with (target_replica_uid,
- target_replica_generation, target_trans_id,
- source_replica_last_known_generation,
- source_replica_last_known_transaction_id)
- :rtype: twisted.internet.defer.Deferred
- """
- raw = yield self._http_request(self._url)
- res = json.loads(raw)
- defer.returnValue((
- res['target_replica_uid'],
- res['target_replica_generation'],
- res['target_replica_transaction_id'],
- res['source_replica_generation'],
- res['source_transaction_id']
- ))
-
- def record_sync_info(
- self, source_replica_uid, source_replica_generation,
- source_replica_transaction_id):
- """
- Record tip information for another replica.
-
- After sync_exchange has been processed, the caller will have
- received new content from this replica. This call allows the
- source replica instigating the sync to inform us what their
- generation became after applying the documents we returned.
-
- This is used to allow future sync operations to not need to repeat data
- that we just talked about. It also means that if this is called at the
- wrong time, there can be database records that will never be
- synchronized.
-
- :param source_replica_uid: The identifier for the source replica.
- :type source_replica_uid: str
- :param source_replica_generation: The database generation for the
- source replica.
- :type source_replica_generation: int
- :param source_replica_transaction_id: The transaction id associated
- with the source replica
- generation.
- :type source_replica_transaction_id: str
-
- :return: A deferred which fires with the result of the query.
- :rtype: twisted.internet.defer.Deferred
- """
- data = json.dumps({
- 'generation': source_replica_generation,
- 'transaction_id': source_replica_transaction_id
- })
- return self._http_request(
- self._url,
- method='PUT',
- body=data,
- content_type='application/json')
-
- @defer.inlineCallbacks
- def sync_exchange(self, docs_by_generation, source_replica_uid,
- last_known_generation, last_known_trans_id,
- insert_doc_cb, ensure_callback=None,
- sync_id=None):
- """
- Find out which documents the remote database does not know about,
- encrypt and send them. After that, receive documents from the remote
- database.
-
- :param docs_by_generations: A list of (doc_id, generation, trans_id)
- of local documents that were changed since
- the last local generation the remote
- replica knows about.
- :type docs_by_generations: list of tuples
-
- :param source_replica_uid: The uid of the source replica.
- :type source_replica_uid: str
-
- :param last_known_generation: Target's last known generation.
- :type last_known_generation: int
-
- :param last_known_trans_id: Target's last known transaction id.
- :type last_known_trans_id: str
-
- :param insert_doc_cb: A callback for inserting received documents from
- target. If not overriden, this will call u1db
- insert_doc_from_target in synchronizer, which
- implements the TAKE OTHER semantics.
- :type insert_doc_cb: function
-
- :param ensure_callback: A callback that ensures we know the target
- replica uid if the target replica was just
- created.
- :type ensure_callback: function
-
- :return: A deferred which fires with the new generation and
- transaction id of the target replica.
- :rtype: twisted.internet.defer.Deferred
- """
- # ---------- phase 1: send docs to server ----------------------------
- if DO_STATS:
- self.sync_exchange_phase[0] += 1
- # --------------------------------------------------------------------
-
- self._ensure_callback = ensure_callback
-
- if sync_id is None:
- sync_id = str(uuid4())
- self.source_replica_uid = source_replica_uid
-
- # save a reference to the callback so we can use it after decrypting
- self._insert_doc_cb = insert_doc_cb
-
- gen_after_send, trans_id_after_send = yield self._send_docs(
- docs_by_generation,
- last_known_generation,
- last_known_trans_id,
- sync_id)
-
- # ---------- phase 2: receive docs -----------------------------------
- if DO_STATS:
- self.sync_exchange_phase[0] += 1
- # --------------------------------------------------------------------
-
- cur_target_gen, cur_target_trans_id = yield self._receive_docs(
- last_known_generation, last_known_trans_id,
- ensure_callback, sync_id)
-
- # update gen and trans id info in case we just sent and did not
- # receive docs.
- if gen_after_send is not None and gen_after_send > cur_target_gen:
- cur_target_gen = gen_after_send
- cur_target_trans_id = trans_id_after_send
-
- # ---------- phase 3: sync exchange is over --------------------------
- if DO_STATS:
- self.sync_exchange_phase[0] += 1
- # --------------------------------------------------------------------
-
- defer.returnValue([cur_target_gen, cur_target_trans_id])
-
-
-def _unauth_to_invalid_token_error(failure):
- """
- An errback to translate unauthorized errors to our own invalid token
- class.
-
- :param failure: The original failure.
- :type failure: twisted.python.failure.Failure
-
- :return: Either the original failure or an invalid auth token error.
- :rtype: twisted.python.failure.Failure
- """
- failure.trap(HTTPError)
- if failure.value.status == 401:
- raise InvalidAuthTokenError
- return failure
diff --git a/client/src/leap/soledad/client/http_target/fetch.py b/client/src/leap/soledad/client/http_target/fetch.py
deleted file mode 100644
index 9d456830..00000000
--- a/client/src/leap/soledad/client/http_target/fetch.py
+++ /dev/null
@@ -1,161 +0,0 @@
-# -*- coding: utf-8 -*-
-# fetch.py
-# Copyright (C) 2015 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-import json
-from twisted.internet import defer
-from twisted.internet import threads
-
-from leap.soledad.client.events import SOLEDAD_SYNC_RECEIVE_STATUS
-from leap.soledad.client.events import emit_async
-from leap.soledad.client.http_target.support import RequestBody
-from leap.soledad.common.log import getLogger
-from leap.soledad.client._crypto import is_symmetrically_encrypted
-from leap.soledad.common.l2db import errors
-from leap.soledad.client import crypto as old_crypto
-
-from .._document import Document
-from . import fetch_protocol
-
-logger = getLogger(__name__)
-
-
-class HTTPDocFetcher(object):
- """
- Handles Document fetching from Soledad server, using HTTP as transport.
- Steps:
- * Prepares metadata by asking server for one document
- * Fetch the total on response and prepare to ask all remaining
- * (async) Documents will come encrypted.
- So we parse, decrypt and insert locally as they arrive.
- """
-
- # The uuid of the local replica.
- # Any class inheriting from this one should provide a meaningful attribute
- # if the sync status event is meant to be used somewhere else.
-
- uuid = 'undefined'
- userid = 'undefined'
-
- @defer.inlineCallbacks
- def _receive_docs(self, last_known_generation, last_known_trans_id,
- ensure_callback, sync_id):
- new_generation = last_known_generation
- new_transaction_id = last_known_trans_id
- # Acts as a queue, ensuring line order on async processing
- # as `self._insert_doc_cb` cant be run concurrently or out of order.
- # DeferredSemaphore solves the concurrency and its implementation uses
- # a queue, solving the ordering.
- # FIXME: Find a proper solution to avoid surprises on Twisted changes
- self.semaphore = defer.DeferredSemaphore(1)
-
- metadata = yield self._fetch_all(
- last_known_generation, last_known_trans_id,
- sync_id)
- number_of_changes, ngen, ntrans = self._parse_metadata(metadata)
-
- # wait for pending inserts
- yield self.semaphore.acquire()
-
- if ngen:
- new_generation = ngen
- new_transaction_id = ntrans
-
- defer.returnValue([new_generation, new_transaction_id])
-
- def _fetch_all(self, last_known_generation,
- last_known_trans_id, sync_id):
- # add remote replica metadata to the request
- body = RequestBody(
- last_known_generation=last_known_generation,
- last_known_trans_id=last_known_trans_id,
- sync_id=sync_id,
- ensure=self._ensure_callback is not None)
- self._received_docs = 0
- # build a stream reader with _doc_parser as a callback
- body_reader = fetch_protocol.build_body_reader(self._doc_parser)
- # start download stream
- return self._http_request(
- self._url,
- method='POST',
- body=str(body),
- content_type='application/x-soledad-sync-get',
- body_reader=body_reader)
-
- @defer.inlineCallbacks
- def _doc_parser(self, doc_info, content, total):
- """
- Insert a received document into the local replica, decrypting
- if necessary. The case where it's not decrypted is when a doc gets
- inserted from Server side with a GPG encrypted content.
-
- :param doc_info: Dictionary representing Document information.
- :type doc_info: dict
- :param content: The Document's content.
- :type idx: str
- :param total: The total number of operations.
- :type total: int
- """
- yield self.semaphore.run(self.__atomic_doc_parse, doc_info, content,
- total)
-
- @defer.inlineCallbacks
- def __atomic_doc_parse(self, doc_info, content, total):
- doc = Document(doc_info['id'], doc_info['rev'], content)
- if is_symmetrically_encrypted(content):
- content = (yield self._crypto.decrypt_doc(doc)).getvalue()
- elif old_crypto.is_symmetrically_encrypted(doc):
- content = self._deprecated_crypto.decrypt_doc(doc)
- doc.set_json(content)
-
- # TODO insert blobs here on the blob backend
- # FIXME: This is wrong. Using the very same SQLite connection object
- # from multiple threads is dangerous. We should bring the dbpool here
- # or find an alternative. Deferring to a thread only helps releasing
- # the reactor for other tasks as this is an IO intensive call.
- yield threads.deferToThread(self._insert_doc_cb,
- doc, doc_info['gen'], doc_info['trans_id'])
- self._received_docs += 1
- user_data = {'uuid': self.uuid, 'userid': self.userid}
- _emit_receive_status(user_data, self._received_docs, total=total)
-
- def _parse_metadata(self, metadata):
- """
- Parse the response from the server containing the sync metadata.
-
- :param response: Metadata as string
- :type response: str
-
- :return: (number_of_changes, new_gen, new_trans_id)
- :rtype: tuple
- """
- try:
- metadata = json.loads(metadata)
- # make sure we have replica_uid from fresh new dbs
- if self._ensure_callback and 'replica_uid' in metadata:
- self._ensure_callback(metadata['replica_uid'])
- return (metadata['number_of_changes'], metadata['new_generation'],
- metadata['new_transaction_id'])
- except (ValueError, KeyError):
- raise errors.BrokenSyncStream('Metadata parsing failed')
-
-
-def _emit_receive_status(user_data, received_docs, total):
- content = {'received': received_docs, 'total': total}
- emit_async(SOLEDAD_SYNC_RECEIVE_STATUS, user_data, content)
-
- if received_docs % 20 == 0:
- msg = "%d/%d" % (received_docs, total)
- logger.debug("Sync receive status: %s" % msg)
diff --git a/client/src/leap/soledad/client/http_target/fetch_protocol.py b/client/src/leap/soledad/client/http_target/fetch_protocol.py
deleted file mode 100644
index 851eb3a1..00000000
--- a/client/src/leap/soledad/client/http_target/fetch_protocol.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# -*- coding: utf-8 -*-
-# fetch_protocol.py
-# Copyright (C) 2016 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-import json
-from functools import partial
-from six import StringIO
-from twisted.web._newclient import ResponseDone
-from leap.soledad.common.l2db import errors
-from leap.soledad.common.l2db.remote import utils
-from leap.soledad.common.log import getLogger
-from .support import ReadBodyProtocol
-from .support import readBody
-
-logger = getLogger(__name__)
-
-
-class DocStreamReceiver(ReadBodyProtocol):
- """
- A protocol implementation that can parse incoming data from server based
- on a line format specified on u1db implementation. Except that we split doc
- attributes from content to ease parsing and increment throughput for larger
- documents.
- [\r\n
- {metadata},\r\n
- {doc_info},\r\n
- {content},\r\n
- ...
- {doc_info},\r\n
- {content},\r\n
- ]
- """
-
- def __init__(self, response, deferred, doc_reader):
- self.deferred = deferred
- self.status = response.code if response else None
- self.message = response.phrase if response else None
- self.headers = response.headers if response else {}
- self.delimiter = '\r\n'
- self.metadata = ''
- self._doc_reader = doc_reader
- self.reset()
-
- def reset(self):
- self._line = 0
- self._buffer = StringIO()
- self._properly_finished = False
-
- def connectionLost(self, reason):
- """
- Deliver the accumulated response bytes to the waiting L{Deferred}, if
- the response body has been completely received without error.
- """
- if self.deferred.called:
- return
- try:
- if reason.check(ResponseDone):
- self.dataBuffer = self.metadata
- else:
- self.dataBuffer = self.finish()
- except errors.BrokenSyncStream as e:
- return self.deferred.errback(e)
- return ReadBodyProtocol.connectionLost(self, reason)
-
- def consumeBufferLines(self):
- """
- Consumes lines from buffer and rewind it, writing remaining data
- that didn't formed a line back into buffer.
- """
- content = self._buffer.getvalue()[0:self._buffer.tell()]
- self._buffer.seek(0)
- lines = content.split(self.delimiter)
- self._buffer.write(lines.pop(-1))
- return lines
-
- def dataReceived(self, data):
- """
- Buffer incoming data until a line breaks comes in. We check only
- the incoming data for efficiency.
- """
- self._buffer.write(data)
- if '\n' not in data:
- return
- lines = self.consumeBufferLines()
- while lines:
- line, _ = utils.check_and_strip_comma(lines.pop(0))
- self.lineReceived(line)
- self._line += 1
-
- def lineReceived(self, line):
- """
- Protocol implementation.
- 0: [\r\n
- 1: {metadata},\r\n
- (even): {doc_info},\r\n
- (odd): {data},\r\n
- (last): ]
- """
- if self._properly_finished:
- raise errors.BrokenSyncStream("Reading a finished stream")
- if ']' == line:
- self._properly_finished = True
- elif self._line == 0:
- if line is not '[':
- raise errors.BrokenSyncStream("Invalid start")
- elif self._line == 1:
- self.metadata = line
- if 'error' in self.metadata:
- raise errors.BrokenSyncStream("Error from server: %s" % line)
- self.total = json.loads(line).get('number_of_changes', -1)
- elif (self._line % 2) == 0:
- self.current_doc = json.loads(line)
- if 'error' in self.current_doc:
- raise errors.BrokenSyncStream("Error from server: %s" % line)
- else:
- d = self._doc_reader(
- self.current_doc, line.strip() or None, self.total)
- d.addErrback(self.deferred.errback)
-
- def finish(self):
- """
- Checks that ']' came and stream was properly closed.
- """
- if not self._properly_finished:
- raise errors.BrokenSyncStream('Stream not properly closed')
- content = self._buffer.getvalue()[0:self._buffer.tell()]
- self._buffer.close()
- return content
-
-
-def build_body_reader(doc_reader):
- """
- Get the documents from a sync stream and call doc_reader on each
- doc received.
-
- @param doc_reader: Function to be called for processing an incoming doc.
- Will be called with doc metadata (dict parsed from 1st line) and doc
- content (string)
- @type doc_reader: function
-
- @return: A function that can be called by the http Agent to create and
- configure the proper protocol.
- """
- protocolClass = partial(DocStreamReceiver, doc_reader=doc_reader)
- return partial(readBody, protocolClass=protocolClass)
diff --git a/client/src/leap/soledad/client/http_target/send.py b/client/src/leap/soledad/client/http_target/send.py
deleted file mode 100644
index 2b286ec5..00000000
--- a/client/src/leap/soledad/client/http_target/send.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# -*- coding: utf-8 -*-
-# send.py
-# Copyright (C) 2015 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-import json
-
-from twisted.internet import defer
-
-from leap.soledad.common.log import getLogger
-from leap.soledad.client.events import emit_async
-from leap.soledad.client.events import SOLEDAD_SYNC_SEND_STATUS
-from leap.soledad.client.http_target.support import RequestBody
-from .send_protocol import DocStreamProducer
-
-logger = getLogger(__name__)
-
-
-class HTTPDocSender(object):
- """
- Handles Document uploading from Soledad server, using HTTP as transport.
- They need to be encrypted and metadata prepared before sending.
- """
-
- # The uuid of the local replica.
- # Any class inheriting from this one should provide a meaningful attribute
- # if the sync status event is meant to be used somewhere else.
-
- uuid = 'undefined'
- userid = 'undefined'
-
- @defer.inlineCallbacks
- def _send_docs(self, docs_by_generation, last_known_generation,
- last_known_trans_id, sync_id):
-
- if not docs_by_generation:
- defer.returnValue([None, None])
-
- # add remote replica metadata to the request
- body = RequestBody(
- last_known_generation=last_known_generation,
- last_known_trans_id=last_known_trans_id,
- sync_id=sync_id,
- ensure=self._ensure_callback is not None)
- result = yield self._send_batch(body, docs_by_generation)
- response_dict = json.loads(result)[0]
- gen_after_send = response_dict['new_generation']
- trans_id_after_send = response_dict['new_transaction_id']
- defer.returnValue([gen_after_send, trans_id_after_send])
-
- @defer.inlineCallbacks
- def _send_batch(self, body, docs):
- total, calls = len(docs), []
- for i, entry in enumerate(docs):
- calls.append((self._prepare_one_doc,
- entry, body, i + 1, total))
- result = yield self._send_request(body, calls)
- _emit_send_status(self.uuid, body.consumed, total)
-
- defer.returnValue(result)
-
- def _send_request(self, body, calls):
- return self._http_request(
- self._url,
- method='POST',
- body=(body, calls),
- content_type='application/x-soledad-sync-put',
- body_producer=DocStreamProducer)
-
- @defer.inlineCallbacks
- def _prepare_one_doc(self, entry, body, idx, total):
- get_doc_call, gen, trans_id = entry
- doc, content = yield self._encrypt_doc(get_doc_call)
- body.insert_info(
- id=doc.doc_id, rev=doc.rev, content=content, gen=gen,
- trans_id=trans_id, number_of_docs=total,
- doc_idx=idx)
- _emit_send_status(self.uuid, body.consumed, total)
-
- @defer.inlineCallbacks
- def _encrypt_doc(self, get_doc_call):
- f, args, kwargs = get_doc_call
- doc = yield f(*args, **kwargs)
- if doc.is_tombstone():
- defer.returnValue((doc, None))
- else:
- content = yield self._crypto.encrypt_doc(doc)
- defer.returnValue((doc, content))
-
-
-def _emit_send_status(user_data, idx, total):
- content = {'sent': idx, 'total': total}
- emit_async(SOLEDAD_SYNC_SEND_STATUS, user_data, content)
-
- msg = "%d/%d" % (idx, total)
- logger.debug("Sync send status: %s" % msg)
diff --git a/client/src/leap/soledad/client/http_target/send_protocol.py b/client/src/leap/soledad/client/http_target/send_protocol.py
deleted file mode 100644
index 4941aa34..00000000
--- a/client/src/leap/soledad/client/http_target/send_protocol.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# -*- coding: utf-8 -*-
-# send_protocol.py
-# Copyright (C) 2016 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-from zope.interface import implementer
-from twisted.internet import defer
-from twisted.internet import reactor
-from twisted.web.iweb import IBodyProducer
-from twisted.web.iweb import UNKNOWN_LENGTH
-
-
-@implementer(IBodyProducer)
-class DocStreamProducer(object):
- """
- A producer that writes the body of a request to a consumer.
- """
-
- def __init__(self, producer):
- """
- Initialize the string produer.
-
- :param producer: A RequestBody instance and a list of producer calls
- :type producer: (.support.RequestBody, [(function, *args)])
- """
- self.body, self.producer = producer
- self.length = UNKNOWN_LENGTH
- self.pause = False
- self.stop = False
-
- @defer.inlineCallbacks
- def startProducing(self, consumer):
- """
- Write the body to the consumer.
-
- :param consumer: Any IConsumer provider.
- :type consumer: twisted.internet.interfaces.IConsumer
-
- :return: A Deferred that fires when production ends.
- :rtype: twisted.internet.defer.Deferred
- """
- while self.producer and not self.stop:
- if self.pause:
- yield self.sleep(0.001)
- continue
- call = self.producer.pop(0)
- fun, args = call[0], call[1:]
- yield fun(*args)
- consumer.write(self.body.pop(1, leave_open=True))
- consumer.write(self.body.pop(0)) # close stream
-
- def sleep(self, secs):
- d = defer.Deferred()
- reactor.callLater(secs, d.callback, None)
- return d
-
- def pauseProducing(self):
- self.pause = True
-
- def stopProducing(self):
- self.stop = True
-
- def resumeProducing(self):
- self.pause = False
diff --git a/client/src/leap/soledad/client/http_target/support.py b/client/src/leap/soledad/client/http_target/support.py
deleted file mode 100644
index d8d8e420..00000000
--- a/client/src/leap/soledad/client/http_target/support.py
+++ /dev/null
@@ -1,220 +0,0 @@
-# -*- coding: utf-8 -*-
-# support.py
-# Copyright (C) 2015 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-import warnings
-import json
-
-from twisted.internet import defer
-from twisted.web.client import _ReadBodyProtocol
-from twisted.web.client import PartialDownloadError
-from twisted.web._newclient import ResponseDone
-from twisted.web._newclient import PotentialDataLoss
-
-from leap.soledad.common.l2db import errors
-from leap.soledad.common.l2db.remote import http_errors
-
-# we want to make sure that HTTP errors will raise appropriate u1db errors,
-# that is, fire errbacks with the appropriate failures, in the context of
-# twisted. Because of that, we redefine the http body reader used by the HTTP
-# client below.
-
-
-class ReadBodyProtocol(_ReadBodyProtocol):
- """
- From original Twisted implementation, focused on adding our error
- handling and ensuring that the proper u1db error is raised.
- """
-
- def __init__(self, response, deferred):
- """
- Initialize the protocol, additionally storing the response headers.
- """
- _ReadBodyProtocol.__init__(
- self, response.code, response.phrase, deferred)
- self.headers = response.headers
-
- # ---8<--- snippet from u1db.remote.http_client, modified to use errbacks
- def _error(self, respdic):
- descr = respdic.get("error")
- exc_cls = errors.wire_description_to_exc.get(descr)
- if exc_cls is not None:
- message = respdic.get("message")
- self.deferred.errback(exc_cls(message))
- else:
- self.deferred.errback(
- errors.HTTPError(self.status, respdic, self.headers))
- # ---8<--- end of snippet from u1db.remote.http_client
-
- def connectionLost(self, reason):
- """
- Deliver the accumulated response bytes to the waiting L{Deferred}, if
- the response body has been completely received without error.
- """
- if reason.check(ResponseDone):
-
- body = b''.join(self.dataBuffer)
-
- # ---8<--- snippet from u1db.remote.http_client
- if self.status in (200, 201):
- self.deferred.callback(body)
- elif self.status in http_errors.ERROR_STATUSES:
- try:
- respdic = json.loads(body)
- except ValueError:
- self.deferred.errback(
- errors.HTTPError(self.status, body, self.headers))
- else:
- self._error(respdic)
- # special cases
- elif self.status == 503:
- self.deferred.errback(errors.Unavailable(body, self.headers))
- else:
- self.deferred.errback(
- errors.HTTPError(self.status, body, self.headers))
- # ---8<--- end of snippet from u1db.remote.http_client
-
- elif reason.check(PotentialDataLoss):
- self.deferred.errback(
- PartialDownloadError(self.status, self.message,
- b''.join(self.dataBuffer)))
- else:
- self.deferred.errback(reason)
-
-
-def readBody(response, protocolClass=ReadBodyProtocol):
- """
- Get the body of an L{IResponse} and return it as a byte string.
-
- This is a helper function for clients that don't want to incrementally
- receive the body of an HTTP response.
-
- @param response: The HTTP response for which the body will be read.
- @type response: L{IResponse} provider
-
- @return: A L{Deferred} which will fire with the body of the response.
- Cancelling it will close the connection to the server immediately.
- """
- def cancel(deferred):
- """
- Cancel a L{readBody} call, close the connection to the HTTP server
- immediately, if it is still open.
-
- @param deferred: The cancelled L{defer.Deferred}.
- """
- abort = getAbort()
- if abort is not None:
- abort()
-
- d = defer.Deferred(cancel)
- protocol = protocolClass(response, d)
-
- def getAbort():
- return getattr(protocol.transport, 'abortConnection', None)
-
- response.deliverBody(protocol)
-
- if protocol.transport is not None and getAbort() is None:
- warnings.warn(
- 'Using readBody with a transport that does not have an '
- 'abortConnection method',
- category=DeprecationWarning,
- stacklevel=2)
-
- return d
-
-
-class RequestBody(object):
- """
- This class is a helper to generate send and fetch requests.
- The expected format is something like:
- [
- {headers},
- {entry1},
- {...},
- {entryN},
- ]
- """
-
- def __init__(self, **header_dict):
- """
- Creates a new RequestBody holding header information.
-
- :param header_dict: A dictionary with the headers.
- :type header_dict: dict
- """
- self.headers = header_dict
- self.entries = []
- self.consumed = 0
-
- def insert_info(self, **entry_dict):
- """
- Dumps an entry into JSON format and add it to entries list.
- Adds 'content' key on a new line if it's present.
-
- :param entry_dict: Entry as a dictionary
- :type entry_dict: dict
- """
- content = ''
- if 'content' in entry_dict:
- content = ',\r\n' + (entry_dict['content'] or '')
- entry = json.dumps(entry_dict) + content
- self.entries.append(entry)
-
- def pop(self, amount=10, leave_open=False):
- """
- Removes entries and returns it formatted and ready
- to be sent.
-
- :param amount: number of entries to pop and format
- :type amount: int
-
- :param leave_open: flag to skip stream closing
- :type amount: bool
-
- :return: formatted body ready to be sent
- :rtype: str
- """
- start = self.consumed == 0
- amount = min([len(self.entries), amount])
- entries = [self.entries.pop(0) for i in xrange(amount)]
- self.consumed += amount
- end = len(self.entries) == 0 if not leave_open else False
- return self.entries_to_str(entries, start, end)
-
- def __str__(self):
- return self.pop(len(self.entries))
-
- def __len__(self):
- return len(self.entries)
-
- def entries_to_str(self, entries=None, start=True, end=True):
- """
- Format a list of entries into the body format expected
- by the server.
-
- :param entries: entries to format
- :type entries: list
-
- :return: formatted body ready to be sent
- :rtype: str
- """
- data = ''
- if start:
- data = '[\r\n' + json.dumps(self.headers)
- data += ''.join(',\r\n' + entry for entry in entries)
- if end:
- data += '\r\n]'
- return data
diff --git a/client/src/leap/soledad/client/interfaces.py b/client/src/leap/soledad/client/interfaces.py
deleted file mode 100644
index 0600449f..00000000
--- a/client/src/leap/soledad/client/interfaces.py
+++ /dev/null
@@ -1,368 +0,0 @@
-# -*- coding: utf-8 -*-
-# interfaces.py
-# Copyright (C) 2014 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-Interfaces used by the Soledad Client.
-"""
-from zope.interface import Interface, Attribute
-
-#
-# Plugins
-#
-
-
-class ISoledadPostSyncPlugin(Interface):
- """
- I implement the minimal methods and attributes for a plugin that can be
- called after a soledad synchronization has ended.
- """
-
- def process_received_docs(self, doc_id_list):
- """
- Do something with the passed list of doc_ids received after the last
- sync.
-
- :param doc_id_list: a list of strings for the received doc_ids
- """
-
- watched_doc_types = Attribute("""
- a tuple of the watched doc types for this plugin. So far, the
- `doc-types` convention is just the preffix of the doc_id, which is
- basically its first character, followed by a dash. So, for instance,
- `M-` is used for meta-docs in mail, and `F-` is used for flag-docs in
- mail. For now there's no central register of all the doc-types
- used.""")
-
-
-#
-# Soledad storage
-#
-
-class ILocalStorage(Interface):
- """
- I implement core methods for the u1db local storage of documents and
- indexes.
- """
- local_db_path = Attribute(
- "The path for the local database replica")
- local_db_file_name = Attribute(
- "The name of the local SQLCipher U1DB database file")
- uuid = Attribute("The user uuid")
- default_prefix = Attribute(
- "Prefix for default values for path")
-
- def put_doc(self, doc):
- """
- Update a document in the local encrypted database.
-
- :param doc: the document to update
- :type doc: Document
-
- :return:
- a deferred that will fire with the new revision identifier for
- the document
- :rtype: Deferred
- """
-
- def delete_doc(self, doc):
- """
- Delete a document from the local encrypted database.
-
- :param doc: the document to delete
- :type doc: Document
-
- :return:
- a deferred that will fire with ...
- :rtype: Deferred
- """
-
- def get_doc(self, doc_id, include_deleted=False):
- """
- Retrieve a document from the local encrypted database.
-
- :param doc_id: the unique document identifier
- :type doc_id: str
- :param include_deleted:
- if True, deleted documents will be returned with empty content;
- otherwise asking for a deleted document will return None
- :type include_deleted: bool
-
- :return:
- A deferred that will fire with the document object, containing a
- Document, or None if it could not be found
- :rtype: Deferred
- """
-
- def get_docs(self, doc_ids, check_for_conflicts=True,
- include_deleted=False):
- """
- Get the content for many documents.
-
- :param doc_ids: a list of document identifiers
- :type doc_ids: list
- :param check_for_conflicts: if set False, then the conflict check will
- be skipped, and 'None' will be returned instead of True/False
- :type check_for_conflicts: bool
-
- :return:
- A deferred that will fire with an iterable giving the Document
- object for each document id in matching doc_ids order.
- :rtype: Deferred
- """
-
- def get_all_docs(self, include_deleted=False):
- """
- Get the JSON content for all documents in the database.
-
- :param include_deleted: If set to True, deleted documents will be
- returned with empty content. Otherwise deleted
- documents will not be included in the results.
- :return:
- A deferred that will fire with (generation, [Document]): that is,
- the current generation of the database, followed by a list of all
- the documents in the database.
- :rtype: Deferred
- """
-
- def create_doc(self, content, doc_id=None):
- """
- Create a new document in the local encrypted database.
-
- :param content: the contents of the new document
- :type content: dict
- :param doc_id: an optional identifier specifying the document id
- :type doc_id: str
-
- :return:
- A deferred tht will fire with the new document (Document
- instance).
- :rtype: Deferred
- """
-
- def create_doc_from_json(self, json, doc_id=None):
- """
- Create a new document.
-
- You can optionally specify the document identifier, but the document
- must not already exist. See 'put_doc' if you want to override an
- existing document.
- If the database specifies a maximum document size and the document
- exceeds it, create will fail and raise a DocumentTooBig exception.
-
- :param json: The JSON document string
- :type json: str
- :param doc_id: An optional identifier specifying the document id.
- :type doc_id:
- :return:
- A deferred that will fire with the new document (A Document
- instance)
- :rtype: Deferred
- """
-
- def create_index(self, index_name, *index_expressions):
- """
- Create an named index, which can then be queried for future lookups.
- Creating an index which already exists is not an error, and is cheap.
- Creating an index which does not match the index_expressions of the
- existing index is an error.
- Creating an index will block until the expressions have been evaluated
- and the index generated.
-
- :param index_name: A unique name which can be used as a key prefix
- :type index_name: str
- :param index_expressions:
- index expressions defining the index information.
- :type index_expressions: dict
-
- Examples:
-
- "fieldname", or "fieldname.subfieldname" to index alphabetically
- sorted on the contents of a field.
-
- "number(fieldname, width)", "lower(fieldname)"
- """
-
- def delete_index(self, index_name):
- """
- Remove a named index.
-
- :param index_name: The name of the index we are removing
- :type index_name: str
- """
-
- def list_indexes(self):
- """
- List the definitions of all known indexes.
-
- :return: A list of [('index-name', ['field', 'field2'])] definitions.
- :rtype: Deferred
- """
-
- def get_from_index(self, index_name, *key_values):
- """
- Return documents that match the keys supplied.
-
- You must supply exactly the same number of values as have been defined
- in the index. It is possible to do a prefix match by using '*' to
- indicate a wildcard match. You can only supply '*' to trailing entries,
- (eg 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.)
- It is also possible to append a '*' to the last supplied value (eg
- 'val*', '*', '*' or 'val', 'val*', '*', but not 'val*', 'val', '*')
-
- :param index_name: The index to query
- :type index_name: str
- :param key_values: values to match. eg, if you have
- an index with 3 fields then you would have:
- get_from_index(index_name, val1, val2, val3)
- :type key_values: tuple
- :return: List of [Document]
- :rtype: list
- """
-
- def get_count_from_index(self, index_name, *key_values):
- """
- Return the count of the documents that match the keys and
- values supplied.
-
- :param index_name: The index to query
- :type index_name: str
- :param key_values: values to match. eg, if you have
- an index with 3 fields then you would have:
- get_from_index(index_name, val1, val2, val3)
- :type key_values: tuple
- :return: count.
- :rtype: int
- """
-
- def get_range_from_index(self, index_name, start_value, end_value):
- """
- Return documents that fall within the specified range.
-
- Both ends of the range are inclusive. For both start_value and
- end_value, one must supply exactly the same number of values as have
- been defined in the index, or pass None. In case of a single column
- index, a string is accepted as an alternative for a tuple with a single
- value. It is possible to do a prefix match by using '*' to indicate
- a wildcard match. You can only supply '*' to trailing entries, (eg
- 'val', '*', '*' is allowed, but '*', 'val', 'val' is not.) It is also
- possible to append a '*' to the last supplied value (eg 'val*', '*',
- '*' or 'val', 'val*', '*', but not 'val*', 'val', '*')
-
- :param index_name: The index to query
- :type index_name: str
- :param start_values: tuples of values that define the lower bound of
- the range. eg, if you have an index with 3 fields then you would
- have: (val1, val2, val3)
- :type start_values: tuple
- :param end_values: tuples of values that define the upper bound of the
- range. eg, if you have an index with 3 fields then you would have:
- (val1, val2, val3)
- :type end_values: tuple
- :return: A deferred that will fire with a list of [Document]
- :rtype: Deferred
- """
-
- def get_index_keys(self, index_name):
- """
- Return all keys under which documents are indexed in this index.
-
- :param index_name: The index to query
- :type index_name: str
- :return:
- A deferred that will fire with a list of tuples of indexed keys.
- :rtype: Deferred
- """
-
- def get_doc_conflicts(self, doc_id):
- """
- Get the list of conflicts for the given document.
-
- :param doc_id: the document id
- :type doc_id: str
-
- :return:
- A deferred that will fire with a list of the document entries that
- are conflicted.
- :rtype: Deferred
- """
-
- def resolve_doc(self, doc, conflicted_doc_revs):
- """
- Mark a document as no longer conflicted.
-
- :param doc: a document with the new content to be inserted.
- :type doc: Document
- :param conflicted_doc_revs:
- A deferred that will fire with a list of revisions that the new
- content supersedes.
- :type conflicted_doc_revs: list
- """
-
-
-class ISyncableStorage(Interface):
- """
- I implement methods to synchronize with a remote replica.
- """
- replica_uid = Attribute("The uid of the local replica")
- syncing = Attribute(
- "Property, True if the syncer is syncing.")
- token = Attribute("The authentication Token.")
-
- def sync(self):
- """
- Synchronize the local encrypted replica with a remote replica.
-
- This method blocks until a syncing lock is acquired, so there are no
- attempts of concurrent syncs from the same client replica.
-
- :param url: the url of the target replica to sync with
- :type url: str
-
- :return:
- A deferred that will fire with the local generation before the
- synchronisation was performed.
- :rtype: str
- """
-
- def stop_sync(self):
- """
- Stop the current syncing process.
- """
-
-
-class ISecretsStorage(Interface):
- """
- I implement methods needed for initializing and accessing secrets, that are
- synced against the Shared Recovery Database.
- """
- secrets_file_name = Attribute(
- "The name of the file where the storage secrets will be stored")
-
- # XXX this used internally from secrets, so it might be good to preserve
- # as a public boundary with other components.
-
- # We should also probably document its interface.
- secrets = Attribute("A SoledadSecrets object containing access to secrets")
-
- def change_passphrase(self, new_passphrase):
- """
- Change the passphrase that encrypts the storage secret.
-
- :param new_passphrase: The new passphrase.
- :type new_passphrase: unicode
-
- :raise NoStorageSecret: Raised if there's no storage secret available.
- """
diff --git a/client/src/leap/soledad/client/shared_db.py b/client/src/leap/soledad/client/shared_db.py
deleted file mode 100644
index 4f70c74b..00000000
--- a/client/src/leap/soledad/client/shared_db.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# -*- coding: utf-8 -*-
-# shared_db.py
-# Copyright (C) 2013 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-A shared database for storing/retrieving encrypted key material.
-"""
-from leap.soledad.common.l2db.remote.http_database import HTTPDatabase
-
-from leap.soledad.client.auth import TokenBasedAuth
-
-
-# ----------------------------------------------------------------------------
-# Soledad shared database
-# ----------------------------------------------------------------------------
-
-# TODO could have a hierarchy of soledad exceptions.
-
-
-class NoTokenForAuth(Exception):
- """
- No token was found for token-based authentication.
- """
-
-
-class Unauthorized(Exception):
- """
- User does not have authorization to perform task.
- """
-
-
-class ImproperlyConfiguredError(Exception):
- """
- Wrong parameters in the database configuration.
- """
-
-
-class SoledadSharedDatabase(HTTPDatabase, TokenBasedAuth):
- """
- This is a shared recovery database that enables users to store their
- encryption secrets in the server and retrieve them afterwards.
- """
- # TODO: prevent client from messing with the shared DB.
- # TODO: define and document API.
-
- #
- # Token auth methods.
- #
-
- def set_token_credentials(self, uuid, token):
- """
- Store given credentials so we can sign the request later.
-
- :param uuid: The user's uuid.
- :type uuid: str
- :param token: The authentication token.
- :type token: str
- """
- TokenBasedAuth.set_token_credentials(self, uuid, token)
-
- def _sign_request(self, method, url_query, params):
- """
- Return an authorization header to be included in the HTTP request.
-
- :param method: The HTTP method.
- :type method: str
- :param url_query: The URL query string.
- :type url_query: str
- :param params: A list with encoded query parameters.
- :type param: list
-
- :return: The Authorization header.
- :rtype: list of tuple
- """
- return TokenBasedAuth._sign_request(self, method, url_query, params)
-
- #
- # Modified HTTPDatabase methods.
- #
-
- @staticmethod
- def open_database(url, creds=None):
- """
- Open a Soledad shared database.
-
- :param url: URL of the remote database.
- :type url: str
- :param creds: A tuple containing the authentication method and
- credentials.
- :type creds: tuple
-
- :return: The shared database in the given url.
- :rtype: SoledadSharedDatabase
- """
- db = SoledadSharedDatabase(url, creds=creds)
- return db
-
- @staticmethod
- def delete_database(url):
- """
- Dummy method that prevents from deleting shared database.
-
- :raise: This will always raise an Unauthorized exception.
-
- :param url: The database URL.
- :type url: str
- """
- raise Unauthorized("Can't delete shared database.")
-
- def __init__(self, url, document_factory=None, creds=None):
- """
- Initialize database with auth token and encryption powers.
-
- :param url: URL of the remote database.
- :type url: str
- :param document_factory: A factory for U1BD documents.
- :type document_factory: u1db.Document
- :param creds: A tuple containing the authentication method and
- credentials.
- :type creds: tuple
- """
- HTTPDatabase.__init__(self, url, document_factory, creds)
diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py
deleted file mode 100644
index 2a927189..00000000
--- a/client/src/leap/soledad/client/sync.py
+++ /dev/null
@@ -1,231 +0,0 @@
-# -*- coding: utf-8 -*-
-# sync.py
-# Copyright (C) 2014 LEAP
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-"""
-Soledad synchronization utilities.
-"""
-import os
-
-from twisted.internet import defer
-
-from leap.soledad.common.log import getLogger
-from leap.soledad.common.l2db import errors
-from leap.soledad.common.l2db.sync import Synchronizer
-from leap.soledad.common.errors import BackendNotReadyError
-
-
-logger = getLogger(__name__)
-
-
-# we may want to collect statistics from the sync process
-DO_STATS = False
-if os.environ.get('SOLEDAD_STATS'):
- DO_STATS = True
-
-
-class SoledadSynchronizer(Synchronizer):
- """
- Collect the state around synchronizing 2 U1DB replicas.
-
- Synchronization is bi-directional, in that new items in the source are sent
- to the target, and new items in the target are returned to the source.
- However, it still recognizes that one side is initiating the request. Also,
- at the moment, conflicts are only created in the source.
-
- Also modified to allow for interrupting the synchronization process.
- """
- received_docs = []
-
- def __init__(self, *args, **kwargs):
- Synchronizer.__init__(self, *args, **kwargs)
- if DO_STATS:
- self.sync_phase = [0]
- self.sync_exchange_phase = None
-
- @defer.inlineCallbacks
- def sync(self):
- """
- Synchronize documents between source and target.
-
- :return: A deferred which will fire after the sync has finished with
- the local generation before the synchronization was performed.
- :rtype: twisted.internet.defer.Deferred
- """
-
- sync_target = self.sync_target
- self.received_docs = []
-
- # ---------- phase 1: get sync info from server ----------------------
- if DO_STATS:
- self.sync_phase[0] += 1
- self.sync_exchange_phase = self.sync_target.sync_exchange_phase
- # --------------------------------------------------------------------
-
- # get target identifier, its current generation,
- # and its last-seen database generation for this source
- ensure_callback = None
- try:
- (self.target_replica_uid, target_gen, target_trans_id,
- target_my_gen, target_my_trans_id) = yield \
- sync_target.get_sync_info(self.source._replica_uid)
- except (errors.DatabaseDoesNotExist, BackendNotReadyError) as e:
- logger.warn("Database isn't ready on server. Will be created.")
- logger.warn("Reason: %s" % e.__class__)
- self.target_replica_uid = None
- target_gen, target_trans_id = 0, ''
- target_my_gen, target_my_trans_id = 0, ''
-
- logger.debug("target replica uid: %s" % self.target_replica_uid)
- logger.debug("target generation: %d" % target_gen)
- logger.debug("target trans id: %s" % target_trans_id)
- logger.debug("target my gen: %d" % target_my_gen)
- logger.debug("target my trans_id: %s" % target_my_trans_id)
- logger.debug("source replica_uid: %s" % self.source._replica_uid)
-
- # make sure we'll have access to target replica uid once it exists
- if self.target_replica_uid is None:
-
- def ensure_callback(replica_uid):
- self.target_replica_uid = replica_uid
-
- # make sure we're not syncing one replica with itself
- if self.target_replica_uid == self.source._replica_uid:
- raise errors.InvalidReplicaUID
-
- # validate the info the target has about the source replica
- self.source.validate_gen_and_trans_id(
- target_my_gen, target_my_trans_id)
-
- # ---------- phase 2: what's changed ---------------------------------
- if DO_STATS:
- self.sync_phase[0] += 1
- # --------------------------------------------------------------------
-
- # what's changed since that generation and this current gen
- my_gen, _, changes = self.source.whats_changed(target_my_gen)
- logger.debug("there are %d documents to send" % len(changes))
-
- # get source last-seen database generation for the target
- if self.target_replica_uid is None:
- target_last_known_gen, target_last_known_trans_id = 0, ''
- else:
- target_last_known_gen, target_last_known_trans_id = \
- self.source._get_replica_gen_and_trans_id(
- self.target_replica_uid)
- logger.debug(
- "last known target gen: %d" % target_last_known_gen)
- logger.debug(
- "last known target trans_id: %s" % target_last_known_trans_id)
-
- # validate transaction ids
- if not changes and target_last_known_gen == target_gen:
- if target_trans_id != target_last_known_trans_id:
- raise errors.InvalidTransactionId
- defer.returnValue(my_gen)
-
- # ---------- phase 3: sync exchange ----------------------------------
- if DO_STATS:
- self.sync_phase[0] += 1
- # --------------------------------------------------------------------
-
- docs_by_generation = self._docs_by_gen_from_changes(changes)
-
- # exchange documents and try to insert the returned ones with
- # the target, return target synced-up-to gen.
- new_gen, new_trans_id = yield sync_target.sync_exchange(
- docs_by_generation, self.source._replica_uid,
- target_last_known_gen, target_last_known_trans_id,
- self._insert_doc_from_target, ensure_callback=ensure_callback)
- ids_sent = [doc_id for doc_id, _, _ in changes]
- logger.debug("target gen after sync: %d" % new_gen)
- logger.debug("target trans_id after sync: %s" % new_trans_id)
- if hasattr(self.source, 'commit'): # sqlcipher backend speed up
- self.source.commit() # insert it all in a single transaction
- info = {
- "target_replica_uid": self.target_replica_uid,
- "new_gen": new_gen,
- "new_trans_id": new_trans_id,
- "my_gen": my_gen
- }
- self._syncing_info = info
-
- # ---------- phase 4: complete sync ----------------------------------
- if DO_STATS:
- self.sync_phase[0] += 1
- # --------------------------------------------------------------------
-
- yield self.complete_sync()
-
- _, _, changes = self.source.whats_changed(target_my_gen)
- changed_doc_ids = [doc_id for doc_id, _, _ in changes]
-
- just_received = list(set(changed_doc_ids) - set(ids_sent))
- self.received_docs = just_received
-
- # ---------- phase 5: sync is over -----------------------------------
- if DO_STATS:
- self.sync_phase[0] += 1
- # --------------------------------------------------------------------
-
- defer.returnValue(my_gen)
-
- def _docs_by_gen_from_changes(self, changes):
- docs_by_generation = []
- kwargs = {'include_deleted': True}
- for doc_id, gen, trans in changes:
- get_doc = (self.source.get_doc, (doc_id,), kwargs)
- docs_by_generation.append((get_doc, gen, trans))
- return docs_by_generation
-
- def complete_sync(self):
- """
- Last stage of the synchronization:
- (a) record last known generation and transaction uid for the remote
- replica, and
- (b) make target aware of our current reached generation.
-
- :return: A deferred which will fire when the sync has been completed.
- :rtype: twisted.internet.defer.Deferred
- """
- logger.debug("completing deferred last step in sync...")
-
- # record target synced-up-to generation including applying what we
- # sent
- info = self._syncing_info
- self.source._set_replica_gen_and_trans_id(
- info["target_replica_uid"], info["new_gen"], info["new_trans_id"])
-
- # if gapless record current reached generation with target
- return self._record_sync_info_with_the_target(info["my_gen"])
-
- def _record_sync_info_with_the_target(self, start_generation):
- """
- Store local replica metadata in server.
-
- :param start_generation: The local generation when the sync was
- started.
- :type start_generation: int
-
- :return: A deferred which will fire when the operation has been
- completed.
- :rtype: twisted.internet.defer.Deferred
- """
- cur_gen, trans_id = self.source._get_generation_info()
- if (cur_gen == start_generation + self.num_inserted and
- self.num_inserted > 0):
- return self.sync_target.record_sync_info(
- self.source._replica_uid, cur_gen, trans_id)
- return defer.succeed(None)