diff options
Diffstat (limited to 'src/leap/soledad/client/_crypto.py')
-rw-r--r-- | src/leap/soledad/client/_crypto.py | 557 |
1 files changed, 557 insertions, 0 deletions
diff --git a/src/leap/soledad/client/_crypto.py b/src/leap/soledad/client/_crypto.py new file mode 100644 index 00000000..8cedf52e --- /dev/null +++ b/src/leap/soledad/client/_crypto.py @@ -0,0 +1,557 @@ +# -*- coding: utf-8 -*- +# _crypto.py +# Copyright (C) 2016 LEAP Encryption Access Project +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +""" +Cryptographic operations for the soledad client. + +This module implements streaming crypto operations. +It replaces the old client.crypto module, that will be deprecated in soledad +0.12. + +The algorithm for encrypting and decrypting is as follow: + +The KEY is a 32 bytes value. +The IV is a random 16 bytes value. +The PREAMBLE is a packed_structure with encryption metadata, such as IV. +The SEPARATOR is a space. + +Encryption +---------- + +IV = os.urandom(16) +PREAMBLE = BLOB_SIGNATURE_MAGIC, ENC_SCHEME, ENC_METHOD, time, IV, doc_id, rev, +and size. + +PREAMBLE = base64_encoded(PREAMBLE) +CIPHERTEXT = base64_encoded(AES_GCM(KEY, cleartext) + resulting_tag) if armor + +CIPHERTEXT = AES_GCM(KEY, cleartext) + resulting_tag if not armor +# "resulting_tag" came from AES-GCM encryption. It will be the last 16 bytes of +# our ciphertext. + +encrypted_payload = PREAMBLE + SEPARATOR + CIPHERTEXT + +Decryption +---------- + +Ciphertext and Tag CAN come encoded in base64 (with armor=True) or raw (with +armor=False). Preamble will always come encoded in base64. + +PREAMBLE, CIPHERTEXT = PAYLOAD.SPLIT(' ', 1) + +PREAMBLE = base64_decode(PREAMBLE) +CIPHERTEXT = base64_decode(CIPHERTEXT) if armor else CIPHERTEXT + +CIPHERTEXT, TAG = CIPHERTEXT[:-16], CIPHERTEXT[-16:] +CLEARTEXT = aes_gcm_decrypt(KEY, IV, CIPHERTEXT, TAG, associated_data=PREAMBLE) + +AES-GCM will check preamble authenticity as well, since we are using +Authenticated Encryption with Associated Data (AEAD). Ciphertext and associated +data (PREAMBLE) authenticity will both be checked together during decryption. +PREAMBLE consistency (if it matches the desired document, for instance) is +checked during PREAMBLE reading. +""" + + +import base64 +import hashlib +import warnings +import hmac +import os +import struct +import time + +from io import BytesIO +from collections import namedtuple + +from twisted.internet import defer +from twisted.internet import interfaces +from twisted.web.client import FileBodyProducer + +from leap.soledad.common import soledad_assert +from cryptography.exceptions import InvalidTag +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes +from cryptography.hazmat.backends import default_backend + +from zope.interface import implementer + + +SECRET_LENGTH = 64 +SEPARATOR = ' ' # Anything that doesn't belong to base64 encoding + +CRYPTO_BACKEND = default_backend() + +PACMAN = struct.Struct('2sbbQ16s255p255pQ') +LEGACY_PACMAN = struct.Struct('2sbbQ16s255p255p') +BLOB_SIGNATURE_MAGIC = '\x13\x37' + + +ENC_SCHEME = namedtuple('SCHEME', 'symkey')(1) +ENC_METHOD = namedtuple('METHOD', 'aes_256_ctr aes_256_gcm')(1, 2) +DocInfo = namedtuple('DocInfo', 'doc_id rev') + + +class EncryptionDecryptionError(Exception): + pass + + +class InvalidBlob(Exception): + pass + + +class SoledadCrypto(object): + """ + This class provides convenient methods for document encryption and + decryption using BlobEncryptor and BlobDecryptor classes. + """ + def __init__(self, secret): + """ + Initialize the crypto object. + + :param secret: The Soledad remote storage secret. + :type secret: str + """ + self.secret = secret + + def encrypt_doc(self, doc): + """ + Creates and configures a BlobEncryptor, asking it to start encryption + and wrapping the result as a simple JSON string with a "raw" key. + + :param doc: the document to be encrypted. + :type doc: Document + :return: A deferred whose callback will be invoked with a JSON string + containing the ciphertext as the value of "raw" key. + :rtype: twisted.internet.defer.Deferred + """ + + def put_raw(blob): + raw = blob.getvalue() + return '{"raw": "' + raw + '"}' + + content = BytesIO(str(doc.get_json())) + info = DocInfo(doc.doc_id, doc.rev) + del doc + encryptor = BlobEncryptor(info, content, secret=self.secret) + d = encryptor.encrypt() + d.addCallback(put_raw) + return d + + def decrypt_doc(self, doc): + """ + Creates and configures a BlobDecryptor, asking it decrypt and returning + the decrypted cleartext content from the encrypted document. + + :param doc: the document to be decrypted. + :type doc: Document + :return: The decrypted cleartext content of the document. + :rtype: str + """ + info = DocInfo(doc.doc_id, doc.rev) + ciphertext = BytesIO() + payload = doc.content['raw'] + del doc + ciphertext.write(str(payload)) + decryptor = BlobDecryptor(info, ciphertext, secret=self.secret) + return decryptor.decrypt() + + +def encrypt_sym(data, key, method=ENC_METHOD.aes_256_gcm): + """ + Encrypt data using AES-256 cipher in selected mode. + + :param data: The data to be encrypted. + :type data: str + :param key: The key used to encrypt data (must be 256 bits long). + :type key: str + + :return: A tuple with the initialization vector and the ciphertext, both + encoded as base64. + :rtype: (str, str) + """ + mode = _mode_by_method(method) + encryptor = AESWriter(key, mode=mode) + encryptor.write(data) + _, ciphertext = encryptor.end() + iv = base64.b64encode(encryptor.iv) + tag = encryptor.tag or '' + return iv, ciphertext + tag + + +def decrypt_sym(data, key, iv, method=ENC_METHOD.aes_256_gcm): + """ + Decrypt data using AES-256 cipher in selected mode. + + :param data: The data to be decrypted. + :type data: str + :param key: The symmetric key used to decrypt data (must be 256 bits + long). + :type key: str + :param iv: The base64 encoded initialization vector. + :type iv: str + + :return: The decrypted data. + :rtype: str + """ + _iv = base64.b64decode(str(iv)) + mode = _mode_by_method(method) + tag = None + if mode == modes.GCM: + data, tag = data[:-16], data[-16:] + decryptor = AESWriter(key, _iv, tag=tag, mode=mode) + decryptor.write(data) + _, plaintext = decryptor.end() + return plaintext + + +# TODO maybe rename this to Encryptor, since it will be used by blobs an non +# blobs in soledad. +class BlobEncryptor(object): + """ + Produces encrypted data from the cleartext data associated with a given + Document using AES-256 cipher in GCM mode. + + The production happens using a Twisted's FileBodyProducer, which uses a + Cooperator to schedule calls and can be paused/resumed. Each call takes at + most 65536 bytes from the input. + + Both the production input and output are file descriptors, so they can be + applied to a stream of data. + """ + # TODO + # This class needs further work to allow for proper streaming. + # Right now we HAVE TO WAIT until the end of the stream before encoding the + # result. It should be possible to do that just encoding the chunks and + # passing them to a sink, but for that we have to encode the chunks at + # proper alignment (3 bytes?) with b64 if armor is defined. + + def __init__(self, doc_info, content_fd, secret=None, armor=True, + sink=None): + if not secret: + raise EncryptionDecryptionError('no secret given') + + self.doc_id = doc_info.doc_id + self.rev = doc_info.rev + self.armor = armor + + self._content_fd = content_fd + self._content_size = self._get_rounded_size(content_fd) + self._producer = FileBodyProducer(content_fd, readSize=2**16) + + self.sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret) + self._aes = AESWriter(self.sym_key, _buffer=sink) + self._aes.authenticate(self._encode_preamble()) + + def _get_rounded_size(self, fd): + """ + Returns a rounded value in order to minimize information leaks due to + the original size being exposed. + """ + fd.seek(0, os.SEEK_END) + size = _ceiling(fd.tell()) + fd.seek(0) + return size + + @property + def iv(self): + return self._aes.iv + + @property + def tag(self): + return self._aes.tag + + def encrypt(self): + """ + Starts producing encrypted data from the cleartext data. + + :return: A deferred which will be fired when encryption ends and whose + callback will be invoked with the resulting ciphertext. + :rtype: twisted.internet.defer.Deferred + """ + # XXX pass a sink to aes? + d = self._producer.startProducing(self._aes) + d.addCallback(lambda _: self._end_crypto_stream_and_encode_result()) + return d + + def _encode_preamble(self): + current_time = int(time.time()) + + preamble = PACMAN.pack( + BLOB_SIGNATURE_MAGIC, + ENC_SCHEME.symkey, + ENC_METHOD.aes_256_gcm, + current_time, + self.iv, + str(self.doc_id), + str(self.rev), + self._content_size) + return preamble + + def _end_crypto_stream_and_encode_result(self): + + # TODO ---- this needs to be refactored to allow PROPER streaming + # We should write the preamble as soon as possible, + # Is it possible to write the AES stream as soon as it is encrypted by + # chunks? + # FIXME also, it needs to be able to encode chunks with base64 if armor + + preamble, encrypted = self._aes.end() + result = BytesIO() + result.write( + base64.urlsafe_b64encode(preamble)) + result.write(SEPARATOR) + + if self.armor: + result.write( + base64.urlsafe_b64encode(encrypted + self.tag)) + else: + result.write(encrypted + self.tag) + + result.seek(0) + return defer.succeed(result) + + +# TODO maybe rename this to just Decryptor, since it will be used by blobs +# and non blobs in soledad. +class BlobDecryptor(object): + """ + Decrypts an encrypted blob associated with a given Document. + + Will raise an exception if the blob doesn't have the expected structure, or + if the GCM tag doesn't verify. + """ + def __init__(self, doc_info, ciphertext_fd, result=None, + secret=None, armor=True, start_stream=True, tag=None): + if not secret: + raise EncryptionDecryptionError('no secret given') + + self.doc_id = doc_info.doc_id + self.rev = doc_info.rev + self.fd = ciphertext_fd + self.armor = armor + self._producer = None + self.result = result or BytesIO() + sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret) + self.size = None + self.tag = None + + preamble, iv = self._consume_preamble() + soledad_assert(preamble) + soledad_assert(iv) + + self._aes = AESWriter(sym_key, iv, self.result, tag=tag or self.tag) + self._aes.authenticate(preamble) + if start_stream: + self._start_stream() + + @property + def decrypted_content_size(self): + return self._aes.written + + def _start_stream(self): + self._producer = FileBodyProducer(self.fd, readSize=2**16) + + def _consume_preamble(self): + """ + Consume the preamble and write remaining bytes as ciphertext. This + function is called during a stream and can be holding both, so we need + to consume only preamble and store the remaining. + """ + self.fd.seek(0) + try: + parts = self.fd.getvalue().split(SEPARATOR, 1) + preamble = base64.urlsafe_b64decode(parts[0]) + if len(parts) == 2: + ciphertext = parts[1] + if self.armor: + ciphertext = base64.urlsafe_b64decode(ciphertext) + self.tag, ciphertext = ciphertext[-16:], ciphertext[:-16] + self.fd.seek(0) + self.fd.write(ciphertext) + self.fd.seek(len(ciphertext)) + self.fd.truncate() + self.fd.seek(0) + + except (TypeError, ValueError): + raise InvalidBlob + + try: + if len(preamble) == LEGACY_PACMAN.size: + warnings.warn("Decrypting a legacy document without size. " + + "This will be deprecated in 0.12. Doc was: " + + "doc_id: %s rev: %s" % (self.doc_id, self.rev), + Warning) + unpacked_data = LEGACY_PACMAN.unpack(preamble) + magic, sch, meth, ts, iv, doc_id, rev = unpacked_data + elif len(preamble) == PACMAN.size: + unpacked_data = PACMAN.unpack(preamble) + magic, sch, meth, ts, iv, doc_id, rev, doc_size = unpacked_data + self.size = doc_size + else: + raise InvalidBlob("Unexpected preamble size %d", len(preamble)) + except struct.error as e: + raise InvalidBlob(e) + + if magic != BLOB_SIGNATURE_MAGIC: + raise InvalidBlob + # TODO check timestamp. Just as a sanity check, but for instance + # we can refuse to process something that is in the future or + # too far in the past (1984 would be nice, hehe) + if sch != ENC_SCHEME.symkey: + raise InvalidBlob('Invalid scheme: %s' % sch) + if meth != ENC_METHOD.aes_256_gcm: + raise InvalidBlob('Invalid encryption scheme: %s' % meth) + if rev != self.rev: + msg = 'Invalid revision. Expected: %s, was: %s' % (self.rev, rev) + raise InvalidBlob(msg) + if doc_id != self.doc_id: + msg = 'Invalid doc_id. ' + + 'Expected: %s, was: %s' % (self.doc_id, doc_id) + raise InvalidBlob(msg) + + return preamble, iv + + def _end_stream(self): + try: + self._aes.end() + except InvalidTag: + raise InvalidBlob('Invalid Tag. Blob authentication failed.') + fd = self.result + fd.seek(0) + return self.result + + def decrypt(self): + """ + Starts producing encrypted data from the cleartext data. + + :return: A deferred which will be fired when encryption ends and whose + callback will be invoked with the resulting ciphertext. + :rtype: twisted.internet.defer.Deferred + """ + d = self.startProducing() + d.addCallback(lambda _: self._end_stream()) + return d + + def startProducing(self): + if not self._producer: + self._start_stream() + return self._producer.startProducing(self._aes) + + def endStream(self): + self._end_stream() + + def write(self, data): + self._aes.write(data) + + def close(self): + result = self._aes.end() + return result + + +@implementer(interfaces.IConsumer) +class AESWriter(object): + """ + A Twisted's Consumer implementation that takes an input file descriptor and + applies AES-256 cipher in GCM mode. + + It is used both for encryption and decryption of a stream, depending of the + value of the tag parameter. If you pass a tag, it will operate in + decryption mode, verifying the authenticity of the preamble and ciphertext. + If no tag is passed, encryption mode is assumed, which will generate a tag. + """ + + def __init__(self, key, iv=None, _buffer=None, tag=None, mode=modes.GCM): + if len(key) != 32: + raise EncryptionDecryptionError('key is not 256 bits') + + if tag is not None: + # if tag, we're decrypting + assert iv is not None + + self.iv = iv or os.urandom(16) + self.buffer = _buffer or BytesIO() + cipher = _get_aes_cipher(key, self.iv, tag, mode) + cipher = cipher.decryptor() if tag else cipher.encryptor() + self.cipher, self.aead = cipher, '' + self.written = 0 + + def authenticate(self, data): + self.aead += data + self.cipher.authenticate_additional_data(data) + + @property + def tag(self): + return getattr(self.cipher, 'tag', None) + + def write(self, data): + self.written += len(data) + self.buffer.write(self.cipher.update(data)) + + def end(self): + self.buffer.write(self.cipher.finalize()) + return self.aead, self.buffer.getvalue() + + +def is_symmetrically_encrypted(content): + """ + Returns True if the document was symmetrically encrypted. + 'EzcB' is the base64 encoding of \x13\x37 magic number and 1 (symmetrically + encrypted value for enc_scheme flag). + + :param doc: The document content as string + :type doc: str + + :rtype: bool + """ + sym_signature = '{"raw": "EzcB' + return content and content.startswith(sym_signature) + + +# utils + + +def _hmac_sha256(key, data): + return hmac.new(key, data, hashlib.sha256).digest() + + +def _get_sym_key_for_doc(doc_id, secret): + key = secret[SECRET_LENGTH:] + return _hmac_sha256(key, doc_id) + + +def _get_aes_cipher(key, iv, tag, mode=modes.GCM): + mode = mode(iv, tag) if mode == modes.GCM else mode(iv) + return Cipher(algorithms.AES(key), mode, backend=CRYPTO_BACKEND) + + +def _mode_by_method(method): + if method == ENC_METHOD.aes_256_gcm: + return modes.GCM + else: + return modes.CTR + + +def _ceiling(size): + """ + Some simplistic ceiling scheme that uses powers of 2. + We report everything below 4096 bytes as that minimum threshold. + See #8759 for research pending for less simplistic/aggresive strategies. + """ + for i in xrange(12, 31): + step = 2 ** i + if size < step: + return step |