summaryrefslogtreecommitdiff
path: root/src/leap/soledad/client/_crypto.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/leap/soledad/client/_crypto.py')
-rw-r--r--src/leap/soledad/client/_crypto.py557
1 files changed, 557 insertions, 0 deletions
diff --git a/src/leap/soledad/client/_crypto.py b/src/leap/soledad/client/_crypto.py
new file mode 100644
index 00000000..8cedf52e
--- /dev/null
+++ b/src/leap/soledad/client/_crypto.py
@@ -0,0 +1,557 @@
+# -*- coding: utf-8 -*-
+# _crypto.py
+# Copyright (C) 2016 LEAP Encryption Access Project
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""
+Cryptographic operations for the soledad client.
+
+This module implements streaming crypto operations.
+It replaces the old client.crypto module, that will be deprecated in soledad
+0.12.
+
+The algorithm for encrypting and decrypting is as follow:
+
+The KEY is a 32 bytes value.
+The IV is a random 16 bytes value.
+The PREAMBLE is a packed_structure with encryption metadata, such as IV.
+The SEPARATOR is a space.
+
+Encryption
+----------
+
+IV = os.urandom(16)
+PREAMBLE = BLOB_SIGNATURE_MAGIC, ENC_SCHEME, ENC_METHOD, time, IV, doc_id, rev,
+and size.
+
+PREAMBLE = base64_encoded(PREAMBLE)
+CIPHERTEXT = base64_encoded(AES_GCM(KEY, cleartext) + resulting_tag) if armor
+
+CIPHERTEXT = AES_GCM(KEY, cleartext) + resulting_tag if not armor
+# "resulting_tag" came from AES-GCM encryption. It will be the last 16 bytes of
+# our ciphertext.
+
+encrypted_payload = PREAMBLE + SEPARATOR + CIPHERTEXT
+
+Decryption
+----------
+
+Ciphertext and Tag CAN come encoded in base64 (with armor=True) or raw (with
+armor=False). Preamble will always come encoded in base64.
+
+PREAMBLE, CIPHERTEXT = PAYLOAD.SPLIT(' ', 1)
+
+PREAMBLE = base64_decode(PREAMBLE)
+CIPHERTEXT = base64_decode(CIPHERTEXT) if armor else CIPHERTEXT
+
+CIPHERTEXT, TAG = CIPHERTEXT[:-16], CIPHERTEXT[-16:]
+CLEARTEXT = aes_gcm_decrypt(KEY, IV, CIPHERTEXT, TAG, associated_data=PREAMBLE)
+
+AES-GCM will check preamble authenticity as well, since we are using
+Authenticated Encryption with Associated Data (AEAD). Ciphertext and associated
+data (PREAMBLE) authenticity will both be checked together during decryption.
+PREAMBLE consistency (if it matches the desired document, for instance) is
+checked during PREAMBLE reading.
+"""
+
+
+import base64
+import hashlib
+import warnings
+import hmac
+import os
+import struct
+import time
+
+from io import BytesIO
+from collections import namedtuple
+
+from twisted.internet import defer
+from twisted.internet import interfaces
+from twisted.web.client import FileBodyProducer
+
+from leap.soledad.common import soledad_assert
+from cryptography.exceptions import InvalidTag
+from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
+from cryptography.hazmat.backends import default_backend
+
+from zope.interface import implementer
+
+
+SECRET_LENGTH = 64
+SEPARATOR = ' ' # Anything that doesn't belong to base64 encoding
+
+CRYPTO_BACKEND = default_backend()
+
+PACMAN = struct.Struct('2sbbQ16s255p255pQ')
+LEGACY_PACMAN = struct.Struct('2sbbQ16s255p255p')
+BLOB_SIGNATURE_MAGIC = '\x13\x37'
+
+
+ENC_SCHEME = namedtuple('SCHEME', 'symkey')(1)
+ENC_METHOD = namedtuple('METHOD', 'aes_256_ctr aes_256_gcm')(1, 2)
+DocInfo = namedtuple('DocInfo', 'doc_id rev')
+
+
+class EncryptionDecryptionError(Exception):
+ pass
+
+
+class InvalidBlob(Exception):
+ pass
+
+
+class SoledadCrypto(object):
+ """
+ This class provides convenient methods for document encryption and
+ decryption using BlobEncryptor and BlobDecryptor classes.
+ """
+ def __init__(self, secret):
+ """
+ Initialize the crypto object.
+
+ :param secret: The Soledad remote storage secret.
+ :type secret: str
+ """
+ self.secret = secret
+
+ def encrypt_doc(self, doc):
+ """
+ Creates and configures a BlobEncryptor, asking it to start encryption
+ and wrapping the result as a simple JSON string with a "raw" key.
+
+ :param doc: the document to be encrypted.
+ :type doc: Document
+ :return: A deferred whose callback will be invoked with a JSON string
+ containing the ciphertext as the value of "raw" key.
+ :rtype: twisted.internet.defer.Deferred
+ """
+
+ def put_raw(blob):
+ raw = blob.getvalue()
+ return '{"raw": "' + raw + '"}'
+
+ content = BytesIO(str(doc.get_json()))
+ info = DocInfo(doc.doc_id, doc.rev)
+ del doc
+ encryptor = BlobEncryptor(info, content, secret=self.secret)
+ d = encryptor.encrypt()
+ d.addCallback(put_raw)
+ return d
+
+ def decrypt_doc(self, doc):
+ """
+ Creates and configures a BlobDecryptor, asking it decrypt and returning
+ the decrypted cleartext content from the encrypted document.
+
+ :param doc: the document to be decrypted.
+ :type doc: Document
+ :return: The decrypted cleartext content of the document.
+ :rtype: str
+ """
+ info = DocInfo(doc.doc_id, doc.rev)
+ ciphertext = BytesIO()
+ payload = doc.content['raw']
+ del doc
+ ciphertext.write(str(payload))
+ decryptor = BlobDecryptor(info, ciphertext, secret=self.secret)
+ return decryptor.decrypt()
+
+
+def encrypt_sym(data, key, method=ENC_METHOD.aes_256_gcm):
+ """
+ Encrypt data using AES-256 cipher in selected mode.
+
+ :param data: The data to be encrypted.
+ :type data: str
+ :param key: The key used to encrypt data (must be 256 bits long).
+ :type key: str
+
+ :return: A tuple with the initialization vector and the ciphertext, both
+ encoded as base64.
+ :rtype: (str, str)
+ """
+ mode = _mode_by_method(method)
+ encryptor = AESWriter(key, mode=mode)
+ encryptor.write(data)
+ _, ciphertext = encryptor.end()
+ iv = base64.b64encode(encryptor.iv)
+ tag = encryptor.tag or ''
+ return iv, ciphertext + tag
+
+
+def decrypt_sym(data, key, iv, method=ENC_METHOD.aes_256_gcm):
+ """
+ Decrypt data using AES-256 cipher in selected mode.
+
+ :param data: The data to be decrypted.
+ :type data: str
+ :param key: The symmetric key used to decrypt data (must be 256 bits
+ long).
+ :type key: str
+ :param iv: The base64 encoded initialization vector.
+ :type iv: str
+
+ :return: The decrypted data.
+ :rtype: str
+ """
+ _iv = base64.b64decode(str(iv))
+ mode = _mode_by_method(method)
+ tag = None
+ if mode == modes.GCM:
+ data, tag = data[:-16], data[-16:]
+ decryptor = AESWriter(key, _iv, tag=tag, mode=mode)
+ decryptor.write(data)
+ _, plaintext = decryptor.end()
+ return plaintext
+
+
+# TODO maybe rename this to Encryptor, since it will be used by blobs an non
+# blobs in soledad.
+class BlobEncryptor(object):
+ """
+ Produces encrypted data from the cleartext data associated with a given
+ Document using AES-256 cipher in GCM mode.
+
+ The production happens using a Twisted's FileBodyProducer, which uses a
+ Cooperator to schedule calls and can be paused/resumed. Each call takes at
+ most 65536 bytes from the input.
+
+ Both the production input and output are file descriptors, so they can be
+ applied to a stream of data.
+ """
+ # TODO
+ # This class needs further work to allow for proper streaming.
+ # Right now we HAVE TO WAIT until the end of the stream before encoding the
+ # result. It should be possible to do that just encoding the chunks and
+ # passing them to a sink, but for that we have to encode the chunks at
+ # proper alignment (3 bytes?) with b64 if armor is defined.
+
+ def __init__(self, doc_info, content_fd, secret=None, armor=True,
+ sink=None):
+ if not secret:
+ raise EncryptionDecryptionError('no secret given')
+
+ self.doc_id = doc_info.doc_id
+ self.rev = doc_info.rev
+ self.armor = armor
+
+ self._content_fd = content_fd
+ self._content_size = self._get_rounded_size(content_fd)
+ self._producer = FileBodyProducer(content_fd, readSize=2**16)
+
+ self.sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret)
+ self._aes = AESWriter(self.sym_key, _buffer=sink)
+ self._aes.authenticate(self._encode_preamble())
+
+ def _get_rounded_size(self, fd):
+ """
+ Returns a rounded value in order to minimize information leaks due to
+ the original size being exposed.
+ """
+ fd.seek(0, os.SEEK_END)
+ size = _ceiling(fd.tell())
+ fd.seek(0)
+ return size
+
+ @property
+ def iv(self):
+ return self._aes.iv
+
+ @property
+ def tag(self):
+ return self._aes.tag
+
+ def encrypt(self):
+ """
+ Starts producing encrypted data from the cleartext data.
+
+ :return: A deferred which will be fired when encryption ends and whose
+ callback will be invoked with the resulting ciphertext.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ # XXX pass a sink to aes?
+ d = self._producer.startProducing(self._aes)
+ d.addCallback(lambda _: self._end_crypto_stream_and_encode_result())
+ return d
+
+ def _encode_preamble(self):
+ current_time = int(time.time())
+
+ preamble = PACMAN.pack(
+ BLOB_SIGNATURE_MAGIC,
+ ENC_SCHEME.symkey,
+ ENC_METHOD.aes_256_gcm,
+ current_time,
+ self.iv,
+ str(self.doc_id),
+ str(self.rev),
+ self._content_size)
+ return preamble
+
+ def _end_crypto_stream_and_encode_result(self):
+
+ # TODO ---- this needs to be refactored to allow PROPER streaming
+ # We should write the preamble as soon as possible,
+ # Is it possible to write the AES stream as soon as it is encrypted by
+ # chunks?
+ # FIXME also, it needs to be able to encode chunks with base64 if armor
+
+ preamble, encrypted = self._aes.end()
+ result = BytesIO()
+ result.write(
+ base64.urlsafe_b64encode(preamble))
+ result.write(SEPARATOR)
+
+ if self.armor:
+ result.write(
+ base64.urlsafe_b64encode(encrypted + self.tag))
+ else:
+ result.write(encrypted + self.tag)
+
+ result.seek(0)
+ return defer.succeed(result)
+
+
+# TODO maybe rename this to just Decryptor, since it will be used by blobs
+# and non blobs in soledad.
+class BlobDecryptor(object):
+ """
+ Decrypts an encrypted blob associated with a given Document.
+
+ Will raise an exception if the blob doesn't have the expected structure, or
+ if the GCM tag doesn't verify.
+ """
+ def __init__(self, doc_info, ciphertext_fd, result=None,
+ secret=None, armor=True, start_stream=True, tag=None):
+ if not secret:
+ raise EncryptionDecryptionError('no secret given')
+
+ self.doc_id = doc_info.doc_id
+ self.rev = doc_info.rev
+ self.fd = ciphertext_fd
+ self.armor = armor
+ self._producer = None
+ self.result = result or BytesIO()
+ sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret)
+ self.size = None
+ self.tag = None
+
+ preamble, iv = self._consume_preamble()
+ soledad_assert(preamble)
+ soledad_assert(iv)
+
+ self._aes = AESWriter(sym_key, iv, self.result, tag=tag or self.tag)
+ self._aes.authenticate(preamble)
+ if start_stream:
+ self._start_stream()
+
+ @property
+ def decrypted_content_size(self):
+ return self._aes.written
+
+ def _start_stream(self):
+ self._producer = FileBodyProducer(self.fd, readSize=2**16)
+
+ def _consume_preamble(self):
+ """
+ Consume the preamble and write remaining bytes as ciphertext. This
+ function is called during a stream and can be holding both, so we need
+ to consume only preamble and store the remaining.
+ """
+ self.fd.seek(0)
+ try:
+ parts = self.fd.getvalue().split(SEPARATOR, 1)
+ preamble = base64.urlsafe_b64decode(parts[0])
+ if len(parts) == 2:
+ ciphertext = parts[1]
+ if self.armor:
+ ciphertext = base64.urlsafe_b64decode(ciphertext)
+ self.tag, ciphertext = ciphertext[-16:], ciphertext[:-16]
+ self.fd.seek(0)
+ self.fd.write(ciphertext)
+ self.fd.seek(len(ciphertext))
+ self.fd.truncate()
+ self.fd.seek(0)
+
+ except (TypeError, ValueError):
+ raise InvalidBlob
+
+ try:
+ if len(preamble) == LEGACY_PACMAN.size:
+ warnings.warn("Decrypting a legacy document without size. " +
+ "This will be deprecated in 0.12. Doc was: " +
+ "doc_id: %s rev: %s" % (self.doc_id, self.rev),
+ Warning)
+ unpacked_data = LEGACY_PACMAN.unpack(preamble)
+ magic, sch, meth, ts, iv, doc_id, rev = unpacked_data
+ elif len(preamble) == PACMAN.size:
+ unpacked_data = PACMAN.unpack(preamble)
+ magic, sch, meth, ts, iv, doc_id, rev, doc_size = unpacked_data
+ self.size = doc_size
+ else:
+ raise InvalidBlob("Unexpected preamble size %d", len(preamble))
+ except struct.error as e:
+ raise InvalidBlob(e)
+
+ if magic != BLOB_SIGNATURE_MAGIC:
+ raise InvalidBlob
+ # TODO check timestamp. Just as a sanity check, but for instance
+ # we can refuse to process something that is in the future or
+ # too far in the past (1984 would be nice, hehe)
+ if sch != ENC_SCHEME.symkey:
+ raise InvalidBlob('Invalid scheme: %s' % sch)
+ if meth != ENC_METHOD.aes_256_gcm:
+ raise InvalidBlob('Invalid encryption scheme: %s' % meth)
+ if rev != self.rev:
+ msg = 'Invalid revision. Expected: %s, was: %s' % (self.rev, rev)
+ raise InvalidBlob(msg)
+ if doc_id != self.doc_id:
+ msg = 'Invalid doc_id. '
+ + 'Expected: %s, was: %s' % (self.doc_id, doc_id)
+ raise InvalidBlob(msg)
+
+ return preamble, iv
+
+ def _end_stream(self):
+ try:
+ self._aes.end()
+ except InvalidTag:
+ raise InvalidBlob('Invalid Tag. Blob authentication failed.')
+ fd = self.result
+ fd.seek(0)
+ return self.result
+
+ def decrypt(self):
+ """
+ Starts producing encrypted data from the cleartext data.
+
+ :return: A deferred which will be fired when encryption ends and whose
+ callback will be invoked with the resulting ciphertext.
+ :rtype: twisted.internet.defer.Deferred
+ """
+ d = self.startProducing()
+ d.addCallback(lambda _: self._end_stream())
+ return d
+
+ def startProducing(self):
+ if not self._producer:
+ self._start_stream()
+ return self._producer.startProducing(self._aes)
+
+ def endStream(self):
+ self._end_stream()
+
+ def write(self, data):
+ self._aes.write(data)
+
+ def close(self):
+ result = self._aes.end()
+ return result
+
+
+@implementer(interfaces.IConsumer)
+class AESWriter(object):
+ """
+ A Twisted's Consumer implementation that takes an input file descriptor and
+ applies AES-256 cipher in GCM mode.
+
+ It is used both for encryption and decryption of a stream, depending of the
+ value of the tag parameter. If you pass a tag, it will operate in
+ decryption mode, verifying the authenticity of the preamble and ciphertext.
+ If no tag is passed, encryption mode is assumed, which will generate a tag.
+ """
+
+ def __init__(self, key, iv=None, _buffer=None, tag=None, mode=modes.GCM):
+ if len(key) != 32:
+ raise EncryptionDecryptionError('key is not 256 bits')
+
+ if tag is not None:
+ # if tag, we're decrypting
+ assert iv is not None
+
+ self.iv = iv or os.urandom(16)
+ self.buffer = _buffer or BytesIO()
+ cipher = _get_aes_cipher(key, self.iv, tag, mode)
+ cipher = cipher.decryptor() if tag else cipher.encryptor()
+ self.cipher, self.aead = cipher, ''
+ self.written = 0
+
+ def authenticate(self, data):
+ self.aead += data
+ self.cipher.authenticate_additional_data(data)
+
+ @property
+ def tag(self):
+ return getattr(self.cipher, 'tag', None)
+
+ def write(self, data):
+ self.written += len(data)
+ self.buffer.write(self.cipher.update(data))
+
+ def end(self):
+ self.buffer.write(self.cipher.finalize())
+ return self.aead, self.buffer.getvalue()
+
+
+def is_symmetrically_encrypted(content):
+ """
+ Returns True if the document was symmetrically encrypted.
+ 'EzcB' is the base64 encoding of \x13\x37 magic number and 1 (symmetrically
+ encrypted value for enc_scheme flag).
+
+ :param doc: The document content as string
+ :type doc: str
+
+ :rtype: bool
+ """
+ sym_signature = '{"raw": "EzcB'
+ return content and content.startswith(sym_signature)
+
+
+# utils
+
+
+def _hmac_sha256(key, data):
+ return hmac.new(key, data, hashlib.sha256).digest()
+
+
+def _get_sym_key_for_doc(doc_id, secret):
+ key = secret[SECRET_LENGTH:]
+ return _hmac_sha256(key, doc_id)
+
+
+def _get_aes_cipher(key, iv, tag, mode=modes.GCM):
+ mode = mode(iv, tag) if mode == modes.GCM else mode(iv)
+ return Cipher(algorithms.AES(key), mode, backend=CRYPTO_BACKEND)
+
+
+def _mode_by_method(method):
+ if method == ENC_METHOD.aes_256_gcm:
+ return modes.GCM
+ else:
+ return modes.CTR
+
+
+def _ceiling(size):
+ """
+ Some simplistic ceiling scheme that uses powers of 2.
+ We report everything below 4096 bytes as that minimum threshold.
+ See #8759 for research pending for less simplistic/aggresive strategies.
+ """
+ for i in xrange(12, 31):
+ step = 2 ** i
+ if size < step:
+ return step