summaryrefslogtreecommitdiff
path: root/client/src/leap/soledad/client/_crypto.py
diff options
context:
space:
mode:
Diffstat (limited to 'client/src/leap/soledad/client/_crypto.py')
-rw-r--r--client/src/leap/soledad/client/_crypto.py557
1 files changed, 0 insertions, 557 deletions
diff --git a/client/src/leap/soledad/client/_crypto.py b/client/src/leap/soledad/client/_crypto.py
deleted file mode 100644
index 8cedf52e..00000000
--- a/client/src/leap/soledad/client/_crypto.py
+++ /dev/null
@@ -1,557 +0,0 @@
-# -*- coding: utf-8 -*-
-# _crypto.py
-# Copyright (C) 2016 LEAP Encryption Access Project
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-"""
-Cryptographic operations for the soledad client.
-
-This module implements streaming crypto operations.
-It replaces the old client.crypto module, that will be deprecated in soledad
-0.12.
-
-The algorithm for encrypting and decrypting is as follow:
-
-The KEY is a 32 bytes value.
-The IV is a random 16 bytes value.
-The PREAMBLE is a packed_structure with encryption metadata, such as IV.
-The SEPARATOR is a space.
-
-Encryption
-----------
-
-IV = os.urandom(16)
-PREAMBLE = BLOB_SIGNATURE_MAGIC, ENC_SCHEME, ENC_METHOD, time, IV, doc_id, rev,
-and size.
-
-PREAMBLE = base64_encoded(PREAMBLE)
-CIPHERTEXT = base64_encoded(AES_GCM(KEY, cleartext) + resulting_tag) if armor
-
-CIPHERTEXT = AES_GCM(KEY, cleartext) + resulting_tag if not armor
-# "resulting_tag" came from AES-GCM encryption. It will be the last 16 bytes of
-# our ciphertext.
-
-encrypted_payload = PREAMBLE + SEPARATOR + CIPHERTEXT
-
-Decryption
-----------
-
-Ciphertext and Tag CAN come encoded in base64 (with armor=True) or raw (with
-armor=False). Preamble will always come encoded in base64.
-
-PREAMBLE, CIPHERTEXT = PAYLOAD.SPLIT(' ', 1)
-
-PREAMBLE = base64_decode(PREAMBLE)
-CIPHERTEXT = base64_decode(CIPHERTEXT) if armor else CIPHERTEXT
-
-CIPHERTEXT, TAG = CIPHERTEXT[:-16], CIPHERTEXT[-16:]
-CLEARTEXT = aes_gcm_decrypt(KEY, IV, CIPHERTEXT, TAG, associated_data=PREAMBLE)
-
-AES-GCM will check preamble authenticity as well, since we are using
-Authenticated Encryption with Associated Data (AEAD). Ciphertext and associated
-data (PREAMBLE) authenticity will both be checked together during decryption.
-PREAMBLE consistency (if it matches the desired document, for instance) is
-checked during PREAMBLE reading.
-"""
-
-
-import base64
-import hashlib
-import warnings
-import hmac
-import os
-import struct
-import time
-
-from io import BytesIO
-from collections import namedtuple
-
-from twisted.internet import defer
-from twisted.internet import interfaces
-from twisted.web.client import FileBodyProducer
-
-from leap.soledad.common import soledad_assert
-from cryptography.exceptions import InvalidTag
-from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
-from cryptography.hazmat.backends import default_backend
-
-from zope.interface import implementer
-
-
-SECRET_LENGTH = 64
-SEPARATOR = ' ' # Anything that doesn't belong to base64 encoding
-
-CRYPTO_BACKEND = default_backend()
-
-PACMAN = struct.Struct('2sbbQ16s255p255pQ')
-LEGACY_PACMAN = struct.Struct('2sbbQ16s255p255p')
-BLOB_SIGNATURE_MAGIC = '\x13\x37'
-
-
-ENC_SCHEME = namedtuple('SCHEME', 'symkey')(1)
-ENC_METHOD = namedtuple('METHOD', 'aes_256_ctr aes_256_gcm')(1, 2)
-DocInfo = namedtuple('DocInfo', 'doc_id rev')
-
-
-class EncryptionDecryptionError(Exception):
- pass
-
-
-class InvalidBlob(Exception):
- pass
-
-
-class SoledadCrypto(object):
- """
- This class provides convenient methods for document encryption and
- decryption using BlobEncryptor and BlobDecryptor classes.
- """
- def __init__(self, secret):
- """
- Initialize the crypto object.
-
- :param secret: The Soledad remote storage secret.
- :type secret: str
- """
- self.secret = secret
-
- def encrypt_doc(self, doc):
- """
- Creates and configures a BlobEncryptor, asking it to start encryption
- and wrapping the result as a simple JSON string with a "raw" key.
-
- :param doc: the document to be encrypted.
- :type doc: Document
- :return: A deferred whose callback will be invoked with a JSON string
- containing the ciphertext as the value of "raw" key.
- :rtype: twisted.internet.defer.Deferred
- """
-
- def put_raw(blob):
- raw = blob.getvalue()
- return '{"raw": "' + raw + '"}'
-
- content = BytesIO(str(doc.get_json()))
- info = DocInfo(doc.doc_id, doc.rev)
- del doc
- encryptor = BlobEncryptor(info, content, secret=self.secret)
- d = encryptor.encrypt()
- d.addCallback(put_raw)
- return d
-
- def decrypt_doc(self, doc):
- """
- Creates and configures a BlobDecryptor, asking it decrypt and returning
- the decrypted cleartext content from the encrypted document.
-
- :param doc: the document to be decrypted.
- :type doc: Document
- :return: The decrypted cleartext content of the document.
- :rtype: str
- """
- info = DocInfo(doc.doc_id, doc.rev)
- ciphertext = BytesIO()
- payload = doc.content['raw']
- del doc
- ciphertext.write(str(payload))
- decryptor = BlobDecryptor(info, ciphertext, secret=self.secret)
- return decryptor.decrypt()
-
-
-def encrypt_sym(data, key, method=ENC_METHOD.aes_256_gcm):
- """
- Encrypt data using AES-256 cipher in selected mode.
-
- :param data: The data to be encrypted.
- :type data: str
- :param key: The key used to encrypt data (must be 256 bits long).
- :type key: str
-
- :return: A tuple with the initialization vector and the ciphertext, both
- encoded as base64.
- :rtype: (str, str)
- """
- mode = _mode_by_method(method)
- encryptor = AESWriter(key, mode=mode)
- encryptor.write(data)
- _, ciphertext = encryptor.end()
- iv = base64.b64encode(encryptor.iv)
- tag = encryptor.tag or ''
- return iv, ciphertext + tag
-
-
-def decrypt_sym(data, key, iv, method=ENC_METHOD.aes_256_gcm):
- """
- Decrypt data using AES-256 cipher in selected mode.
-
- :param data: The data to be decrypted.
- :type data: str
- :param key: The symmetric key used to decrypt data (must be 256 bits
- long).
- :type key: str
- :param iv: The base64 encoded initialization vector.
- :type iv: str
-
- :return: The decrypted data.
- :rtype: str
- """
- _iv = base64.b64decode(str(iv))
- mode = _mode_by_method(method)
- tag = None
- if mode == modes.GCM:
- data, tag = data[:-16], data[-16:]
- decryptor = AESWriter(key, _iv, tag=tag, mode=mode)
- decryptor.write(data)
- _, plaintext = decryptor.end()
- return plaintext
-
-
-# TODO maybe rename this to Encryptor, since it will be used by blobs an non
-# blobs in soledad.
-class BlobEncryptor(object):
- """
- Produces encrypted data from the cleartext data associated with a given
- Document using AES-256 cipher in GCM mode.
-
- The production happens using a Twisted's FileBodyProducer, which uses a
- Cooperator to schedule calls and can be paused/resumed. Each call takes at
- most 65536 bytes from the input.
-
- Both the production input and output are file descriptors, so they can be
- applied to a stream of data.
- """
- # TODO
- # This class needs further work to allow for proper streaming.
- # Right now we HAVE TO WAIT until the end of the stream before encoding the
- # result. It should be possible to do that just encoding the chunks and
- # passing them to a sink, but for that we have to encode the chunks at
- # proper alignment (3 bytes?) with b64 if armor is defined.
-
- def __init__(self, doc_info, content_fd, secret=None, armor=True,
- sink=None):
- if not secret:
- raise EncryptionDecryptionError('no secret given')
-
- self.doc_id = doc_info.doc_id
- self.rev = doc_info.rev
- self.armor = armor
-
- self._content_fd = content_fd
- self._content_size = self._get_rounded_size(content_fd)
- self._producer = FileBodyProducer(content_fd, readSize=2**16)
-
- self.sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret)
- self._aes = AESWriter(self.sym_key, _buffer=sink)
- self._aes.authenticate(self._encode_preamble())
-
- def _get_rounded_size(self, fd):
- """
- Returns a rounded value in order to minimize information leaks due to
- the original size being exposed.
- """
- fd.seek(0, os.SEEK_END)
- size = _ceiling(fd.tell())
- fd.seek(0)
- return size
-
- @property
- def iv(self):
- return self._aes.iv
-
- @property
- def tag(self):
- return self._aes.tag
-
- def encrypt(self):
- """
- Starts producing encrypted data from the cleartext data.
-
- :return: A deferred which will be fired when encryption ends and whose
- callback will be invoked with the resulting ciphertext.
- :rtype: twisted.internet.defer.Deferred
- """
- # XXX pass a sink to aes?
- d = self._producer.startProducing(self._aes)
- d.addCallback(lambda _: self._end_crypto_stream_and_encode_result())
- return d
-
- def _encode_preamble(self):
- current_time = int(time.time())
-
- preamble = PACMAN.pack(
- BLOB_SIGNATURE_MAGIC,
- ENC_SCHEME.symkey,
- ENC_METHOD.aes_256_gcm,
- current_time,
- self.iv,
- str(self.doc_id),
- str(self.rev),
- self._content_size)
- return preamble
-
- def _end_crypto_stream_and_encode_result(self):
-
- # TODO ---- this needs to be refactored to allow PROPER streaming
- # We should write the preamble as soon as possible,
- # Is it possible to write the AES stream as soon as it is encrypted by
- # chunks?
- # FIXME also, it needs to be able to encode chunks with base64 if armor
-
- preamble, encrypted = self._aes.end()
- result = BytesIO()
- result.write(
- base64.urlsafe_b64encode(preamble))
- result.write(SEPARATOR)
-
- if self.armor:
- result.write(
- base64.urlsafe_b64encode(encrypted + self.tag))
- else:
- result.write(encrypted + self.tag)
-
- result.seek(0)
- return defer.succeed(result)
-
-
-# TODO maybe rename this to just Decryptor, since it will be used by blobs
-# and non blobs in soledad.
-class BlobDecryptor(object):
- """
- Decrypts an encrypted blob associated with a given Document.
-
- Will raise an exception if the blob doesn't have the expected structure, or
- if the GCM tag doesn't verify.
- """
- def __init__(self, doc_info, ciphertext_fd, result=None,
- secret=None, armor=True, start_stream=True, tag=None):
- if not secret:
- raise EncryptionDecryptionError('no secret given')
-
- self.doc_id = doc_info.doc_id
- self.rev = doc_info.rev
- self.fd = ciphertext_fd
- self.armor = armor
- self._producer = None
- self.result = result or BytesIO()
- sym_key = _get_sym_key_for_doc(doc_info.doc_id, secret)
- self.size = None
- self.tag = None
-
- preamble, iv = self._consume_preamble()
- soledad_assert(preamble)
- soledad_assert(iv)
-
- self._aes = AESWriter(sym_key, iv, self.result, tag=tag or self.tag)
- self._aes.authenticate(preamble)
- if start_stream:
- self._start_stream()
-
- @property
- def decrypted_content_size(self):
- return self._aes.written
-
- def _start_stream(self):
- self._producer = FileBodyProducer(self.fd, readSize=2**16)
-
- def _consume_preamble(self):
- """
- Consume the preamble and write remaining bytes as ciphertext. This
- function is called during a stream and can be holding both, so we need
- to consume only preamble and store the remaining.
- """
- self.fd.seek(0)
- try:
- parts = self.fd.getvalue().split(SEPARATOR, 1)
- preamble = base64.urlsafe_b64decode(parts[0])
- if len(parts) == 2:
- ciphertext = parts[1]
- if self.armor:
- ciphertext = base64.urlsafe_b64decode(ciphertext)
- self.tag, ciphertext = ciphertext[-16:], ciphertext[:-16]
- self.fd.seek(0)
- self.fd.write(ciphertext)
- self.fd.seek(len(ciphertext))
- self.fd.truncate()
- self.fd.seek(0)
-
- except (TypeError, ValueError):
- raise InvalidBlob
-
- try:
- if len(preamble) == LEGACY_PACMAN.size:
- warnings.warn("Decrypting a legacy document without size. " +
- "This will be deprecated in 0.12. Doc was: " +
- "doc_id: %s rev: %s" % (self.doc_id, self.rev),
- Warning)
- unpacked_data = LEGACY_PACMAN.unpack(preamble)
- magic, sch, meth, ts, iv, doc_id, rev = unpacked_data
- elif len(preamble) == PACMAN.size:
- unpacked_data = PACMAN.unpack(preamble)
- magic, sch, meth, ts, iv, doc_id, rev, doc_size = unpacked_data
- self.size = doc_size
- else:
- raise InvalidBlob("Unexpected preamble size %d", len(preamble))
- except struct.error as e:
- raise InvalidBlob(e)
-
- if magic != BLOB_SIGNATURE_MAGIC:
- raise InvalidBlob
- # TODO check timestamp. Just as a sanity check, but for instance
- # we can refuse to process something that is in the future or
- # too far in the past (1984 would be nice, hehe)
- if sch != ENC_SCHEME.symkey:
- raise InvalidBlob('Invalid scheme: %s' % sch)
- if meth != ENC_METHOD.aes_256_gcm:
- raise InvalidBlob('Invalid encryption scheme: %s' % meth)
- if rev != self.rev:
- msg = 'Invalid revision. Expected: %s, was: %s' % (self.rev, rev)
- raise InvalidBlob(msg)
- if doc_id != self.doc_id:
- msg = 'Invalid doc_id. '
- + 'Expected: %s, was: %s' % (self.doc_id, doc_id)
- raise InvalidBlob(msg)
-
- return preamble, iv
-
- def _end_stream(self):
- try:
- self._aes.end()
- except InvalidTag:
- raise InvalidBlob('Invalid Tag. Blob authentication failed.')
- fd = self.result
- fd.seek(0)
- return self.result
-
- def decrypt(self):
- """
- Starts producing encrypted data from the cleartext data.
-
- :return: A deferred which will be fired when encryption ends and whose
- callback will be invoked with the resulting ciphertext.
- :rtype: twisted.internet.defer.Deferred
- """
- d = self.startProducing()
- d.addCallback(lambda _: self._end_stream())
- return d
-
- def startProducing(self):
- if not self._producer:
- self._start_stream()
- return self._producer.startProducing(self._aes)
-
- def endStream(self):
- self._end_stream()
-
- def write(self, data):
- self._aes.write(data)
-
- def close(self):
- result = self._aes.end()
- return result
-
-
-@implementer(interfaces.IConsumer)
-class AESWriter(object):
- """
- A Twisted's Consumer implementation that takes an input file descriptor and
- applies AES-256 cipher in GCM mode.
-
- It is used both for encryption and decryption of a stream, depending of the
- value of the tag parameter. If you pass a tag, it will operate in
- decryption mode, verifying the authenticity of the preamble and ciphertext.
- If no tag is passed, encryption mode is assumed, which will generate a tag.
- """
-
- def __init__(self, key, iv=None, _buffer=None, tag=None, mode=modes.GCM):
- if len(key) != 32:
- raise EncryptionDecryptionError('key is not 256 bits')
-
- if tag is not None:
- # if tag, we're decrypting
- assert iv is not None
-
- self.iv = iv or os.urandom(16)
- self.buffer = _buffer or BytesIO()
- cipher = _get_aes_cipher(key, self.iv, tag, mode)
- cipher = cipher.decryptor() if tag else cipher.encryptor()
- self.cipher, self.aead = cipher, ''
- self.written = 0
-
- def authenticate(self, data):
- self.aead += data
- self.cipher.authenticate_additional_data(data)
-
- @property
- def tag(self):
- return getattr(self.cipher, 'tag', None)
-
- def write(self, data):
- self.written += len(data)
- self.buffer.write(self.cipher.update(data))
-
- def end(self):
- self.buffer.write(self.cipher.finalize())
- return self.aead, self.buffer.getvalue()
-
-
-def is_symmetrically_encrypted(content):
- """
- Returns True if the document was symmetrically encrypted.
- 'EzcB' is the base64 encoding of \x13\x37 magic number and 1 (symmetrically
- encrypted value for enc_scheme flag).
-
- :param doc: The document content as string
- :type doc: str
-
- :rtype: bool
- """
- sym_signature = '{"raw": "EzcB'
- return content and content.startswith(sym_signature)
-
-
-# utils
-
-
-def _hmac_sha256(key, data):
- return hmac.new(key, data, hashlib.sha256).digest()
-
-
-def _get_sym_key_for_doc(doc_id, secret):
- key = secret[SECRET_LENGTH:]
- return _hmac_sha256(key, doc_id)
-
-
-def _get_aes_cipher(key, iv, tag, mode=modes.GCM):
- mode = mode(iv, tag) if mode == modes.GCM else mode(iv)
- return Cipher(algorithms.AES(key), mode, backend=CRYPTO_BACKEND)
-
-
-def _mode_by_method(method):
- if method == ENC_METHOD.aes_256_gcm:
- return modes.GCM
- else:
- return modes.CTR
-
-
-def _ceiling(size):
- """
- Some simplistic ceiling scheme that uses powers of 2.
- We report everything below 4096 bytes as that minimum threshold.
- See #8759 for research pending for less simplistic/aggresive strategies.
- """
- for i in xrange(12, 31):
- step = 2 ** i
- if size < step:
- return step