From 2e59f9740a29439df7c7a56cf0ae83dec3081d31 Mon Sep 17 00:00:00 2001 From: Micah Anderson Date: Mon, 11 Aug 2014 13:49:21 -0400 Subject: initial import of debian version from mentors --- .../aes128ctr/portable/afternm_aes128ctr.c | 159 +++++ .../crypto_stream/aes128ctr/portable/api.h | 13 + .../aes128ctr/portable/beforenm_aes128ctr.c | 59 ++ .../crypto_stream/aes128ctr/portable/common.h | 788 +++++++++++++++++++++ .../aes128ctr/portable/common_aes128ctr.c | 64 ++ .../crypto_stream/aes128ctr/portable/consts.h | 28 + .../aes128ctr/portable/consts_aes128ctr.c | 14 + .../crypto_stream/aes128ctr/portable/int128.h | 56 ++ .../aes128ctr/portable/int128_aes128ctr.c | 131 ++++ .../aes128ctr/portable/stream_aes128ctr.c | 28 + .../crypto_stream/aes128ctr/portable/types.h | 10 + .../aes128ctr/portable/xor_afternm_aes128ctr.c | 181 +++++ 12 files changed, 1531 insertions(+) create mode 100644 src/libsodium/crypto_stream/aes128ctr/portable/afternm_aes128ctr.c create mode 100644 src/libsodium/crypto_stream/aes128ctr/portable/api.h create mode 100644 src/libsodium/crypto_stream/aes128ctr/portable/beforenm_aes128ctr.c create mode 100644 src/libsodium/crypto_stream/aes128ctr/portable/common.h create mode 100644 src/libsodium/crypto_stream/aes128ctr/portable/common_aes128ctr.c create mode 100644 src/libsodium/crypto_stream/aes128ctr/portable/consts.h create mode 100644 src/libsodium/crypto_stream/aes128ctr/portable/consts_aes128ctr.c create mode 100644 src/libsodium/crypto_stream/aes128ctr/portable/int128.h create mode 100644 src/libsodium/crypto_stream/aes128ctr/portable/int128_aes128ctr.c create mode 100644 src/libsodium/crypto_stream/aes128ctr/portable/stream_aes128ctr.c create mode 100644 src/libsodium/crypto_stream/aes128ctr/portable/types.h create mode 100644 src/libsodium/crypto_stream/aes128ctr/portable/xor_afternm_aes128ctr.c (limited to 'src/libsodium/crypto_stream/aes128ctr/portable') diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/afternm_aes128ctr.c b/src/libsodium/crypto_stream/aes128ctr/portable/afternm_aes128ctr.c new file mode 100644 index 0000000..a5a9a7a --- /dev/null +++ b/src/libsodium/crypto_stream/aes128ctr/portable/afternm_aes128ctr.c @@ -0,0 +1,159 @@ +/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper + * Date: 2009-03-19 + * Public domain */ + +#include "api.h" +#include "int128.h" +#include "common.h" +#include "consts.h" + +int crypto_stream_afternm(unsigned char *out, unsigned long long len, const unsigned char *nonce, const unsigned char *c) +{ + + int128 xmm0; + int128 xmm1; + int128 xmm2; + int128 xmm3; + int128 xmm4; + int128 xmm5; + int128 xmm6; + int128 xmm7; + + int128 xmm8; + int128 xmm9; + int128 xmm10; + int128 xmm11; + int128 xmm12; + int128 xmm13; + int128 xmm14; + int128 xmm15; + + int128 nonce_stack; + unsigned long long lensav; + unsigned char bl[128]; + unsigned char *blp; + unsigned char *np; + unsigned char b; + + uint32 tmp; + + /* Copy nonce on the stack */ + copy2(&nonce_stack, (const int128 *) (nonce + 0)); + np = (unsigned char *)&nonce_stack; + + enc_block: + + xmm0 = *(int128 *) (np + 0); + copy2(&xmm1, &xmm0); + shufb(&xmm1, SWAP32); + copy2(&xmm2, &xmm1); + copy2(&xmm3, &xmm1); + copy2(&xmm4, &xmm1); + copy2(&xmm5, &xmm1); + copy2(&xmm6, &xmm1); + copy2(&xmm7, &xmm1); + + add_uint32_big(&xmm1, 1); + add_uint32_big(&xmm2, 2); + add_uint32_big(&xmm3, 3); + add_uint32_big(&xmm4, 4); + add_uint32_big(&xmm5, 5); + add_uint32_big(&xmm6, 6); + add_uint32_big(&xmm7, 7); + + shufb(&xmm0, M0); + shufb(&xmm1, M0SWAP); + shufb(&xmm2, M0SWAP); + shufb(&xmm3, M0SWAP); + shufb(&xmm4, M0SWAP); + shufb(&xmm5, M0SWAP); + shufb(&xmm6, M0SWAP); + shufb(&xmm7, M0SWAP); + + bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, xmm8) + + aesround( 1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + aesround( 2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) + aesround( 3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + aesround( 4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) + aesround( 5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + aesround( 6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) + aesround( 7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + aesround( 8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) + aesround( 9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + lastround(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) + + bitslice(xmm13, xmm10, xmm15, xmm11, xmm14, xmm12, xmm9, xmm8, xmm0) + + if(len < 128) goto partial; + if(len == 128) goto full; + + tmp = load32_bigendian(np + 12); + tmp += 8; + store32_bigendian(np + 12, tmp); + + *(int128 *) (out + 0) = xmm8; + *(int128 *) (out + 16) = xmm9; + *(int128 *) (out + 32) = xmm12; + *(int128 *) (out + 48) = xmm14; + *(int128 *) (out + 64) = xmm11; + *(int128 *) (out + 80) = xmm15; + *(int128 *) (out + 96) = xmm10; + *(int128 *) (out + 112) = xmm13; + + len -= 128; + out += 128; + + goto enc_block; + + partial: + + lensav = len; + len >>= 4; + + tmp = load32_bigendian(np + 12); + tmp += len; + store32_bigendian(np + 12, tmp); + + blp = bl; + *(int128 *)(blp + 0) = xmm8; + *(int128 *)(blp + 16) = xmm9; + *(int128 *)(blp + 32) = xmm12; + *(int128 *)(blp + 48) = xmm14; + *(int128 *)(blp + 64) = xmm11; + *(int128 *)(blp + 80) = xmm15; + *(int128 *)(blp + 96) = xmm10; + *(int128 *)(blp + 112) = xmm13; + + bytes: + + if(lensav == 0) goto end; + + b = blp[0]; /* clang false positive */ + *(unsigned char *)(out + 0) = b; + + blp += 1; + out +=1; + lensav -= 1; + + goto bytes; + + full: + + tmp = load32_bigendian(np + 12); + tmp += 8; + store32_bigendian(np + 12, tmp); + + *(int128 *) (out + 0) = xmm8; + *(int128 *) (out + 16) = xmm9; + *(int128 *) (out + 32) = xmm12; + *(int128 *) (out + 48) = xmm14; + *(int128 *) (out + 64) = xmm11; + *(int128 *) (out + 80) = xmm15; + *(int128 *) (out + 96) = xmm10; + *(int128 *) (out + 112) = xmm13; + + end: + return 0; + +} diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/api.h b/src/libsodium/crypto_stream/aes128ctr/portable/api.h new file mode 100644 index 0000000..3c53fb9 --- /dev/null +++ b/src/libsodium/crypto_stream/aes128ctr/portable/api.h @@ -0,0 +1,13 @@ + +#include "crypto_stream_aes128ctr.h" + +#define crypto_stream crypto_stream_aes128ctr +#define crypto_stream_xor crypto_stream_aes128ctr_xor +#define crypto_stream_beforenm crypto_stream_aes128ctr_beforenm +#define crypto_stream_afternm crypto_stream_aes128ctr_afternm +#define crypto_stream_xor_afternm crypto_stream_aes128ctr_xor_afternm +#define crypto_stream_KEYBYTES crypto_stream_aes128ctr_KEYBYTES +#define crypto_stream_NONCEBYTES crypto_stream_aes128ctr_NONCEBYTES +#define crypto_stream_BEFORENMBYTES crypto_stream_aes128ctr_BEFORENMBYTES +#define crypto_stream_IMPLEMENTATION crypto_stream_aes128ctr_IMPLEMENTATION +#define crypto_stream_VERSION crypto_stream_aes128ctr_VERSION diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/beforenm_aes128ctr.c b/src/libsodium/crypto_stream/aes128ctr/portable/beforenm_aes128ctr.c new file mode 100644 index 0000000..f8623dd --- /dev/null +++ b/src/libsodium/crypto_stream/aes128ctr/portable/beforenm_aes128ctr.c @@ -0,0 +1,59 @@ +/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper + * Date: 2009-03-19 + * Public domain */ + +#include "api.h" +#include "consts.h" +#include "int128.h" +#include "common.h" + +int crypto_stream_beforenm(unsigned char *c, const unsigned char *k) +{ + + /* + int64 x0; + int64 x1; + int64 x2; + int64 x3; + int64 e; + int64 q0; + int64 q1; + int64 q2; + int64 q3; + */ + + int128 xmm0; + int128 xmm1; + int128 xmm2; + int128 xmm3; + int128 xmm4; + int128 xmm5; + int128 xmm6; + int128 xmm7; + int128 xmm8; + int128 xmm9; + int128 xmm10; + int128 xmm11; + int128 xmm12; + int128 xmm13; + int128 xmm14; + int128 xmm15; + int128 t; + + bitslicekey0(k, c) + + keyexpbs1(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + keyexpbs(xmm0, xmm1, xmm4, xmm6, xmm3, xmm7, xmm2, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm1);, 2,c) + keyexpbs(xmm0, xmm1, xmm3, xmm2, xmm6, xmm5, xmm4, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm6);, 3,c) + keyexpbs(xmm0, xmm1, xmm6, xmm4, xmm2, xmm7, xmm3, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm3);, 4,c) + + keyexpbs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm3);, 5,c) + keyexpbs(xmm0, xmm1, xmm4, xmm6, xmm3, xmm7, xmm2, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm5);, 6,c) + keyexpbs(xmm0, xmm1, xmm3, xmm2, xmm6, xmm5, xmm4, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm3);, 7,c) + keyexpbs(xmm0, xmm1, xmm6, xmm4, xmm2, xmm7, xmm3, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm7);, 8,c) + + keyexpbs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm0); xor_rcon(&xmm1); xor_rcon(&xmm6); xor_rcon(&xmm3);, 9,c) + keyexpbs10(xmm0, xmm1, xmm4, xmm6, xmm3, xmm7, xmm2, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + + return 0; +} diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/common.h b/src/libsodium/crypto_stream/aes128ctr/portable/common.h new file mode 100644 index 0000000..3923c02 --- /dev/null +++ b/src/libsodium/crypto_stream/aes128ctr/portable/common.h @@ -0,0 +1,788 @@ +/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper + Date: 2009-03-19 + Public domain */ +#ifndef COMMON_H +#define COMMON_H + +#include "types.h" + +#define load32_bigendian crypto_stream_aes128ctr_portable_load32_bigendian +uint32 load32_bigendian(const unsigned char *x); + +#define store32_bigendian crypto_stream_aes128ctr_portable_store32_bigendian +void store32_bigendian(unsigned char *x,uint32 u); + +#define load32_littleendian crypto_stream_aes128ctr_portable_load32_littleendian +uint32 load32_littleendian(const unsigned char *x); + +#define store32_littleendian crypto_stream_aes128ctr_portable_store32_littleendian +void store32_littleendian(unsigned char *x,uint32 u); + +#define load64_littleendian crypto_stream_aes128ctr_portable_load64_littleendian +uint64 load64_littleendian(const unsigned char *x); + +#define store64_littleendian crypto_stream_aes128ctr_portable_store64_littleendian +void store64_littleendian(unsigned char *x,uint64 u); + +/* Macros required only for key expansion */ + +#define keyexpbs1(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) \ + rotbyte(&b0);\ + rotbyte(&b1);\ + rotbyte(&b2);\ + rotbyte(&b3);\ + rotbyte(&b4);\ + rotbyte(&b5);\ + rotbyte(&b6);\ + rotbyte(&b7);\ + ;\ + sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\ + ;\ + xor_rcon(&b0);\ + shufb(&b0, EXPB0);\ + shufb(&b1, EXPB0);\ + shufb(&b4, EXPB0);\ + shufb(&b6, EXPB0);\ + shufb(&b3, EXPB0);\ + shufb(&b7, EXPB0);\ + shufb(&b2, EXPB0);\ + shufb(&b5, EXPB0);\ + shufb(&b0, EXPB0);\ + ;\ + t0 = *(int128 *)(bskey + 0);\ + t1 = *(int128 *)(bskey + 16);\ + t2 = *(int128 *)(bskey + 32);\ + t3 = *(int128 *)(bskey + 48);\ + t4 = *(int128 *)(bskey + 64);\ + t5 = *(int128 *)(bskey + 80);\ + t6 = *(int128 *)(bskey + 96);\ + t7 = *(int128 *)(bskey + 112);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + *(int128 *)(bskey + 128) = b0;\ + *(int128 *)(bskey + 144) = b1;\ + *(int128 *)(bskey + 160) = b4;\ + *(int128 *)(bskey + 176) = b6;\ + *(int128 *)(bskey + 192) = b3;\ + *(int128 *)(bskey + 208) = b7;\ + *(int128 *)(bskey + 224) = b2;\ + *(int128 *)(bskey + 240) = b5;\ + +#define keyexpbs10(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) ;\ + toggle(&b0);\ + toggle(&b1);\ + toggle(&b5);\ + toggle(&b6);\ + rotbyte(&b0);\ + rotbyte(&b1);\ + rotbyte(&b2);\ + rotbyte(&b3);\ + rotbyte(&b4);\ + rotbyte(&b5);\ + rotbyte(&b6);\ + rotbyte(&b7);\ + ;\ + sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\ + ;\ + xor_rcon(&b1);\ + xor_rcon(&b4);\ + xor_rcon(&b3);\ + xor_rcon(&b7);\ + shufb(&b0, EXPB0);\ + shufb(&b1, EXPB0);\ + shufb(&b4, EXPB0);\ + shufb(&b6, EXPB0);\ + shufb(&b3, EXPB0);\ + shufb(&b7, EXPB0);\ + shufb(&b2, EXPB0);\ + shufb(&b5, EXPB0);\ + ;\ + t0 = *(int128 *)(bskey + 9 * 128 + 0);\ + t1 = *(int128 *)(bskey + 9 * 128 + 16);\ + t2 = *(int128 *)(bskey + 9 * 128 + 32);\ + t3 = *(int128 *)(bskey + 9 * 128 + 48);\ + t4 = *(int128 *)(bskey + 9 * 128 + 64);\ + t5 = *(int128 *)(bskey + 9 * 128 + 80);\ + t6 = *(int128 *)(bskey + 9 * 128 + 96);\ + t7 = *(int128 *)(bskey + 9 * 128 + 112);\ + ;\ + toggle(&t0);\ + toggle(&t1);\ + toggle(&t5);\ + toggle(&t6);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + shufb(&b0, M0);\ + shufb(&b1, M0);\ + shufb(&b2, M0);\ + shufb(&b3, M0);\ + shufb(&b4, M0);\ + shufb(&b5, M0);\ + shufb(&b6, M0);\ + shufb(&b7, M0);\ + ;\ + *(int128 *)(bskey + 1280) = b0;\ + *(int128 *)(bskey + 1296) = b1;\ + *(int128 *)(bskey + 1312) = b4;\ + *(int128 *)(bskey + 1328) = b6;\ + *(int128 *)(bskey + 1344) = b3;\ + *(int128 *)(bskey + 1360) = b7;\ + *(int128 *)(bskey + 1376) = b2;\ + *(int128 *)(bskey + 1392) = b5;\ + + +#define keyexpbs(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, rcon, i, bskey) \ + toggle(&b0);\ + toggle(&b1);\ + toggle(&b5);\ + toggle(&b6);\ + rotbyte(&b0);\ + rotbyte(&b1);\ + rotbyte(&b2);\ + rotbyte(&b3);\ + rotbyte(&b4);\ + rotbyte(&b5);\ + rotbyte(&b6);\ + rotbyte(&b7);\ + ;\ + sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\ + ;\ + rcon;\ + shufb(&b0, EXPB0);\ + shufb(&b1, EXPB0);\ + shufb(&b4, EXPB0);\ + shufb(&b6, EXPB0);\ + shufb(&b3, EXPB0);\ + shufb(&b7, EXPB0);\ + shufb(&b2, EXPB0);\ + shufb(&b5, EXPB0);\ + ;\ + t0 = *(int128 *)(bskey + (i-1) * 128 + 0);\ + t1 = *(int128 *)(bskey + (i-1) * 128 + 16);\ + t2 = *(int128 *)(bskey + (i-1) * 128 + 32);\ + t3 = *(int128 *)(bskey + (i-1) * 128 + 48);\ + t4 = *(int128 *)(bskey + (i-1) * 128 + 64);\ + t5 = *(int128 *)(bskey + (i-1) * 128 + 80);\ + t6 = *(int128 *)(bskey + (i-1) * 128 + 96);\ + t7 = *(int128 *)(bskey + (i-1) * 128 + 112);\ + ;\ + toggle(&t0);\ + toggle(&t1);\ + toggle(&t5);\ + toggle(&t6);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + *(int128 *)(bskey + i*128 + 0) = b0;\ + *(int128 *)(bskey + i*128 + 16) = b1;\ + *(int128 *)(bskey + i*128 + 32) = b4;\ + *(int128 *)(bskey + i*128 + 48) = b6;\ + *(int128 *)(bskey + i*128 + 64) = b3;\ + *(int128 *)(bskey + i*128 + 80) = b7;\ + *(int128 *)(bskey + i*128 + 96) = b2;\ + *(int128 *)(bskey + i*128 + 112) = b5;\ + +/* Macros used in multiple contexts */ + +#define bitslicekey0(key, bskey) \ + xmm0 = *(const int128 *) (key + 0);\ + shufb(&xmm0, M0);\ + copy2(&xmm1, &xmm0);\ + copy2(&xmm2, &xmm0);\ + copy2(&xmm3, &xmm0);\ + copy2(&xmm4, &xmm0);\ + copy2(&xmm5, &xmm0);\ + copy2(&xmm6, &xmm0);\ + copy2(&xmm7, &xmm0);\ + ;\ + bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, t);\ + ;\ + *(int128 *) (bskey + 0) = xmm0;\ + *(int128 *) (bskey + 16) = xmm1;\ + *(int128 *) (bskey + 32) = xmm2;\ + *(int128 *) (bskey + 48) = xmm3;\ + *(int128 *) (bskey + 64) = xmm4;\ + *(int128 *) (bskey + 80) = xmm5;\ + *(int128 *) (bskey + 96) = xmm6;\ + *(int128 *) (bskey + 112) = xmm7;\ + + +#define bitslicekey10(key, bskey) \ + xmm0 = *(int128 *) (key + 0);\ + copy2(xmm1, xmm0);\ + copy2(xmm2, xmm0);\ + copy2(xmm3, xmm0);\ + copy2(xmm4, xmm0);\ + copy2(xmm5, xmm0);\ + copy2(xmm6, xmm0);\ + copy2(xmm7, xmm0);\ + ;\ + bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, t);\ + ;\ + toggle(&xmm6);\ + toggle(&xmm5);\ + toggle(&xmm1);\ + toggle(&xmm0);\ + ;\ + *(int128 *) (bskey + 0 + 1280) = xmm0;\ + *(int128 *) (bskey + 16 + 1280) = xmm1;\ + *(int128 *) (bskey + 32 + 1280) = xmm2;\ + *(int128 *) (bskey + 48 + 1280) = xmm3;\ + *(int128 *) (bskey + 64 + 1280) = xmm4;\ + *(int128 *) (bskey + 80 + 1280) = xmm5;\ + *(int128 *) (bskey + 96 + 1280) = xmm6;\ + *(int128 *) (bskey + 112 + 1280) = xmm7;\ + + +#define bitslicekey(i,key,bskey) \ + xmm0 = *(int128 *) (key + 0);\ + shufb(&xmm0, M0);\ + copy2(&xmm1, &xmm0);\ + copy2(&xmm2, &xmm0);\ + copy2(&xmm3, &xmm0);\ + copy2(&xmm4, &xmm0);\ + copy2(&xmm5, &xmm0);\ + copy2(&xmm6, &xmm0);\ + copy2(&xmm7, &xmm0);\ + ;\ + bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, t);\ + ;\ + toggle(&xmm6);\ + toggle(&xmm5);\ + toggle(&xmm1);\ + toggle(&xmm0);\ + ;\ + *(int128 *) (bskey + 0 + 128*i) = xmm0;\ + *(int128 *) (bskey + 16 + 128*i) = xmm1;\ + *(int128 *) (bskey + 32 + 128*i) = xmm2;\ + *(int128 *) (bskey + 48 + 128*i) = xmm3;\ + *(int128 *) (bskey + 64 + 128*i) = xmm4;\ + *(int128 *) (bskey + 80 + 128*i) = xmm5;\ + *(int128 *) (bskey + 96 + 128*i) = xmm6;\ + *(int128 *) (bskey + 112 + 128*i) = xmm7;\ + + +#define bitslice(x0, x1, x2, x3, x4, x5, x6, x7, t) \ + swapmove(x0, x1, 1, BS0, t);\ + swapmove(x2, x3, 1, BS0, t);\ + swapmove(x4, x5, 1, BS0, t);\ + swapmove(x6, x7, 1, BS0, t);\ + ;\ + swapmove(x0, x2, 2, BS1, t);\ + swapmove(x1, x3, 2, BS1, t);\ + swapmove(x4, x6, 2, BS1, t);\ + swapmove(x5, x7, 2, BS1, t);\ + ;\ + swapmove(x0, x4, 4, BS2, t);\ + swapmove(x1, x5, 4, BS2, t);\ + swapmove(x2, x6, 4, BS2, t);\ + swapmove(x3, x7, 4, BS2, t);\ + + +#define swapmove(a, b, n, m, t) \ + copy2(&t, &b);\ + rshift64_littleendian(&t, n);\ + xor2(&t, &a);\ + and2(&t, &m);\ + xor2(&a, &t);\ + lshift64_littleendian(&t, n);\ + xor2(&b, &t); + +#define rotbyte(x) \ + shufb(x, ROTB) /* TODO: Make faster */ + + +/* Macros used for encryption (and decryption) */ + +#define shiftrows(x0, x1, x2, x3, x4, x5, x6, x7, i, M, bskey) \ + xor2(&x0, (const int128 *)(bskey + 128*(i-1) + 0));\ + shufb(&x0, M);\ + xor2(&x1, (const int128 *)(bskey + 128*(i-1) + 16));\ + shufb(&x1, M);\ + xor2(&x2, (const int128 *)(bskey + 128*(i-1) + 32));\ + shufb(&x2, M);\ + xor2(&x3, (const int128 *)(bskey + 128*(i-1) + 48));\ + shufb(&x3, M);\ + xor2(&x4, (const int128 *)(bskey + 128*(i-1) + 64));\ + shufb(&x4, M);\ + xor2(&x5, (const int128 *)(bskey + 128*(i-1) + 80));\ + shufb(&x5, M);\ + xor2(&x6, (const int128 *)(bskey + 128*(i-1) + 96));\ + shufb(&x6, M);\ + xor2(&x7, (const int128 *)(bskey + 128*(i-1) + 112));\ + shufb(&x7, M);\ + + +#define mixcolumns(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, t7) \ + shufd(&t0, &x0, 0x93);\ + shufd(&t1, &x1, 0x93);\ + shufd(&t2, &x2, 0x93);\ + shufd(&t3, &x3, 0x93);\ + shufd(&t4, &x4, 0x93);\ + shufd(&t5, &x5, 0x93);\ + shufd(&t6, &x6, 0x93);\ + shufd(&t7, &x7, 0x93);\ + ;\ + xor2(&x0, &t0);\ + xor2(&x1, &t1);\ + xor2(&x2, &t2);\ + xor2(&x3, &t3);\ + xor2(&x4, &t4);\ + xor2(&x5, &t5);\ + xor2(&x6, &t6);\ + xor2(&x7, &t7);\ + ;\ + xor2(&t0, &x7);\ + xor2(&t1, &x0);\ + xor2(&t2, &x1);\ + xor2(&t1, &x7);\ + xor2(&t3, &x2);\ + xor2(&t4, &x3);\ + xor2(&t5, &x4);\ + xor2(&t3, &x7);\ + xor2(&t6, &x5);\ + xor2(&t7, &x6);\ + xor2(&t4, &x7);\ + ;\ + shufd(&x0, &x0, 0x4e);\ + shufd(&x1, &x1, 0x4e);\ + shufd(&x2, &x2, 0x4e);\ + shufd(&x3, &x3, 0x4e);\ + shufd(&x4, &x4, 0x4e);\ + shufd(&x5, &x5, 0x4e);\ + shufd(&x6, &x6, 0x4e);\ + shufd(&x7, &x7, 0x4e);\ + ;\ + xor2(&t0, &x0);\ + xor2(&t1, &x1);\ + xor2(&t2, &x2);\ + xor2(&t3, &x3);\ + xor2(&t4, &x4);\ + xor2(&t5, &x5);\ + xor2(&t6, &x6);\ + xor2(&t7, &x7);\ + + +#define aesround(i, b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) \ + shiftrows(b0, b1, b2, b3, b4, b5, b6, b7, i, SR, bskey);\ + sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\ + mixcolumns(b0, b1, b4, b6, b3, b7, b2, b5, t0, t1, t2, t3, t4, t5, t6, t7);\ + + +#define lastround(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) \ + shiftrows(b0, b1, b2, b3, b4, b5, b6, b7, 10, SRM0, bskey);\ + sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\ + xor2(&b0,(const int128 *)(bskey + 128*10));\ + xor2(&b1,(const int128 *)(bskey + 128*10+16));\ + xor2(&b4,(const int128 *)(bskey + 128*10+32));\ + xor2(&b6,(const int128 *)(bskey + 128*10+48));\ + xor2(&b3,(const int128 *)(bskey + 128*10+64));\ + xor2(&b7,(const int128 *)(bskey + 128*10+80));\ + xor2(&b2,(const int128 *)(bskey + 128*10+96));\ + xor2(&b5,(const int128 *)(bskey + 128*10+112));\ + + +#define sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, s0, s1, s2, s3) \ + InBasisChange(b0, b1, b2, b3, b4, b5, b6, b7); \ + Inv_GF256(b6, b5, b0, b3, b7, b1, b4, b2, t0, t1, t2, t3, s0, s1, s2, s3); \ + OutBasisChange(b7, b1, b4, b2, b6, b5, b0, b3); \ + + +#define InBasisChange(b0, b1, b2, b3, b4, b5, b6, b7) \ + xor2(&b5, &b6);\ + xor2(&b2, &b1);\ + xor2(&b5, &b0);\ + xor2(&b6, &b2);\ + xor2(&b3, &b0);\ + ;\ + xor2(&b6, &b3);\ + xor2(&b3, &b7);\ + xor2(&b3, &b4);\ + xor2(&b7, &b5);\ + xor2(&b3, &b1);\ + ;\ + xor2(&b4, &b5);\ + xor2(&b2, &b7);\ + xor2(&b1, &b5);\ + +#define OutBasisChange(b0, b1, b2, b3, b4, b5, b6, b7) \ + xor2(&b0, &b6);\ + xor2(&b1, &b4);\ + xor2(&b2, &b0);\ + xor2(&b4, &b6);\ + xor2(&b6, &b1);\ + ;\ + xor2(&b1, &b5);\ + xor2(&b5, &b3);\ + xor2(&b2, &b5);\ + xor2(&b3, &b7);\ + xor2(&b7, &b5);\ + ;\ + xor2(&b4, &b7);\ + +#define Mul_GF4(x0, x1, y0, y1, t0) \ + copy2(&t0, &y0);\ + xor2(&t0, &y1);\ + and2(&t0, &x0);\ + xor2(&x0, &x1);\ + and2(&x0, &y1);\ + and2(&x1, &y0);\ + xor2(&x0, &x1);\ + xor2(&x1, &t0);\ + +#define Mul_GF4_N(x0, x1, y0, y1, t0) \ + copy2(&t0, &y0);\ + xor2(&t0, &y1);\ + and2(&t0, &x0);\ + xor2(&x0, &x1);\ + and2(&x0, &y1);\ + and2(&x1, &y0);\ + xor2(&x1, &x0);\ + xor2(&x0, &t0);\ + +#define Mul_GF4_2(x0, x1, x2, x3, y0, y1, t0, t1) \ + copy2(&t0, = y0);\ + xor2(&t0, &y1);\ + copy2(&t1, &t0);\ + and2(&t0, &x0);\ + and2(&t1, &x2);\ + xor2(&x0, &x1);\ + xor2(&x2, &x3);\ + and2(&x0, &y1);\ + and2(&x2, &y1);\ + and2(&x1, &y0);\ + and2(&x3, &y0);\ + xor2(&x0, &x1);\ + xor2(&x2, &x3);\ + xor2(&x1, &t0);\ + xor2(&x3, &t1);\ + +#define Mul_GF16(x0, x1, x2, x3, y0, y1, y2, y3, t0, t1, t2, t3) \ + copy2(&t0, &x0);\ + copy2(&t1, &x1);\ + Mul_GF4(x0, x1, y0, y1, t2);\ + xor2(&t0, &x2);\ + xor2(&t1, &x3);\ + xor2(&y0, &y2);\ + xor2(&y1, &y3);\ + Mul_GF4_N(t0, t1, y0, y1, t2);\ + Mul_GF4(x2, x3, y2, y3, t3);\ + ;\ + xor2(&x0, &t0);\ + xor2(&x2, &t0);\ + xor2(&x1, &t1);\ + xor2(&x3, &t1);\ + +#define Mul_GF16_2(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, t0, t1, t2, t3) \ + copy2(&t0, &x0);\ + copy2(&t1, &x1);\ + Mul_GF4(x0, x1, y0, y1, t2);\ + xor2(&t0, &x2);\ + xor2(&t1, &x3);\ + xor2(&y0, &y2);\ + xor2(&y1, &y3);\ + Mul_GF4_N(t0, t1, y0, y1, t3);\ + Mul_GF4(x2, x3, y2, y3, t2);\ + ;\ + xor2(&x0, &t0);\ + xor2(&x2, &t0);\ + xor2(&x1, &t1);\ + xor2(&x3, &t1);\ + ;\ + copy2(&t0, &x4);\ + copy2(&t1, &x5);\ + xor2(&t0, &x6);\ + xor2(&t1, &x7);\ + Mul_GF4_N(t0, t1, y0, y1, t3);\ + Mul_GF4(x6, x7, y2, y3, t2);\ + xor2(&y0, &y2);\ + xor2(&y1, &y3);\ + Mul_GF4(x4, x5, y0, y1, t3);\ + ;\ + xor2(&x4, &t0);\ + xor2(&x6, &t0);\ + xor2(&x5, &t1);\ + xor2(&x7, &t1);\ + +#define Inv_GF16(x0, x1, x2, x3, t0, t1, t2, t3) \ + copy2(&t0, &x1);\ + copy2(&t1, &x0);\ + and2(&t0, &x3);\ + or2(&t1, &x2);\ + copy2(&t2, &x1);\ + copy2(&t3, &x0);\ + or2(&t2, &x2);\ + or2(&t3, &x3);\ + xor2(&t2, &t3);\ + ;\ + xor2(&t0, &t2);\ + xor2(&t1, &t2);\ + ;\ + Mul_GF4_2(x0, x1, x2, x3, t1, t0, t2, t3);\ + + +#define Inv_GF256(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, s0, s1, s2, s3) \ + copy2(&t3, &x4);\ + copy2(&t2, &x5);\ + copy2(&t1, &x1);\ + copy2(&s1, &x7);\ + copy2(&s0, &x0);\ + ;\ + xor2(&t3, &x6);\ + xor2(&t2, &x7);\ + xor2(&t1, &x3);\ + xor2(&s1, &x6);\ + xor2(&s0, &x2);\ + ;\ + copy2(&s2, &t3);\ + copy2(&t0, &t2);\ + copy2(&s3, &t3);\ + ;\ + or2(&t2, &t1);\ + or2(&t3, &s0);\ + xor2(&s3, &t0);\ + and2(&s2, &s0);\ + and2(&t0, &t1);\ + xor2(&s0, &t1);\ + and2(&s3, &s0);\ + copy2(&s0, &x3);\ + xor2(&s0, &x2);\ + and2(&s1, &s0);\ + xor2(&t3, &s1);\ + xor2(&t2, &s1);\ + copy2(&s1, &x4);\ + xor2(&s1, &x5);\ + copy2(&s0, &x1);\ + copy2(&t1, &s1);\ + xor2(&s0, &x0);\ + or2(&t1, &s0);\ + and2(&s1, &s0);\ + xor2(&t0, &s1);\ + xor2(&t3, &s3);\ + xor2(&t2, &s2);\ + xor2(&t1, &s3);\ + xor2(&t0, &s2);\ + xor2(&t1, &s2);\ + copy2(&s0, &x7);\ + copy2(&s1, &x6);\ + copy2(&s2, &x5);\ + copy2(&s3, &x4);\ + and2(&s0, &x3);\ + and2(&s1, &x2);\ + and2(&s2, &x1);\ + or2(&s3, &x0);\ + xor2(&t3, &s0);\ + xor2(&t2, &s1);\ + xor2(&t1, &s2);\ + xor2(&t0, &s3);\ + ;\ + copy2(&s0, &t3);\ + xor2(&s0, &t2);\ + and2(&t3, &t1);\ + copy2(&s2, &t0);\ + xor2(&s2, &t3);\ + copy2(&s3, &s0);\ + and2(&s3, &s2);\ + xor2(&s3, &t2);\ + copy2(&s1, &t1);\ + xor2(&s1, &t0);\ + xor2(&t3, &t2);\ + and2(&s1, &t3);\ + xor2(&s1, &t0);\ + xor2(&t1, &s1);\ + copy2(&t2, &s2);\ + xor2(&t2, &s1);\ + and2(&t2, &t0);\ + xor2(&t1, &t2);\ + xor2(&s2, &t2);\ + and2(&s2, &s3);\ + xor2(&s2, &s0);\ + ;\ + Mul_GF16_2(x0, x1, x2, x3, x4, x5, x6, x7, s3, s2, s1, t1, s0, t0, t2, t3);\ + +#endif diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/common_aes128ctr.c b/src/libsodium/crypto_stream/aes128ctr/portable/common_aes128ctr.c new file mode 100644 index 0000000..14a28cc --- /dev/null +++ b/src/libsodium/crypto_stream/aes128ctr/portable/common_aes128ctr.c @@ -0,0 +1,64 @@ +#include "common.h" + +uint32 load32_bigendian(const unsigned char *x) +{ + return + (uint32) (x[3]) \ + | (((uint32) (x[2])) << 8) \ + | (((uint32) (x[1])) << 16) \ + | (((uint32) (x[0])) << 24) + ; +} + +void store32_bigendian(unsigned char *x,uint32 u) +{ + x[3] = u; u >>= 8; + x[2] = u; u >>= 8; + x[1] = u; u >>= 8; + x[0] = u; +} + +uint32 load32_littleendian(const unsigned char *x) +{ + return + (uint32) (x[0]) \ + | (((uint32) (x[1])) << 8) \ + | (((uint32) (x[2])) << 16) \ + | (((uint32) (x[3])) << 24) + ; +} + +void store32_littleendian(unsigned char *x,uint32 u) +{ + x[0] = u; u >>= 8; + x[1] = u; u >>= 8; + x[2] = u; u >>= 8; + x[3] = u; +} + + +uint64 load64_littleendian(const unsigned char *x) +{ + return + (uint64) (x[0]) \ + | (((uint64) (x[1])) << 8) \ + | (((uint64) (x[2])) << 16) \ + | (((uint64) (x[3])) << 24) + | (((uint64) (x[4])) << 32) + | (((uint64) (x[5])) << 40) + | (((uint64) (x[6])) << 48) + | (((uint64) (x[7])) << 56) + ; +} + +void store64_littleendian(unsigned char *x,uint64 u) +{ + x[0] = u; u >>= 8; + x[1] = u; u >>= 8; + x[2] = u; u >>= 8; + x[3] = u; u >>= 8; + x[4] = u; u >>= 8; + x[5] = u; u >>= 8; + x[6] = u; u >>= 8; + x[7] = u; +} diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/consts.h b/src/libsodium/crypto_stream/aes128ctr/portable/consts.h new file mode 100644 index 0000000..4c50360 --- /dev/null +++ b/src/libsodium/crypto_stream/aes128ctr/portable/consts.h @@ -0,0 +1,28 @@ +#ifndef CONSTS_H +#define CONSTS_H + +#include "int128.h" + +#define ROTB crypto_stream_aes128ctr_portable_ROTB +#define M0 crypto_stream_aes128ctr_portable_M0 +#define EXPB0 crypto_stream_aes128ctr_portable_EXPB0 +#define SWAP32 crypto_stream_aes128ctr_portable_SWAP32 +#define M0SWAP crypto_stream_aes128ctr_portable_M0SWAP +#define SR crypto_stream_aes128ctr_portable_SR +#define SRM0 crypto_stream_aes128ctr_portable_SRM0 +#define BS0 crypto_stream_aes128ctr_portable_BS0 +#define BS1 crypto_stream_aes128ctr_portable_BS1 +#define BS2 crypto_stream_aes128ctr_portable_BS2 + +extern const unsigned char ROTB[16]; +extern const unsigned char M0[16]; +extern const unsigned char EXPB0[16]; +extern const unsigned char SWAP32[16]; +extern const unsigned char M0SWAP[16]; +extern const unsigned char SR[16]; +extern const unsigned char SRM0[16]; +extern const int128 BS0; +extern const int128 BS1; +extern const int128 BS2; + +#endif diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/consts_aes128ctr.c b/src/libsodium/crypto_stream/aes128ctr/portable/consts_aes128ctr.c new file mode 100644 index 0000000..f8029b8 --- /dev/null +++ b/src/libsodium/crypto_stream/aes128ctr/portable/consts_aes128ctr.c @@ -0,0 +1,14 @@ +#include "consts.h" + +const unsigned char ROTB[16] = {0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08}; +const unsigned char M0[16] = {0x0f, 0x0b, 0x07, 0x03, 0x0e, 0x0a, 0x06, 0x02, 0x0d, 0x09, 0x05, 0x01, 0x0c, 0x08, 0x04, 0x00}; +const unsigned char EXPB0[16] = {0x03, 0x03, 0x03, 0x03, 0x07, 0x07, 0x07, 0x07, 0x0b, 0x0b, 0x0b, 0x0b, 0x0f, 0x0f, 0x0f, 0x0f}; + +const unsigned char SWAP32[16] = {0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c}; +const unsigned char M0SWAP[16] = {0x0c, 0x08, 0x04, 0x00, 0x0d, 0x09, 0x05, 0x01, 0x0e, 0x0a, 0x06, 0x02, 0x0f, 0x0b, 0x07, 0x03}; +const unsigned char SR[16] = {0x01, 0x02, 0x03, 0x00, 0x06, 0x07, 0x04, 0x05, 0x0b, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0e, 0x0f}; +const unsigned char SRM0[16] = {0x0f, 0x0a, 0x05, 0x00, 0x0e, 0x09, 0x04, 0x03, 0x0d, 0x08, 0x07, 0x02, 0x0c, 0x0b, 0x06, 0x01}; + +const int128 BS0 = {{0x5555555555555555ULL, 0x5555555555555555ULL}}; +const int128 BS1 = {{0x3333333333333333ULL, 0x3333333333333333ULL}}; +const int128 BS2 = {{0x0f0f0f0f0f0f0f0fULL, 0x0f0f0f0f0f0f0f0fULL}}; diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/int128.h b/src/libsodium/crypto_stream/aes128ctr/portable/int128.h new file mode 100644 index 0000000..3fd2111 --- /dev/null +++ b/src/libsodium/crypto_stream/aes128ctr/portable/int128.h @@ -0,0 +1,56 @@ +#ifndef INT128_H +#define INT128_H + +#include + +#include "common.h" + +#ifdef __cplusplus +# if __GNUC__ +# pragma GCC diagnostic ignored "-Wlong-long" +# endif +#endif + +typedef union { + uint64_t u64[2]; + uint32_t u32[4]; + uint8_t u8[16]; +} int128; + +#define xor2 crypto_stream_aes128ctr_portable_xor2 +void xor2(int128 *r, const int128 *x); + +#define and2 crypto_stream_aes128ctr_portable_and2 +void and2(int128 *r, const int128 *x); + +#define or2 crypto_stream_aes128ctr_portable_or2 +void or2(int128 *r, const int128 *x); + +#define copy2 crypto_stream_aes128ctr_portable_copy2 +void copy2(int128 *r, const int128 *x); + +#define shufb crypto_stream_aes128ctr_portable_shufb +void shufb(int128 *r, const unsigned char *l); + +#define shufd crypto_stream_aes128ctr_portable_shufd +void shufd(int128 *r, const int128 *x, const unsigned int c); + +#define rshift32_littleendian crypto_stream_aes128ctr_portable_rshift32_littleendian +void rshift32_littleendian(int128 *r, const unsigned int n); + +#define rshift64_littleendian crypto_stream_aes128ctr_portable_rshift64_littleendian +void rshift64_littleendian(int128 *r, const unsigned int n); + +#define lshift64_littleendian crypto_stream_aes128ctr_portable_lshift64_littleendian +void lshift64_littleendian(int128 *r, const unsigned int n); + +#define toggle crypto_stream_aes128ctr_portable_toggle +void toggle(int128 *r); + +#define xor_rcon crypto_stream_aes128ctr_portable_xor_rcon +void xor_rcon(int128 *r); + +#define add_uint32_big crypto_stream_aes128ctr_portable_add_uint32_big +void add_uint32_big(int128 *r, uint32 x); + +#endif diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/int128_aes128ctr.c b/src/libsodium/crypto_stream/aes128ctr/portable/int128_aes128ctr.c new file mode 100644 index 0000000..703de39 --- /dev/null +++ b/src/libsodium/crypto_stream/aes128ctr/portable/int128_aes128ctr.c @@ -0,0 +1,131 @@ + +#include "int128.h" +#include "common.h" + +void xor2(int128 *r, const int128 *x) +{ + r->u64[0] ^= x->u64[0]; + r->u64[1] ^= x->u64[1]; +} + +void and2(int128 *r, const int128 *x) +{ + r->u64[0] &= x->u64[0]; + r->u64[1] &= x->u64[1]; +} + +void or2(int128 *r, const int128 *x) +{ + r->u64[0] |= x->u64[0]; + r->u64[1] |= x->u64[1]; +} + +void copy2(int128 *r, const int128 *x) +{ + r->u64[0] = x->u64[0]; + r->u64[1] = x->u64[1]; +} + +void shufb(int128 *r, const unsigned char *l) +{ + int128 t; + uint8_t *ct; + uint8_t *cr; + + copy2(&t, r); + cr = r->u8; + ct = t.u8; + cr[0] = ct[l[0]]; + cr[1] = ct[l[1]]; + cr[2] = ct[l[2]]; + cr[3] = ct[l[3]]; + cr[4] = ct[l[4]]; + cr[5] = ct[l[5]]; + cr[6] = ct[l[6]]; + cr[7] = ct[l[7]]; + cr[8] = ct[l[8]]; + cr[9] = ct[l[9]]; + cr[10] = ct[l[10]]; + cr[11] = ct[l[11]]; + cr[12] = ct[l[12]]; + cr[13] = ct[l[13]]; + cr[14] = ct[l[14]]; + cr[15] = ct[l[15]]; +} + +void shufd(int128 *r, const int128 *x, const unsigned int c) +{ + int128 t; + + t.u32[0] = x->u32[c >> 0 & 3]; + t.u32[1] = x->u32[c >> 2 & 3]; + t.u32[2] = x->u32[c >> 4 & 3]; + t.u32[3] = x->u32[c >> 6 & 3]; + copy2(r, &t); +} + +void rshift32_littleendian(int128 *r, const unsigned int n) +{ + unsigned char *rp = (unsigned char *)r; + uint32 t; + t = load32_littleendian(rp); + t >>= n; + store32_littleendian(rp, t); + t = load32_littleendian(rp+4); + t >>= n; + store32_littleendian(rp+4, t); + t = load32_littleendian(rp+8); + t >>= n; + store32_littleendian(rp+8, t); + t = load32_littleendian(rp+12); + t >>= n; + store32_littleendian(rp+12, t); +} + +void rshift64_littleendian(int128 *r, const unsigned int n) +{ + unsigned char *rp = (unsigned char *)r; + uint64 t; + t = load64_littleendian(rp); + t >>= n; + store64_littleendian(rp, t); + t = load64_littleendian(rp+8); + t >>= n; + store64_littleendian(rp+8, t); +} + +void lshift64_littleendian(int128 *r, const unsigned int n) +{ + unsigned char *rp = (unsigned char *)r; + uint64 t; + t = load64_littleendian(rp); + t <<= n; + store64_littleendian(rp, t); + t = load64_littleendian(rp+8); + t <<= n; + store64_littleendian(rp+8, t); +} + +void toggle(int128 *r) +{ + r->u64[0] ^= 0xffffffffffffffffULL; + r->u64[1] ^= 0xffffffffffffffffULL; +} + +void xor_rcon(int128 *r) +{ + unsigned char *rp = (unsigned char *)r; + uint32 t; + t = load32_littleendian(rp+12); + t ^= 0xffffffff; + store32_littleendian(rp+12, t); +} + +void add_uint32_big(int128 *r, uint32 x) +{ + unsigned char *rp = (unsigned char *)r; + uint32 t; + t = load32_littleendian(rp+12); + t += x; + store32_littleendian(rp+12, t); +} diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/stream_aes128ctr.c b/src/libsodium/crypto_stream/aes128ctr/portable/stream_aes128ctr.c new file mode 100644 index 0000000..8f4ec72 --- /dev/null +++ b/src/libsodium/crypto_stream/aes128ctr/portable/stream_aes128ctr.c @@ -0,0 +1,28 @@ +#include "api.h" + +int crypto_stream( + unsigned char *out, + unsigned long long outlen, + const unsigned char *n, + const unsigned char *k + ) +{ + unsigned char d[crypto_stream_BEFORENMBYTES]; + crypto_stream_beforenm(d, k); + crypto_stream_afternm(out, outlen, n, d); + return 0; +} + +int crypto_stream_xor( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen, + const unsigned char *n, + const unsigned char *k + ) +{ + unsigned char d[crypto_stream_BEFORENMBYTES]; + crypto_stream_beforenm(d, k); + crypto_stream_xor_afternm(out, in, inlen, n, d); + return 0; +} diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/types.h b/src/libsodium/crypto_stream/aes128ctr/portable/types.h new file mode 100644 index 0000000..6aa502f --- /dev/null +++ b/src/libsodium/crypto_stream/aes128ctr/portable/types.h @@ -0,0 +1,10 @@ +#ifndef TYPES_H +#define TYPES_H + +#include "crypto_uint32.h" +typedef crypto_uint32 uint32; + +#include "crypto_uint64.h" +typedef crypto_uint64 uint64; + +#endif diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/xor_afternm_aes128ctr.c b/src/libsodium/crypto_stream/aes128ctr/portable/xor_afternm_aes128ctr.c new file mode 100644 index 0000000..139dbe5 --- /dev/null +++ b/src/libsodium/crypto_stream/aes128ctr/portable/xor_afternm_aes128ctr.c @@ -0,0 +1,181 @@ +/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper + * Date: 2009-03-19 + * Public domain */ + +#include +#include "api.h" +#include "int128.h" +#include "common.h" +#include "consts.h" + +int crypto_stream_xor_afternm(unsigned char *out, const unsigned char *in, unsigned long long len, const unsigned char *nonce, const unsigned char *c) +{ + + int128 xmm0; + int128 xmm1; + int128 xmm2; + int128 xmm3; + int128 xmm4; + int128 xmm5; + int128 xmm6; + int128 xmm7; + + int128 xmm8; + int128 xmm9; + int128 xmm10; + int128 xmm11; + int128 xmm12; + int128 xmm13; + int128 xmm14; + int128 xmm15; + + int128 nonce_stack; + unsigned long long lensav; + unsigned char bl[128]; + unsigned char *blp; + unsigned char *np; + unsigned char b; + + uint32 tmp; + + /* Copy nonce on the stack */ + copy2(&nonce_stack, (const int128 *) (nonce + 0)); + np = (unsigned char *)&nonce_stack; + + enc_block: + + xmm0 = *(int128 *) (np + 0); + copy2(&xmm1, &xmm0); + shufb(&xmm1, SWAP32); + copy2(&xmm2, &xmm1); + copy2(&xmm3, &xmm1); + copy2(&xmm4, &xmm1); + copy2(&xmm5, &xmm1); + copy2(&xmm6, &xmm1); + copy2(&xmm7, &xmm1); + + add_uint32_big(&xmm1, 1); + add_uint32_big(&xmm2, 2); + add_uint32_big(&xmm3, 3); + add_uint32_big(&xmm4, 4); + add_uint32_big(&xmm5, 5); + add_uint32_big(&xmm6, 6); + add_uint32_big(&xmm7, 7); + + shufb(&xmm0, M0); + shufb(&xmm1, M0SWAP); + shufb(&xmm2, M0SWAP); + shufb(&xmm3, M0SWAP); + shufb(&xmm4, M0SWAP); + shufb(&xmm5, M0SWAP); + shufb(&xmm6, M0SWAP); + shufb(&xmm7, M0SWAP); + + bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, xmm8) + + aesround( 1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + aesround( 2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) + aesround( 3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + aesround( 4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) + aesround( 5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + aesround( 6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) + aesround( 7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + aesround( 8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) + aesround( 9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + lastround(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) + + bitslice(xmm13, xmm10, xmm15, xmm11, xmm14, xmm12, xmm9, xmm8, xmm0) + + if(len < 128) goto partial; + if(len == 128) goto full; + + tmp = load32_bigendian(np + 12); + tmp += 8; + store32_bigendian(np + 12, tmp); + + xor2(&xmm8, (const int128 *)(in + 0)); + xor2(&xmm9, (const int128 *)(in + 16)); + xor2(&xmm12, (const int128 *)(in + 32)); + xor2(&xmm14, (const int128 *)(in + 48)); + xor2(&xmm11, (const int128 *)(in + 64)); + xor2(&xmm15, (const int128 *)(in + 80)); + xor2(&xmm10, (const int128 *)(in + 96)); + xor2(&xmm13, (const int128 *)(in + 112)); + + *(int128 *) (out + 0) = xmm8; + *(int128 *) (out + 16) = xmm9; + *(int128 *) (out + 32) = xmm12; + *(int128 *) (out + 48) = xmm14; + *(int128 *) (out + 64) = xmm11; + *(int128 *) (out + 80) = xmm15; + *(int128 *) (out + 96) = xmm10; + *(int128 *) (out + 112) = xmm13; + + len -= 128; + in += 128; + out += 128; + + goto enc_block; + + partial: + + lensav = len; + len >>= 4; + + tmp = load32_bigendian(np + 12); + tmp += len; + store32_bigendian(np + 12, tmp); + + blp = bl; + *(int128 *)(blp + 0) = xmm8; + *(int128 *)(blp + 16) = xmm9; + *(int128 *)(blp + 32) = xmm12; + *(int128 *)(blp + 48) = xmm14; + *(int128 *)(blp + 64) = xmm11; + *(int128 *)(blp + 80) = xmm15; + *(int128 *)(blp + 96) = xmm10; + *(int128 *)(blp + 112) = xmm13; + + bytes: + + if(lensav == 0) goto end; + + b = blp[0]; /* clang false positive */ + b ^= *(const unsigned char *)(in + 0); + *(unsigned char *)(out + 0) = b; + + blp += 1; + in +=1; + out +=1; + lensav -= 1; + + goto bytes; + + full: + + tmp = load32_bigendian(np + 12); + tmp += 8; + store32_bigendian(np + 12, tmp); + + xor2(&xmm8, (const int128 *)(in + 0)); + xor2(&xmm9, (const int128 *)(in + 16)); + xor2(&xmm12, (const int128 *)(in + 32)); + xor2(&xmm14, (const int128 *)(in + 48)); + xor2(&xmm11, (const int128 *)(in + 64)); + xor2(&xmm15, (const int128 *)(in + 80)); + xor2(&xmm10, (const int128 *)(in + 96)); + xor2(&xmm13, (const int128 *)(in + 112)); + + *(int128 *) (out + 0) = xmm8; + *(int128 *) (out + 16) = xmm9; + *(int128 *) (out + 32) = xmm12; + *(int128 *) (out + 48) = xmm14; + *(int128 *) (out + 64) = xmm11; + *(int128 *) (out + 80) = xmm15; + *(int128 *) (out + 96) = xmm10; + *(int128 *) (out + 112) = xmm13; + + end: + return 0; + +} -- cgit v1.2.3