summaryrefslogtreecommitdiff
path: root/src/libsodium/crypto_stream
diff options
context:
space:
mode:
Diffstat (limited to 'src/libsodium/crypto_stream')
-rw-r--r--src/libsodium/crypto_stream/aes128ctr/checksum1
-rw-r--r--src/libsodium/crypto_stream/aes128ctr/portable/afternm_aes128ctr.c159
-rw-r--r--src/libsodium/crypto_stream/aes128ctr/portable/api.h13
-rw-r--r--src/libsodium/crypto_stream/aes128ctr/portable/beforenm_aes128ctr.c59
-rw-r--r--src/libsodium/crypto_stream/aes128ctr/portable/common.h788
-rw-r--r--src/libsodium/crypto_stream/aes128ctr/portable/common_aes128ctr.c64
-rw-r--r--src/libsodium/crypto_stream/aes128ctr/portable/consts.h28
-rw-r--r--src/libsodium/crypto_stream/aes128ctr/portable/consts_aes128ctr.c14
-rw-r--r--src/libsodium/crypto_stream/aes128ctr/portable/int128.h56
-rw-r--r--src/libsodium/crypto_stream/aes128ctr/portable/int128_aes128ctr.c131
-rw-r--r--src/libsodium/crypto_stream/aes128ctr/portable/stream_aes128ctr.c28
-rw-r--r--src/libsodium/crypto_stream/aes128ctr/portable/types.h10
-rw-r--r--src/libsodium/crypto_stream/aes128ctr/portable/xor_afternm_aes128ctr.c181
-rw-r--r--src/libsodium/crypto_stream/aes128ctr/stream_aes128ctr_api.c16
-rw-r--r--src/libsodium/crypto_stream/aes256estream/hongjun/aes-table-be.h273
-rw-r--r--src/libsodium/crypto_stream/aes256estream/hongjun/aes-table-le.h274
-rw-r--r--src/libsodium/crypto_stream/aes256estream/hongjun/aes-table.h62
-rw-r--r--src/libsodium/crypto_stream/aes256estream/hongjun/aes256-ctr.c239
-rw-r--r--src/libsodium/crypto_stream/aes256estream/hongjun/aes256.h171
-rw-r--r--src/libsodium/crypto_stream/aes256estream/hongjun/api.h13
-rw-r--r--src/libsodium/crypto_stream/aes256estream/hongjun/ecrypt-sync.h27
-rw-r--r--src/libsodium/crypto_stream/aes256estream/stream_aes256estream_api.c16
-rw-r--r--src/libsodium/crypto_stream/chacha20/ref/api.h12
-rw-r--r--src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.c275
-rw-r--r--src/libsodium/crypto_stream/chacha20/stream_chacha20_api.c36
-rw-r--r--src/libsodium/crypto_stream/crypto_stream.c36
-rw-r--r--src/libsodium/crypto_stream/salsa20/amd64_xmm6/api.h1
-rw-r--r--src/libsodium/crypto_stream/salsa20/amd64_xmm6/stream_salsa20_amd64_xmm6.S944
-rw-r--r--src/libsodium/crypto_stream/salsa20/checksum1
-rw-r--r--src/libsodium/crypto_stream/salsa20/ref/api.h5
-rw-r--r--src/libsodium/crypto_stream/salsa20/ref/stream_salsa20_ref.c61
-rw-r--r--src/libsodium/crypto_stream/salsa20/ref/xor_salsa20_ref.c69
-rw-r--r--src/libsodium/crypto_stream/salsa20/stream_salsa20_api.c19
-rw-r--r--src/libsodium/crypto_stream/salsa2012/checksum1
-rw-r--r--src/libsodium/crypto_stream/salsa2012/ref/api.h10
-rw-r--r--src/libsodium/crypto_stream/salsa2012/ref/stream_salsa2012.c51
-rw-r--r--src/libsodium/crypto_stream/salsa2012/ref/xor_salsa2012.c54
-rw-r--r--src/libsodium/crypto_stream/salsa2012/stream_salsa2012_api.c11
-rw-r--r--src/libsodium/crypto_stream/salsa208/checksum1
-rw-r--r--src/libsodium/crypto_stream/salsa208/ref/api.h9
-rw-r--r--src/libsodium/crypto_stream/salsa208/ref/stream_salsa208.c51
-rw-r--r--src/libsodium/crypto_stream/salsa208/ref/xor_salsa208.c54
-rw-r--r--src/libsodium/crypto_stream/salsa208/stream_salsa208_api.c11
-rw-r--r--src/libsodium/crypto_stream/try.c122
-rw-r--r--src/libsodium/crypto_stream/xsalsa20/checksum1
-rw-r--r--src/libsodium/crypto_stream/xsalsa20/ref/api.h10
-rw-r--r--src/libsodium/crypto_stream/xsalsa20/ref/stream_xsalsa20.c24
-rw-r--r--src/libsodium/crypto_stream/xsalsa20/ref/xor_xsalsa20.c25
-rw-r--r--src/libsodium/crypto_stream/xsalsa20/stream_xsalsa20_api.c11
49 files changed, 4528 insertions, 0 deletions
diff --git a/src/libsodium/crypto_stream/aes128ctr/checksum b/src/libsodium/crypto_stream/aes128ctr/checksum
new file mode 100644
index 0000000..9286543
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes128ctr/checksum
@@ -0,0 +1 @@
+6e9966897837aae181e93261ae88fdf0
diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/afternm_aes128ctr.c b/src/libsodium/crypto_stream/aes128ctr/portable/afternm_aes128ctr.c
new file mode 100644
index 0000000..a5a9a7a
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes128ctr/portable/afternm_aes128ctr.c
@@ -0,0 +1,159 @@
+/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
+ * Date: 2009-03-19
+ * Public domain */
+
+#include "api.h"
+#include "int128.h"
+#include "common.h"
+#include "consts.h"
+
+int crypto_stream_afternm(unsigned char *out, unsigned long long len, const unsigned char *nonce, const unsigned char *c)
+{
+
+ int128 xmm0;
+ int128 xmm1;
+ int128 xmm2;
+ int128 xmm3;
+ int128 xmm4;
+ int128 xmm5;
+ int128 xmm6;
+ int128 xmm7;
+
+ int128 xmm8;
+ int128 xmm9;
+ int128 xmm10;
+ int128 xmm11;
+ int128 xmm12;
+ int128 xmm13;
+ int128 xmm14;
+ int128 xmm15;
+
+ int128 nonce_stack;
+ unsigned long long lensav;
+ unsigned char bl[128];
+ unsigned char *blp;
+ unsigned char *np;
+ unsigned char b;
+
+ uint32 tmp;
+
+ /* Copy nonce on the stack */
+ copy2(&nonce_stack, (const int128 *) (nonce + 0));
+ np = (unsigned char *)&nonce_stack;
+
+ enc_block:
+
+ xmm0 = *(int128 *) (np + 0);
+ copy2(&xmm1, &xmm0);
+ shufb(&xmm1, SWAP32);
+ copy2(&xmm2, &xmm1);
+ copy2(&xmm3, &xmm1);
+ copy2(&xmm4, &xmm1);
+ copy2(&xmm5, &xmm1);
+ copy2(&xmm6, &xmm1);
+ copy2(&xmm7, &xmm1);
+
+ add_uint32_big(&xmm1, 1);
+ add_uint32_big(&xmm2, 2);
+ add_uint32_big(&xmm3, 3);
+ add_uint32_big(&xmm4, 4);
+ add_uint32_big(&xmm5, 5);
+ add_uint32_big(&xmm6, 6);
+ add_uint32_big(&xmm7, 7);
+
+ shufb(&xmm0, M0);
+ shufb(&xmm1, M0SWAP);
+ shufb(&xmm2, M0SWAP);
+ shufb(&xmm3, M0SWAP);
+ shufb(&xmm4, M0SWAP);
+ shufb(&xmm5, M0SWAP);
+ shufb(&xmm6, M0SWAP);
+ shufb(&xmm7, M0SWAP);
+
+ bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, xmm8)
+
+ aesround( 1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+ aesround( 2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+ aesround( 3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+ aesround( 4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+ aesround( 5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+ aesround( 6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+ aesround( 7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+ aesround( 8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+ aesround( 9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+ lastround(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+
+ bitslice(xmm13, xmm10, xmm15, xmm11, xmm14, xmm12, xmm9, xmm8, xmm0)
+
+ if(len < 128) goto partial;
+ if(len == 128) goto full;
+
+ tmp = load32_bigendian(np + 12);
+ tmp += 8;
+ store32_bigendian(np + 12, tmp);
+
+ *(int128 *) (out + 0) = xmm8;
+ *(int128 *) (out + 16) = xmm9;
+ *(int128 *) (out + 32) = xmm12;
+ *(int128 *) (out + 48) = xmm14;
+ *(int128 *) (out + 64) = xmm11;
+ *(int128 *) (out + 80) = xmm15;
+ *(int128 *) (out + 96) = xmm10;
+ *(int128 *) (out + 112) = xmm13;
+
+ len -= 128;
+ out += 128;
+
+ goto enc_block;
+
+ partial:
+
+ lensav = len;
+ len >>= 4;
+
+ tmp = load32_bigendian(np + 12);
+ tmp += len;
+ store32_bigendian(np + 12, tmp);
+
+ blp = bl;
+ *(int128 *)(blp + 0) = xmm8;
+ *(int128 *)(blp + 16) = xmm9;
+ *(int128 *)(blp + 32) = xmm12;
+ *(int128 *)(blp + 48) = xmm14;
+ *(int128 *)(blp + 64) = xmm11;
+ *(int128 *)(blp + 80) = xmm15;
+ *(int128 *)(blp + 96) = xmm10;
+ *(int128 *)(blp + 112) = xmm13;
+
+ bytes:
+
+ if(lensav == 0) goto end;
+
+ b = blp[0]; /* clang false positive */
+ *(unsigned char *)(out + 0) = b;
+
+ blp += 1;
+ out +=1;
+ lensav -= 1;
+
+ goto bytes;
+
+ full:
+
+ tmp = load32_bigendian(np + 12);
+ tmp += 8;
+ store32_bigendian(np + 12, tmp);
+
+ *(int128 *) (out + 0) = xmm8;
+ *(int128 *) (out + 16) = xmm9;
+ *(int128 *) (out + 32) = xmm12;
+ *(int128 *) (out + 48) = xmm14;
+ *(int128 *) (out + 64) = xmm11;
+ *(int128 *) (out + 80) = xmm15;
+ *(int128 *) (out + 96) = xmm10;
+ *(int128 *) (out + 112) = xmm13;
+
+ end:
+ return 0;
+
+}
diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/api.h b/src/libsodium/crypto_stream/aes128ctr/portable/api.h
new file mode 100644
index 0000000..3c53fb9
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes128ctr/portable/api.h
@@ -0,0 +1,13 @@
+
+#include "crypto_stream_aes128ctr.h"
+
+#define crypto_stream crypto_stream_aes128ctr
+#define crypto_stream_xor crypto_stream_aes128ctr_xor
+#define crypto_stream_beforenm crypto_stream_aes128ctr_beforenm
+#define crypto_stream_afternm crypto_stream_aes128ctr_afternm
+#define crypto_stream_xor_afternm crypto_stream_aes128ctr_xor_afternm
+#define crypto_stream_KEYBYTES crypto_stream_aes128ctr_KEYBYTES
+#define crypto_stream_NONCEBYTES crypto_stream_aes128ctr_NONCEBYTES
+#define crypto_stream_BEFORENMBYTES crypto_stream_aes128ctr_BEFORENMBYTES
+#define crypto_stream_IMPLEMENTATION crypto_stream_aes128ctr_IMPLEMENTATION
+#define crypto_stream_VERSION crypto_stream_aes128ctr_VERSION
diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/beforenm_aes128ctr.c b/src/libsodium/crypto_stream/aes128ctr/portable/beforenm_aes128ctr.c
new file mode 100644
index 0000000..f8623dd
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes128ctr/portable/beforenm_aes128ctr.c
@@ -0,0 +1,59 @@
+/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
+ * Date: 2009-03-19
+ * Public domain */
+
+#include "api.h"
+#include "consts.h"
+#include "int128.h"
+#include "common.h"
+
+int crypto_stream_beforenm(unsigned char *c, const unsigned char *k)
+{
+
+ /*
+ int64 x0;
+ int64 x1;
+ int64 x2;
+ int64 x3;
+ int64 e;
+ int64 q0;
+ int64 q1;
+ int64 q2;
+ int64 q3;
+ */
+
+ int128 xmm0;
+ int128 xmm1;
+ int128 xmm2;
+ int128 xmm3;
+ int128 xmm4;
+ int128 xmm5;
+ int128 xmm6;
+ int128 xmm7;
+ int128 xmm8;
+ int128 xmm9;
+ int128 xmm10;
+ int128 xmm11;
+ int128 xmm12;
+ int128 xmm13;
+ int128 xmm14;
+ int128 xmm15;
+ int128 t;
+
+ bitslicekey0(k, c)
+
+ keyexpbs1(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+ keyexpbs(xmm0, xmm1, xmm4, xmm6, xmm3, xmm7, xmm2, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm1);, 2,c)
+ keyexpbs(xmm0, xmm1, xmm3, xmm2, xmm6, xmm5, xmm4, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm6);, 3,c)
+ keyexpbs(xmm0, xmm1, xmm6, xmm4, xmm2, xmm7, xmm3, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm3);, 4,c)
+
+ keyexpbs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm3);, 5,c)
+ keyexpbs(xmm0, xmm1, xmm4, xmm6, xmm3, xmm7, xmm2, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm5);, 6,c)
+ keyexpbs(xmm0, xmm1, xmm3, xmm2, xmm6, xmm5, xmm4, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm3);, 7,c)
+ keyexpbs(xmm0, xmm1, xmm6, xmm4, xmm2, xmm7, xmm3, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm7);, 8,c)
+
+ keyexpbs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm0); xor_rcon(&xmm1); xor_rcon(&xmm6); xor_rcon(&xmm3);, 9,c)
+ keyexpbs10(xmm0, xmm1, xmm4, xmm6, xmm3, xmm7, xmm2, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+
+ return 0;
+}
diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/common.h b/src/libsodium/crypto_stream/aes128ctr/portable/common.h
new file mode 100644
index 0000000..3923c02
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes128ctr/portable/common.h
@@ -0,0 +1,788 @@
+/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
+ Date: 2009-03-19
+ Public domain */
+#ifndef COMMON_H
+#define COMMON_H
+
+#include "types.h"
+
+#define load32_bigendian crypto_stream_aes128ctr_portable_load32_bigendian
+uint32 load32_bigendian(const unsigned char *x);
+
+#define store32_bigendian crypto_stream_aes128ctr_portable_store32_bigendian
+void store32_bigendian(unsigned char *x,uint32 u);
+
+#define load32_littleendian crypto_stream_aes128ctr_portable_load32_littleendian
+uint32 load32_littleendian(const unsigned char *x);
+
+#define store32_littleendian crypto_stream_aes128ctr_portable_store32_littleendian
+void store32_littleendian(unsigned char *x,uint32 u);
+
+#define load64_littleendian crypto_stream_aes128ctr_portable_load64_littleendian
+uint64 load64_littleendian(const unsigned char *x);
+
+#define store64_littleendian crypto_stream_aes128ctr_portable_store64_littleendian
+void store64_littleendian(unsigned char *x,uint64 u);
+
+/* Macros required only for key expansion */
+
+#define keyexpbs1(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) \
+ rotbyte(&b0);\
+ rotbyte(&b1);\
+ rotbyte(&b2);\
+ rotbyte(&b3);\
+ rotbyte(&b4);\
+ rotbyte(&b5);\
+ rotbyte(&b6);\
+ rotbyte(&b7);\
+ ;\
+ sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\
+ ;\
+ xor_rcon(&b0);\
+ shufb(&b0, EXPB0);\
+ shufb(&b1, EXPB0);\
+ shufb(&b4, EXPB0);\
+ shufb(&b6, EXPB0);\
+ shufb(&b3, EXPB0);\
+ shufb(&b7, EXPB0);\
+ shufb(&b2, EXPB0);\
+ shufb(&b5, EXPB0);\
+ shufb(&b0, EXPB0);\
+ ;\
+ t0 = *(int128 *)(bskey + 0);\
+ t1 = *(int128 *)(bskey + 16);\
+ t2 = *(int128 *)(bskey + 32);\
+ t3 = *(int128 *)(bskey + 48);\
+ t4 = *(int128 *)(bskey + 64);\
+ t5 = *(int128 *)(bskey + 80);\
+ t6 = *(int128 *)(bskey + 96);\
+ t7 = *(int128 *)(bskey + 112);\
+ ;\
+ xor2(&b0, &t0);\
+ xor2(&b1, &t1);\
+ xor2(&b4, &t2);\
+ xor2(&b6, &t3);\
+ xor2(&b3, &t4);\
+ xor2(&b7, &t5);\
+ xor2(&b2, &t6);\
+ xor2(&b5, &t7);\
+ ;\
+ rshift32_littleendian(&t0, 8);\
+ rshift32_littleendian(&t1, 8);\
+ rshift32_littleendian(&t2, 8);\
+ rshift32_littleendian(&t3, 8);\
+ rshift32_littleendian(&t4, 8);\
+ rshift32_littleendian(&t5, 8);\
+ rshift32_littleendian(&t6, 8);\
+ rshift32_littleendian(&t7, 8);\
+ ;\
+ xor2(&b0, &t0);\
+ xor2(&b1, &t1);\
+ xor2(&b4, &t2);\
+ xor2(&b6, &t3);\
+ xor2(&b3, &t4);\
+ xor2(&b7, &t5);\
+ xor2(&b2, &t6);\
+ xor2(&b5, &t7);\
+ ;\
+ rshift32_littleendian(&t0, 8);\
+ rshift32_littleendian(&t1, 8);\
+ rshift32_littleendian(&t2, 8);\
+ rshift32_littleendian(&t3, 8);\
+ rshift32_littleendian(&t4, 8);\
+ rshift32_littleendian(&t5, 8);\
+ rshift32_littleendian(&t6, 8);\
+ rshift32_littleendian(&t7, 8);\
+ ;\
+ xor2(&b0, &t0);\
+ xor2(&b1, &t1);\
+ xor2(&b4, &t2);\
+ xor2(&b6, &t3);\
+ xor2(&b3, &t4);\
+ xor2(&b7, &t5);\
+ xor2(&b2, &t6);\
+ xor2(&b5, &t7);\
+ ;\
+ rshift32_littleendian(&t0, 8);\
+ rshift32_littleendian(&t1, 8);\
+ rshift32_littleendian(&t2, 8);\
+ rshift32_littleendian(&t3, 8);\
+ rshift32_littleendian(&t4, 8);\
+ rshift32_littleendian(&t5, 8);\
+ rshift32_littleendian(&t6, 8);\
+ rshift32_littleendian(&t7, 8);\
+ ;\
+ xor2(&b0, &t0);\
+ xor2(&b1, &t1);\
+ xor2(&b4, &t2);\
+ xor2(&b6, &t3);\
+ xor2(&b3, &t4);\
+ xor2(&b7, &t5);\
+ xor2(&b2, &t6);\
+ xor2(&b5, &t7);\
+ ;\
+ *(int128 *)(bskey + 128) = b0;\
+ *(int128 *)(bskey + 144) = b1;\
+ *(int128 *)(bskey + 160) = b4;\
+ *(int128 *)(bskey + 176) = b6;\
+ *(int128 *)(bskey + 192) = b3;\
+ *(int128 *)(bskey + 208) = b7;\
+ *(int128 *)(bskey + 224) = b2;\
+ *(int128 *)(bskey + 240) = b5;\
+
+#define keyexpbs10(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) ;\
+ toggle(&b0);\
+ toggle(&b1);\
+ toggle(&b5);\
+ toggle(&b6);\
+ rotbyte(&b0);\
+ rotbyte(&b1);\
+ rotbyte(&b2);\
+ rotbyte(&b3);\
+ rotbyte(&b4);\
+ rotbyte(&b5);\
+ rotbyte(&b6);\
+ rotbyte(&b7);\
+ ;\
+ sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\
+ ;\
+ xor_rcon(&b1);\
+ xor_rcon(&b4);\
+ xor_rcon(&b3);\
+ xor_rcon(&b7);\
+ shufb(&b0, EXPB0);\
+ shufb(&b1, EXPB0);\
+ shufb(&b4, EXPB0);\
+ shufb(&b6, EXPB0);\
+ shufb(&b3, EXPB0);\
+ shufb(&b7, EXPB0);\
+ shufb(&b2, EXPB0);\
+ shufb(&b5, EXPB0);\
+ ;\
+ t0 = *(int128 *)(bskey + 9 * 128 + 0);\
+ t1 = *(int128 *)(bskey + 9 * 128 + 16);\
+ t2 = *(int128 *)(bskey + 9 * 128 + 32);\
+ t3 = *(int128 *)(bskey + 9 * 128 + 48);\
+ t4 = *(int128 *)(bskey + 9 * 128 + 64);\
+ t5 = *(int128 *)(bskey + 9 * 128 + 80);\
+ t6 = *(int128 *)(bskey + 9 * 128 + 96);\
+ t7 = *(int128 *)(bskey + 9 * 128 + 112);\
+ ;\
+ toggle(&t0);\
+ toggle(&t1);\
+ toggle(&t5);\
+ toggle(&t6);\
+ ;\
+ xor2(&b0, &t0);\
+ xor2(&b1, &t1);\
+ xor2(&b4, &t2);\
+ xor2(&b6, &t3);\
+ xor2(&b3, &t4);\
+ xor2(&b7, &t5);\
+ xor2(&b2, &t6);\
+ xor2(&b5, &t7);\
+ ;\
+ rshift32_littleendian(&t0, 8);\
+ rshift32_littleendian(&t1, 8);\
+ rshift32_littleendian(&t2, 8);\
+ rshift32_littleendian(&t3, 8);\
+ rshift32_littleendian(&t4, 8);\
+ rshift32_littleendian(&t5, 8);\
+ rshift32_littleendian(&t6, 8);\
+ rshift32_littleendian(&t7, 8);\
+ ;\
+ xor2(&b0, &t0);\
+ xor2(&b1, &t1);\
+ xor2(&b4, &t2);\
+ xor2(&b6, &t3);\
+ xor2(&b3, &t4);\
+ xor2(&b7, &t5);\
+ xor2(&b2, &t6);\
+ xor2(&b5, &t7);\
+ ;\
+ rshift32_littleendian(&t0, 8);\
+ rshift32_littleendian(&t1, 8);\
+ rshift32_littleendian(&t2, 8);\
+ rshift32_littleendian(&t3, 8);\
+ rshift32_littleendian(&t4, 8);\
+ rshift32_littleendian(&t5, 8);\
+ rshift32_littleendian(&t6, 8);\
+ rshift32_littleendian(&t7, 8);\
+ ;\
+ xor2(&b0, &t0);\
+ xor2(&b1, &t1);\
+ xor2(&b4, &t2);\
+ xor2(&b6, &t3);\
+ xor2(&b3, &t4);\
+ xor2(&b7, &t5);\
+ xor2(&b2, &t6);\
+ xor2(&b5, &t7);\
+ ;\
+ rshift32_littleendian(&t0, 8);\
+ rshift32_littleendian(&t1, 8);\
+ rshift32_littleendian(&t2, 8);\
+ rshift32_littleendian(&t3, 8);\
+ rshift32_littleendian(&t4, 8);\
+ rshift32_littleendian(&t5, 8);\
+ rshift32_littleendian(&t6, 8);\
+ rshift32_littleendian(&t7, 8);\
+ ;\
+ xor2(&b0, &t0);\
+ xor2(&b1, &t1);\
+ xor2(&b4, &t2);\
+ xor2(&b6, &t3);\
+ xor2(&b3, &t4);\
+ xor2(&b7, &t5);\
+ xor2(&b2, &t6);\
+ xor2(&b5, &t7);\
+ ;\
+ shufb(&b0, M0);\
+ shufb(&b1, M0);\
+ shufb(&b2, M0);\
+ shufb(&b3, M0);\
+ shufb(&b4, M0);\
+ shufb(&b5, M0);\
+ shufb(&b6, M0);\
+ shufb(&b7, M0);\
+ ;\
+ *(int128 *)(bskey + 1280) = b0;\
+ *(int128 *)(bskey + 1296) = b1;\
+ *(int128 *)(bskey + 1312) = b4;\
+ *(int128 *)(bskey + 1328) = b6;\
+ *(int128 *)(bskey + 1344) = b3;\
+ *(int128 *)(bskey + 1360) = b7;\
+ *(int128 *)(bskey + 1376) = b2;\
+ *(int128 *)(bskey + 1392) = b5;\
+
+
+#define keyexpbs(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, rcon, i, bskey) \
+ toggle(&b0);\
+ toggle(&b1);\
+ toggle(&b5);\
+ toggle(&b6);\
+ rotbyte(&b0);\
+ rotbyte(&b1);\
+ rotbyte(&b2);\
+ rotbyte(&b3);\
+ rotbyte(&b4);\
+ rotbyte(&b5);\
+ rotbyte(&b6);\
+ rotbyte(&b7);\
+ ;\
+ sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\
+ ;\
+ rcon;\
+ shufb(&b0, EXPB0);\
+ shufb(&b1, EXPB0);\
+ shufb(&b4, EXPB0);\
+ shufb(&b6, EXPB0);\
+ shufb(&b3, EXPB0);\
+ shufb(&b7, EXPB0);\
+ shufb(&b2, EXPB0);\
+ shufb(&b5, EXPB0);\
+ ;\
+ t0 = *(int128 *)(bskey + (i-1) * 128 + 0);\
+ t1 = *(int128 *)(bskey + (i-1) * 128 + 16);\
+ t2 = *(int128 *)(bskey + (i-1) * 128 + 32);\
+ t3 = *(int128 *)(bskey + (i-1) * 128 + 48);\
+ t4 = *(int128 *)(bskey + (i-1) * 128 + 64);\
+ t5 = *(int128 *)(bskey + (i-1) * 128 + 80);\
+ t6 = *(int128 *)(bskey + (i-1) * 128 + 96);\
+ t7 = *(int128 *)(bskey + (i-1) * 128 + 112);\
+ ;\
+ toggle(&t0);\
+ toggle(&t1);\
+ toggle(&t5);\
+ toggle(&t6);\
+ ;\
+ xor2(&b0, &t0);\
+ xor2(&b1, &t1);\
+ xor2(&b4, &t2);\
+ xor2(&b6, &t3);\
+ xor2(&b3, &t4);\
+ xor2(&b7, &t5);\
+ xor2(&b2, &t6);\
+ xor2(&b5, &t7);\
+ ;\
+ rshift32_littleendian(&t0, 8);\
+ rshift32_littleendian(&t1, 8);\
+ rshift32_littleendian(&t2, 8);\
+ rshift32_littleendian(&t3, 8);\
+ rshift32_littleendian(&t4, 8);\
+ rshift32_littleendian(&t5, 8);\
+ rshift32_littleendian(&t6, 8);\
+ rshift32_littleendian(&t7, 8);\
+ ;\
+ xor2(&b0, &t0);\
+ xor2(&b1, &t1);\
+ xor2(&b4, &t2);\
+ xor2(&b6, &t3);\
+ xor2(&b3, &t4);\
+ xor2(&b7, &t5);\
+ xor2(&b2, &t6);\
+ xor2(&b5, &t7);\
+ ;\
+ rshift32_littleendian(&t0, 8);\
+ rshift32_littleendian(&t1, 8);\
+ rshift32_littleendian(&t2, 8);\
+ rshift32_littleendian(&t3, 8);\
+ rshift32_littleendian(&t4, 8);\
+ rshift32_littleendian(&t5, 8);\
+ rshift32_littleendian(&t6, 8);\
+ rshift32_littleendian(&t7, 8);\
+ ;\
+ xor2(&b0, &t0);\
+ xor2(&b1, &t1);\
+ xor2(&b4, &t2);\
+ xor2(&b6, &t3);\
+ xor2(&b3, &t4);\
+ xor2(&b7, &t5);\
+ xor2(&b2, &t6);\
+ xor2(&b5, &t7);\
+ ;\
+ rshift32_littleendian(&t0, 8);\
+ rshift32_littleendian(&t1, 8);\
+ rshift32_littleendian(&t2, 8);\
+ rshift32_littleendian(&t3, 8);\
+ rshift32_littleendian(&t4, 8);\
+ rshift32_littleendian(&t5, 8);\
+ rshift32_littleendian(&t6, 8);\
+ rshift32_littleendian(&t7, 8);\
+ ;\
+ xor2(&b0, &t0);\
+ xor2(&b1, &t1);\
+ xor2(&b4, &t2);\
+ xor2(&b6, &t3);\
+ xor2(&b3, &t4);\
+ xor2(&b7, &t5);\
+ xor2(&b2, &t6);\
+ xor2(&b5, &t7);\
+ ;\
+ *(int128 *)(bskey + i*128 + 0) = b0;\
+ *(int128 *)(bskey + i*128 + 16) = b1;\
+ *(int128 *)(bskey + i*128 + 32) = b4;\
+ *(int128 *)(bskey + i*128 + 48) = b6;\
+ *(int128 *)(bskey + i*128 + 64) = b3;\
+ *(int128 *)(bskey + i*128 + 80) = b7;\
+ *(int128 *)(bskey + i*128 + 96) = b2;\
+ *(int128 *)(bskey + i*128 + 112) = b5;\
+
+/* Macros used in multiple contexts */
+
+#define bitslicekey0(key, bskey) \
+ xmm0 = *(const int128 *) (key + 0);\
+ shufb(&xmm0, M0);\
+ copy2(&xmm1, &xmm0);\
+ copy2(&xmm2, &xmm0);\
+ copy2(&xmm3, &xmm0);\
+ copy2(&xmm4, &xmm0);\
+ copy2(&xmm5, &xmm0);\
+ copy2(&xmm6, &xmm0);\
+ copy2(&xmm7, &xmm0);\
+ ;\
+ bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, t);\
+ ;\
+ *(int128 *) (bskey + 0) = xmm0;\
+ *(int128 *) (bskey + 16) = xmm1;\
+ *(int128 *) (bskey + 32) = xmm2;\
+ *(int128 *) (bskey + 48) = xmm3;\
+ *(int128 *) (bskey + 64) = xmm4;\
+ *(int128 *) (bskey + 80) = xmm5;\
+ *(int128 *) (bskey + 96) = xmm6;\
+ *(int128 *) (bskey + 112) = xmm7;\
+
+
+#define bitslicekey10(key, bskey) \
+ xmm0 = *(int128 *) (key + 0);\
+ copy2(xmm1, xmm0);\
+ copy2(xmm2, xmm0);\
+ copy2(xmm3, xmm0);\
+ copy2(xmm4, xmm0);\
+ copy2(xmm5, xmm0);\
+ copy2(xmm6, xmm0);\
+ copy2(xmm7, xmm0);\
+ ;\
+ bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, t);\
+ ;\
+ toggle(&xmm6);\
+ toggle(&xmm5);\
+ toggle(&xmm1);\
+ toggle(&xmm0);\
+ ;\
+ *(int128 *) (bskey + 0 + 1280) = xmm0;\
+ *(int128 *) (bskey + 16 + 1280) = xmm1;\
+ *(int128 *) (bskey + 32 + 1280) = xmm2;\
+ *(int128 *) (bskey + 48 + 1280) = xmm3;\
+ *(int128 *) (bskey + 64 + 1280) = xmm4;\
+ *(int128 *) (bskey + 80 + 1280) = xmm5;\
+ *(int128 *) (bskey + 96 + 1280) = xmm6;\
+ *(int128 *) (bskey + 112 + 1280) = xmm7;\
+
+
+#define bitslicekey(i,key,bskey) \
+ xmm0 = *(int128 *) (key + 0);\
+ shufb(&xmm0, M0);\
+ copy2(&xmm1, &xmm0);\
+ copy2(&xmm2, &xmm0);\
+ copy2(&xmm3, &xmm0);\
+ copy2(&xmm4, &xmm0);\
+ copy2(&xmm5, &xmm0);\
+ copy2(&xmm6, &xmm0);\
+ copy2(&xmm7, &xmm0);\
+ ;\
+ bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, t);\
+ ;\
+ toggle(&xmm6);\
+ toggle(&xmm5);\
+ toggle(&xmm1);\
+ toggle(&xmm0);\
+ ;\
+ *(int128 *) (bskey + 0 + 128*i) = xmm0;\
+ *(int128 *) (bskey + 16 + 128*i) = xmm1;\
+ *(int128 *) (bskey + 32 + 128*i) = xmm2;\
+ *(int128 *) (bskey + 48 + 128*i) = xmm3;\
+ *(int128 *) (bskey + 64 + 128*i) = xmm4;\
+ *(int128 *) (bskey + 80 + 128*i) = xmm5;\
+ *(int128 *) (bskey + 96 + 128*i) = xmm6;\
+ *(int128 *) (bskey + 112 + 128*i) = xmm7;\
+
+
+#define bitslice(x0, x1, x2, x3, x4, x5, x6, x7, t) \
+ swapmove(x0, x1, 1, BS0, t);\
+ swapmove(x2, x3, 1, BS0, t);\
+ swapmove(x4, x5, 1, BS0, t);\
+ swapmove(x6, x7, 1, BS0, t);\
+ ;\
+ swapmove(x0, x2, 2, BS1, t);\
+ swapmove(x1, x3, 2, BS1, t);\
+ swapmove(x4, x6, 2, BS1, t);\
+ swapmove(x5, x7, 2, BS1, t);\
+ ;\
+ swapmove(x0, x4, 4, BS2, t);\
+ swapmove(x1, x5, 4, BS2, t);\
+ swapmove(x2, x6, 4, BS2, t);\
+ swapmove(x3, x7, 4, BS2, t);\
+
+
+#define swapmove(a, b, n, m, t) \
+ copy2(&t, &b);\
+ rshift64_littleendian(&t, n);\
+ xor2(&t, &a);\
+ and2(&t, &m);\
+ xor2(&a, &t);\
+ lshift64_littleendian(&t, n);\
+ xor2(&b, &t);
+
+#define rotbyte(x) \
+ shufb(x, ROTB) /* TODO: Make faster */
+
+
+/* Macros used for encryption (and decryption) */
+
+#define shiftrows(x0, x1, x2, x3, x4, x5, x6, x7, i, M, bskey) \
+ xor2(&x0, (const int128 *)(bskey + 128*(i-1) + 0));\
+ shufb(&x0, M);\
+ xor2(&x1, (const int128 *)(bskey + 128*(i-1) + 16));\
+ shufb(&x1, M);\
+ xor2(&x2, (const int128 *)(bskey + 128*(i-1) + 32));\
+ shufb(&x2, M);\
+ xor2(&x3, (const int128 *)(bskey + 128*(i-1) + 48));\
+ shufb(&x3, M);\
+ xor2(&x4, (const int128 *)(bskey + 128*(i-1) + 64));\
+ shufb(&x4, M);\
+ xor2(&x5, (const int128 *)(bskey + 128*(i-1) + 80));\
+ shufb(&x5, M);\
+ xor2(&x6, (const int128 *)(bskey + 128*(i-1) + 96));\
+ shufb(&x6, M);\
+ xor2(&x7, (const int128 *)(bskey + 128*(i-1) + 112));\
+ shufb(&x7, M);\
+
+
+#define mixcolumns(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, t7) \
+ shufd(&t0, &x0, 0x93);\
+ shufd(&t1, &x1, 0x93);\
+ shufd(&t2, &x2, 0x93);\
+ shufd(&t3, &x3, 0x93);\
+ shufd(&t4, &x4, 0x93);\
+ shufd(&t5, &x5, 0x93);\
+ shufd(&t6, &x6, 0x93);\
+ shufd(&t7, &x7, 0x93);\
+ ;\
+ xor2(&x0, &t0);\
+ xor2(&x1, &t1);\
+ xor2(&x2, &t2);\
+ xor2(&x3, &t3);\
+ xor2(&x4, &t4);\
+ xor2(&x5, &t5);\
+ xor2(&x6, &t6);\
+ xor2(&x7, &t7);\
+ ;\
+ xor2(&t0, &x7);\
+ xor2(&t1, &x0);\
+ xor2(&t2, &x1);\
+ xor2(&t1, &x7);\
+ xor2(&t3, &x2);\
+ xor2(&t4, &x3);\
+ xor2(&t5, &x4);\
+ xor2(&t3, &x7);\
+ xor2(&t6, &x5);\
+ xor2(&t7, &x6);\
+ xor2(&t4, &x7);\
+ ;\
+ shufd(&x0, &x0, 0x4e);\
+ shufd(&x1, &x1, 0x4e);\
+ shufd(&x2, &x2, 0x4e);\
+ shufd(&x3, &x3, 0x4e);\
+ shufd(&x4, &x4, 0x4e);\
+ shufd(&x5, &x5, 0x4e);\
+ shufd(&x6, &x6, 0x4e);\
+ shufd(&x7, &x7, 0x4e);\
+ ;\
+ xor2(&t0, &x0);\
+ xor2(&t1, &x1);\
+ xor2(&t2, &x2);\
+ xor2(&t3, &x3);\
+ xor2(&t4, &x4);\
+ xor2(&t5, &x5);\
+ xor2(&t6, &x6);\
+ xor2(&t7, &x7);\
+
+
+#define aesround(i, b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) \
+ shiftrows(b0, b1, b2, b3, b4, b5, b6, b7, i, SR, bskey);\
+ sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\
+ mixcolumns(b0, b1, b4, b6, b3, b7, b2, b5, t0, t1, t2, t3, t4, t5, t6, t7);\
+
+
+#define lastround(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) \
+ shiftrows(b0, b1, b2, b3, b4, b5, b6, b7, 10, SRM0, bskey);\
+ sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\
+ xor2(&b0,(const int128 *)(bskey + 128*10));\
+ xor2(&b1,(const int128 *)(bskey + 128*10+16));\
+ xor2(&b4,(const int128 *)(bskey + 128*10+32));\
+ xor2(&b6,(const int128 *)(bskey + 128*10+48));\
+ xor2(&b3,(const int128 *)(bskey + 128*10+64));\
+ xor2(&b7,(const int128 *)(bskey + 128*10+80));\
+ xor2(&b2,(const int128 *)(bskey + 128*10+96));\
+ xor2(&b5,(const int128 *)(bskey + 128*10+112));\
+
+
+#define sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, s0, s1, s2, s3) \
+ InBasisChange(b0, b1, b2, b3, b4, b5, b6, b7); \
+ Inv_GF256(b6, b5, b0, b3, b7, b1, b4, b2, t0, t1, t2, t3, s0, s1, s2, s3); \
+ OutBasisChange(b7, b1, b4, b2, b6, b5, b0, b3); \
+
+
+#define InBasisChange(b0, b1, b2, b3, b4, b5, b6, b7) \
+ xor2(&b5, &b6);\
+ xor2(&b2, &b1);\
+ xor2(&b5, &b0);\
+ xor2(&b6, &b2);\
+ xor2(&b3, &b0);\
+ ;\
+ xor2(&b6, &b3);\
+ xor2(&b3, &b7);\
+ xor2(&b3, &b4);\
+ xor2(&b7, &b5);\
+ xor2(&b3, &b1);\
+ ;\
+ xor2(&b4, &b5);\
+ xor2(&b2, &b7);\
+ xor2(&b1, &b5);\
+
+#define OutBasisChange(b0, b1, b2, b3, b4, b5, b6, b7) \
+ xor2(&b0, &b6);\
+ xor2(&b1, &b4);\
+ xor2(&b2, &b0);\
+ xor2(&b4, &b6);\
+ xor2(&b6, &b1);\
+ ;\
+ xor2(&b1, &b5);\
+ xor2(&b5, &b3);\
+ xor2(&b2, &b5);\
+ xor2(&b3, &b7);\
+ xor2(&b7, &b5);\
+ ;\
+ xor2(&b4, &b7);\
+
+#define Mul_GF4(x0, x1, y0, y1, t0) \
+ copy2(&t0, &y0);\
+ xor2(&t0, &y1);\
+ and2(&t0, &x0);\
+ xor2(&x0, &x1);\
+ and2(&x0, &y1);\
+ and2(&x1, &y0);\
+ xor2(&x0, &x1);\
+ xor2(&x1, &t0);\
+
+#define Mul_GF4_N(x0, x1, y0, y1, t0) \
+ copy2(&t0, &y0);\
+ xor2(&t0, &y1);\
+ and2(&t0, &x0);\
+ xor2(&x0, &x1);\
+ and2(&x0, &y1);\
+ and2(&x1, &y0);\
+ xor2(&x1, &x0);\
+ xor2(&x0, &t0);\
+
+#define Mul_GF4_2(x0, x1, x2, x3, y0, y1, t0, t1) \
+ copy2(&t0, = y0);\
+ xor2(&t0, &y1);\
+ copy2(&t1, &t0);\
+ and2(&t0, &x0);\
+ and2(&t1, &x2);\
+ xor2(&x0, &x1);\
+ xor2(&x2, &x3);\
+ and2(&x0, &y1);\
+ and2(&x2, &y1);\
+ and2(&x1, &y0);\
+ and2(&x3, &y0);\
+ xor2(&x0, &x1);\
+ xor2(&x2, &x3);\
+ xor2(&x1, &t0);\
+ xor2(&x3, &t1);\
+
+#define Mul_GF16(x0, x1, x2, x3, y0, y1, y2, y3, t0, t1, t2, t3) \
+ copy2(&t0, &x0);\
+ copy2(&t1, &x1);\
+ Mul_GF4(x0, x1, y0, y1, t2);\
+ xor2(&t0, &x2);\
+ xor2(&t1, &x3);\
+ xor2(&y0, &y2);\
+ xor2(&y1, &y3);\
+ Mul_GF4_N(t0, t1, y0, y1, t2);\
+ Mul_GF4(x2, x3, y2, y3, t3);\
+ ;\
+ xor2(&x0, &t0);\
+ xor2(&x2, &t0);\
+ xor2(&x1, &t1);\
+ xor2(&x3, &t1);\
+
+#define Mul_GF16_2(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, t0, t1, t2, t3) \
+ copy2(&t0, &x0);\
+ copy2(&t1, &x1);\
+ Mul_GF4(x0, x1, y0, y1, t2);\
+ xor2(&t0, &x2);\
+ xor2(&t1, &x3);\
+ xor2(&y0, &y2);\
+ xor2(&y1, &y3);\
+ Mul_GF4_N(t0, t1, y0, y1, t3);\
+ Mul_GF4(x2, x3, y2, y3, t2);\
+ ;\
+ xor2(&x0, &t0);\
+ xor2(&x2, &t0);\
+ xor2(&x1, &t1);\
+ xor2(&x3, &t1);\
+ ;\
+ copy2(&t0, &x4);\
+ copy2(&t1, &x5);\
+ xor2(&t0, &x6);\
+ xor2(&t1, &x7);\
+ Mul_GF4_N(t0, t1, y0, y1, t3);\
+ Mul_GF4(x6, x7, y2, y3, t2);\
+ xor2(&y0, &y2);\
+ xor2(&y1, &y3);\
+ Mul_GF4(x4, x5, y0, y1, t3);\
+ ;\
+ xor2(&x4, &t0);\
+ xor2(&x6, &t0);\
+ xor2(&x5, &t1);\
+ xor2(&x7, &t1);\
+
+#define Inv_GF16(x0, x1, x2, x3, t0, t1, t2, t3) \
+ copy2(&t0, &x1);\
+ copy2(&t1, &x0);\
+ and2(&t0, &x3);\
+ or2(&t1, &x2);\
+ copy2(&t2, &x1);\
+ copy2(&t3, &x0);\
+ or2(&t2, &x2);\
+ or2(&t3, &x3);\
+ xor2(&t2, &t3);\
+ ;\
+ xor2(&t0, &t2);\
+ xor2(&t1, &t2);\
+ ;\
+ Mul_GF4_2(x0, x1, x2, x3, t1, t0, t2, t3);\
+
+
+#define Inv_GF256(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, s0, s1, s2, s3) \
+ copy2(&t3, &x4);\
+ copy2(&t2, &x5);\
+ copy2(&t1, &x1);\
+ copy2(&s1, &x7);\
+ copy2(&s0, &x0);\
+ ;\
+ xor2(&t3, &x6);\
+ xor2(&t2, &x7);\
+ xor2(&t1, &x3);\
+ xor2(&s1, &x6);\
+ xor2(&s0, &x2);\
+ ;\
+ copy2(&s2, &t3);\
+ copy2(&t0, &t2);\
+ copy2(&s3, &t3);\
+ ;\
+ or2(&t2, &t1);\
+ or2(&t3, &s0);\
+ xor2(&s3, &t0);\
+ and2(&s2, &s0);\
+ and2(&t0, &t1);\
+ xor2(&s0, &t1);\
+ and2(&s3, &s0);\
+ copy2(&s0, &x3);\
+ xor2(&s0, &x2);\
+ and2(&s1, &s0);\
+ xor2(&t3, &s1);\
+ xor2(&t2, &s1);\
+ copy2(&s1, &x4);\
+ xor2(&s1, &x5);\
+ copy2(&s0, &x1);\
+ copy2(&t1, &s1);\
+ xor2(&s0, &x0);\
+ or2(&t1, &s0);\
+ and2(&s1, &s0);\
+ xor2(&t0, &s1);\
+ xor2(&t3, &s3);\
+ xor2(&t2, &s2);\
+ xor2(&t1, &s3);\
+ xor2(&t0, &s2);\
+ xor2(&t1, &s2);\
+ copy2(&s0, &x7);\
+ copy2(&s1, &x6);\
+ copy2(&s2, &x5);\
+ copy2(&s3, &x4);\
+ and2(&s0, &x3);\
+ and2(&s1, &x2);\
+ and2(&s2, &x1);\
+ or2(&s3, &x0);\
+ xor2(&t3, &s0);\
+ xor2(&t2, &s1);\
+ xor2(&t1, &s2);\
+ xor2(&t0, &s3);\
+ ;\
+ copy2(&s0, &t3);\
+ xor2(&s0, &t2);\
+ and2(&t3, &t1);\
+ copy2(&s2, &t0);\
+ xor2(&s2, &t3);\
+ copy2(&s3, &s0);\
+ and2(&s3, &s2);\
+ xor2(&s3, &t2);\
+ copy2(&s1, &t1);\
+ xor2(&s1, &t0);\
+ xor2(&t3, &t2);\
+ and2(&s1, &t3);\
+ xor2(&s1, &t0);\
+ xor2(&t1, &s1);\
+ copy2(&t2, &s2);\
+ xor2(&t2, &s1);\
+ and2(&t2, &t0);\
+ xor2(&t1, &t2);\
+ xor2(&s2, &t2);\
+ and2(&s2, &s3);\
+ xor2(&s2, &s0);\
+ ;\
+ Mul_GF16_2(x0, x1, x2, x3, x4, x5, x6, x7, s3, s2, s1, t1, s0, t0, t2, t3);\
+
+#endif
diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/common_aes128ctr.c b/src/libsodium/crypto_stream/aes128ctr/portable/common_aes128ctr.c
new file mode 100644
index 0000000..14a28cc
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes128ctr/portable/common_aes128ctr.c
@@ -0,0 +1,64 @@
+#include "common.h"
+
+uint32 load32_bigendian(const unsigned char *x)
+{
+ return
+ (uint32) (x[3]) \
+ | (((uint32) (x[2])) << 8) \
+ | (((uint32) (x[1])) << 16) \
+ | (((uint32) (x[0])) << 24)
+ ;
+}
+
+void store32_bigendian(unsigned char *x,uint32 u)
+{
+ x[3] = u; u >>= 8;
+ x[2] = u; u >>= 8;
+ x[1] = u; u >>= 8;
+ x[0] = u;
+}
+
+uint32 load32_littleendian(const unsigned char *x)
+{
+ return
+ (uint32) (x[0]) \
+ | (((uint32) (x[1])) << 8) \
+ | (((uint32) (x[2])) << 16) \
+ | (((uint32) (x[3])) << 24)
+ ;
+}
+
+void store32_littleendian(unsigned char *x,uint32 u)
+{
+ x[0] = u; u >>= 8;
+ x[1] = u; u >>= 8;
+ x[2] = u; u >>= 8;
+ x[3] = u;
+}
+
+
+uint64 load64_littleendian(const unsigned char *x)
+{
+ return
+ (uint64) (x[0]) \
+ | (((uint64) (x[1])) << 8) \
+ | (((uint64) (x[2])) << 16) \
+ | (((uint64) (x[3])) << 24)
+ | (((uint64) (x[4])) << 32)
+ | (((uint64) (x[5])) << 40)
+ | (((uint64) (x[6])) << 48)
+ | (((uint64) (x[7])) << 56)
+ ;
+}
+
+void store64_littleendian(unsigned char *x,uint64 u)
+{
+ x[0] = u; u >>= 8;
+ x[1] = u; u >>= 8;
+ x[2] = u; u >>= 8;
+ x[3] = u; u >>= 8;
+ x[4] = u; u >>= 8;
+ x[5] = u; u >>= 8;
+ x[6] = u; u >>= 8;
+ x[7] = u;
+}
diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/consts.h b/src/libsodium/crypto_stream/aes128ctr/portable/consts.h
new file mode 100644
index 0000000..4c50360
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes128ctr/portable/consts.h
@@ -0,0 +1,28 @@
+#ifndef CONSTS_H
+#define CONSTS_H
+
+#include "int128.h"
+
+#define ROTB crypto_stream_aes128ctr_portable_ROTB
+#define M0 crypto_stream_aes128ctr_portable_M0
+#define EXPB0 crypto_stream_aes128ctr_portable_EXPB0
+#define SWAP32 crypto_stream_aes128ctr_portable_SWAP32
+#define M0SWAP crypto_stream_aes128ctr_portable_M0SWAP
+#define SR crypto_stream_aes128ctr_portable_SR
+#define SRM0 crypto_stream_aes128ctr_portable_SRM0
+#define BS0 crypto_stream_aes128ctr_portable_BS0
+#define BS1 crypto_stream_aes128ctr_portable_BS1
+#define BS2 crypto_stream_aes128ctr_portable_BS2
+
+extern const unsigned char ROTB[16];
+extern const unsigned char M0[16];
+extern const unsigned char EXPB0[16];
+extern const unsigned char SWAP32[16];
+extern const unsigned char M0SWAP[16];
+extern const unsigned char SR[16];
+extern const unsigned char SRM0[16];
+extern const int128 BS0;
+extern const int128 BS1;
+extern const int128 BS2;
+
+#endif
diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/consts_aes128ctr.c b/src/libsodium/crypto_stream/aes128ctr/portable/consts_aes128ctr.c
new file mode 100644
index 0000000..f8029b8
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes128ctr/portable/consts_aes128ctr.c
@@ -0,0 +1,14 @@
+#include "consts.h"
+
+const unsigned char ROTB[16] = {0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08};
+const unsigned char M0[16] = {0x0f, 0x0b, 0x07, 0x03, 0x0e, 0x0a, 0x06, 0x02, 0x0d, 0x09, 0x05, 0x01, 0x0c, 0x08, 0x04, 0x00};
+const unsigned char EXPB0[16] = {0x03, 0x03, 0x03, 0x03, 0x07, 0x07, 0x07, 0x07, 0x0b, 0x0b, 0x0b, 0x0b, 0x0f, 0x0f, 0x0f, 0x0f};
+
+const unsigned char SWAP32[16] = {0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c};
+const unsigned char M0SWAP[16] = {0x0c, 0x08, 0x04, 0x00, 0x0d, 0x09, 0x05, 0x01, 0x0e, 0x0a, 0x06, 0x02, 0x0f, 0x0b, 0x07, 0x03};
+const unsigned char SR[16] = {0x01, 0x02, 0x03, 0x00, 0x06, 0x07, 0x04, 0x05, 0x0b, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0e, 0x0f};
+const unsigned char SRM0[16] = {0x0f, 0x0a, 0x05, 0x00, 0x0e, 0x09, 0x04, 0x03, 0x0d, 0x08, 0x07, 0x02, 0x0c, 0x0b, 0x06, 0x01};
+
+const int128 BS0 = {{0x5555555555555555ULL, 0x5555555555555555ULL}};
+const int128 BS1 = {{0x3333333333333333ULL, 0x3333333333333333ULL}};
+const int128 BS2 = {{0x0f0f0f0f0f0f0f0fULL, 0x0f0f0f0f0f0f0f0fULL}};
diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/int128.h b/src/libsodium/crypto_stream/aes128ctr/portable/int128.h
new file mode 100644
index 0000000..3fd2111
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes128ctr/portable/int128.h
@@ -0,0 +1,56 @@
+#ifndef INT128_H
+#define INT128_H
+
+#include <stdint.h>
+
+#include "common.h"
+
+#ifdef __cplusplus
+# if __GNUC__
+# pragma GCC diagnostic ignored "-Wlong-long"
+# endif
+#endif
+
+typedef union {
+ uint64_t u64[2];
+ uint32_t u32[4];
+ uint8_t u8[16];
+} int128;
+
+#define xor2 crypto_stream_aes128ctr_portable_xor2
+void xor2(int128 *r, const int128 *x);
+
+#define and2 crypto_stream_aes128ctr_portable_and2
+void and2(int128 *r, const int128 *x);
+
+#define or2 crypto_stream_aes128ctr_portable_or2
+void or2(int128 *r, const int128 *x);
+
+#define copy2 crypto_stream_aes128ctr_portable_copy2
+void copy2(int128 *r, const int128 *x);
+
+#define shufb crypto_stream_aes128ctr_portable_shufb
+void shufb(int128 *r, const unsigned char *l);
+
+#define shufd crypto_stream_aes128ctr_portable_shufd
+void shufd(int128 *r, const int128 *x, const unsigned int c);
+
+#define rshift32_littleendian crypto_stream_aes128ctr_portable_rshift32_littleendian
+void rshift32_littleendian(int128 *r, const unsigned int n);
+
+#define rshift64_littleendian crypto_stream_aes128ctr_portable_rshift64_littleendian
+void rshift64_littleendian(int128 *r, const unsigned int n);
+
+#define lshift64_littleendian crypto_stream_aes128ctr_portable_lshift64_littleendian
+void lshift64_littleendian(int128 *r, const unsigned int n);
+
+#define toggle crypto_stream_aes128ctr_portable_toggle
+void toggle(int128 *r);
+
+#define xor_rcon crypto_stream_aes128ctr_portable_xor_rcon
+void xor_rcon(int128 *r);
+
+#define add_uint32_big crypto_stream_aes128ctr_portable_add_uint32_big
+void add_uint32_big(int128 *r, uint32 x);
+
+#endif
diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/int128_aes128ctr.c b/src/libsodium/crypto_stream/aes128ctr/portable/int128_aes128ctr.c
new file mode 100644
index 0000000..703de39
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes128ctr/portable/int128_aes128ctr.c
@@ -0,0 +1,131 @@
+
+#include "int128.h"
+#include "common.h"
+
+void xor2(int128 *r, const int128 *x)
+{
+ r->u64[0] ^= x->u64[0];
+ r->u64[1] ^= x->u64[1];
+}
+
+void and2(int128 *r, const int128 *x)
+{
+ r->u64[0] &= x->u64[0];
+ r->u64[1] &= x->u64[1];
+}
+
+void or2(int128 *r, const int128 *x)
+{
+ r->u64[0] |= x->u64[0];
+ r->u64[1] |= x->u64[1];
+}
+
+void copy2(int128 *r, const int128 *x)
+{
+ r->u64[0] = x->u64[0];
+ r->u64[1] = x->u64[1];
+}
+
+void shufb(int128 *r, const unsigned char *l)
+{
+ int128 t;
+ uint8_t *ct;
+ uint8_t *cr;
+
+ copy2(&t, r);
+ cr = r->u8;
+ ct = t.u8;
+ cr[0] = ct[l[0]];
+ cr[1] = ct[l[1]];
+ cr[2] = ct[l[2]];
+ cr[3] = ct[l[3]];
+ cr[4] = ct[l[4]];
+ cr[5] = ct[l[5]];
+ cr[6] = ct[l[6]];
+ cr[7] = ct[l[7]];
+ cr[8] = ct[l[8]];
+ cr[9] = ct[l[9]];
+ cr[10] = ct[l[10]];
+ cr[11] = ct[l[11]];
+ cr[12] = ct[l[12]];
+ cr[13] = ct[l[13]];
+ cr[14] = ct[l[14]];
+ cr[15] = ct[l[15]];
+}
+
+void shufd(int128 *r, const int128 *x, const unsigned int c)
+{
+ int128 t;
+
+ t.u32[0] = x->u32[c >> 0 & 3];
+ t.u32[1] = x->u32[c >> 2 & 3];
+ t.u32[2] = x->u32[c >> 4 & 3];
+ t.u32[3] = x->u32[c >> 6 & 3];
+ copy2(r, &t);
+}
+
+void rshift32_littleendian(int128 *r, const unsigned int n)
+{
+ unsigned char *rp = (unsigned char *)r;
+ uint32 t;
+ t = load32_littleendian(rp);
+ t >>= n;
+ store32_littleendian(rp, t);
+ t = load32_littleendian(rp+4);
+ t >>= n;
+ store32_littleendian(rp+4, t);
+ t = load32_littleendian(rp+8);
+ t >>= n;
+ store32_littleendian(rp+8, t);
+ t = load32_littleendian(rp+12);
+ t >>= n;
+ store32_littleendian(rp+12, t);
+}
+
+void rshift64_littleendian(int128 *r, const unsigned int n)
+{
+ unsigned char *rp = (unsigned char *)r;
+ uint64 t;
+ t = load64_littleendian(rp);
+ t >>= n;
+ store64_littleendian(rp, t);
+ t = load64_littleendian(rp+8);
+ t >>= n;
+ store64_littleendian(rp+8, t);
+}
+
+void lshift64_littleendian(int128 *r, const unsigned int n)
+{
+ unsigned char *rp = (unsigned char *)r;
+ uint64 t;
+ t = load64_littleendian(rp);
+ t <<= n;
+ store64_littleendian(rp, t);
+ t = load64_littleendian(rp+8);
+ t <<= n;
+ store64_littleendian(rp+8, t);
+}
+
+void toggle(int128 *r)
+{
+ r->u64[0] ^= 0xffffffffffffffffULL;
+ r->u64[1] ^= 0xffffffffffffffffULL;
+}
+
+void xor_rcon(int128 *r)
+{
+ unsigned char *rp = (unsigned char *)r;
+ uint32 t;
+ t = load32_littleendian(rp+12);
+ t ^= 0xffffffff;
+ store32_littleendian(rp+12, t);
+}
+
+void add_uint32_big(int128 *r, uint32 x)
+{
+ unsigned char *rp = (unsigned char *)r;
+ uint32 t;
+ t = load32_littleendian(rp+12);
+ t += x;
+ store32_littleendian(rp+12, t);
+}
diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/stream_aes128ctr.c b/src/libsodium/crypto_stream/aes128ctr/portable/stream_aes128ctr.c
new file mode 100644
index 0000000..8f4ec72
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes128ctr/portable/stream_aes128ctr.c
@@ -0,0 +1,28 @@
+#include "api.h"
+
+int crypto_stream(
+ unsigned char *out,
+ unsigned long long outlen,
+ const unsigned char *n,
+ const unsigned char *k
+ )
+{
+ unsigned char d[crypto_stream_BEFORENMBYTES];
+ crypto_stream_beforenm(d, k);
+ crypto_stream_afternm(out, outlen, n, d);
+ return 0;
+}
+
+int crypto_stream_xor(
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned long long inlen,
+ const unsigned char *n,
+ const unsigned char *k
+ )
+{
+ unsigned char d[crypto_stream_BEFORENMBYTES];
+ crypto_stream_beforenm(d, k);
+ crypto_stream_xor_afternm(out, in, inlen, n, d);
+ return 0;
+}
diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/types.h b/src/libsodium/crypto_stream/aes128ctr/portable/types.h
new file mode 100644
index 0000000..6aa502f
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes128ctr/portable/types.h
@@ -0,0 +1,10 @@
+#ifndef TYPES_H
+#define TYPES_H
+
+#include "crypto_uint32.h"
+typedef crypto_uint32 uint32;
+
+#include "crypto_uint64.h"
+typedef crypto_uint64 uint64;
+
+#endif
diff --git a/src/libsodium/crypto_stream/aes128ctr/portable/xor_afternm_aes128ctr.c b/src/libsodium/crypto_stream/aes128ctr/portable/xor_afternm_aes128ctr.c
new file mode 100644
index 0000000..139dbe5
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes128ctr/portable/xor_afternm_aes128ctr.c
@@ -0,0 +1,181 @@
+/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
+ * Date: 2009-03-19
+ * Public domain */
+
+#include <stdio.h>
+#include "api.h"
+#include "int128.h"
+#include "common.h"
+#include "consts.h"
+
+int crypto_stream_xor_afternm(unsigned char *out, const unsigned char *in, unsigned long long len, const unsigned char *nonce, const unsigned char *c)
+{
+
+ int128 xmm0;
+ int128 xmm1;
+ int128 xmm2;
+ int128 xmm3;
+ int128 xmm4;
+ int128 xmm5;
+ int128 xmm6;
+ int128 xmm7;
+
+ int128 xmm8;
+ int128 xmm9;
+ int128 xmm10;
+ int128 xmm11;
+ int128 xmm12;
+ int128 xmm13;
+ int128 xmm14;
+ int128 xmm15;
+
+ int128 nonce_stack;
+ unsigned long long lensav;
+ unsigned char bl[128];
+ unsigned char *blp;
+ unsigned char *np;
+ unsigned char b;
+
+ uint32 tmp;
+
+ /* Copy nonce on the stack */
+ copy2(&nonce_stack, (const int128 *) (nonce + 0));
+ np = (unsigned char *)&nonce_stack;
+
+ enc_block:
+
+ xmm0 = *(int128 *) (np + 0);
+ copy2(&xmm1, &xmm0);
+ shufb(&xmm1, SWAP32);
+ copy2(&xmm2, &xmm1);
+ copy2(&xmm3, &xmm1);
+ copy2(&xmm4, &xmm1);
+ copy2(&xmm5, &xmm1);
+ copy2(&xmm6, &xmm1);
+ copy2(&xmm7, &xmm1);
+
+ add_uint32_big(&xmm1, 1);
+ add_uint32_big(&xmm2, 2);
+ add_uint32_big(&xmm3, 3);
+ add_uint32_big(&xmm4, 4);
+ add_uint32_big(&xmm5, 5);
+ add_uint32_big(&xmm6, 6);
+ add_uint32_big(&xmm7, 7);
+
+ shufb(&xmm0, M0);
+ shufb(&xmm1, M0SWAP);
+ shufb(&xmm2, M0SWAP);
+ shufb(&xmm3, M0SWAP);
+ shufb(&xmm4, M0SWAP);
+ shufb(&xmm5, M0SWAP);
+ shufb(&xmm6, M0SWAP);
+ shufb(&xmm7, M0SWAP);
+
+ bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, xmm8)
+
+ aesround( 1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+ aesround( 2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+ aesround( 3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+ aesround( 4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+ aesround( 5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+ aesround( 6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+ aesround( 7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+ aesround( 8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+ aesround( 9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+ lastround(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+
+ bitslice(xmm13, xmm10, xmm15, xmm11, xmm14, xmm12, xmm9, xmm8, xmm0)
+
+ if(len < 128) goto partial;
+ if(len == 128) goto full;
+
+ tmp = load32_bigendian(np + 12);
+ tmp += 8;
+ store32_bigendian(np + 12, tmp);
+
+ xor2(&xmm8, (const int128 *)(in + 0));
+ xor2(&xmm9, (const int128 *)(in + 16));
+ xor2(&xmm12, (const int128 *)(in + 32));
+ xor2(&xmm14, (const int128 *)(in + 48));
+ xor2(&xmm11, (const int128 *)(in + 64));
+ xor2(&xmm15, (const int128 *)(in + 80));
+ xor2(&xmm10, (const int128 *)(in + 96));
+ xor2(&xmm13, (const int128 *)(in + 112));
+
+ *(int128 *) (out + 0) = xmm8;
+ *(int128 *) (out + 16) = xmm9;
+ *(int128 *) (out + 32) = xmm12;
+ *(int128 *) (out + 48) = xmm14;
+ *(int128 *) (out + 64) = xmm11;
+ *(int128 *) (out + 80) = xmm15;
+ *(int128 *) (out + 96) = xmm10;
+ *(int128 *) (out + 112) = xmm13;
+
+ len -= 128;
+ in += 128;
+ out += 128;
+
+ goto enc_block;
+
+ partial:
+
+ lensav = len;
+ len >>= 4;
+
+ tmp = load32_bigendian(np + 12);
+ tmp += len;
+ store32_bigendian(np + 12, tmp);
+
+ blp = bl;
+ *(int128 *)(blp + 0) = xmm8;
+ *(int128 *)(blp + 16) = xmm9;
+ *(int128 *)(blp + 32) = xmm12;
+ *(int128 *)(blp + 48) = xmm14;
+ *(int128 *)(blp + 64) = xmm11;
+ *(int128 *)(blp + 80) = xmm15;
+ *(int128 *)(blp + 96) = xmm10;
+ *(int128 *)(blp + 112) = xmm13;
+
+ bytes:
+
+ if(lensav == 0) goto end;
+
+ b = blp[0]; /* clang false positive */
+ b ^= *(const unsigned char *)(in + 0);
+ *(unsigned char *)(out + 0) = b;
+
+ blp += 1;
+ in +=1;
+ out +=1;
+ lensav -= 1;
+
+ goto bytes;
+
+ full:
+
+ tmp = load32_bigendian(np + 12);
+ tmp += 8;
+ store32_bigendian(np + 12, tmp);
+
+ xor2(&xmm8, (const int128 *)(in + 0));
+ xor2(&xmm9, (const int128 *)(in + 16));
+ xor2(&xmm12, (const int128 *)(in + 32));
+ xor2(&xmm14, (const int128 *)(in + 48));
+ xor2(&xmm11, (const int128 *)(in + 64));
+ xor2(&xmm15, (const int128 *)(in + 80));
+ xor2(&xmm10, (const int128 *)(in + 96));
+ xor2(&xmm13, (const int128 *)(in + 112));
+
+ *(int128 *) (out + 0) = xmm8;
+ *(int128 *) (out + 16) = xmm9;
+ *(int128 *) (out + 32) = xmm12;
+ *(int128 *) (out + 48) = xmm14;
+ *(int128 *) (out + 64) = xmm11;
+ *(int128 *) (out + 80) = xmm15;
+ *(int128 *) (out + 96) = xmm10;
+ *(int128 *) (out + 112) = xmm13;
+
+ end:
+ return 0;
+
+}
diff --git a/src/libsodium/crypto_stream/aes128ctr/stream_aes128ctr_api.c b/src/libsodium/crypto_stream/aes128ctr/stream_aes128ctr_api.c
new file mode 100644
index 0000000..184ad3f
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes128ctr/stream_aes128ctr_api.c
@@ -0,0 +1,16 @@
+#include "crypto_stream_aes128ctr.h"
+
+size_t
+crypto_stream_aes128ctr_keybytes(void) {
+ return crypto_stream_aes128ctr_KEYBYTES;
+}
+
+size_t
+crypto_stream_aes128ctr_noncebytes(void) {
+ return crypto_stream_aes128ctr_NONCEBYTES;
+}
+
+size_t
+crypto_stream_aes128ctr_beforenmbytes(void) {
+ return crypto_stream_aes128ctr_BEFORENMBYTES;
+}
diff --git a/src/libsodium/crypto_stream/aes256estream/hongjun/aes-table-be.h b/src/libsodium/crypto_stream/aes256estream/hongjun/aes-table-be.h
new file mode 100644
index 0000000..8a4a49c
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes256estream/hongjun/aes-table-be.h
@@ -0,0 +1,273 @@
+
+#ifndef __AES_TABLE_BE_H__
+#define __AES_TABLE_BE_H__
+
+ALIGN(64) static unsigned int T0[256] = {
+ 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
+ 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
+ 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
+ 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
+ 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
+ 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
+ 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
+ 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
+ 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
+ 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
+ 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
+ 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
+ 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
+ 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
+ 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
+ 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
+ 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
+ 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
+ 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
+ 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
+ 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
+ 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
+ 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
+ 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
+ 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
+ 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
+ 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
+ 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
+ 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
+ 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
+ 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
+ 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
+ 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
+ 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
+ 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
+ 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
+ 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
+ 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
+ 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
+ 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
+ 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
+ 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
+ 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
+ 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
+ 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
+ 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
+ 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
+ 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
+ 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
+ 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
+ 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
+ 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
+ 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
+ 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
+ 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
+ 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
+ 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
+ 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
+ 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
+ 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
+ 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
+ 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
+ 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
+ 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
+};
+
+ALIGN(64) static unsigned int T1[256] = {
+ 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
+ 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
+ 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
+ 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
+ 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d,
+ 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
+ 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf,
+ 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
+ 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
+ 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
+ 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1,
+ 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
+ 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3,
+ 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
+ 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
+ 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
+ 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a,
+ 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
+ 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3,
+ 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
+ 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
+ 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
+ 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939,
+ 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
+ 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb,
+ 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
+ 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
+ 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
+ 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f,
+ 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
+ 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121,
+ 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
+ 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
+ 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
+ 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d,
+ 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
+ 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc,
+ 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
+ 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
+ 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
+ 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a,
+ 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
+ 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262,
+ 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
+ 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
+ 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
+ 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea,
+ 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
+ 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e,
+ 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
+ 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
+ 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
+ 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666,
+ 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
+ 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9,
+ 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
+ 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
+ 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
+ 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9,
+ 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
+ 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d,
+ 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
+ 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
+ 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616
+};
+
+ALIGN(64) static unsigned int T2[256] = {
+ 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
+ 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
+ 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
+ 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
+ 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d,
+ 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
+ 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af,
+ 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
+ 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
+ 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
+ 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1,
+ 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
+ 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3,
+ 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
+ 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
+ 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
+ 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a,
+ 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
+ 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3,
+ 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
+ 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
+ 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
+ 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239,
+ 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
+ 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb,
+ 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
+ 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
+ 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
+ 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f,
+ 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
+ 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221,
+ 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
+ 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
+ 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
+ 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d,
+ 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
+ 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc,
+ 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
+ 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
+ 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
+ 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a,
+ 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
+ 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462,
+ 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
+ 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
+ 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
+ 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea,
+ 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
+ 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e,
+ 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
+ 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
+ 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
+ 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66,
+ 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
+ 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9,
+ 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
+ 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
+ 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
+ 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9,
+ 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
+ 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d,
+ 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
+ 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
+ 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16
+};
+
+ALIGN(64) static unsigned int T3[256] = {
+ 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
+ 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
+ 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
+ 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
+ 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa,
+ 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
+ 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45,
+ 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
+ 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
+ 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
+ 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9,
+ 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
+ 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d,
+ 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
+ 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
+ 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
+ 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34,
+ 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
+ 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d,
+ 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
+ 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
+ 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
+ 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72,
+ 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
+ 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed,
+ 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
+ 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
+ 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
+ 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05,
+ 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
+ 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342,
+ 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
+ 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
+ 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
+ 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a,
+ 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
+ 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3,
+ 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
+ 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
+ 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
+ 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14,
+ 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
+ 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4,
+ 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
+ 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
+ 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
+ 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf,
+ 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
+ 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c,
+ 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
+ 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
+ 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
+ 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc,
+ 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
+ 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069,
+ 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
+ 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
+ 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
+ 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9,
+ 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
+ 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a,
+ 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
+ 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
+ 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c
+};
+
+#endif
diff --git a/src/libsodium/crypto_stream/aes256estream/hongjun/aes-table-le.h b/src/libsodium/crypto_stream/aes256estream/hongjun/aes-table-le.h
new file mode 100644
index 0000000..9d61039
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes256estream/hongjun/aes-table-le.h
@@ -0,0 +1,274 @@
+
+#ifndef __AES_TABLE_LE_H__
+#define __AES_TABLE_LE_H__
+
+ALIGN(64) static unsigned int T0[256] = {
+ 0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6,
+ 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591,
+ 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56,
+ 0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec,
+ 0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa,
+ 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb,
+ 0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45,
+ 0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b,
+ 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c,
+ 0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83,
+ 0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9,
+ 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a,
+ 0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d,
+ 0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f,
+ 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df,
+ 0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea,
+ 0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34,
+ 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b,
+ 0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d,
+ 0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413,
+ 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1,
+ 0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6,
+ 0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972,
+ 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85,
+ 0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed,
+ 0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511,
+ 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe,
+ 0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b,
+ 0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05,
+ 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1,
+ 0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142,
+ 0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf,
+ 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3,
+ 0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e,
+ 0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a,
+ 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6,
+ 0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3,
+ 0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b,
+ 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428,
+ 0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad,
+ 0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14,
+ 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8,
+ 0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4,
+ 0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2,
+ 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda,
+ 0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949,
+ 0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf,
+ 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810,
+ 0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c,
+ 0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697,
+ 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e,
+ 0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f,
+ 0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc,
+ 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c,
+ 0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969,
+ 0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27,
+ 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122,
+ 0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433,
+ 0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9,
+ 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5,
+ 0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a,
+ 0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0,
+ 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e,
+ 0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c
+};
+
+
+ALIGN(64) static unsigned int T1[256] = {
+ 0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d,
+ 0xf2f2ff0d, 0x6b6bd6bd, 0x6f6fdeb1, 0xc5c59154,
+ 0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d,
+ 0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a,
+ 0xcaca8f45, 0x82821f9d, 0xc9c98940, 0x7d7dfa87,
+ 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b,
+ 0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea,
+ 0x9c9c23bf, 0xa4a453f7, 0x7272e496, 0xc0c09b5b,
+ 0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a,
+ 0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f,
+ 0x3434685c, 0xa5a551f4, 0xe5e5d134, 0xf1f1f908,
+ 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f,
+ 0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e,
+ 0x18183028, 0x969637a1, 0x05050a0f, 0x9a9a2fb5,
+ 0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d,
+ 0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f,
+ 0x0909121b, 0x83831d9e, 0x2c2c5874, 0x1a1a342e,
+ 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb,
+ 0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce,
+ 0x2929527b, 0xe3e3dd3e, 0x2f2f5e71, 0x84841397,
+ 0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c,
+ 0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed,
+ 0x6a6ad4be, 0xcbcb8d46, 0xbebe67d9, 0x3939724b,
+ 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a,
+ 0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16,
+ 0x434386c5, 0x4d4d9ad7, 0x33336655, 0x85851194,
+ 0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81,
+ 0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3,
+ 0x5151a2f3, 0xa3a35dfe, 0x404080c0, 0x8f8f058a,
+ 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104,
+ 0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263,
+ 0x10102030, 0xffffe51a, 0xf3f3fd0e, 0xd2d2bf6d,
+ 0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f,
+ 0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39,
+ 0xc4c49357, 0xa7a755f2, 0x7e7efc82, 0x3d3d7a47,
+ 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695,
+ 0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f,
+ 0x22224466, 0x2a2a547e, 0x90903bab, 0x88880b83,
+ 0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c,
+ 0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76,
+ 0xe0e0db3b, 0x32326456, 0x3a3a744e, 0x0a0a141e,
+ 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4,
+ 0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6,
+ 0x919139a8, 0x959531a4, 0xe4e4d337, 0x7979f28b,
+ 0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7,
+ 0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0,
+ 0x6c6cd8b4, 0x5656acfa, 0xf4f4f307, 0xeaeacf25,
+ 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018,
+ 0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72,
+ 0x1c1c3824, 0xa6a657f1, 0xb4b473c7, 0xc6c69751,
+ 0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21,
+ 0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85,
+ 0x7070e090, 0x3e3e7c42, 0xb5b571c4, 0x6666ccaa,
+ 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12,
+ 0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0,
+ 0x86861791, 0xc1c19958, 0x1d1d3a27, 0x9e9e27b9,
+ 0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233,
+ 0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7,
+ 0x9b9b2db6, 0x1e1e3c22, 0x87871592, 0xe9e9c920,
+ 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a,
+ 0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17,
+ 0xbfbf65da, 0xe6e6d731, 0x424284c6, 0x6868d0b8,
+ 0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11,
+ 0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a
+};
+
+ALIGN(64) static unsigned int T2[256] = {
+ 0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b,
+ 0xf2ff0df2, 0x6bd6bd6b, 0x6fdeb16f, 0xc59154c5,
+ 0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b,
+ 0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76,
+ 0xca8f45ca, 0x821f9d82, 0xc98940c9, 0x7dfa877d,
+ 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0,
+ 0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf,
+ 0x9c23bf9c, 0xa453f7a4, 0x72e49672, 0xc09b5bc0,
+ 0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26,
+ 0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc,
+ 0x34685c34, 0xa551f4a5, 0xe5d134e5, 0xf1f908f1,
+ 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15,
+ 0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3,
+ 0x18302818, 0x9637a196, 0x050a0f05, 0x9a2fb59a,
+ 0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2,
+ 0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75,
+ 0x09121b09, 0x831d9e83, 0x2c58742c, 0x1a342e1a,
+ 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0,
+ 0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3,
+ 0x29527b29, 0xe3dd3ee3, 0x2f5e712f, 0x84139784,
+ 0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced,
+ 0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b,
+ 0x6ad4be6a, 0xcb8d46cb, 0xbe67d9be, 0x39724b39,
+ 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf,
+ 0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb,
+ 0x4386c543, 0x4d9ad74d, 0x33665533, 0x85119485,
+ 0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f,
+ 0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8,
+ 0x51a2f351, 0xa35dfea3, 0x4080c040, 0x8f058a8f,
+ 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5,
+ 0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321,
+ 0x10203010, 0xffe51aff, 0xf3fd0ef3, 0xd2bf6dd2,
+ 0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec,
+ 0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917,
+ 0xc49357c4, 0xa755f2a7, 0x7efc827e, 0x3d7a473d,
+ 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573,
+ 0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc,
+ 0x22446622, 0x2a547e2a, 0x903bab90, 0x880b8388,
+ 0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14,
+ 0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db,
+ 0xe0db3be0, 0x32645632, 0x3a744e3a, 0x0a141e0a,
+ 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c,
+ 0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662,
+ 0x9139a891, 0x9531a495, 0xe4d337e4, 0x79f28b79,
+ 0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d,
+ 0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9,
+ 0x6cd8b46c, 0x56acfa56, 0xf4f307f4, 0xeacf25ea,
+ 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808,
+ 0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e,
+ 0x1c38241c, 0xa657f1a6, 0xb473c7b4, 0xc69751c6,
+ 0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f,
+ 0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a,
+ 0x70e09070, 0x3e7c423e, 0xb571c4b5, 0x66ccaa66,
+ 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e,
+ 0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9,
+ 0x86179186, 0xc19958c1, 0x1d3a271d, 0x9e27b99e,
+ 0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311,
+ 0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794,
+ 0x9b2db69b, 0x1e3c221e, 0x87159287, 0xe9c920e9,
+ 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf,
+ 0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d,
+ 0xbf65dabf, 0xe6d731e6, 0x4284c642, 0x68d0b868,
+ 0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f,
+ 0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16
+};
+
+ALIGN(64) static unsigned int T3[256] = {
+ 0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b,
+ 0xff0df2f2, 0xd6bd6b6b, 0xdeb16f6f, 0x9154c5c5,
+ 0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b,
+ 0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676,
+ 0x8f45caca, 0x1f9d8282, 0x8940c9c9, 0xfa877d7d,
+ 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0,
+ 0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf,
+ 0x23bf9c9c, 0x53f7a4a4, 0xe4967272, 0x9b5bc0c0,
+ 0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626,
+ 0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc,
+ 0x685c3434, 0x51f4a5a5, 0xd134e5e5, 0xf908f1f1,
+ 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515,
+ 0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3,
+ 0x30281818, 0x37a19696, 0x0a0f0505, 0x2fb59a9a,
+ 0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2,
+ 0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575,
+ 0x121b0909, 0x1d9e8383, 0x58742c2c, 0x342e1a1a,
+ 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0,
+ 0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3,
+ 0x527b2929, 0xdd3ee3e3, 0x5e712f2f, 0x13978484,
+ 0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded,
+ 0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b,
+ 0xd4be6a6a, 0x8d46cbcb, 0x67d9bebe, 0x724b3939,
+ 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf,
+ 0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb,
+ 0x86c54343, 0x9ad74d4d, 0x66553333, 0x11948585,
+ 0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f,
+ 0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8,
+ 0xa2f35151, 0x5dfea3a3, 0x80c04040, 0x058a8f8f,
+ 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5,
+ 0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121,
+ 0x20301010, 0xe51affff, 0xfd0ef3f3, 0xbf6dd2d2,
+ 0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec,
+ 0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717,
+ 0x9357c4c4, 0x55f2a7a7, 0xfc827e7e, 0x7a473d3d,
+ 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373,
+ 0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc,
+ 0x44662222, 0x547e2a2a, 0x3bab9090, 0x0b838888,
+ 0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414,
+ 0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb,
+ 0xdb3be0e0, 0x64563232, 0x744e3a3a, 0x141e0a0a,
+ 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c,
+ 0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262,
+ 0x39a89191, 0x31a49595, 0xd337e4e4, 0xf28b7979,
+ 0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d,
+ 0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9,
+ 0xd8b46c6c, 0xacfa5656, 0xf307f4f4, 0xcf25eaea,
+ 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808,
+ 0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e,
+ 0x38241c1c, 0x57f1a6a6, 0x73c7b4b4, 0x9751c6c6,
+ 0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f,
+ 0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a,
+ 0xe0907070, 0x7c423e3e, 0x71c4b5b5, 0xccaa6666,
+ 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e,
+ 0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9,
+ 0x17918686, 0x9958c1c1, 0x3a271d1d, 0x27b99e9e,
+ 0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111,
+ 0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494,
+ 0x2db69b9b, 0x3c221e1e, 0x15928787, 0xc920e9e9,
+ 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf,
+ 0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d,
+ 0x65dabfbf, 0xd731e6e6, 0x84c64242, 0xd0b86868,
+ 0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f,
+ 0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616
+};
+
+#endif
diff --git a/src/libsodium/crypto_stream/aes256estream/hongjun/aes-table.h b/src/libsodium/crypto_stream/aes256estream/hongjun/aes-table.h
new file mode 100644
index 0000000..89839e5
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes256estream/hongjun/aes-table.h
@@ -0,0 +1,62 @@
+
+#ifndef __AES_TABLE_H__
+#define __AES_TABLE_H__
+
+#if defined(_MSC_VER)
+# define ALIGN(x) __declspec(align(x))
+#else
+# define ALIGN(x) __attribute__((aligned(x)))
+#endif
+
+#ifdef NATIVE_LITTLE_ENDIAN
+# include "aes-table-le.h"
+#elif defined(NATIVE_BIG_ENDIAN)
+# include "aes-table-be.h"
+#else
+# error Unsupported byte ordering
+#endif
+
+static const unsigned char Rcon[31] =
+{
+ 0x0, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20,
+ 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xc0,
+ 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc,
+ 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4,
+ 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91
+};
+
+
+ALIGN(64) static const unsigned char Sbox[256] = {
+ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
+ 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
+ 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
+ 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
+ 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
+ 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
+ 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
+ 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
+ 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
+ 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
+ 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
+ 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
+ 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
+ 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
+ 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
+ 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
+ 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
+#endif
diff --git a/src/libsodium/crypto_stream/aes256estream/hongjun/aes256-ctr.c b/src/libsodium/crypto_stream/aes256estream/hongjun/aes256-ctr.c
new file mode 100644
index 0000000..c35e310
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes256estream/hongjun/aes256-ctr.c
@@ -0,0 +1,239 @@
+/* aes-ctr.c */
+/* AES in CTR mode. */
+
+/* Hongjun Wu, January 2007*/
+
+
+/* ------------------------------------------------------------------------- */
+
+#include "api.h"
+#include "aes256.h"
+
+#include <string.h>
+
+/* ------------------------------------------------------------------------- */
+/* key setup for AES-256*/
+static void
+ECRYPT_keysetup(ECRYPT_ctx* ctx, const u8* key, u32 keysize, u32 ivsize)
+{
+ unsigned int w[Nk*(Nr+1)], temp;
+ int i, j;
+
+ (void) keysize;
+ (void) ivsize;
+ (void) sizeof(char[sizeof *ctx == crypto_stream_BEFORENMBYTES ? 1 : -1]);
+
+ for( i = 0; i < Nk; i++ ) {
+ w[i] = key[(i << 2)];
+ w[i] |= key[(i << 2)+1] << 8;
+ w[i] |= key[(i << 2)+2] << 16;
+ w[i] |= (unsigned int) key[(i << 2)+3] << 24;
+ }
+
+ i = Nk;
+
+ while( i < Nb*(Nr+1) ) {
+ temp = w[i-1];
+
+ temp = (unsigned int) Sbox[temp & 0xFF] << 24 ^
+ Sbox[(temp >> 8) & 0xFF] ^
+ (Sbox[(temp >> 16) & 0xFF] << 8 ) ^
+ (Sbox[(temp >> 24) & 0xFF] << 16) ^
+ Rcon[i/Nk];
+ w[i] = w[i-Nk] ^ temp;
+ i++;
+
+ temp = w[i-1];
+ w[i] = w[i-Nk] ^ temp;
+ i++;
+
+ temp = w[i-1];
+ w[i] = w[i-Nk] ^ temp;
+ i++;
+
+ temp = w[i-1];
+ w[i] = w[i-Nk] ^ temp;
+ i++;
+
+ temp = w[i-1];
+ temp = Sbox[temp & 0xFF] ^
+ Sbox[(temp >> 8) & 0xFF] << 8 ^
+ (Sbox[(temp >> 16) & 0xFF] << 16) ^
+ ((unsigned int) Sbox[(temp >> 24) & 0xFF] << 24);
+ w[i] = w[i-Nk] ^ temp;
+ i++;
+
+ temp = w[i-1];
+ w[i] = w[i-Nk] ^ temp;
+ i++;
+
+ temp = w[i-1];
+ w[i] = w[i-Nk] ^ temp;
+ i++;
+
+ temp = w[i-1];
+ w[i] = w[i-Nk] ^ temp;
+ i++;
+ }
+
+ for (i = 0; i <= Nr; i++) {
+ for (j = 0; j < Nb; j++) {
+ ctx->round_key[i][j] = SWP32(w[(i<<2)+j]);
+ }
+ }
+}
+
+/* ------------------------------------------------------------------------- */
+
+static void
+ECRYPT_ivsetup(ECRYPT_ctx* ctx, const u8* iv)
+{
+ (void) sizeof(char[(sizeof ctx->counter) == crypto_stream_NONCEBYTES ? 1 : -1]);
+ memcpy(ctx->counter, iv, crypto_stream_NONCEBYTES);
+}
+
+/* ------------------------------------------------------------------------- */
+
+/*compute the intermediate values for the first two rounds*/
+static void
+partial_precompute_tworounds(ECRYPT_ctx* ctx)
+{
+ u32 x0,x1,x2,x3,y0,y1,y2,y3;
+
+ x0 = ctx->counter[0] ^ ctx->round_key[0][0];
+ x1 = ctx->counter[1] ^ ctx->round_key[0][1];
+ x2 = ctx->counter[2] ^ ctx->round_key[0][2];
+ x3 = ctx->counter[3] ^ ctx->round_key[0][3];
+ x0 &= SWP32(0xffffff00);
+ round(ctx,x0,x1,x2,x3,y0,y1,y2,y3,1);
+ ctx->first_round_output_x0 = y0 ^ T0[0];
+ y0 = 0;
+ round(ctx,y0,y1,y2,y3,x0,x1,x2,x3,2);
+ ctx->second_round_output[0] = x0 ^ T0[0];
+ ctx->second_round_output[1] = x1 ^ T3[0];
+ ctx->second_round_output[2] = x2 ^ T2[0];
+ ctx->second_round_output[3] = x3 ^ T1[0];
+}
+
+/* ------------------------------------------------------------------------- */
+
+#ifndef CPU_ALIGNED_ACCESS_REQUIRED
+# define UNALIGNED_U32_READ(P, I) (((const u32 *)(const void *) (P))[(I)])
+#else
+static inline uint32_t
+UNALIGNED_U32_READ(const u8 * const p, const size_t i)
+{
+ uint32_t t;
+ (void) sizeof(int[sizeof(*p) == sizeof(char) ? 1 : -1]);
+ memcpy(&t, p + i * (sizeof t / sizeof *p), sizeof t);
+ return t;
+}
+#endif
+
+/* ------------------------------------------------------------------------- */
+
+static void
+ECRYPT_process_bytes(int action, ECRYPT_ctx* ctx, const u8* input, u8* output,
+ u32 msglen)
+{
+ __attribute__((aligned(32))) u8 keystream[16];
+ u32 i;
+
+ (void) action;
+ memset(keystream, 0, sizeof keystream);
+ partial_precompute_tworounds(ctx);
+
+ for ( ; msglen >= 16; msglen -= 16, input += 16, output += 16) {
+ aes256_enc_block(ctx->counter, keystream, ctx);
+
+ ((u32*)output)[0] = UNALIGNED_U32_READ(input, 0) ^ ((u32*)keystream)[0] ^ ctx->round_key[Nr][0];
+ ((u32*)output)[1] = UNALIGNED_U32_READ(input, 1) ^ ((u32*)keystream)[1] ^ ctx->round_key[Nr][1];
+ ((u32*)output)[2] = UNALIGNED_U32_READ(input, 2) ^ ((u32*)keystream)[2] ^ ctx->round_key[Nr][2];
+ ((u32*)output)[3] = UNALIGNED_U32_READ(input, 3) ^ ((u32*)keystream)[3] ^ ctx->round_key[Nr][3];
+
+ ctx->counter[0] = SWP32(SWP32(ctx->counter[0]) + 1);
+
+ if ((ctx->counter[0] & SWP32(0xff))== 0) {
+ partial_precompute_tworounds(ctx);
+ }
+ }
+
+ if (msglen > 0) {
+ aes256_enc_block(ctx->counter, keystream, ctx);
+ ((u32*)keystream)[0] ^= ctx->round_key[Nr][0];
+ ((u32*)keystream)[1] ^= ctx->round_key[Nr][1];
+ ((u32*)keystream)[2] ^= ctx->round_key[Nr][2];
+ ((u32*)keystream)[3] ^= ctx->round_key[Nr][3];
+
+ for (i = 0; i < msglen; i ++) {
+ output[i] = input[i] ^ keystream[i];
+ }
+ }
+}
+
+/* ------------------------------------------------------------------------- */
+
+#include "ecrypt-sync.h"
+
+int
+crypto_stream_beforenm(unsigned char *c, const unsigned char *k)
+{
+ ECRYPT_ctx * const ctx = (ECRYPT_ctx *) c;
+
+ ECRYPT_keysetup(ctx, k, crypto_stream_KEYBYTES * 8,
+ crypto_stream_NONCEBYTES * 8);
+ return 0;
+}
+
+int
+crypto_stream_afternm(unsigned char *out, unsigned long long len,
+ const unsigned char *nonce, const unsigned char *c)
+{
+ ECRYPT_ctx * const ctx = (ECRYPT_ctx *) c;
+ unsigned long long i;
+
+ ECRYPT_ivsetup(ctx, nonce);
+ for (i = 0U; i < len; ++i) {
+ out[i] = 0U;
+ }
+ ECRYPT_encrypt_bytes(ctx, (u8 *) out, (u8 *) out, len);
+
+ return 0;
+}
+
+int
+crypto_stream_xor_afternm(unsigned char *out, const unsigned char *in,
+ unsigned long long len, const unsigned char *nonce,
+ const unsigned char *c)
+{
+ ECRYPT_ctx * const ctx = (ECRYPT_ctx *) c;
+
+ ECRYPT_ivsetup(ctx, nonce);
+ ECRYPT_encrypt_bytes(ctx, (const u8 *) in, (u8 *) out, len);
+
+ return 0;
+}
+
+int
+crypto_stream(unsigned char *out, unsigned long long outlen,
+ const unsigned char *n, const unsigned char *k)
+{
+ unsigned char d[crypto_stream_BEFORENMBYTES];
+
+ crypto_stream_beforenm(d, k);
+ crypto_stream_afternm(out, outlen, n, d);
+
+ return 0;
+}
+
+int crypto_stream_xor(unsigned char *out, const unsigned char *in,
+ unsigned long long inlen, const unsigned char *n,
+ const unsigned char *k)
+{
+ unsigned char d[crypto_stream_BEFORENMBYTES];
+
+ crypto_stream_beforenm(d, k);
+ crypto_stream_xor_afternm(out, in, inlen, n, d);
+
+ return 0;
+}
diff --git a/src/libsodium/crypto_stream/aes256estream/hongjun/aes256.h b/src/libsodium/crypto_stream/aes256estream/hongjun/aes256.h
new file mode 100644
index 0000000..d562b1d
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes256estream/hongjun/aes256.h
@@ -0,0 +1,171 @@
+/* aes256.h */
+/* Hongjun Wu, January 2007*/
+
+
+#include "ecrypt-sync.h"
+#include "aes-table.h"
+
+#include <stdio.h>
+
+#ifdef NATIVE_LITTLE_ENDIAN
+# define LEROT(X, S) ((uint8_t) ((uint32_t)(X) >> (S)))
+# define SWP32(X) (X)
+#elif defined(NATIVE_BIG_ENDIAN)
+# define LEROT(X, S) ((uint8_t) ((uint32_t)(X) >> (24 - (S))))
+# define SWP32(X) ((uint32_t)((((uint32_t)(X) & 0xff000000) >> 24) | \
+ (((uint32_t)(X) & 0x00ff0000) >> 8) | \
+ (((uint32_t)(X) & 0x0000ff00) << 8) | \
+ (((uint32_t)(X) & 0x000000ff) << 24)))
+#else
+# error Unsupported byte ordering
+#endif
+
+#define first_round(ctx,x0,y0) { \
+ u32 z0,t0,tem0; \
+ z0 = (x0) ^ ctx->round_key[0][0]; \
+ t0 = LEROT(z0, 0); \
+ tem0 = T0[t0]; \
+ (y0) = tem0 ^ ctx->first_round_output_x0; \
+}
+
+#define second_round(ctx,x0,y0,y1,y2,y3) { \
+ u32 t0,t7,t10,t13; \
+ u32 tem0,tem7,tem10,tem13; \
+ t0 = LEROT(x0, 0); \
+ tem0 = T0[t0]; \
+ (y0) = tem0 ^ ctx->second_round_output[0]; \
+ t7 = LEROT(x0, 24); \
+ tem7 = T3[t7]; \
+ (y1) = tem7 ^ ctx->second_round_output[1]; \
+ t10 = LEROT(x0, 16); \
+ tem10 = T2[t10]; \
+ (y2) = tem10 ^ ctx->second_round_output[2]; \
+ t13 = LEROT(x0, 8); \
+ tem13 = T1[t13];\
+ (y3) = tem13 ^ ctx->second_round_output[3]; \
+}
+
+#define round(ctx,x0,x1,x2,x3,y0,y1,y2,y3,r) { \
+ u32 t0,t1,t2,t3; \
+ u32 t4,t5,t6,t7; \
+ u32 t8,t9,t10,t11; \
+ u32 t12,t13,t14,t15;\
+ u32 tem0,tem1,tem2,tem3; \
+ u32 tem4,tem5,tem6,tem7; \
+ u32 tem8,tem9,tem10,tem11; \
+ u32 tem12,tem13,tem14,tem15;\
+ \
+ t0 = LEROT(x0, 0); \
+ tem0 = T0[t0]; \
+ t1 = LEROT(x1, 8); \
+ tem1 = tem0 ^ T1[t1]; \
+ t2 = LEROT(x2, 16); \
+ tem2 = tem1 ^ T2[t2]; \
+ t3 = LEROT(x3, 24); \
+ tem3 = tem2 ^ T3[t3]; \
+ (y0) = tem3 ^ ctx->round_key[r][0]; \
+ \
+ t4 = LEROT(x1, 0); \
+ tem4 = T0[t4]; \
+ t5 = LEROT(x2, 8); \
+ tem5 = tem4 ^ T1[t5]; \
+ t6 = LEROT(x3, 16); \
+ tem6 = tem5 ^ T2[t6]; \
+ t7 = LEROT(x0, 24); \
+ tem7 = tem6 ^ T3[t7]; \
+ (y1) = tem7 ^ ctx->round_key[r][1]; \
+ \
+ t8 = LEROT(x2, 0); \
+ tem8 = T0[t8]; \
+ t9 = LEROT(x3, 8); \
+ tem9 = tem8 ^ T1[t9]; \
+ t10 = LEROT(x0, 16); \
+ tem10 = tem9 ^ T2[t10]; \
+ t11 = LEROT(x1, 24); \
+ tem11 = tem10 ^ T3[t11];\
+ (y2) = tem11 ^ ctx->round_key[r][2]; \
+ \
+ t12 = LEROT(x3, 0); \
+ tem12 = T0[t12]; \
+ t13 = LEROT(x0, 8); \
+ tem13 = tem12 ^ T1[t13];\
+ t14 = LEROT(x1, 16); \
+ tem14 = tem13 ^ T2[t14];\
+ t15 = LEROT(x2, 24); \
+ tem15 = tem14 ^ T3[t15];\
+ (y3) = tem15 ^ ctx->round_key[r][3]; \
+}
+
+/* 22.14 cycles/byte*/
+#define last_round(ctx,x0,x1,x2,x3,output,r) { \
+ u32 t0,t1,t2,t3; \
+ u32 t4,t5,t6,t7; \
+ u32 t8,t9,t10,t11; \
+ u32 t12,t13,t14,t15;\
+ \
+ t0 = LEROT(x0, 0); \
+ output[0] = Sbox[t0]; \
+ t7 = LEROT(x0, 24); \
+ output[7] = Sbox[t7]; \
+ t10 = LEROT(x0, 16); \
+ output[10] = Sbox[t10]; \
+ t13 = LEROT(x0, 8); \
+ output[13] = Sbox[t13]; \
+ \
+ t1 = LEROT(x1, 8); \
+ output[1] = Sbox[t1]; \
+ t4 = LEROT(x1, 0); \
+ output[4] = Sbox[t4]; \
+ t11 = LEROT(x1, 24); \
+ output[11] = Sbox[t11]; \
+ t14 = LEROT(x1, 16); \
+ output[14] = Sbox[t14]; \
+ \
+ t2 = LEROT(x2, 16); \
+ output[2] = Sbox[t2]; \
+ t5 = LEROT(x2, 8); \
+ output[5] = Sbox[t5]; \
+ t8 = LEROT(x2, 0); \
+ output[8] = Sbox[t8]; \
+ t15 = LEROT(x2, 24); \
+ output[15] = Sbox[t15]; \
+ \
+ t3 = LEROT(x3, 24); \
+ output[3] = Sbox[t3]; \
+ t6 = LEROT(x3, 16); \
+ output[6] = Sbox[t6]; \
+ t9 = LEROT(x3, 8); \
+ output[9] = Sbox[t9]; \
+ t12 = LEROT(x3, 0); \
+ output[12] = Sbox[t12]; \
+}
+
+#define aes256_enc_block(x,output,ctx) {\
+ u32 y0;\
+ u32 z0,z1,z2,z3;\
+ u32 a0,a1,a2,a3;\
+ u32 b0,b1,b2,b3;\
+ u32 c0,c1,c2,c3;\
+ u32 d0,d1,d2,d3;\
+ u32 e0,e1,e2,e3;\
+ u32 f0,f1,f2,f3;\
+ u32 g0,g1,g2,g3;\
+ u32 h0,h1,h2,h3;\
+ u32 i0,i1,i2,i3;\
+ u32 j0,j1,j2,j3;\
+ u32 k0,k1,k2,k3;\
+ first_round(ctx,x[0],y0);\
+ second_round(ctx,y0,z0,z1,z2,z3);\
+ round(ctx,z0,z1,z2,z3,a0,a1,a2,a3,3);\
+ round(ctx,a0,a1,a2,a3,b0,b1,b2,b3,4);\
+ round(ctx,b0,b1,b2,b3,c0,c1,c2,c3,5);\
+ round(ctx,c0,c1,c2,c3,d0,d1,d2,d3,6);\
+ round(ctx,d0,d1,d2,d3,e0,e1,e2,e3,7);\
+ round(ctx,e0,e1,e2,e3,f0,f1,f2,f3,8);\
+ round(ctx,f0,f1,f2,f3,g0,g1,g2,g3,9);\
+ round(ctx,g0,g1,g2,g3,h0,h1,h2,h3,10);\
+ round(ctx,h0,h1,h2,h3,i0,i1,i2,i3,11);\
+ round(ctx,i0,i1,i2,i3,j0,j1,j2,j3,12);\
+ round(ctx,j0,j1,j2,j3,k0,k1,k2,k3,13);\
+ last_round(ctx,k0,k1,k2,k3,(output),14);\
+}
diff --git a/src/libsodium/crypto_stream/aes256estream/hongjun/api.h b/src/libsodium/crypto_stream/aes256estream/hongjun/api.h
new file mode 100644
index 0000000..017babe
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes256estream/hongjun/api.h
@@ -0,0 +1,13 @@
+
+#include "crypto_stream_aes256estream.h"
+
+#define crypto_stream crypto_stream_aes256estream
+#define crypto_stream_xor crypto_stream_aes256estream_xor
+#define crypto_stream_beforenm crypto_stream_aes256estream_beforenm
+#define crypto_stream_afternm crypto_stream_aes256estream_afternm
+#define crypto_stream_xor_afternm crypto_stream_aes256estream_xor_afternm
+#define crypto_stream_KEYBYTES crypto_stream_aes256estream_KEYBYTES
+#define crypto_stream_NONCEBYTES crypto_stream_aes256estream_NONCEBYTES
+#define crypto_stream_BEFORENMBYTES crypto_stream_aes256estream_BEFORENMBYTES
+#define crypto_stream_IMPLEMENTATION crypto_stream_aes256estream_IMPLEMENTATION
+#define crypto_stream_VERSION crypto_stream_aes256estream_VERSION
diff --git a/src/libsodium/crypto_stream/aes256estream/hongjun/ecrypt-sync.h b/src/libsodium/crypto_stream/aes256estream/hongjun/ecrypt-sync.h
new file mode 100644
index 0000000..23f2aee
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes256estream/hongjun/ecrypt-sync.h
@@ -0,0 +1,27 @@
+
+#ifndef __ECRYPT_SYNC__
+#define __ECRYPT_SYNC__
+
+#include <stdint.h>
+
+typedef uint8_t u8;
+typedef uint32_t u32;
+
+#define Nr 14
+#define Nk 8
+#define Nb 4
+
+#pragma pack(push, 1)
+typedef struct ECRYPT_ctx
+{
+ u32 round_key[Nr+1][4];
+ u32 counter[4];
+ u32 first_round_output_x0;
+ u32 second_round_output[4];
+} ECRYPT_ctx;
+#pragma pack(pop)
+
+#define ECRYPT_encrypt_bytes(ctx, plaintext, ciphertext, msglen) \
+ ECRYPT_process_bytes(0, ctx, plaintext, ciphertext, msglen)
+
+#endif
diff --git a/src/libsodium/crypto_stream/aes256estream/stream_aes256estream_api.c b/src/libsodium/crypto_stream/aes256estream/stream_aes256estream_api.c
new file mode 100644
index 0000000..2d3d1cb
--- /dev/null
+++ b/src/libsodium/crypto_stream/aes256estream/stream_aes256estream_api.c
@@ -0,0 +1,16 @@
+#include "crypto_stream_aes256estream.h"
+
+size_t
+crypto_stream_aes256estream_keybytes(void) {
+ return crypto_stream_aes256estream_KEYBYTES;
+}
+
+size_t
+crypto_stream_aes256estream_noncebytes(void) {
+ return crypto_stream_aes256estream_NONCEBYTES;
+}
+
+size_t
+crypto_stream_aes256estream_beforenmbytes(void) {
+ return crypto_stream_aes256estream_BEFORENMBYTES;
+}
diff --git a/src/libsodium/crypto_stream/chacha20/ref/api.h b/src/libsodium/crypto_stream/chacha20/ref/api.h
new file mode 100644
index 0000000..3d85867
--- /dev/null
+++ b/src/libsodium/crypto_stream/chacha20/ref/api.h
@@ -0,0 +1,12 @@
+
+#include "crypto_stream_chacha20.h"
+
+int
+crypto_stream_chacha20_ref(unsigned char *c, unsigned long long clen,
+ const unsigned char *n, const unsigned char *k);
+
+int
+crypto_stream_chacha20_ref_xor_ic(unsigned char *c, const unsigned char *m,
+ unsigned long long mlen,
+ const unsigned char *n, uint64_t ic,
+ const unsigned char *k);
diff --git a/src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.c b/src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.c
new file mode 100644
index 0000000..7c7d1a5
--- /dev/null
+++ b/src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.c
@@ -0,0 +1,275 @@
+
+/* $OpenBSD: chacha.c,v 1.1 2013/11/21 00:45:44 djm Exp $ */
+
+/*
+ chacha-merged.c version 20080118
+ D. J. Bernstein
+ Public domain.
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "api.h"
+#include "crypto_stream_chacha20.h"
+#include "utils.h"
+
+struct chacha_ctx {
+ uint32_t input[16];
+};
+
+typedef uint8_t u8;
+typedef uint32_t u32;
+
+typedef struct chacha_ctx chacha_ctx;
+
+#define U8C(v) (v##U)
+#define U32C(v) (v##U)
+
+#define U8V(v) ((u8)(v) & U8C(0xFF))
+#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
+
+#define ROTL32(v, n) \
+ (U32V((v) << (n)) | ((v) >> (32 - (n))))
+
+#define U8TO32_LITTLE(p) \
+ (((u32)((p)[0]) ) | \
+ ((u32)((p)[1]) << 8) | \
+ ((u32)((p)[2]) << 16) | \
+ ((u32)((p)[3]) << 24))
+
+#define U32TO8_LITTLE(p, v) \
+ do { \
+ (p)[0] = U8V((v) ); \
+ (p)[1] = U8V((v) >> 8); \
+ (p)[2] = U8V((v) >> 16); \
+ (p)[3] = U8V((v) >> 24); \
+ } while (0)
+
+#define ROTATE(v,c) (ROTL32(v,c))
+#define XOR(v,w) ((v) ^ (w))
+#define PLUS(v,w) (U32V((v) + (w)))
+#define PLUSONE(v) (PLUS((v),1))
+
+#define QUARTERROUND(a,b,c,d) \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
+
+static const unsigned char sigma[16] = {
+ 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'
+};
+
+static void
+chacha_keysetup(chacha_ctx *x, const u8 *k)
+{
+ const unsigned char *constants;
+
+ x->input[4] = U8TO32_LITTLE(k + 0);
+ x->input[5] = U8TO32_LITTLE(k + 4);
+ x->input[6] = U8TO32_LITTLE(k + 8);
+ x->input[7] = U8TO32_LITTLE(k + 12);
+ k += 16;
+ constants = sigma;
+ x->input[8] = U8TO32_LITTLE(k + 0);
+ x->input[9] = U8TO32_LITTLE(k + 4);
+ x->input[10] = U8TO32_LITTLE(k + 8);
+ x->input[11] = U8TO32_LITTLE(k + 12);
+ x->input[0] = U8TO32_LITTLE(constants + 0);
+ x->input[1] = U8TO32_LITTLE(constants + 4);
+ x->input[2] = U8TO32_LITTLE(constants + 8);
+ x->input[3] = U8TO32_LITTLE(constants + 12);
+}
+
+static void
+chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
+{
+ x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
+ x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
+ x->input[14] = U8TO32_LITTLE(iv + 0);
+ x->input[15] = U8TO32_LITTLE(iv + 4);
+}
+
+static void
+chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, unsigned long long bytes)
+{
+ u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+ u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+ u8 *ctarget = NULL;
+ u8 tmp[64];
+ unsigned long long i;
+
+ if (!bytes) {
+ return;
+ }
+ j0 = x->input[0];
+ j1 = x->input[1];
+ j2 = x->input[2];
+ j3 = x->input[3];
+ j4 = x->input[4];
+ j5 = x->input[5];
+ j6 = x->input[6];
+ j7 = x->input[7];
+ j8 = x->input[8];
+ j9 = x->input[9];
+ j10 = x->input[10];
+ j11 = x->input[11];
+ j12 = x->input[12];
+ j13 = x->input[13];
+ j14 = x->input[14];
+ j15 = x->input[15];
+
+ for (;;) {
+ if (bytes < 64) {
+ for (i = 0; i < bytes; ++i) {
+ tmp[i] = m[i];
+ }
+ m = tmp;
+ ctarget = c;
+ c = tmp;
+ }
+ x0 = j0;
+ x1 = j1;
+ x2 = j2;
+ x3 = j3;
+ x4 = j4;
+ x5 = j5;
+ x6 = j6;
+ x7 = j7;
+ x8 = j8;
+ x9 = j9;
+ x10 = j10;
+ x11 = j11;
+ x12 = j12;
+ x13 = j13;
+ x14 = j14;
+ x15 = j15;
+ for (i = 20; i > 0; i -= 2) {
+ QUARTERROUND(x0, x4, x8, x12)
+ QUARTERROUND(x1, x5, x9, x13)
+ QUARTERROUND(x2, x6, x10, x14)
+ QUARTERROUND(x3, x7, x11, x15)
+ QUARTERROUND(x0, x5, x10, x15)
+ QUARTERROUND(x1, x6, x11, x12)
+ QUARTERROUND(x2, x7, x8, x13)
+ QUARTERROUND(x3, x4, x9, x14)
+ }
+ x0 = PLUS(x0, j0);
+ x1 = PLUS(x1, j1);
+ x2 = PLUS(x2, j2);
+ x3 = PLUS(x3, j3);
+ x4 = PLUS(x4, j4);
+ x5 = PLUS(x5, j5);
+ x6 = PLUS(x6, j6);
+ x7 = PLUS(x7, j7);
+ x8 = PLUS(x8, j8);
+ x9 = PLUS(x9, j9);
+ x10 = PLUS(x10, j10);
+ x11 = PLUS(x11, j11);
+ x12 = PLUS(x12, j12);
+ x13 = PLUS(x13, j13);
+ x14 = PLUS(x14, j14);
+ x15 = PLUS(x15, j15);
+
+ x0 = XOR(x0, U8TO32_LITTLE(m + 0));
+ x1 = XOR(x1, U8TO32_LITTLE(m + 4));
+ x2 = XOR(x2, U8TO32_LITTLE(m + 8));
+ x3 = XOR(x3, U8TO32_LITTLE(m + 12));
+ x4 = XOR(x4, U8TO32_LITTLE(m + 16));
+ x5 = XOR(x5, U8TO32_LITTLE(m + 20));
+ x6 = XOR(x6, U8TO32_LITTLE(m + 24));
+ x7 = XOR(x7, U8TO32_LITTLE(m + 28));
+ x8 = XOR(x8, U8TO32_LITTLE(m + 32));
+ x9 = XOR(x9, U8TO32_LITTLE(m + 36));
+ x10 = XOR(x10, U8TO32_LITTLE(m + 40));
+ x11 = XOR(x11, U8TO32_LITTLE(m + 44));
+ x12 = XOR(x12, U8TO32_LITTLE(m + 48));
+ x13 = XOR(x13, U8TO32_LITTLE(m + 52));
+ x14 = XOR(x14, U8TO32_LITTLE(m + 56));
+ x15 = XOR(x15, U8TO32_LITTLE(m + 60));
+
+ j12 = PLUSONE(j12);
+ if (!j12) {
+ j13 = PLUSONE(j13);
+ /* stopping at 2^70 bytes per nonce is user's responsibility */
+ }
+
+ U32TO8_LITTLE(c + 0, x0);
+ U32TO8_LITTLE(c + 4, x1);
+ U32TO8_LITTLE(c + 8, x2);
+ U32TO8_LITTLE(c + 12, x3);
+ U32TO8_LITTLE(c + 16, x4);
+ U32TO8_LITTLE(c + 20, x5);
+ U32TO8_LITTLE(c + 24, x6);
+ U32TO8_LITTLE(c + 28, x7);
+ U32TO8_LITTLE(c + 32, x8);
+ U32TO8_LITTLE(c + 36, x9);
+ U32TO8_LITTLE(c + 40, x10);
+ U32TO8_LITTLE(c + 44, x11);
+ U32TO8_LITTLE(c + 48, x12);
+ U32TO8_LITTLE(c + 52, x13);
+ U32TO8_LITTLE(c + 56, x14);
+ U32TO8_LITTLE(c + 60, x15);
+
+ if (bytes <= 64) {
+ if (bytes < 64) {
+ for (i = 0; i < bytes; ++i) {
+ ctarget[i] = c[i];
+ }
+ }
+ x->input[12] = j12;
+ x->input[13] = j13;
+ return;
+ }
+ bytes -= 64;
+ c += 64;
+ m += 64;
+ }
+}
+
+int
+crypto_stream_chacha20_ref(unsigned char *c, unsigned long long clen,
+ const unsigned char *n, const unsigned char *k)
+{
+ struct chacha_ctx ctx;
+
+ if (!clen) {
+ return 0;
+ }
+ (void) sizeof(int[crypto_stream_chacha20_KEYBYTES == 256 / 8 ? 1 : -1]);
+ chacha_keysetup(&ctx, k);
+ chacha_ivsetup(&ctx, n, NULL);
+ memset(c, 0, clen);
+ chacha_encrypt_bytes(&ctx, c, c, clen);
+ sodium_memzero(&ctx, sizeof ctx);
+
+ return 0;
+}
+
+int
+crypto_stream_chacha20_ref_xor_ic(unsigned char *c, const unsigned char *m,
+ unsigned long long mlen,
+ const unsigned char *n, uint64_t ic,
+ const unsigned char *k)
+{
+ struct chacha_ctx ctx;
+ uint8_t ic_bytes[8];
+ uint32_t ic_high;
+ uint32_t ic_low;
+
+ if (!mlen) {
+ return 0;
+ }
+ ic_high = U32V(ic >> 32);
+ ic_low = U32V(ic);
+ U32TO8_LITTLE(&ic_bytes[0], ic_low);
+ U32TO8_LITTLE(&ic_bytes[4], ic_high);
+ chacha_keysetup(&ctx, k);
+ chacha_ivsetup(&ctx, n, ic_bytes);
+ chacha_encrypt_bytes(&ctx, m, c, mlen);
+ sodium_memzero(&ctx, sizeof ctx);
+ sodium_memzero(ic_bytes, sizeof ic_bytes);
+
+ return 0;
+}
diff --git a/src/libsodium/crypto_stream/chacha20/stream_chacha20_api.c b/src/libsodium/crypto_stream/chacha20/stream_chacha20_api.c
new file mode 100644
index 0000000..412cdfa
--- /dev/null
+++ b/src/libsodium/crypto_stream/chacha20/stream_chacha20_api.c
@@ -0,0 +1,36 @@
+#include "crypto_stream_chacha20.h"
+#include "ref/api.h"
+
+size_t
+crypto_stream_chacha20_keybytes(void) {
+ return crypto_stream_chacha20_KEYBYTES;
+}
+
+size_t
+crypto_stream_chacha20_noncebytes(void) {
+ return crypto_stream_chacha20_NONCEBYTES;
+}
+
+int
+crypto_stream_chacha20(unsigned char *c, unsigned long long clen,
+ const unsigned char *n, const unsigned char *k)
+{
+ return crypto_stream_chacha20_ref(c, clen, n, k);
+}
+
+int
+crypto_stream_chacha20_xor_ic(unsigned char *c, const unsigned char *m,
+ unsigned long long mlen,
+ const unsigned char *n, uint64_t ic,
+ const unsigned char *k)
+{
+ return crypto_stream_chacha20_ref_xor_ic(c, m, mlen, n, ic, k);
+}
+
+int
+crypto_stream_chacha20_xor(unsigned char *c, const unsigned char *m,
+ unsigned long long mlen, const unsigned char *n,
+ const unsigned char *k)
+{
+ return crypto_stream_chacha20_ref_xor_ic(c, m, mlen, n, 0U, k);
+}
diff --git a/src/libsodium/crypto_stream/crypto_stream.c b/src/libsodium/crypto_stream/crypto_stream.c
new file mode 100644
index 0000000..50a9c1c
--- /dev/null
+++ b/src/libsodium/crypto_stream/crypto_stream.c
@@ -0,0 +1,36 @@
+
+#include "crypto_stream.h"
+
+size_t
+crypto_stream_keybytes(void)
+{
+ return crypto_stream_KEYBYTES;
+}
+
+size_t
+crypto_stream_noncebytes(void)
+{
+ return crypto_stream_NONCEBYTES;
+}
+
+const char *
+crypto_stream_primitive(void)
+{
+ return crypto_stream_PRIMITIVE;
+}
+
+int
+crypto_stream(unsigned char *c, unsigned long long clen,
+ const unsigned char *n, const unsigned char *k)
+{
+ return crypto_stream_xsalsa20(c, clen, n, k);
+}
+
+
+int
+crypto_stream_xor(unsigned char *c, const unsigned char *m,
+ unsigned long long mlen, const unsigned char *n,
+ const unsigned char *k)
+{
+ return crypto_stream_xsalsa20_xor(c, m, mlen, n, k);
+}
diff --git a/src/libsodium/crypto_stream/salsa20/amd64_xmm6/api.h b/src/libsodium/crypto_stream/salsa20/amd64_xmm6/api.h
new file mode 100644
index 0000000..037fb59
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa20/amd64_xmm6/api.h
@@ -0,0 +1 @@
+#include "crypto_stream_salsa20.h"
diff --git a/src/libsodium/crypto_stream/salsa20/amd64_xmm6/stream_salsa20_amd64_xmm6.S b/src/libsodium/crypto_stream/salsa20/amd64_xmm6/stream_salsa20_amd64_xmm6.S
new file mode 100644
index 0000000..f241568
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa20/amd64_xmm6/stream_salsa20_amd64_xmm6.S
@@ -0,0 +1,944 @@
+#if defined(__amd64) || defined(__amd64__) || defined(__x86_64__)
+
+.text
+.p2align 5
+
+.globl crypto_stream_salsa20
+.globl _crypto_stream_salsa20
+#ifdef __ELF__
+.type crypto_stream_salsa20, @function
+.type _crypto_stream_salsa20, @function
+#endif
+crypto_stream_salsa20:
+_crypto_stream_salsa20:
+mov %rsp,%r11
+and $31,%r11
+add $512,%r11
+sub %r11,%rsp
+movq %r11,416(%rsp)
+movq %r12,424(%rsp)
+movq %r13,432(%rsp)
+movq %r14,440(%rsp)
+movq %r15,448(%rsp)
+movq %rbx,456(%rsp)
+movq %rbp,464(%rsp)
+mov %rsi,%r9
+mov %rdi,%rdi
+mov %rdi,%rsi
+mov %rdx,%rdx
+mov %rcx,%r10
+cmp $0,%r9
+jbe ._done
+mov $0,%rax
+mov %r9,%rcx
+rep stosb
+sub %r9,%rdi
+movq $0,472(%rsp)
+jmp ._start
+
+.text
+.p2align 5
+
+.globl crypto_stream_salsa20_xor_ic
+.globl _crypto_stream_salsa20_xor_ic
+#ifdef __ELF__
+.type crypto_stream_salsa20_xor_ic, @function
+.type _crypto_stream_salsa20_xor_ic, @function
+#endif
+crypto_stream_salsa20_xor_ic:
+_crypto_stream_salsa20_xor_ic:
+
+mov %rsp,%r11
+and $31,%r11
+add $512,%r11
+sub %r11,%rsp
+movq %r11,416(%rsp)
+movq %r12,424(%rsp)
+movq %r13,432(%rsp)
+movq %r14,440(%rsp)
+movq %r15,448(%rsp)
+movq %rbx,456(%rsp)
+movq %rbp,464(%rsp)
+mov %rdi,%rdi
+mov %rsi,%rsi
+mov %r9,%r10
+movq %r8,472(%rsp)
+mov %rdx,%r9
+mov %rcx,%rdx
+cmp $0,%r9
+jbe ._done
+
+._start:
+movl 20(%r10),%ecx
+movl 0(%r10),%r8d
+movl 0(%rdx),%eax
+movl 16(%r10),%r11d
+movl %ecx,64(%rsp)
+movl %r8d,4+64(%rsp)
+movl %eax,8+64(%rsp)
+movl %r11d,12+64(%rsp)
+movl 24(%r10),%r8d
+movl 4(%r10),%eax
+movl 4(%rdx),%edx
+movq 472(%rsp),%rcx
+movl %ecx,80(%rsp)
+movl %r8d,4+80(%rsp)
+movl %eax,8+80(%rsp)
+movl %edx,12+80(%rsp)
+movl 12(%r10),%edx
+shr $32,%rcx
+movl 28(%r10),%r8d
+movl 8(%r10),%eax
+movl %edx,96(%rsp)
+movl %ecx,4+96(%rsp)
+movl %r8d,8+96(%rsp)
+movl %eax,12+96(%rsp)
+mov $1634760805,%rdx
+mov $857760878,%rcx
+mov $2036477234,%r8
+mov $1797285236,%rax
+movl %edx,112(%rsp)
+movl %ecx,4+112(%rsp)
+movl %r8d,8+112(%rsp)
+movl %eax,12+112(%rsp)
+cmp $256,%r9
+jb ._bytesbetween1and255
+movdqa 112(%rsp),%xmm0
+pshufd $0x55,%xmm0,%xmm1
+pshufd $0xaa,%xmm0,%xmm2
+pshufd $0xff,%xmm0,%xmm3
+pshufd $0x00,%xmm0,%xmm0
+movdqa %xmm1,128(%rsp)
+movdqa %xmm2,144(%rsp)
+movdqa %xmm3,160(%rsp)
+movdqa %xmm0,176(%rsp)
+movdqa 64(%rsp),%xmm0
+pshufd $0xaa,%xmm0,%xmm1
+pshufd $0xff,%xmm0,%xmm2
+pshufd $0x00,%xmm0,%xmm3
+pshufd $0x55,%xmm0,%xmm0
+movdqa %xmm1,192(%rsp)
+movdqa %xmm2,208(%rsp)
+movdqa %xmm3,224(%rsp)
+movdqa %xmm0,240(%rsp)
+movdqa 80(%rsp),%xmm0
+pshufd $0xff,%xmm0,%xmm1
+pshufd $0x55,%xmm0,%xmm2
+pshufd $0xaa,%xmm0,%xmm0
+movdqa %xmm1,256(%rsp)
+movdqa %xmm2,272(%rsp)
+movdqa %xmm0,288(%rsp)
+movdqa 96(%rsp),%xmm0
+pshufd $0x00,%xmm0,%xmm1
+pshufd $0xaa,%xmm0,%xmm2
+pshufd $0xff,%xmm0,%xmm0
+movdqa %xmm1,304(%rsp)
+movdqa %xmm2,320(%rsp)
+movdqa %xmm0,336(%rsp)
+
+._bytesatleast256:
+movq 472(%rsp),%rdx
+mov %rdx,%rcx
+shr $32,%rcx
+movl %edx,352(%rsp)
+movl %ecx,368(%rsp)
+add $1,%rdx
+mov %rdx,%rcx
+shr $32,%rcx
+movl %edx,4+352(%rsp)
+movl %ecx,4+368(%rsp)
+add $1,%rdx
+mov %rdx,%rcx
+shr $32,%rcx
+movl %edx,8+352(%rsp)
+movl %ecx,8+368(%rsp)
+add $1,%rdx
+mov %rdx,%rcx
+shr $32,%rcx
+movl %edx,12+352(%rsp)
+movl %ecx,12+368(%rsp)
+add $1,%rdx
+mov %rdx,%rcx
+shr $32,%rcx
+movl %edx,80(%rsp)
+movl %ecx,4+96(%rsp)
+movq %rdx,472(%rsp)
+movq %r9,480(%rsp)
+mov $20,%rdx
+movdqa 128(%rsp),%xmm0
+movdqa 144(%rsp),%xmm1
+movdqa 160(%rsp),%xmm2
+movdqa 320(%rsp),%xmm3
+movdqa 336(%rsp),%xmm4
+movdqa 192(%rsp),%xmm5
+movdqa 208(%rsp),%xmm6
+movdqa 240(%rsp),%xmm7
+movdqa 256(%rsp),%xmm8
+movdqa 272(%rsp),%xmm9
+movdqa 288(%rsp),%xmm10
+movdqa 368(%rsp),%xmm11
+movdqa 176(%rsp),%xmm12
+movdqa 224(%rsp),%xmm13
+movdqa 304(%rsp),%xmm14
+movdqa 352(%rsp),%xmm15
+
+._mainloop1:
+movdqa %xmm1,384(%rsp)
+movdqa %xmm2,400(%rsp)
+movdqa %xmm13,%xmm1
+paddd %xmm12,%xmm1
+movdqa %xmm1,%xmm2
+pslld $7,%xmm1
+pxor %xmm1,%xmm14
+psrld $25,%xmm2
+pxor %xmm2,%xmm14
+movdqa %xmm7,%xmm1
+paddd %xmm0,%xmm1
+movdqa %xmm1,%xmm2
+pslld $7,%xmm1
+pxor %xmm1,%xmm11
+psrld $25,%xmm2
+pxor %xmm2,%xmm11
+movdqa %xmm12,%xmm1
+paddd %xmm14,%xmm1
+movdqa %xmm1,%xmm2
+pslld $9,%xmm1
+pxor %xmm1,%xmm15
+psrld $23,%xmm2
+pxor %xmm2,%xmm15
+movdqa %xmm0,%xmm1
+paddd %xmm11,%xmm1
+movdqa %xmm1,%xmm2
+pslld $9,%xmm1
+pxor %xmm1,%xmm9
+psrld $23,%xmm2
+pxor %xmm2,%xmm9
+movdqa %xmm14,%xmm1
+paddd %xmm15,%xmm1
+movdqa %xmm1,%xmm2
+pslld $13,%xmm1
+pxor %xmm1,%xmm13
+psrld $19,%xmm2
+pxor %xmm2,%xmm13
+movdqa %xmm11,%xmm1
+paddd %xmm9,%xmm1
+movdqa %xmm1,%xmm2
+pslld $13,%xmm1
+pxor %xmm1,%xmm7
+psrld $19,%xmm2
+pxor %xmm2,%xmm7
+movdqa %xmm15,%xmm1
+paddd %xmm13,%xmm1
+movdqa %xmm1,%xmm2
+pslld $18,%xmm1
+pxor %xmm1,%xmm12
+psrld $14,%xmm2
+pxor %xmm2,%xmm12
+movdqa 384(%rsp),%xmm1
+movdqa %xmm12,384(%rsp)
+movdqa %xmm9,%xmm2
+paddd %xmm7,%xmm2
+movdqa %xmm2,%xmm12
+pslld $18,%xmm2
+pxor %xmm2,%xmm0
+psrld $14,%xmm12
+pxor %xmm12,%xmm0
+movdqa %xmm5,%xmm2
+paddd %xmm1,%xmm2
+movdqa %xmm2,%xmm12
+pslld $7,%xmm2
+pxor %xmm2,%xmm3
+psrld $25,%xmm12
+pxor %xmm12,%xmm3
+movdqa 400(%rsp),%xmm2
+movdqa %xmm0,400(%rsp)
+movdqa %xmm6,%xmm0
+paddd %xmm2,%xmm0
+movdqa %xmm0,%xmm12
+pslld $7,%xmm0
+pxor %xmm0,%xmm4
+psrld $25,%xmm12
+pxor %xmm12,%xmm4
+movdqa %xmm1,%xmm0
+paddd %xmm3,%xmm0
+movdqa %xmm0,%xmm12
+pslld $9,%xmm0
+pxor %xmm0,%xmm10
+psrld $23,%xmm12
+pxor %xmm12,%xmm10
+movdqa %xmm2,%xmm0
+paddd %xmm4,%xmm0
+movdqa %xmm0,%xmm12
+pslld $9,%xmm0
+pxor %xmm0,%xmm8
+psrld $23,%xmm12
+pxor %xmm12,%xmm8
+movdqa %xmm3,%xmm0
+paddd %xmm10,%xmm0
+movdqa %xmm0,%xmm12
+pslld $13,%xmm0
+pxor %xmm0,%xmm5
+psrld $19,%xmm12
+pxor %xmm12,%xmm5
+movdqa %xmm4,%xmm0
+paddd %xmm8,%xmm0
+movdqa %xmm0,%xmm12
+pslld $13,%xmm0
+pxor %xmm0,%xmm6
+psrld $19,%xmm12
+pxor %xmm12,%xmm6
+movdqa %xmm10,%xmm0
+paddd %xmm5,%xmm0
+movdqa %xmm0,%xmm12
+pslld $18,%xmm0
+pxor %xmm0,%xmm1
+psrld $14,%xmm12
+pxor %xmm12,%xmm1
+movdqa 384(%rsp),%xmm0
+movdqa %xmm1,384(%rsp)
+movdqa %xmm4,%xmm1
+paddd %xmm0,%xmm1
+movdqa %xmm1,%xmm12
+pslld $7,%xmm1
+pxor %xmm1,%xmm7
+psrld $25,%xmm12
+pxor %xmm12,%xmm7
+movdqa %xmm8,%xmm1
+paddd %xmm6,%xmm1
+movdqa %xmm1,%xmm12
+pslld $18,%xmm1
+pxor %xmm1,%xmm2
+psrld $14,%xmm12
+pxor %xmm12,%xmm2
+movdqa 400(%rsp),%xmm12
+movdqa %xmm2,400(%rsp)
+movdqa %xmm14,%xmm1
+paddd %xmm12,%xmm1
+movdqa %xmm1,%xmm2
+pslld $7,%xmm1
+pxor %xmm1,%xmm5
+psrld $25,%xmm2
+pxor %xmm2,%xmm5
+movdqa %xmm0,%xmm1
+paddd %xmm7,%xmm1
+movdqa %xmm1,%xmm2
+pslld $9,%xmm1
+pxor %xmm1,%xmm10
+psrld $23,%xmm2
+pxor %xmm2,%xmm10
+movdqa %xmm12,%xmm1
+paddd %xmm5,%xmm1
+movdqa %xmm1,%xmm2
+pslld $9,%xmm1
+pxor %xmm1,%xmm8
+psrld $23,%xmm2
+pxor %xmm2,%xmm8
+movdqa %xmm7,%xmm1
+paddd %xmm10,%xmm1
+movdqa %xmm1,%xmm2
+pslld $13,%xmm1
+pxor %xmm1,%xmm4
+psrld $19,%xmm2
+pxor %xmm2,%xmm4
+movdqa %xmm5,%xmm1
+paddd %xmm8,%xmm1
+movdqa %xmm1,%xmm2
+pslld $13,%xmm1
+pxor %xmm1,%xmm14
+psrld $19,%xmm2
+pxor %xmm2,%xmm14
+movdqa %xmm10,%xmm1
+paddd %xmm4,%xmm1
+movdqa %xmm1,%xmm2
+pslld $18,%xmm1
+pxor %xmm1,%xmm0
+psrld $14,%xmm2
+pxor %xmm2,%xmm0
+movdqa 384(%rsp),%xmm1
+movdqa %xmm0,384(%rsp)
+movdqa %xmm8,%xmm0
+paddd %xmm14,%xmm0
+movdqa %xmm0,%xmm2
+pslld $18,%xmm0
+pxor %xmm0,%xmm12
+psrld $14,%xmm2
+pxor %xmm2,%xmm12
+movdqa %xmm11,%xmm0
+paddd %xmm1,%xmm0
+movdqa %xmm0,%xmm2
+pslld $7,%xmm0
+pxor %xmm0,%xmm6
+psrld $25,%xmm2
+pxor %xmm2,%xmm6
+movdqa 400(%rsp),%xmm2
+movdqa %xmm12,400(%rsp)
+movdqa %xmm3,%xmm0
+paddd %xmm2,%xmm0
+movdqa %xmm0,%xmm12
+pslld $7,%xmm0
+pxor %xmm0,%xmm13
+psrld $25,%xmm12
+pxor %xmm12,%xmm13
+movdqa %xmm1,%xmm0
+paddd %xmm6,%xmm0
+movdqa %xmm0,%xmm12
+pslld $9,%xmm0
+pxor %xmm0,%xmm15
+psrld $23,%xmm12
+pxor %xmm12,%xmm15
+movdqa %xmm2,%xmm0
+paddd %xmm13,%xmm0
+movdqa %xmm0,%xmm12
+pslld $9,%xmm0
+pxor %xmm0,%xmm9
+psrld $23,%xmm12
+pxor %xmm12,%xmm9
+movdqa %xmm6,%xmm0
+paddd %xmm15,%xmm0
+movdqa %xmm0,%xmm12
+pslld $13,%xmm0
+pxor %xmm0,%xmm11
+psrld $19,%xmm12
+pxor %xmm12,%xmm11
+movdqa %xmm13,%xmm0
+paddd %xmm9,%xmm0
+movdqa %xmm0,%xmm12
+pslld $13,%xmm0
+pxor %xmm0,%xmm3
+psrld $19,%xmm12
+pxor %xmm12,%xmm3
+movdqa %xmm15,%xmm0
+paddd %xmm11,%xmm0
+movdqa %xmm0,%xmm12
+pslld $18,%xmm0
+pxor %xmm0,%xmm1
+psrld $14,%xmm12
+pxor %xmm12,%xmm1
+movdqa %xmm9,%xmm0
+paddd %xmm3,%xmm0
+movdqa %xmm0,%xmm12
+pslld $18,%xmm0
+pxor %xmm0,%xmm2
+psrld $14,%xmm12
+pxor %xmm12,%xmm2
+movdqa 384(%rsp),%xmm12
+movdqa 400(%rsp),%xmm0
+sub $2,%rdx
+ja ._mainloop1
+paddd 176(%rsp),%xmm12
+paddd 240(%rsp),%xmm7
+paddd 288(%rsp),%xmm10
+paddd 336(%rsp),%xmm4
+movd %xmm12,%rdx
+movd %xmm7,%rcx
+movd %xmm10,%r8
+movd %xmm4,%r9
+pshufd $0x39,%xmm12,%xmm12
+pshufd $0x39,%xmm7,%xmm7
+pshufd $0x39,%xmm10,%xmm10
+pshufd $0x39,%xmm4,%xmm4
+xorl 0(%rsi),%edx
+xorl 4(%rsi),%ecx
+xorl 8(%rsi),%r8d
+xorl 12(%rsi),%r9d
+movl %edx,0(%rdi)
+movl %ecx,4(%rdi)
+movl %r8d,8(%rdi)
+movl %r9d,12(%rdi)
+movd %xmm12,%rdx
+movd %xmm7,%rcx
+movd %xmm10,%r8
+movd %xmm4,%r9
+pshufd $0x39,%xmm12,%xmm12
+pshufd $0x39,%xmm7,%xmm7
+pshufd $0x39,%xmm10,%xmm10
+pshufd $0x39,%xmm4,%xmm4
+xorl 64(%rsi),%edx
+xorl 68(%rsi),%ecx
+xorl 72(%rsi),%r8d
+xorl 76(%rsi),%r9d
+movl %edx,64(%rdi)
+movl %ecx,68(%rdi)
+movl %r8d,72(%rdi)
+movl %r9d,76(%rdi)
+movd %xmm12,%rdx
+movd %xmm7,%rcx
+movd %xmm10,%r8
+movd %xmm4,%r9
+pshufd $0x39,%xmm12,%xmm12
+pshufd $0x39,%xmm7,%xmm7
+pshufd $0x39,%xmm10,%xmm10
+pshufd $0x39,%xmm4,%xmm4
+xorl 128(%rsi),%edx
+xorl 132(%rsi),%ecx
+xorl 136(%rsi),%r8d
+xorl 140(%rsi),%r9d
+movl %edx,128(%rdi)
+movl %ecx,132(%rdi)
+movl %r8d,136(%rdi)
+movl %r9d,140(%rdi)
+movd %xmm12,%rdx
+movd %xmm7,%rcx
+movd %xmm10,%r8
+movd %xmm4,%r9
+xorl 192(%rsi),%edx
+xorl 196(%rsi),%ecx
+xorl 200(%rsi),%r8d
+xorl 204(%rsi),%r9d
+movl %edx,192(%rdi)
+movl %ecx,196(%rdi)
+movl %r8d,200(%rdi)
+movl %r9d,204(%rdi)
+paddd 304(%rsp),%xmm14
+paddd 128(%rsp),%xmm0
+paddd 192(%rsp),%xmm5
+paddd 256(%rsp),%xmm8
+movd %xmm14,%rdx
+movd %xmm0,%rcx
+movd %xmm5,%r8
+movd %xmm8,%r9
+pshufd $0x39,%xmm14,%xmm14
+pshufd $0x39,%xmm0,%xmm0
+pshufd $0x39,%xmm5,%xmm5
+pshufd $0x39,%xmm8,%xmm8
+xorl 16(%rsi),%edx
+xorl 20(%rsi),%ecx
+xorl 24(%rsi),%r8d
+xorl 28(%rsi),%r9d
+movl %edx,16(%rdi)
+movl %ecx,20(%rdi)
+movl %r8d,24(%rdi)
+movl %r9d,28(%rdi)
+movd %xmm14,%rdx
+movd %xmm0,%rcx
+movd %xmm5,%r8
+movd %xmm8,%r9
+pshufd $0x39,%xmm14,%xmm14
+pshufd $0x39,%xmm0,%xmm0
+pshufd $0x39,%xmm5,%xmm5
+pshufd $0x39,%xmm8,%xmm8
+xorl 80(%rsi),%edx
+xorl 84(%rsi),%ecx
+xorl 88(%rsi),%r8d
+xorl 92(%rsi),%r9d
+movl %edx,80(%rdi)
+movl %ecx,84(%rdi)
+movl %r8d,88(%rdi)
+movl %r9d,92(%rdi)
+movd %xmm14,%rdx
+movd %xmm0,%rcx
+movd %xmm5,%r8
+movd %xmm8,%r9
+pshufd $0x39,%xmm14,%xmm14
+pshufd $0x39,%xmm0,%xmm0
+pshufd $0x39,%xmm5,%xmm5
+pshufd $0x39,%xmm8,%xmm8
+xorl 144(%rsi),%edx
+xorl 148(%rsi),%ecx
+xorl 152(%rsi),%r8d
+xorl 156(%rsi),%r9d
+movl %edx,144(%rdi)
+movl %ecx,148(%rdi)
+movl %r8d,152(%rdi)
+movl %r9d,156(%rdi)
+movd %xmm14,%rdx
+movd %xmm0,%rcx
+movd %xmm5,%r8
+movd %xmm8,%r9
+xorl 208(%rsi),%edx
+xorl 212(%rsi),%ecx
+xorl 216(%rsi),%r8d
+xorl 220(%rsi),%r9d
+movl %edx,208(%rdi)
+movl %ecx,212(%rdi)
+movl %r8d,216(%rdi)
+movl %r9d,220(%rdi)
+paddd 352(%rsp),%xmm15
+paddd 368(%rsp),%xmm11
+paddd 144(%rsp),%xmm1
+paddd 208(%rsp),%xmm6
+movd %xmm15,%rdx
+movd %xmm11,%rcx
+movd %xmm1,%r8
+movd %xmm6,%r9
+pshufd $0x39,%xmm15,%xmm15
+pshufd $0x39,%xmm11,%xmm11
+pshufd $0x39,%xmm1,%xmm1
+pshufd $0x39,%xmm6,%xmm6
+xorl 32(%rsi),%edx
+xorl 36(%rsi),%ecx
+xorl 40(%rsi),%r8d
+xorl 44(%rsi),%r9d
+movl %edx,32(%rdi)
+movl %ecx,36(%rdi)
+movl %r8d,40(%rdi)
+movl %r9d,44(%rdi)
+movd %xmm15,%rdx
+movd %xmm11,%rcx
+movd %xmm1,%r8
+movd %xmm6,%r9
+pshufd $0x39,%xmm15,%xmm15
+pshufd $0x39,%xmm11,%xmm11
+pshufd $0x39,%xmm1,%xmm1
+pshufd $0x39,%xmm6,%xmm6
+xorl 96(%rsi),%edx
+xorl 100(%rsi),%ecx
+xorl 104(%rsi),%r8d
+xorl 108(%rsi),%r9d
+movl %edx,96(%rdi)
+movl %ecx,100(%rdi)
+movl %r8d,104(%rdi)
+movl %r9d,108(%rdi)
+movd %xmm15,%rdx
+movd %xmm11,%rcx
+movd %xmm1,%r8
+movd %xmm6,%r9
+pshufd $0x39,%xmm15,%xmm15
+pshufd $0x39,%xmm11,%xmm11
+pshufd $0x39,%xmm1,%xmm1
+pshufd $0x39,%xmm6,%xmm6
+xorl 160(%rsi),%edx
+xorl 164(%rsi),%ecx
+xorl 168(%rsi),%r8d
+xorl 172(%rsi),%r9d
+movl %edx,160(%rdi)
+movl %ecx,164(%rdi)
+movl %r8d,168(%rdi)
+movl %r9d,172(%rdi)
+movd %xmm15,%rdx
+movd %xmm11,%rcx
+movd %xmm1,%r8
+movd %xmm6,%r9
+xorl 224(%rsi),%edx
+xorl 228(%rsi),%ecx
+xorl 232(%rsi),%r8d
+xorl 236(%rsi),%r9d
+movl %edx,224(%rdi)
+movl %ecx,228(%rdi)
+movl %r8d,232(%rdi)
+movl %r9d,236(%rdi)
+paddd 224(%rsp),%xmm13
+paddd 272(%rsp),%xmm9
+paddd 320(%rsp),%xmm3
+paddd 160(%rsp),%xmm2
+movd %xmm13,%rdx
+movd %xmm9,%rcx
+movd %xmm3,%r8
+movd %xmm2,%r9
+pshufd $0x39,%xmm13,%xmm13
+pshufd $0x39,%xmm9,%xmm9
+pshufd $0x39,%xmm3,%xmm3
+pshufd $0x39,%xmm2,%xmm2
+xorl 48(%rsi),%edx
+xorl 52(%rsi),%ecx
+xorl 56(%rsi),%r8d
+xorl 60(%rsi),%r9d
+movl %edx,48(%rdi)
+movl %ecx,52(%rdi)
+movl %r8d,56(%rdi)
+movl %r9d,60(%rdi)
+movd %xmm13,%rdx
+movd %xmm9,%rcx
+movd %xmm3,%r8
+movd %xmm2,%r9
+pshufd $0x39,%xmm13,%xmm13
+pshufd $0x39,%xmm9,%xmm9
+pshufd $0x39,%xmm3,%xmm3
+pshufd $0x39,%xmm2,%xmm2
+xorl 112(%rsi),%edx
+xorl 116(%rsi),%ecx
+xorl 120(%rsi),%r8d
+xorl 124(%rsi),%r9d
+movl %edx,112(%rdi)
+movl %ecx,116(%rdi)
+movl %r8d,120(%rdi)
+movl %r9d,124(%rdi)
+movd %xmm13,%rdx
+movd %xmm9,%rcx
+movd %xmm3,%r8
+movd %xmm2,%r9
+pshufd $0x39,%xmm13,%xmm13
+pshufd $0x39,%xmm9,%xmm9
+pshufd $0x39,%xmm3,%xmm3
+pshufd $0x39,%xmm2,%xmm2
+xorl 176(%rsi),%edx
+xorl 180(%rsi),%ecx
+xorl 184(%rsi),%r8d
+xorl 188(%rsi),%r9d
+movl %edx,176(%rdi)
+movl %ecx,180(%rdi)
+movl %r8d,184(%rdi)
+movl %r9d,188(%rdi)
+movd %xmm13,%rdx
+movd %xmm9,%rcx
+movd %xmm3,%r8
+movd %xmm2,%r9
+xorl 240(%rsi),%edx
+xorl 244(%rsi),%ecx
+xorl 248(%rsi),%r8d
+xorl 252(%rsi),%r9d
+movl %edx,240(%rdi)
+movl %ecx,244(%rdi)
+movl %r8d,248(%rdi)
+movl %r9d,252(%rdi)
+movq 480(%rsp),%r9
+sub $256,%r9
+add $256,%rsi
+add $256,%rdi
+cmp $256,%r9
+jae ._bytesatleast256
+cmp $0,%r9
+jbe ._done
+
+._bytesbetween1and255:
+cmp $64,%r9
+jae ._nocopy
+mov %rdi,%rdx
+leaq 0(%rsp),%rdi
+mov %r9,%rcx
+rep movsb
+leaq 0(%rsp),%rdi
+leaq 0(%rsp),%rsi
+
+._nocopy:
+movq %r9,480(%rsp)
+movdqa 112(%rsp),%xmm0
+movdqa 64(%rsp),%xmm1
+movdqa 80(%rsp),%xmm2
+movdqa 96(%rsp),%xmm3
+movdqa %xmm1,%xmm4
+mov $20,%rcx
+
+._mainloop2:
+paddd %xmm0,%xmm4
+movdqa %xmm0,%xmm5
+movdqa %xmm4,%xmm6
+pslld $7,%xmm4
+psrld $25,%xmm6
+pxor %xmm4,%xmm3
+pxor %xmm6,%xmm3
+paddd %xmm3,%xmm5
+movdqa %xmm3,%xmm4
+movdqa %xmm5,%xmm6
+pslld $9,%xmm5
+psrld $23,%xmm6
+pxor %xmm5,%xmm2
+pshufd $0x93,%xmm3,%xmm3
+pxor %xmm6,%xmm2
+paddd %xmm2,%xmm4
+movdqa %xmm2,%xmm5
+movdqa %xmm4,%xmm6
+pslld $13,%xmm4
+psrld $19,%xmm6
+pxor %xmm4,%xmm1
+pshufd $0x4e,%xmm2,%xmm2
+pxor %xmm6,%xmm1
+paddd %xmm1,%xmm5
+movdqa %xmm3,%xmm4
+movdqa %xmm5,%xmm6
+pslld $18,%xmm5
+psrld $14,%xmm6
+pxor %xmm5,%xmm0
+pshufd $0x39,%xmm1,%xmm1
+pxor %xmm6,%xmm0
+paddd %xmm0,%xmm4
+movdqa %xmm0,%xmm5
+movdqa %xmm4,%xmm6
+pslld $7,%xmm4
+psrld $25,%xmm6
+pxor %xmm4,%xmm1
+pxor %xmm6,%xmm1
+paddd %xmm1,%xmm5
+movdqa %xmm1,%xmm4
+movdqa %xmm5,%xmm6
+pslld $9,%xmm5
+psrld $23,%xmm6
+pxor %xmm5,%xmm2
+pshufd $0x93,%xmm1,%xmm1
+pxor %xmm6,%xmm2
+paddd %xmm2,%xmm4
+movdqa %xmm2,%xmm5
+movdqa %xmm4,%xmm6
+pslld $13,%xmm4
+psrld $19,%xmm6
+pxor %xmm4,%xmm3
+pshufd $0x4e,%xmm2,%xmm2
+pxor %xmm6,%xmm3
+paddd %xmm3,%xmm5
+movdqa %xmm1,%xmm4
+movdqa %xmm5,%xmm6
+pslld $18,%xmm5
+psrld $14,%xmm6
+pxor %xmm5,%xmm0
+pshufd $0x39,%xmm3,%xmm3
+pxor %xmm6,%xmm0
+paddd %xmm0,%xmm4
+movdqa %xmm0,%xmm5
+movdqa %xmm4,%xmm6
+pslld $7,%xmm4
+psrld $25,%xmm6
+pxor %xmm4,%xmm3
+pxor %xmm6,%xmm3
+paddd %xmm3,%xmm5
+movdqa %xmm3,%xmm4
+movdqa %xmm5,%xmm6
+pslld $9,%xmm5
+psrld $23,%xmm6
+pxor %xmm5,%xmm2
+pshufd $0x93,%xmm3,%xmm3
+pxor %xmm6,%xmm2
+paddd %xmm2,%xmm4
+movdqa %xmm2,%xmm5
+movdqa %xmm4,%xmm6
+pslld $13,%xmm4
+psrld $19,%xmm6
+pxor %xmm4,%xmm1
+pshufd $0x4e,%xmm2,%xmm2
+pxor %xmm6,%xmm1
+paddd %xmm1,%xmm5
+movdqa %xmm3,%xmm4
+movdqa %xmm5,%xmm6
+pslld $18,%xmm5
+psrld $14,%xmm6
+pxor %xmm5,%xmm0
+pshufd $0x39,%xmm1,%xmm1
+pxor %xmm6,%xmm0
+paddd %xmm0,%xmm4
+movdqa %xmm0,%xmm5
+movdqa %xmm4,%xmm6
+pslld $7,%xmm4
+psrld $25,%xmm6
+pxor %xmm4,%xmm1
+pxor %xmm6,%xmm1
+paddd %xmm1,%xmm5
+movdqa %xmm1,%xmm4
+movdqa %xmm5,%xmm6
+pslld $9,%xmm5
+psrld $23,%xmm6
+pxor %xmm5,%xmm2
+pshufd $0x93,%xmm1,%xmm1
+pxor %xmm6,%xmm2
+paddd %xmm2,%xmm4
+movdqa %xmm2,%xmm5
+movdqa %xmm4,%xmm6
+pslld $13,%xmm4
+psrld $19,%xmm6
+pxor %xmm4,%xmm3
+pshufd $0x4e,%xmm2,%xmm2
+pxor %xmm6,%xmm3
+sub $4,%rcx
+paddd %xmm3,%xmm5
+movdqa %xmm1,%xmm4
+movdqa %xmm5,%xmm6
+pslld $18,%xmm5
+pxor %xmm7,%xmm7
+psrld $14,%xmm6
+pxor %xmm5,%xmm0
+pshufd $0x39,%xmm3,%xmm3
+pxor %xmm6,%xmm0
+ja ._mainloop2
+paddd 112(%rsp),%xmm0
+paddd 64(%rsp),%xmm1
+paddd 80(%rsp),%xmm2
+paddd 96(%rsp),%xmm3
+movd %xmm0,%rcx
+movd %xmm1,%r8
+movd %xmm2,%r9
+movd %xmm3,%rax
+pshufd $0x39,%xmm0,%xmm0
+pshufd $0x39,%xmm1,%xmm1
+pshufd $0x39,%xmm2,%xmm2
+pshufd $0x39,%xmm3,%xmm3
+xorl 0(%rsi),%ecx
+xorl 48(%rsi),%r8d
+xorl 32(%rsi),%r9d
+xorl 16(%rsi),%eax
+movl %ecx,0(%rdi)
+movl %r8d,48(%rdi)
+movl %r9d,32(%rdi)
+movl %eax,16(%rdi)
+movd %xmm0,%rcx
+movd %xmm1,%r8
+movd %xmm2,%r9
+movd %xmm3,%rax
+pshufd $0x39,%xmm0,%xmm0
+pshufd $0x39,%xmm1,%xmm1
+pshufd $0x39,%xmm2,%xmm2
+pshufd $0x39,%xmm3,%xmm3
+xorl 20(%rsi),%ecx
+xorl 4(%rsi),%r8d
+xorl 52(%rsi),%r9d
+xorl 36(%rsi),%eax
+movl %ecx,20(%rdi)
+movl %r8d,4(%rdi)
+movl %r9d,52(%rdi)
+movl %eax,36(%rdi)
+movd %xmm0,%rcx
+movd %xmm1,%r8
+movd %xmm2,%r9
+movd %xmm3,%rax
+pshufd $0x39,%xmm0,%xmm0
+pshufd $0x39,%xmm1,%xmm1
+pshufd $0x39,%xmm2,%xmm2
+pshufd $0x39,%xmm3,%xmm3
+xorl 40(%rsi),%ecx
+xorl 24(%rsi),%r8d
+xorl 8(%rsi),%r9d
+xorl 56(%rsi),%eax
+movl %ecx,40(%rdi)
+movl %r8d,24(%rdi)
+movl %r9d,8(%rdi)
+movl %eax,56(%rdi)
+movd %xmm0,%rcx
+movd %xmm1,%r8
+movd %xmm2,%r9
+movd %xmm3,%rax
+xorl 60(%rsi),%ecx
+xorl 44(%rsi),%r8d
+xorl 28(%rsi),%r9d
+xorl 12(%rsi),%eax
+movl %ecx,60(%rdi)
+movl %r8d,44(%rdi)
+movl %r9d,28(%rdi)
+movl %eax,12(%rdi)
+movq 480(%rsp),%r9
+movq 472(%rsp),%rcx
+add $1,%rcx
+mov %rcx,%r8
+shr $32,%r8
+movl %ecx,80(%rsp)
+movl %r8d,4+96(%rsp)
+movq %rcx,472(%rsp)
+cmp $64,%r9
+ja ._bytesatleast65
+jae ._bytesatleast64
+mov %rdi,%rsi
+mov %rdx,%rdi
+mov %r9,%rcx
+rep movsb
+
+._bytesatleast64:
+._done:
+movq 416(%rsp),%r11
+movq 424(%rsp),%r12
+movq 432(%rsp),%r13
+movq 440(%rsp),%r14
+movq 448(%rsp),%r15
+movq 456(%rsp),%rbx
+movq 464(%rsp),%rbp
+add %r11,%rsp
+xor %rax,%rax
+mov %rsi,%rdx
+ret
+
+._bytesatleast65:
+sub $64,%r9
+add $64,%rdi
+add $64,%rsi
+jmp ._bytesbetween1and255
+
+#endif
+
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/src/libsodium/crypto_stream/salsa20/checksum b/src/libsodium/crypto_stream/salsa20/checksum
new file mode 100644
index 0000000..78ff05f
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa20/checksum
@@ -0,0 +1 @@
+44a3966eabcd3a2b13faca2150e38f2b7e6bac187d626618f50a9f875158ae78
diff --git a/src/libsodium/crypto_stream/salsa20/ref/api.h b/src/libsodium/crypto_stream/salsa20/ref/api.h
new file mode 100644
index 0000000..3616ea7
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa20/ref/api.h
@@ -0,0 +1,5 @@
+
+#include "crypto_stream_salsa20.h"
+
+#define crypto_stream crypto_stream_salsa20
+#define crypto_stream_xor crypto_stream_salsa20_xor
diff --git a/src/libsodium/crypto_stream/salsa20/ref/stream_salsa20_ref.c b/src/libsodium/crypto_stream/salsa20/ref/stream_salsa20_ref.c
new file mode 100644
index 0000000..6d3eacb
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa20/ref/stream_salsa20_ref.c
@@ -0,0 +1,61 @@
+/*
+version 20140420
+D. J. Bernstein
+Public domain.
+*/
+
+#include "api.h"
+#include "crypto_core_salsa20.h"
+#include "utils.h"
+
+#ifndef HAVE_AMD64_ASM
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = {
+ 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'
+};
+
+int crypto_stream(
+ unsigned char *c,unsigned long long clen,
+ const unsigned char *n,
+ const unsigned char *k
+)
+{
+ unsigned char in[16];
+ unsigned char block[64];
+ unsigned char kcopy[32];
+ unsigned long long i;
+ unsigned int u;
+
+ if (!clen) return 0;
+
+ for (i = 0;i < 32;++i) kcopy[i] = k[i];
+ for (i = 0;i < 8;++i) in[i] = n[i];
+ for (i = 8;i < 16;++i) in[i] = 0;
+
+ while (clen >= 64) {
+ crypto_core_salsa20(c,in,kcopy,sigma);
+
+ u = 1;
+ for (i = 8;i < 16;++i) {
+ u += (unsigned int) in[i];
+ in[i] = u;
+ u >>= 8;
+ }
+
+ clen -= 64;
+ c += 64;
+ }
+
+ if (clen) {
+ crypto_core_salsa20(block,in,kcopy,sigma);
+ for (i = 0;i < clen;++i) c[i] = block[i];
+ }
+ sodium_memzero(block, sizeof block);
+ sodium_memzero(kcopy, sizeof kcopy);
+
+ return 0;
+}
+
+#endif
diff --git a/src/libsodium/crypto_stream/salsa20/ref/xor_salsa20_ref.c b/src/libsodium/crypto_stream/salsa20/ref/xor_salsa20_ref.c
new file mode 100644
index 0000000..19cd79e
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa20/ref/xor_salsa20_ref.c
@@ -0,0 +1,69 @@
+/*
+version 20140420
+D. J. Bernstein
+Public domain.
+*/
+
+#include <stdint.h>
+
+#include "api.h"
+#include "crypto_core_salsa20.h"
+#include "utils.h"
+
+#ifndef HAVE_AMD64_ASM
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = {
+ 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'
+};
+
+int crypto_stream_salsa20_xor_ic(
+ unsigned char *c,
+ const unsigned char *m,unsigned long long mlen,
+ const unsigned char *n, uint64_t ic,
+ const unsigned char *k
+)
+{
+ unsigned char in[16];
+ unsigned char block[64];
+ unsigned char kcopy[32];
+ unsigned long long i;
+ unsigned int u;
+
+ if (!mlen) return 0;
+
+ for (i = 0;i < 32;++i) kcopy[i] = k[i];
+ for (i = 0;i < 8;++i) in[i] = n[i];
+ for (i = 8;i < 16;++i) {
+ in[i] = (unsigned char) (ic & 0xff);
+ ic >>= 8;
+ }
+
+ while (mlen >= 64) {
+ crypto_core_salsa20(block,in,kcopy,sigma);
+ for (i = 0;i < 64;++i) c[i] = m[i] ^ block[i];
+
+ u = 1;
+ for (i = 8;i < 16;++i) {
+ u += (unsigned int) in[i];
+ in[i] = u;
+ u >>= 8;
+ }
+
+ mlen -= 64;
+ c += 64;
+ m += 64;
+ }
+
+ if (mlen) {
+ crypto_core_salsa20(block,in,kcopy,sigma);
+ for (i = 0;i < mlen;++i) c[i] = m[i] ^ block[i];
+ }
+ sodium_memzero(block, sizeof block);
+ sodium_memzero(kcopy, sizeof kcopy);
+
+ return 0;
+}
+
+#endif
diff --git a/src/libsodium/crypto_stream/salsa20/stream_salsa20_api.c b/src/libsodium/crypto_stream/salsa20/stream_salsa20_api.c
new file mode 100644
index 0000000..3bc0580
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa20/stream_salsa20_api.c
@@ -0,0 +1,19 @@
+#include "crypto_stream_salsa20.h"
+
+size_t
+crypto_stream_salsa20_keybytes(void) {
+ return crypto_stream_salsa20_KEYBYTES;
+}
+
+size_t
+crypto_stream_salsa20_noncebytes(void) {
+ return crypto_stream_salsa20_NONCEBYTES;
+}
+
+int
+crypto_stream_salsa20_xor(unsigned char *c, const unsigned char *m,
+ unsigned long long mlen, const unsigned char *n,
+ const unsigned char *k)
+{
+ return crypto_stream_salsa20_xor_ic(c, m, mlen, n, 0U, k);
+}
diff --git a/src/libsodium/crypto_stream/salsa2012/checksum b/src/libsodium/crypto_stream/salsa2012/checksum
new file mode 100644
index 0000000..f801d9e
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa2012/checksum
@@ -0,0 +1 @@
+ecc758f200061c3cc770b25797da73583548d4f90f69a967fbbe1a6d94d1705c
diff --git a/src/libsodium/crypto_stream/salsa2012/ref/api.h b/src/libsodium/crypto_stream/salsa2012/ref/api.h
new file mode 100644
index 0000000..0efe8b8
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa2012/ref/api.h
@@ -0,0 +1,10 @@
+
+#include "crypto_stream_salsa2012.h"
+
+#define crypto_stream crypto_stream_salsa2012
+#define crypto_stream_xor crypto_stream_salsa2012_xor
+#define crypto_stream_KEYBYTES crypto_stream_salsa2012_KEYBYTES
+#define crypto_stream_NONCEBYTES crypto_stream_salsa2012_NONCEBYTES
+#define crypto_stream_IMPLEMENTATION crypto_stream_salsa2012_IMPLEMENTATION
+#define crypto_stream_VERSION crypto_stream_salsa2012_VERSION
+
diff --git a/src/libsodium/crypto_stream/salsa2012/ref/stream_salsa2012.c b/src/libsodium/crypto_stream/salsa2012/ref/stream_salsa2012.c
new file mode 100644
index 0000000..793adaa
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa2012/ref/stream_salsa2012.c
@@ -0,0 +1,51 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "api.h"
+#include "crypto_core_salsa2012.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = {
+ 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'
+};
+
+int crypto_stream(
+ unsigned char *c,unsigned long long clen,
+ const unsigned char *n,
+ const unsigned char *k
+)
+{
+ unsigned char in[16];
+ unsigned char block[64];
+ unsigned long long i;
+ unsigned int u;
+
+ if (!clen) return 0;
+
+ for (i = 0;i < 8;++i) in[i] = n[i];
+ for (i = 8;i < 16;++i) in[i] = 0;
+
+ while (clen >= 64) {
+ crypto_core_salsa2012(c,in,k,sigma);
+
+ u = 1;
+ for (i = 8;i < 16;++i) {
+ u += (unsigned int) in[i];
+ in[i] = u;
+ u >>= 8;
+ }
+
+ clen -= 64;
+ c += 64;
+ }
+
+ if (clen) {
+ crypto_core_salsa2012(block,in,k,sigma);
+ for (i = 0;i < clen;++i) c[i] = block[i];
+ }
+ return 0;
+}
diff --git a/src/libsodium/crypto_stream/salsa2012/ref/xor_salsa2012.c b/src/libsodium/crypto_stream/salsa2012/ref/xor_salsa2012.c
new file mode 100644
index 0000000..5970ca4
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa2012/ref/xor_salsa2012.c
@@ -0,0 +1,54 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "api.h"
+#include "crypto_core_salsa2012.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = {
+ 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'
+};
+
+int crypto_stream_xor(
+ unsigned char *c,
+ const unsigned char *m,unsigned long long mlen,
+ const unsigned char *n,
+ const unsigned char *k
+)
+{
+ unsigned char in[16];
+ unsigned char block[64];
+ unsigned long long i;
+ unsigned int u;
+
+ if (!mlen) return 0;
+
+ for (i = 0;i < 8;++i) in[i] = n[i];
+ for (i = 8;i < 16;++i) in[i] = 0;
+
+ while (mlen >= 64) {
+ crypto_core_salsa2012(block,in,k,sigma);
+ for (i = 0;i < 64;++i) c[i] = m[i] ^ block[i];
+
+ u = 1;
+ for (i = 8;i < 16;++i) {
+ u += (unsigned int) in[i];
+ in[i] = u;
+ u >>= 8;
+ }
+
+ mlen -= 64;
+ c += 64;
+ m += 64;
+ }
+
+ if (mlen) {
+ crypto_core_salsa2012(block,in,k,sigma);
+ for (i = 0;i < mlen;++i) c[i] = m[i] ^ block[i];
+ }
+ return 0;
+}
diff --git a/src/libsodium/crypto_stream/salsa2012/stream_salsa2012_api.c b/src/libsodium/crypto_stream/salsa2012/stream_salsa2012_api.c
new file mode 100644
index 0000000..3b5685f
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa2012/stream_salsa2012_api.c
@@ -0,0 +1,11 @@
+#include "crypto_stream_salsa2012.h"
+
+size_t
+crypto_stream_salsa2012_keybytes(void) {
+ return crypto_stream_salsa2012_KEYBYTES;
+}
+
+size_t
+crypto_stream_salsa2012_noncebytes(void) {
+ return crypto_stream_salsa2012_NONCEBYTES;
+}
diff --git a/src/libsodium/crypto_stream/salsa208/checksum b/src/libsodium/crypto_stream/salsa208/checksum
new file mode 100644
index 0000000..c87364e
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa208/checksum
@@ -0,0 +1 @@
+05f32b0647417aaa446b0b3127318133cf9af32b771869eab267000bf02710cd
diff --git a/src/libsodium/crypto_stream/salsa208/ref/api.h b/src/libsodium/crypto_stream/salsa208/ref/api.h
new file mode 100644
index 0000000..14b4a77
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa208/ref/api.h
@@ -0,0 +1,9 @@
+
+#include "crypto_stream_salsa208.h"
+
+#define crypto_stream crypto_stream_salsa208
+#define crypto_stream_xor crypto_stream_salsa208_xor
+#define crypto_stream_KEYBYTES crypto_stream_salsa208_KEYBYTES
+#define crypto_stream_NONCEBYTES crypto_stream_salsa208_NONCEBYTES
+#define crypto_stream_IMPLEMENTATION crypto_stream_salsa208_IMPLEMENTATION
+#define crypto_stream_VERSION crypto_stream_salsa208_VERSION
diff --git a/src/libsodium/crypto_stream/salsa208/ref/stream_salsa208.c b/src/libsodium/crypto_stream/salsa208/ref/stream_salsa208.c
new file mode 100644
index 0000000..0889002
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa208/ref/stream_salsa208.c
@@ -0,0 +1,51 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "api.h"
+#include "crypto_core_salsa208.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = {
+ 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'
+};
+
+int crypto_stream(
+ unsigned char *c,unsigned long long clen,
+ const unsigned char *n,
+ const unsigned char *k
+)
+{
+ unsigned char in[16];
+ unsigned char block[64];
+ unsigned long long i;
+ unsigned int u;
+
+ if (!clen) return 0;
+
+ for (i = 0;i < 8;++i) in[i] = n[i];
+ for (i = 8;i < 16;++i) in[i] = 0;
+
+ while (clen >= 64) {
+ crypto_core_salsa208(c,in,k,sigma);
+
+ u = 1;
+ for (i = 8;i < 16;++i) {
+ u += (unsigned int) in[i];
+ in[i] = u;
+ u >>= 8;
+ }
+
+ clen -= 64;
+ c += 64;
+ }
+
+ if (clen) {
+ crypto_core_salsa208(block,in,k,sigma);
+ for (i = 0;i < clen;++i) c[i] = block[i];
+ }
+ return 0;
+}
diff --git a/src/libsodium/crypto_stream/salsa208/ref/xor_salsa208.c b/src/libsodium/crypto_stream/salsa208/ref/xor_salsa208.c
new file mode 100644
index 0000000..9f6dac5
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa208/ref/xor_salsa208.c
@@ -0,0 +1,54 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "api.h"
+#include "crypto_core_salsa208.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = {
+ 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'
+};
+
+int crypto_stream_xor(
+ unsigned char *c,
+ const unsigned char *m,unsigned long long mlen,
+ const unsigned char *n,
+ const unsigned char *k
+)
+{
+ unsigned char in[16];
+ unsigned char block[64];
+ unsigned long long i;
+ unsigned int u;
+
+ if (!mlen) return 0;
+
+ for (i = 0;i < 8;++i) in[i] = n[i];
+ for (i = 8;i < 16;++i) in[i] = 0;
+
+ while (mlen >= 64) {
+ crypto_core_salsa208(block,in,k,sigma);
+ for (i = 0;i < 64;++i) c[i] = m[i] ^ block[i];
+
+ u = 1;
+ for (i = 8;i < 16;++i) {
+ u += (unsigned int) in[i];
+ in[i] = u;
+ u >>= 8;
+ }
+
+ mlen -= 64;
+ c += 64;
+ m += 64;
+ }
+
+ if (mlen) {
+ crypto_core_salsa208(block,in,k,sigma);
+ for (i = 0;i < mlen;++i) c[i] = m[i] ^ block[i];
+ }
+ return 0;
+}
diff --git a/src/libsodium/crypto_stream/salsa208/stream_salsa208_api.c b/src/libsodium/crypto_stream/salsa208/stream_salsa208_api.c
new file mode 100644
index 0000000..640a8b2
--- /dev/null
+++ b/src/libsodium/crypto_stream/salsa208/stream_salsa208_api.c
@@ -0,0 +1,11 @@
+#include "crypto_stream_salsa208.h"
+
+size_t
+crypto_stream_salsa208_keybytes(void) {
+ return crypto_stream_salsa208_KEYBYTES;
+}
+
+size_t
+crypto_stream_salsa208_noncebytes(void) {
+ return crypto_stream_salsa208_NONCEBYTES;
+}
diff --git a/src/libsodium/crypto_stream/try.c b/src/libsodium/crypto_stream/try.c
new file mode 100644
index 0000000..61bf8ab
--- /dev/null
+++ b/src/libsodium/crypto_stream/try.c
@@ -0,0 +1,122 @@
+/*
+ * crypto_stream/try.c version 20090118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include <stdlib.h>
+#include "crypto_stream.h"
+#include "utils.h"
+#include "windows/windows-quirks.h"
+
+extern unsigned char *alignedcalloc(unsigned long long);
+
+const char *primitiveimplementation = crypto_stream_IMPLEMENTATION;
+
+#define MAXTEST_BYTES 10000
+#define CHECKSUM_BYTES 4096
+#define TUNE_BYTES 1536
+
+static unsigned char *k;
+static unsigned char *n;
+static unsigned char *m;
+static unsigned char *c;
+static unsigned char *s;
+static unsigned char *k2;
+static unsigned char *n2;
+static unsigned char *m2;
+static unsigned char *c2;
+static unsigned char *s2;
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+ k = alignedcalloc(crypto_stream_KEYBYTES);
+ n = alignedcalloc(crypto_stream_NONCEBYTES);
+ m = alignedcalloc(MAXTEST_BYTES);
+ c = alignedcalloc(MAXTEST_BYTES);
+ s = alignedcalloc(MAXTEST_BYTES);
+ k2 = alignedcalloc(crypto_stream_KEYBYTES);
+ n2 = alignedcalloc(crypto_stream_NONCEBYTES);
+ m2 = alignedcalloc(MAXTEST_BYTES);
+ c2 = alignedcalloc(MAXTEST_BYTES);
+ s2 = alignedcalloc(MAXTEST_BYTES);
+}
+
+void predoit(void)
+{
+}
+
+void doit(void)
+{
+ crypto_stream_xor(c,m,TUNE_BYTES,n,k);
+}
+
+char checksum[crypto_stream_KEYBYTES * 2 + 1];
+
+const char *checksum_compute(void)
+{
+ long long i;
+ long long j;
+
+ for (i = 0;i < CHECKSUM_BYTES;++i) {
+ long long mlen = i;
+ long long clen = i;
+ long long slen = i;
+ long long klen = crypto_stream_KEYBYTES;
+ long long nlen = crypto_stream_NONCEBYTES;
+ for (j = -16;j < 0;++j) m[j] = rand();
+ for (j = -16;j < 0;++j) c[j] = rand();
+ for (j = -16;j < 0;++j) s[j] = rand();
+ for (j = -16;j < 0;++j) n[j] = rand();
+ for (j = -16;j < 0;++j) k[j] = rand();
+ for (j = mlen;j < mlen + 16;++j) m[j] = rand();
+ for (j = clen;j < clen + 16;++j) c[j] = rand();
+ for (j = slen;j < slen + 16;++j) s[j] = rand();
+ for (j = nlen;j < nlen + 16;++j) n[j] = rand();
+ for (j = klen;j < klen + 16;++j) k[j] = rand();
+ for (j = -16;j < mlen + 16;++j) m2[j] = m[j];
+ for (j = -16;j < clen + 16;++j) c2[j] = c[j];
+ for (j = -16;j < slen + 16;++j) s2[j] = s[j];
+ for (j = -16;j < nlen + 16;++j) n2[j] = n[j];
+ for (j = -16;j < klen + 16;++j) k2[j] = k[j];
+
+ crypto_stream_xor(c,m,mlen,n,k);
+
+ for (j = -16;j < mlen + 16;++j) if (m[j] != m2[j]) return "crypto_stream_xor overwrites m";
+ for (j = -16;j < slen + 16;++j) if (s[j] != s2[j]) return "crypto_stream_xor overwrites s";
+ for (j = -16;j < nlen + 16;++j) if (n[j] != n2[j]) return "crypto_stream_xor overwrites n";
+ for (j = -16;j < klen + 16;++j) if (k[j] != k2[j]) return "crypto_stream_xor overwrites k";
+ for (j = -16;j < 0;++j) if (c[j] != c2[j]) return "crypto_stream_xor writes before output";
+ for (j = clen;j < clen + 16;++j) if (c[j] != c2[j]) return "crypto_stream_xor writes after output";
+
+ for (j = -16;j < clen + 16;++j) c2[j] = c[j];
+
+ crypto_stream(s,slen,n,k);
+
+ for (j = -16;j < mlen + 16;++j) if (m[j] != m2[j]) return "crypto_stream overwrites m";
+ for (j = -16;j < clen + 16;++j) if (c[j] != c2[j]) return "crypto_stream overwrites c";
+ for (j = -16;j < nlen + 16;++j) if (n[j] != n2[j]) return "crypto_stream overwrites n";
+ for (j = -16;j < klen + 16;++j) if (k[j] != k2[j]) return "crypto_stream overwrites k";
+ for (j = -16;j < 0;++j) if (s[j] != s2[j]) return "crypto_stream writes before output";
+ for (j = slen;j < slen + 16;++j) if (s[j] != s2[j]) return "crypto_stream writes after output";
+
+ for (j = 0;j < mlen;++j)
+ if ((s[j] ^ m[j]) != c[j]) return "crypto_stream_xor does not match crypto_stream";
+
+ for (j = 0;j < clen;++j) k[j % klen] ^= c[j];
+ crypto_stream_xor(m,c,clen,n,k);
+ crypto_stream(s,slen,n,k);
+ for (j = 0;j < mlen;++j)
+ if ((s[j] ^ m[j]) != c[j]) return "crypto_stream_xor does not match crypto_stream";
+ for (j = 0;j < mlen;++j) n[j % nlen] ^= m[j];
+ m[mlen] = 0;
+ }
+
+ sodium_bin2hex(checksum, sizeof checksum, k, crypto_stream_KEYBYTES);
+
+ return 0;
+}
diff --git a/src/libsodium/crypto_stream/xsalsa20/checksum b/src/libsodium/crypto_stream/xsalsa20/checksum
new file mode 100644
index 0000000..cae64c0
--- /dev/null
+++ b/src/libsodium/crypto_stream/xsalsa20/checksum
@@ -0,0 +1 @@
+201bc58a96adcb6ed339ca33c188af8ca04a4ce68be1e0953309ee09a0cf8e7a
diff --git a/src/libsodium/crypto_stream/xsalsa20/ref/api.h b/src/libsodium/crypto_stream/xsalsa20/ref/api.h
new file mode 100644
index 0000000..58915f3
--- /dev/null
+++ b/src/libsodium/crypto_stream/xsalsa20/ref/api.h
@@ -0,0 +1,10 @@
+
+#include "crypto_stream_xsalsa20.h"
+
+#define crypto_stream crypto_stream_xsalsa20
+#define crypto_stream_xor crypto_stream_xsalsa20_xor
+#define crypto_stream_KEYBYTES crypto_stream_xsalsa20_KEYBYTES
+#define crypto_stream_NONCEBYTES crypto_stream_xsalsa20_NONCEBYTES
+#define crypto_stream_IMPLEMENTATION crypto_stream_xsalsa20_IMPLEMENTATION
+#define crypto_stream_VERSION crypto_stream_xsalsa20_VERSION
+
diff --git a/src/libsodium/crypto_stream/xsalsa20/ref/stream_xsalsa20.c b/src/libsodium/crypto_stream/xsalsa20/ref/stream_xsalsa20.c
new file mode 100644
index 0000000..50f8788
--- /dev/null
+++ b/src/libsodium/crypto_stream/xsalsa20/ref/stream_xsalsa20.c
@@ -0,0 +1,24 @@
+/*
+version 20080914
+D. J. Bernstein
+Public domain.
+*/
+
+#include "api.h"
+#include "crypto_core_hsalsa20.h"
+#include "crypto_stream_salsa20.h"
+
+static const unsigned char sigma[16] = {
+ 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'
+};
+
+int crypto_stream(
+ unsigned char *c,unsigned long long clen,
+ const unsigned char *n,
+ const unsigned char *k
+)
+{
+ unsigned char subkey[32];
+ crypto_core_hsalsa20(subkey,n,k,sigma);
+ return crypto_stream_salsa20(c,clen,n + 16,subkey);
+}
diff --git a/src/libsodium/crypto_stream/xsalsa20/ref/xor_xsalsa20.c b/src/libsodium/crypto_stream/xsalsa20/ref/xor_xsalsa20.c
new file mode 100644
index 0000000..14cce18
--- /dev/null
+++ b/src/libsodium/crypto_stream/xsalsa20/ref/xor_xsalsa20.c
@@ -0,0 +1,25 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "api.h"
+#include "crypto_core_hsalsa20.h"
+#include "crypto_stream_salsa20.h"
+
+static const unsigned char sigma[16] = {
+ 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'
+};
+
+int crypto_stream_xor(
+ unsigned char *c,
+ const unsigned char *m,unsigned long long mlen,
+ const unsigned char *n,
+ const unsigned char *k
+)
+{
+ unsigned char subkey[32];
+ crypto_core_hsalsa20(subkey,n,k,sigma);
+ return crypto_stream_salsa20_xor(c,m,mlen,n + 16,subkey);
+}
diff --git a/src/libsodium/crypto_stream/xsalsa20/stream_xsalsa20_api.c b/src/libsodium/crypto_stream/xsalsa20/stream_xsalsa20_api.c
new file mode 100644
index 0000000..256084e
--- /dev/null
+++ b/src/libsodium/crypto_stream/xsalsa20/stream_xsalsa20_api.c
@@ -0,0 +1,11 @@
+#include "crypto_stream_xsalsa20.h"
+
+size_t
+crypto_stream_xsalsa20_keybytes(void) {
+ return crypto_stream_xsalsa20_KEYBYTES;
+}
+
+size_t
+crypto_stream_xsalsa20_noncebytes(void) {
+ return crypto_stream_xsalsa20_NONCEBYTES;
+}