From 184b064b669a41cba6f42dfc980551009e106710 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Parm=C3=A9nides=20GV?= Date: Tue, 23 Sep 2014 15:57:33 +0200 Subject: Check if there is a release signingConfig. F-Droid strips the signingConfigs section from build.gradle, so buildTypes mustn't check if there is a release signing configuration. --- app/build.gradle | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/build.gradle b/app/build.gradle index 822c7124..78f2f791 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -24,7 +24,8 @@ android { buildTypes { release { //runProguard true - signingConfig signingConfigs.release.isSigningReady() ? signingConfigs.release : signingConfigs.debug + if(signingConfigs.contains(release)) + signingConfig signingConfigs.release.isSigningReady() ? signingConfigs.release : signingConfigs.debug } } -- cgit v1.2.3 From eb4e6620cf4d183c38aec826ee67d9dab6775390 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Parm=C3=A9nides=20GV?= Date: Tue, 23 Sep 2014 16:40:53 +0200 Subject: Without breakpad, removed binaries. --- app/jni/Android.mk | 4 ++-- app/src/main/assets/minivpn.armeabi | Bin 5216 -> 0 bytes app/src/main/assets/minivpn.armeabi-v7a | Bin 5228 -> 0 bytes app/src/main/assets/minivpn.mips | Bin 5164 -> 0 bytes app/src/main/assets/minivpn.x86 | Bin 5268 -> 0 bytes 5 files changed, 2 insertions(+), 2 deletions(-) delete mode 100644 app/src/main/assets/minivpn.armeabi delete mode 100644 app/src/main/assets/minivpn.armeabi-v7a delete mode 100644 app/src/main/assets/minivpn.mips delete mode 100644 app/src/main/assets/minivpn.x86 diff --git a/app/jni/Android.mk b/app/jni/Android.mk index 33d0bc76..0c906f97 100644 --- a/app/jni/Android.mk +++ b/app/jni/Android.mk @@ -5,7 +5,7 @@ JNI_DIR := $(call my-dir) #WITH_POLAR=1 #WITH_OPENVPN3=1 # Build openvpn with polar (OpenVPN3 core is always build with polar) -#WITH_BREAKPAD=0 +WITH_BREAKPAD=0 include lzo/Android.mk @@ -13,7 +13,7 @@ include snappy/Android.mk include openssl/Android.mk -ifneq ($(USE_BREAKPAD),0) +ifneq ($(WITH_BREAKPAD),0) ifneq ($(TARGET_ARCH),mips) WITH_BREAKPAD=1 include google-breakpad/android/google_breakpad/Android.mk diff --git a/app/src/main/assets/minivpn.armeabi b/app/src/main/assets/minivpn.armeabi deleted file mode 100644 index 7018dbc9..00000000 Binary files a/app/src/main/assets/minivpn.armeabi and /dev/null differ diff --git a/app/src/main/assets/minivpn.armeabi-v7a b/app/src/main/assets/minivpn.armeabi-v7a deleted file mode 100644 index a8e01017..00000000 Binary files a/app/src/main/assets/minivpn.armeabi-v7a and /dev/null differ diff --git a/app/src/main/assets/minivpn.mips b/app/src/main/assets/minivpn.mips deleted file mode 100644 index c44e56c5..00000000 Binary files a/app/src/main/assets/minivpn.mips and /dev/null differ diff --git a/app/src/main/assets/minivpn.x86 b/app/src/main/assets/minivpn.x86 deleted file mode 100644 index 1a6bf464..00000000 Binary files a/app/src/main/assets/minivpn.x86 and /dev/null differ -- cgit v1.2.3 From 957f1007e584a826008950486638f911ecffff55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Parm=C3=A9nides=20GV?= Date: Tue, 23 Sep 2014 17:02:22 +0200 Subject: More binaries removed. --- app/ovpnlibs/assets/nopievpn.armeabi | Bin 5248 -> 0 bytes app/ovpnlibs/assets/nopievpn.armeabi-v7a | Bin 5256 -> 0 bytes app/ovpnlibs/assets/nopievpn.mips | Bin 5244 -> 0 bytes app/ovpnlibs/assets/nopievpn.x86 | Bin 5264 -> 0 bytes app/ovpnlibs/assets/pievpn.armeabi | Bin 5248 -> 0 bytes app/ovpnlibs/assets/pievpn.armeabi-v7a | Bin 5256 -> 0 bytes app/ovpnlibs/assets/pievpn.mips | Bin 5180 -> 0 bytes app/ovpnlibs/assets/pievpn.x86 | Bin 5264 -> 0 bytes 8 files changed, 0 insertions(+), 0 deletions(-) delete mode 100755 app/ovpnlibs/assets/nopievpn.armeabi delete mode 100755 app/ovpnlibs/assets/nopievpn.armeabi-v7a delete mode 100755 app/ovpnlibs/assets/nopievpn.mips delete mode 100755 app/ovpnlibs/assets/nopievpn.x86 delete mode 100755 app/ovpnlibs/assets/pievpn.armeabi delete mode 100755 app/ovpnlibs/assets/pievpn.armeabi-v7a delete mode 100755 app/ovpnlibs/assets/pievpn.mips delete mode 100755 app/ovpnlibs/assets/pievpn.x86 diff --git a/app/ovpnlibs/assets/nopievpn.armeabi b/app/ovpnlibs/assets/nopievpn.armeabi deleted file mode 100755 index 908dceab..00000000 Binary files a/app/ovpnlibs/assets/nopievpn.armeabi and /dev/null differ diff --git a/app/ovpnlibs/assets/nopievpn.armeabi-v7a b/app/ovpnlibs/assets/nopievpn.armeabi-v7a deleted file mode 100755 index bba1ea22..00000000 Binary files a/app/ovpnlibs/assets/nopievpn.armeabi-v7a and /dev/null differ diff --git a/app/ovpnlibs/assets/nopievpn.mips b/app/ovpnlibs/assets/nopievpn.mips deleted file mode 100755 index e30fde3e..00000000 Binary files a/app/ovpnlibs/assets/nopievpn.mips and /dev/null differ diff --git a/app/ovpnlibs/assets/nopievpn.x86 b/app/ovpnlibs/assets/nopievpn.x86 deleted file mode 100755 index 8a4b7d13..00000000 Binary files a/app/ovpnlibs/assets/nopievpn.x86 and /dev/null differ diff --git a/app/ovpnlibs/assets/pievpn.armeabi b/app/ovpnlibs/assets/pievpn.armeabi deleted file mode 100755 index 8849f862..00000000 Binary files a/app/ovpnlibs/assets/pievpn.armeabi and /dev/null differ diff --git a/app/ovpnlibs/assets/pievpn.armeabi-v7a b/app/ovpnlibs/assets/pievpn.armeabi-v7a deleted file mode 100755 index fce4d077..00000000 Binary files a/app/ovpnlibs/assets/pievpn.armeabi-v7a and /dev/null differ diff --git a/app/ovpnlibs/assets/pievpn.mips b/app/ovpnlibs/assets/pievpn.mips deleted file mode 100755 index 6e4ba6a4..00000000 Binary files a/app/ovpnlibs/assets/pievpn.mips and /dev/null differ diff --git a/app/ovpnlibs/assets/pievpn.x86 b/app/ovpnlibs/assets/pievpn.x86 deleted file mode 100755 index df9b45eb..00000000 Binary files a/app/ovpnlibs/assets/pievpn.x86 and /dev/null differ -- cgit v1.2.3 From 248ce016ac2f33df9673e90277733abfd854f27d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Parm=C3=A9nides=20GV?= Date: Tue, 23 Sep 2014 17:09:16 +0200 Subject: Don't ignore config-version.h --- .gitignore | 1 - app/openvpn/.gitignore | 1 - app/openvpn/config-version.h | 2 ++ 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 app/openvpn/config-version.h diff --git a/.gitignore b/.gitignore index b5067c60..fa1293d0 100644 --- a/.gitignore +++ b/.gitignore @@ -52,7 +52,6 @@ bitmask_android/assets/minivpn.armeabi-v7a bitmask_android/assets/minivpn.mips bitmask_android/assets/minivpn.x86 openvpn/a.out.dSYM -openvpn/config-version.h openvpn/config.status Makefile openvpn/libtool diff --git a/app/openvpn/.gitignore b/app/openvpn/.gitignore index 538c0208..94b3b9a1 100644 --- a/app/openvpn/.gitignore +++ b/app/openvpn/.gitignore @@ -53,7 +53,6 @@ distro/rpm/openvpn.spec tests/t_client.sh tests/t_client-*-20??????-??????/ src/openvpn/openvpn -config-version.h nbproject test-driver compile diff --git a/app/openvpn/config-version.h b/app/openvpn/config-version.h new file mode 100644 index 00000000..762b9dc6 --- /dev/null +++ b/app/openvpn/config-version.h @@ -0,0 +1,2 @@ +#define CONFIGURE_GIT_REVISION "icsopenvpn_618-e63b88d330782d14" +#define CONFIGURE_GIT_FLAGS "" -- cgit v1.2.3 From d0e7ba3029b2fd42582413aa95773fe7dbdede90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Parm=C3=A9nides=20GV?= Date: Tue, 23 Sep 2014 18:10:57 +0200 Subject: Updated native subprojects from ics-openvpn. --- app/jni/Android.mk | 13 +- app/jni/Application.mk | 2 +- app/lzo/B/00README.TXT | 102 +- app/lzo/B/clean.bat | 8 +- app/lzo/B/done.bat | 4 +- app/lzo/B/dos16/bc.bat | 78 +- app/lzo/B/dos16/bc.rsp | 134 +- app/lzo/B/dos16/bc_286.bat | 84 +- app/lzo/B/dos16/bc_pp.bat | 84 +- app/lzo/B/dos16/dm.bat | 78 +- app/lzo/B/dos16/mc.bat | 104 +- app/lzo/B/dos16/mc_qc.bat | 104 +- app/lzo/B/dos16/qc.bat | 80 +- app/lzo/B/dos16/sc.bat | 106 +- app/lzo/B/dos16/tc.bat | 78 +- app/lzo/B/dos16/vc.bat | 78 +- app/lzo/B/dos16/vc_qc.bat | 78 +- app/lzo/B/dos16/wc.bat | 78 +- app/lzo/B/dos16/wc.rsp | 134 +- app/lzo/B/dos32/bc_pp.bat | 84 +- app/lzo/B/dos32/dj2.bat | 90 +- app/lzo/B/dos32/dj2.opt | 12 +- app/lzo/B/dos32/dm.bat | 86 +- app/lzo/B/dos32/emx.bat | 90 +- app/lzo/B/dos32/highc.bat | 78 +- app/lzo/B/dos32/highc.rsp | 146 +- app/lzo/B/dos32/ndp.bat | 80 +- app/lzo/B/dos32/ndp.rsp | 134 +- app/lzo/B/dos32/sc.bat | 102 +- app/lzo/B/dos32/wc.bat | 78 +- app/lzo/B/dos32/zc.bat | 102 +- app/lzo/B/os2/emx.bat | 90 +- app/lzo/B/os2/wc.bat | 84 +- app/lzo/B/os2/zc.bat | 102 +- app/lzo/B/os2_16/mc.bat | 86 +- app/lzo/B/os2_16/wc.bat | 84 +- app/lzo/B/prepare.bat | 20 +- app/lzo/B/src.rsp | 134 +- app/lzo/B/unset.bat | 20 +- app/lzo/B/win16/bc.bat | 84 +- app/lzo/B/win16/dm.bat | 86 +- app/lzo/B/win16/mc.bat | 112 +- app/lzo/B/win16/sc.bat | 106 +- app/lzo/B/win16/vc.bat | 86 +- app/lzo/B/win16/wc.bat | 84 +- app/lzo/B/win32/bc.bat | 84 +- app/lzo/B/win32/bc.rsp | 146 +- app/lzo/B/win32/cygwin.bat | 90 +- app/lzo/B/win32/cygwin.rsp | 146 +- app/lzo/B/win32/dm.bat | 84 +- app/lzo/B/win32/ic.bat | 84 +- app/lzo/B/win32/lccwin32.bat | 118 +- app/lzo/B/win32/mingw.bat | 90 +- app/lzo/B/win32/mwerks.bat | 84 +- app/lzo/B/win32/pellesc.bat | 84 +- app/lzo/B/win32/pgi.bat | 86 +- app/lzo/B/win32/pw32.bat | 90 +- app/lzo/B/win32/rsxnt.bat | 90 +- app/lzo/B/win32/sc.bat | 112 +- app/lzo/B/win32/vc.bat | 84 +- app/lzo/B/win32/vc.rsp | 146 +- app/lzo/B/win32/vc_dll.bat | 84 +- app/lzo/B/win32/vc_dll.def | 14 +- app/lzo/B/win32/wc.bat | 78 +- app/lzo/B/win32/wc.rsp | 146 +- app/lzo/B/win64/ic.bat | 84 +- app/lzo/B/win64/ic_dll.bat | 84 +- app/lzo/B/win64/vc.bat | 84 +- app/lzo/B/win64/vc.rsp | 134 +- app/lzo/B/win64/vc_dll.bat | 84 +- app/lzo/B/win64/vc_dll.def | 2 +- app/lzo/autoconf/shtool | 12 +- app/openssl/Apps-config-host.mk | 2 +- app/openssl/Apps-config-target.mk | 2 +- app/openssl/Apps.mk | 9 +- app/openssl/Crypto-config-host.mk | 28 +- app/openssl/Crypto-config-target.mk | 28 +- app/openssl/Crypto-config-trusty.mk | 2 +- app/openssl/Crypto.mk | 29 +- app/openssl/Ssl-config-host.mk | 2 +- app/openssl/Ssl-config-target.mk | 2 +- app/openssl/Ssl.mk | 14 +- app/openssl/apps/enc.c | 6 + app/openssl/apps/ocsp.c | 22 +- app/openssl/apps/req.c | 15 +- app/openssl/apps/s_cb.c | 4 + app/openssl/apps/s_socket.c | 5 +- app/openssl/apps/smime.c | 4 +- app/openssl/build-config-32.mk | 4 +- app/openssl/build-config-64.mk | 4 +- app/openssl/build-config-trusty.mk | 2 +- app/openssl/check-all-builds.sh | 16 +- app/openssl/crypto/aes/asm/aes-armv4.pl | 139 +- app/openssl/crypto/aes/asm/aes-armv4.s | 160 +- app/openssl/crypto/aes/asm/aesv8-armx-64.S | 761 ++++ app/openssl/crypto/aes/asm/aesv8-armx.S | 767 ++++ app/openssl/crypto/aes/asm/aesv8-armx.pl | 980 +++++ app/openssl/crypto/arm64cpuid.S | 46 + app/openssl/crypto/arm_arch.h | 19 +- app/openssl/crypto/armcap.c | 85 +- app/openssl/crypto/armv4cpuid.S | 82 +- app/openssl/crypto/asn1/a_strnid.c | 2 +- app/openssl/crypto/bio/bio.h | 3 + app/openssl/crypto/bio/bss_dgram.c | 9 +- app/openssl/crypto/bn/asm/armv4-gf2m.S | 106 +- app/openssl/crypto/bn/asm/armv4-gf2m.pl | 139 +- app/openssl/crypto/bn/asm/armv4-mont.pl | 483 ++- app/openssl/crypto/bn/asm/armv4-mont.s | 444 ++- app/openssl/crypto/bn/asm/mips3.S | 2201 ++++++++++++ app/openssl/crypto/bn/asm/pa-risc2.S | 1618 +++++++++ app/openssl/crypto/bn/asm/pa-risc2W.S | 1605 +++++++++ app/openssl/crypto/bn/bn_mont.c | 46 +- app/openssl/crypto/cms/cms_env.c | 2 + app/openssl/crypto/cms/cms_sd.c | 4 +- app/openssl/crypto/cms/cms_smime.c | 5 +- app/openssl/crypto/dso/dso_dlfcn.c | 2 +- app/openssl/crypto/ec/ec_ameth.c | 1 + app/openssl/crypto/ec/ec_asn1.c | 7 +- app/openssl/crypto/ec/ec_lcl.h | 2 +- app/openssl/crypto/evp/bio_b64.c | 1 + app/openssl/crypto/evp/e_aes.c | 170 +- app/openssl/crypto/evp/encode.c | 1 + app/openssl/crypto/evp/p_lib.c | 2 +- app/openssl/crypto/modes/asm/ghash-armv4.S | 248 +- app/openssl/crypto/modes/asm/ghash-armv4.pl | 229 +- app/openssl/crypto/modes/asm/ghashv8-armx-64.S | 115 + app/openssl/crypto/modes/asm/ghashv8-armx.S | 116 + app/openssl/crypto/modes/asm/ghashv8-armx.pl | 240 ++ app/openssl/crypto/modes/gcm128.c | 27 +- app/openssl/crypto/opensslconf-32.h | 6 + app/openssl/crypto/opensslconf-64.h | 6 + app/openssl/crypto/opensslconf-static-32.h | 6 + app/openssl/crypto/opensslconf-static-64.h | 6 + app/openssl/crypto/opensslv.h | 6 +- app/openssl/crypto/pkcs12/p12_crt.c | 8 + app/openssl/crypto/pkcs12/p12_kiss.c | 2 +- app/openssl/crypto/pkcs7/pk7_doit.c | 6 + app/openssl/crypto/pkcs7/pkcs7.h | 1 + app/openssl/crypto/pkcs7/pkcs7err.c | 3 +- app/openssl/crypto/rsa/rsa_ameth.c | 2 +- app/openssl/crypto/sha/asm/sha1-armv4-large.pl | 446 ++- app/openssl/crypto/sha/asm/sha1-armv4-large.s | 1008 +++++- app/openssl/crypto/sha/asm/sha1-armv8.S | 1211 +++++++ app/openssl/crypto/sha/asm/sha1-armv8.pl | 333 ++ app/openssl/crypto/sha/asm/sha256-armv4.pl | 585 ++- app/openssl/crypto/sha/asm/sha256-armv4.s | 3729 +++++++++++++------- app/openssl/crypto/sha/asm/sha256-armv8.S | 1141 ++++++ app/openssl/crypto/sha/asm/sha512-armv4.pl | 3 +- app/openssl/crypto/sha/asm/sha512-armv4.s | 2 +- app/openssl/crypto/sha/asm/sha512-armv8.S | 1021 ++++++ app/openssl/crypto/sha/asm/sha512-armv8.pl | 414 +++ app/openssl/crypto/srp/srp_vfy.c | 3 + app/openssl/crypto/x509v3/v3_purp.c | 6 +- app/openssl/import_openssl.sh | 90 +- app/openssl/include/openssl/bio.h | 3 + app/openssl/include/openssl/opensslconf-32.h | 6 + app/openssl/include/openssl/opensslconf-64.h | 6 + .../include/openssl/opensslconf-static-32.h | 6 + .../include/openssl/opensslconf-static-64.h | 6 + app/openssl/include/openssl/opensslv.h | 6 +- app/openssl/include/openssl/pkcs7.h | 1 + app/openssl/include/openssl/ssl.h | 37 +- app/openssl/include/openssl/ssl3.h | 10 +- app/openssl/include/openssl/tls1.h | 15 +- app/openssl/openssl.config | 136 +- app/openssl/openssl.version | 2 +- app/openssl/patches/README | 13 + app/openssl/ssl/bio_ssl.c | 8 + app/openssl/ssl/d1_both.c | 15 +- app/openssl/ssl/d1_lib.c | 9 +- app/openssl/ssl/d1_pkt.c | 19 +- app/openssl/ssl/d1_srvr.c | 1 + app/openssl/ssl/s3_both.c | 2 +- app/openssl/ssl/s3_clnt.c | 70 +- app/openssl/ssl/s3_enc.c | 2 +- app/openssl/ssl/s3_lib.c | 31 +- app/openssl/ssl/s3_pkt.c | 42 +- app/openssl/ssl/s3_srvr.c | 65 +- app/openssl/ssl/ssl.h | 37 +- app/openssl/ssl/ssl3.h | 10 +- app/openssl/ssl/ssl_asn1.c | 29 +- app/openssl/ssl/ssl_err.c | 3 +- app/openssl/ssl/ssl_lib.c | 39 +- app/openssl/ssl/ssl_locl.h | 3 + app/openssl/ssl/ssl_sess.c | 11 + app/openssl/ssl/t1_enc.c | 99 +- app/openssl/ssl/t1_lib.c | 105 +- app/openssl/ssl/tls1.h | 15 +- app/openvpn/doc/android.txt | 11 +- app/openvpn/doc/openvpn.8 | 7 - app/openvpn/openvpn.sln | 76 +- app/openvpn/sample/sample-keys/pkcs12.p12 | Bin 4756 -> 2685 bytes app/openvpn/src/compat/compat.vcxproj | 172 +- app/openvpn/src/compat/compat.vcxproj.filters | 82 +- app/openvpn/src/openvpn/openvpn.vcxproj | 524 +-- app/openvpn/src/openvpn/openvpn.vcxproj.filters | 914 ++--- app/openvpn/src/openvpnserv/openvpnserv.vcxproj | 222 +- .../src/openvpnserv/openvpnserv.vcxproj.filters | 68 +- app/ovpnlibs/assets/nopievpn.arm64-v8a | Bin 0 -> 5368 bytes app/ovpnlibs/assets/nopievpn.armeabi | Bin 0 -> 5240 bytes app/ovpnlibs/assets/nopievpn.armeabi-v7a | Bin 0 -> 5248 bytes app/ovpnlibs/assets/nopievpn.mips | Bin 0 -> 5276 bytes app/ovpnlibs/assets/pievpn.arm64-v8a | Bin 0 -> 5368 bytes app/ovpnlibs/assets/pievpn.armeabi | Bin 0 -> 5240 bytes app/ovpnlibs/assets/pievpn.armeabi-v7a | Bin 0 -> 5248 bytes app/ovpnlibs/assets/pievpn.mips | Bin 0 -> 5276 bytes 206 files changed, 23907 insertions(+), 6253 deletions(-) create mode 100644 app/openssl/crypto/aes/asm/aesv8-armx-64.S create mode 100644 app/openssl/crypto/aes/asm/aesv8-armx.S create mode 100644 app/openssl/crypto/aes/asm/aesv8-armx.pl create mode 100644 app/openssl/crypto/arm64cpuid.S create mode 100644 app/openssl/crypto/bn/asm/mips3.S create mode 100644 app/openssl/crypto/bn/asm/pa-risc2.S create mode 100644 app/openssl/crypto/bn/asm/pa-risc2W.S create mode 100644 app/openssl/crypto/modes/asm/ghashv8-armx-64.S create mode 100644 app/openssl/crypto/modes/asm/ghashv8-armx.S create mode 100644 app/openssl/crypto/modes/asm/ghashv8-armx.pl create mode 100644 app/openssl/crypto/sha/asm/sha1-armv8.S create mode 100644 app/openssl/crypto/sha/asm/sha1-armv8.pl create mode 100644 app/openssl/crypto/sha/asm/sha256-armv8.S create mode 100644 app/openssl/crypto/sha/asm/sha512-armv8.S create mode 100644 app/openssl/crypto/sha/asm/sha512-armv8.pl create mode 100755 app/ovpnlibs/assets/nopievpn.arm64-v8a create mode 100755 app/ovpnlibs/assets/nopievpn.armeabi create mode 100755 app/ovpnlibs/assets/nopievpn.armeabi-v7a create mode 100755 app/ovpnlibs/assets/nopievpn.mips create mode 100755 app/ovpnlibs/assets/pievpn.arm64-v8a create mode 100755 app/ovpnlibs/assets/pievpn.armeabi create mode 100755 app/ovpnlibs/assets/pievpn.armeabi-v7a create mode 100755 app/ovpnlibs/assets/pievpn.mips diff --git a/app/jni/Android.mk b/app/jni/Android.mk index 0c906f97..8418c9b8 100644 --- a/app/jni/Android.mk +++ b/app/jni/Android.mk @@ -13,15 +13,18 @@ include snappy/Android.mk include openssl/Android.mk +ifeq ($(TARGET_ARCH),mips) + USE_BREAKPAD=0 +endif +ifeq ($(TARGET_ARCH),mips64) + USE_BREAKPAD=0 +endif + ifneq ($(WITH_BREAKPAD),0) - ifneq ($(TARGET_ARCH),mips) WITH_BREAKPAD=1 include google-breakpad/android/google_breakpad/Android.mk - else - WITH_BREAKPAD=0 - endif else -WITH_BREAKPAD=0 + WITH_BREAKPAD=0 endif ifeq ($(WITH_POLAR),1) diff --git a/app/jni/Application.mk b/app/jni/Application.mk index 718e79a8..21718248 100644 --- a/app/jni/Application.mk +++ b/app/jni/Application.mk @@ -1,4 +1,4 @@ -APP_ABI := all +APP_ABI := arm64-v8a armeabi armeabi-v7a mips x86 x86_64 APP_PLATFORM := android-14 APP_STL:=stlport_shared diff --git a/app/lzo/B/00README.TXT b/app/lzo/B/00README.TXT index b5352a86..17d8ef8e 100644 --- a/app/lzo/B/00README.TXT +++ b/app/lzo/B/00README.TXT @@ -1,51 +1,51 @@ -Simple make drivers for DOS, Windows, OS/2 and other systems -============================================================ - -This directory contains a bunch of simple build scripts - I've tried -to make them as foolproof as possible. - -To build LZO for your system type 'b\OS\COMPILER' in the base directory, -e.g. 'b\win32\vc.bat' will build the Win32 Visual C/C++ version. - -After building do a basic test by running 'lzotest.exe -mlzo COPYING'. -util\check.sh is an example of a more thorough test driver. - -Please send me your additional/improved versions. - - -Overview: ---------- - -b\dos32\bc_pp.bat Borland C/C++ (1) -b\dos32\dj2.bat djgpp v2 + gcc (1) -b\dos32\dm.bat Digital Mars C/C++ (1) -b\dos32\emx.bat emx + gcc (1) -b\dos32\sc.bat Symantec C/C++ (1) -b\dos32\wc.bat Watcom C/C++ (1) - -b\os2\emx.bat emx + gcc (1) -b\os2\wc.bat Watcom C/C++ (1) - -b\win32\bc.bat Borland C/C++ (1) -b\win32\cygwin.bat Cygwin + gcc (1) -b\win32\dm.bat Digital Mars C/C++ (1) -b\win32\ic.bat Intel C/C++ (1) -b\win32\mingw.bat MinGW + gcc (1) -b\win32\mwerks.bat Metrowerks CodeWarrior C/C++ (1) -b\win32\pellesc.bat Pelles C (1) -b\win32\pgi.bat Portland Group PGI C/C++ (1) -b\win32\rsxnt.bat rsxnt + gcc (1) -b\win32\sc.bat Symantec C/C++ (1) -b\win32\vc.bat Visual C/C++ (1) -b\win32\vc_dll.bat Visual C/C++ (DLL version) (1) -b\win32\wc.bat Watcom C/C++ (1) - -b\win64\ic.bat Intel C/C++ (Itanium) -b\win64\ic_dll.bat Intel C/C++ (Itanium) (DLL version) -b\win64\vc.bat Visual C/C++ (AMD64 or Itanium) -b\win64\vc_dll.bat Visual C/C++ (AMD64 or Itanium) (DLL version) - - -Notes: - (1) includes support for i386 assembler versions - +Simple make drivers for DOS, Windows, OS/2 and other systems +============================================================ + +This directory contains a bunch of simple build scripts - I've tried +to make them as foolproof as possible. + +To build LZO for your system type 'b\OS\COMPILER' in the base directory, +e.g. 'b\win32\vc.bat' will build the Win32 Visual C/C++ version. + +After building do a basic test by running 'lzotest.exe -mlzo COPYING'. +util\check.sh is an example of a more thorough test driver. + +Please send me your additional/improved versions. + + +Overview: +--------- + +b\dos32\bc_pp.bat Borland C/C++ (1) +b\dos32\dj2.bat djgpp v2 + gcc (1) +b\dos32\dm.bat Digital Mars C/C++ (1) +b\dos32\emx.bat emx + gcc (1) +b\dos32\sc.bat Symantec C/C++ (1) +b\dos32\wc.bat Watcom C/C++ (1) + +b\os2\emx.bat emx + gcc (1) +b\os2\wc.bat Watcom C/C++ (1) + +b\win32\bc.bat Borland C/C++ (1) +b\win32\cygwin.bat Cygwin + gcc (1) +b\win32\dm.bat Digital Mars C/C++ (1) +b\win32\ic.bat Intel C/C++ (1) +b\win32\mingw.bat MinGW + gcc (1) +b\win32\mwerks.bat Metrowerks CodeWarrior C/C++ (1) +b\win32\pellesc.bat Pelles C (1) +b\win32\pgi.bat Portland Group PGI C/C++ (1) +b\win32\rsxnt.bat rsxnt + gcc (1) +b\win32\sc.bat Symantec C/C++ (1) +b\win32\vc.bat Visual C/C++ (1) +b\win32\vc_dll.bat Visual C/C++ (DLL version) (1) +b\win32\wc.bat Watcom C/C++ (1) + +b\win64\ic.bat Intel C/C++ (Itanium) +b\win64\ic_dll.bat Intel C/C++ (Itanium) (DLL version) +b\win64\vc.bat Visual C/C++ (AMD64 or Itanium) +b\win64\vc_dll.bat Visual C/C++ (AMD64 or Itanium) (DLL version) + + +Notes: + (1) includes support for i386 assembler versions + diff --git a/app/lzo/B/clean.bat b/app/lzo/B/clean.bat index 453e9479..1f7de592 100644 --- a/app/lzo/B/clean.bat +++ b/app/lzo/B/clean.bat @@ -1,4 +1,4 @@ -@del /q *.def *.err *.exp *.map *.o *.obj *.res *.tds > nul 2> nul -@del /q liblzo2.a lzo2.a lzo2.dll lzo2.lib > nul 2> nul -@del /q a.exe a.out dict.exe lzopack.exe precomp.exe precomp2.exe simple.exe > nul 2> nul -@del /q lzotest.exe testmini.exe > nul 2> nul +@del /q *.def *.err *.exp *.map *.o *.obj *.res *.tds > nul 2> nul +@del /q liblzo2.a lzo2.a lzo2.dll lzo2.lib > nul 2> nul +@del /q a.exe a.out dict.exe lzopack.exe precomp.exe precomp2.exe simple.exe > nul 2> nul +@del /q lzotest.exe testmini.exe > nul 2> nul diff --git a/app/lzo/B/done.bat b/app/lzo/B/done.bat index 0ae243d4..b9bf6aa0 100644 --- a/app/lzo/B/done.bat +++ b/app/lzo/B/done.bat @@ -1,2 +1,2 @@ -@echo // -@echo // Building LZO was successful. All done. +@echo // +@echo // Building LZO was successful. All done. diff --git a/app/lzo/B/dos16/bc.bat b/app/lzo/B/dos16/bc.bat index bfcf38c6..292b793a 100644 --- a/app/lzo/B/dos16/bc.bat +++ b/app/lzo/B/dos16/bc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Borland C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=bcc -ml -f- -set CF=-O1 -d -w -w-rch -w-sig %CFI% -Iinclude\lzo -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -tlib %BLIB% @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -f -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -f -ls -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Borland C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=bcc -ml -f- +set CF=-O1 -d -w -w-rch -w-sig %CFI% -Iinclude\lzo +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +tlib %BLIB% @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -f -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -f -ls -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/bc.rsp b/app/lzo/B/dos16/bc.rsp index 39b28f9f..8a1d83e4 100644 --- a/app/lzo/B/dos16/bc.rsp +++ b/app/lzo/B/dos16/bc.rsp @@ -1,67 +1,67 @@ -+lzo1.obj & -+lzo1_99.obj & -+lzo1a.obj & -+lzo1a_99.obj & -+lzo1b_1.obj & -+lzo1b_2.obj & -+lzo1b_3.obj & -+lzo1b_4.obj & -+lzo1b_5.obj & -+lzo1b_6.obj & -+lzo1b_7.obj & -+lzo1b_8.obj & -+lzo1b_9.obj & -+lzo1b_99.obj & -+lzo1b_9x.obj & -+lzo1b_cc.obj & -+lzo1b_d1.obj & -+lzo1b_d2.obj & -+lzo1b_rr.obj & -+lzo1b_xx.obj & -+lzo1c_1.obj & -+lzo1c_2.obj & -+lzo1c_3.obj & -+lzo1c_4.obj & -+lzo1c_5.obj & -+lzo1c_6.obj & -+lzo1c_7.obj & -+lzo1c_8.obj & -+lzo1c_9.obj & -+lzo1c_99.obj & -+lzo1c_9x.obj & -+lzo1c_cc.obj & -+lzo1c_d1.obj & -+lzo1c_d2.obj & -+lzo1c_rr.obj & -+lzo1c_xx.obj & -+lzo1f_1.obj & -+lzo1f_9x.obj & -+lzo1f_d1.obj & -+lzo1f_d2.obj & -+lzo1x_1.obj & -+lzo1x_1k.obj & -+lzo1x_1l.obj & -+lzo1x_1o.obj & -+lzo1x_9x.obj & -+lzo1x_d1.obj & -+lzo1x_d2.obj & -+lzo1x_d3.obj & -+lzo1x_o.obj & -+lzo1y_1.obj & -+lzo1y_9x.obj & -+lzo1y_d1.obj & -+lzo1y_d2.obj & -+lzo1y_d3.obj & -+lzo1y_o.obj & -+lzo1z_9x.obj & -+lzo1z_d1.obj & -+lzo1z_d2.obj & -+lzo1z_d3.obj & -+lzo2a_9x.obj & -+lzo2a_d1.obj & -+lzo2a_d2.obj & -+lzo_crc.obj & -+lzo_init.obj & -+lzo_ptr.obj & -+lzo_str.obj & -+lzo_util.obj ++lzo1.obj & ++lzo1_99.obj & ++lzo1a.obj & ++lzo1a_99.obj & ++lzo1b_1.obj & ++lzo1b_2.obj & ++lzo1b_3.obj & ++lzo1b_4.obj & ++lzo1b_5.obj & ++lzo1b_6.obj & ++lzo1b_7.obj & ++lzo1b_8.obj & ++lzo1b_9.obj & ++lzo1b_99.obj & ++lzo1b_9x.obj & ++lzo1b_cc.obj & ++lzo1b_d1.obj & ++lzo1b_d2.obj & ++lzo1b_rr.obj & ++lzo1b_xx.obj & ++lzo1c_1.obj & ++lzo1c_2.obj & ++lzo1c_3.obj & ++lzo1c_4.obj & ++lzo1c_5.obj & ++lzo1c_6.obj & ++lzo1c_7.obj & ++lzo1c_8.obj & ++lzo1c_9.obj & ++lzo1c_99.obj & ++lzo1c_9x.obj & ++lzo1c_cc.obj & ++lzo1c_d1.obj & ++lzo1c_d2.obj & ++lzo1c_rr.obj & ++lzo1c_xx.obj & ++lzo1f_1.obj & ++lzo1f_9x.obj & ++lzo1f_d1.obj & ++lzo1f_d2.obj & ++lzo1x_1.obj & ++lzo1x_1k.obj & ++lzo1x_1l.obj & ++lzo1x_1o.obj & ++lzo1x_9x.obj & ++lzo1x_d1.obj & ++lzo1x_d2.obj & ++lzo1x_d3.obj & ++lzo1x_o.obj & ++lzo1y_1.obj & ++lzo1y_9x.obj & ++lzo1y_d1.obj & ++lzo1y_d2.obj & ++lzo1y_d3.obj & ++lzo1y_o.obj & ++lzo1z_9x.obj & ++lzo1z_d1.obj & ++lzo1z_d2.obj & ++lzo1z_d3.obj & ++lzo2a_9x.obj & ++lzo2a_d1.obj & ++lzo2a_d2.obj & ++lzo_crc.obj & ++lzo_init.obj & ++lzo_ptr.obj & ++lzo_str.obj & ++lzo_util.obj diff --git a/app/lzo/B/dos16/bc_286.bat b/app/lzo/B/dos16/bc_286.bat index e68fd4e2..025a48ea 100644 --- a/app/lzo/B/dos16/bc_286.bat +++ b/app/lzo/B/dos16/bc_286.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Borland C/C++ + Pharlap 286DOS-Extender -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=bcc286 -ml -2 -set CF=-O1 -d -w -w-rch -w-sig %CFI% -Iinclude\lzo -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -tlib %BLIB% @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples -DWITH_TIMER examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iminilzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Borland C/C++ + Pharlap 286DOS-Extender +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=bcc286 -ml -2 +set CF=-O1 -d -w -w-rch -w-sig %CFI% -Iinclude\lzo +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +tlib %BLIB% @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples -DWITH_TIMER examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iminilzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/bc_pp.bat b/app/lzo/B/dos16/bc_pp.bat index 2a09ba01..6c0aac37 100644 --- a/app/lzo/B/dos16/bc_pp.bat +++ b/app/lzo/B/dos16/bc_pp.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Borland C/C++ + Borland PowerPack 1.0 (DPMI16) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=bcc -ml -2 -WX -set CF=-O1 -d -w -w-sig %CFI% -Iinclude\lzo -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -tlib %BLIB% @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples -DWITH_TIMER examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iminilzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Borland C/C++ + Borland PowerPack 1.0 (DPMI16) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=bcc -ml -2 -WX +set CF=-O1 -d -w -w-sig %CFI% -Iinclude\lzo +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +tlib %BLIB% @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples -DWITH_TIMER examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iminilzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/dm.bat b/app/lzo/B/dos16/dm.bat index 24335982..1ec6feca 100644 --- a/app/lzo/B/dos16/dm.bat +++ b/app/lzo/B/dos16/dm.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Digital Mars C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=dmc -ml -set CF=-o -w- %CFI% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Digital Mars C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=dmc -ml +set CF=-o -w- %CFI% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/mc.bat b/app/lzo/B/dos16/mc.bat index 077401af..4f34a36f 100644 --- a/app/lzo/B/dos16/mc.bat +++ b/app/lzo/B/dos16/mc.bat @@ -1,52 +1,52 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Microsoft C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -AL -set CF=-O -Gf -W3 %CFI% -set LF=/map - -@REM %CC% %CF% -c src\*.c -for %%f in (src\*.c) do %CC% %CF% -c %%f -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -link %LF% dict.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -link %LF% lzopack.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -link %LF% precomp.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -link %LF% precomp2.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -link %LF% simple.obj,,,%BLIB%; -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -link %LF% lzotest.obj,,,%BLIB%; -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Microsoft C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -AL +set CF=-O -Gf -W3 %CFI% +set LF=/map + +@REM %CC% %CF% -c src\*.c +for %%f in (src\*.c) do %CC% %CF% -c %%f +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +link %LF% dict.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +link %LF% lzopack.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +link %LF% precomp.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +link %LF% precomp2.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +link %LF% simple.obj,,,%BLIB%; +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +link %LF% lzotest.obj,,,%BLIB%; +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/mc_qc.bat b/app/lzo/B/dos16/mc_qc.bat index cb814dd7..1c2ce940 100644 --- a/app/lzo/B/dos16/mc_qc.bat +++ b/app/lzo/B/dos16/mc_qc.bat @@ -1,52 +1,52 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Microsoft C/C++ (QuickC) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -qc -AL -set CF=-O -Gf -W3 %CFI% -set LF=/map - -@REM %CC% %CF% -c src\*.c -for %%f in (src\*.c) do %CC% %CF% -c %%f -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -link %LF% dict.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -link %LF% lzopack.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -link %LF% precomp.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -link %LF% precomp2.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -link %LF% simple.obj,,,%BLIB%; -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -link %LF% lzotest.obj,,,%BLIB%; -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Microsoft C/C++ (QuickC) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -qc -AL +set CF=-O -Gf -W3 %CFI% +set LF=/map + +@REM %CC% %CF% -c src\*.c +for %%f in (src\*.c) do %CC% %CF% -c %%f +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +link %LF% dict.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +link %LF% lzopack.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +link %LF% precomp.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +link %LF% precomp2.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +link %LF% simple.obj,,,%BLIB%; +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +link %LF% lzotest.obj,,,%BLIB%; +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/qc.bat b/app/lzo/B/dos16/qc.bat index e51166ec..82734fd7 100644 --- a/app/lzo/B/dos16/qc.bat +++ b/app/lzo/B/dos16/qc.bat @@ -1,40 +1,40 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Microsoft QuickC -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=qcl -nologo -AL -set CF=-O -Gf -W3 %CFI% -set LF=%BLIB% -Fm - -@REM %CC% %CF% -c src\*.c -for %%f in (src\*.c) do %CC% %CF% -c %%f -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Microsoft QuickC +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=qcl -nologo -AL +set CF=-O -Gf -W3 %CFI% +set LF=%BLIB% -Fm + +@REM %CC% %CF% -c src\*.c +for %%f in (src\*.c) do %CC% %CF% -c %%f +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/sc.bat b/app/lzo/B/dos16/sc.bat index 52960232..056c04a9 100644 --- a/app/lzo/B/dos16/sc.bat +++ b/app/lzo/B/dos16/sc.bat @@ -1,53 +1,53 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Symantec C/C++ -@echo // -@echo // NOTE: LZO breaks the optimizer, so we disable optimizations -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=sc -ml -set CF=-w- %CFI% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -%CC% dict.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -%CC% lzopack.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -%CC% precomp.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -%CC% precomp2.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -%CC% simple.obj %LF% -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -%CC% lzotest.obj %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Symantec C/C++ +@echo // +@echo // NOTE: LZO breaks the optimizer, so we disable optimizations +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=sc -ml +set CF=-w- %CFI% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +%CC% dict.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +%CC% lzopack.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +%CC% precomp.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +%CC% precomp2.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +%CC% simple.obj %LF% +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +%CC% lzotest.obj %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/tc.bat b/app/lzo/B/dos16/tc.bat index c397ae1b..a3c32770 100644 --- a/app/lzo/B/dos16/tc.bat +++ b/app/lzo/B/dos16/tc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Turbo C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=tcc -ml -f- -set CF=-O -G -w -w-rch -w-sig %CFI% -Iinclude\lzo -set LF=%BLIB% - -%CC% %CF% -Isrc -c src\*.c -@if errorlevel 1 goto error -tlib %BLIB% @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -f -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -f -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Turbo C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=tcc -ml -f- +set CF=-O -G -w -w-rch -w-sig %CFI% -Iinclude\lzo +set LF=%BLIB% + +%CC% %CF% -Isrc -c src\*.c +@if errorlevel 1 goto error +tlib %BLIB% @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -f -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -f -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/vc.bat b/app/lzo/B/dos16/vc.bat index 17ec4ccd..b0617859 100644 --- a/app/lzo/B/dos16/vc.bat +++ b/app/lzo/B/dos16/vc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Microsoft Visual C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -AL -set CF=-O -Gf -Gs -Gy -W3 %CFI% -set LF=%BLIB% -Fm - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Microsoft Visual C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -AL +set CF=-O -Gf -Gs -Gy -W3 %CFI% +set LF=%BLIB% -Fm + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/vc_qc.bat b/app/lzo/B/dos16/vc_qc.bat index 09fc0d3d..329b092f 100644 --- a/app/lzo/B/dos16/vc_qc.bat +++ b/app/lzo/B/dos16/vc_qc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Microsoft Visual C/C++ (QuickC) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -qc -AL -set CF=-O -Gf -Gy -W3 %CFI% -set LF=%BLIB% -Fm - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Microsoft Visual C/C++ (QuickC) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -qc -AL +set CF=-O -Gf -Gy -W3 %CFI% +set LF=%BLIB% -Fm + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/wc.bat b/app/lzo/B/dos16/wc.bat index 4effecc6..c92135c0 100644 --- a/app/lzo/B/dos16/wc.bat +++ b/app/lzo/B/dos16/wc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Watcom C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=wcl -zq -ml -bt#dos -l#dos -set CF=-ox %CFI% -set LF=%BLIB% - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -wlib -q -b -n -t %BLIB% @b\dos16\wc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Watcom C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=wcl -zq -ml -bt#dos -l#dos +set CF=-ox %CFI% +set LF=%BLIB% + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +wlib -q -b -n -t %BLIB% @b\dos16\wc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/wc.rsp b/app/lzo/B/dos16/wc.rsp index f36b8cba..c8e1c9ff 100644 --- a/app/lzo/B/dos16/wc.rsp +++ b/app/lzo/B/dos16/wc.rsp @@ -1,67 +1,67 @@ -+'lzo1.obj' -+'lzo1_99.obj' -+'lzo1a.obj' -+'lzo1a_99.obj' -+'lzo1b_1.obj' -+'lzo1b_2.obj' -+'lzo1b_3.obj' -+'lzo1b_4.obj' -+'lzo1b_5.obj' -+'lzo1b_6.obj' -+'lzo1b_7.obj' -+'lzo1b_8.obj' -+'lzo1b_9.obj' -+'lzo1b_99.obj' -+'lzo1b_9x.obj' -+'lzo1b_cc.obj' -+'lzo1b_d1.obj' -+'lzo1b_d2.obj' -+'lzo1b_rr.obj' -+'lzo1b_xx.obj' -+'lzo1c_1.obj' -+'lzo1c_2.obj' -+'lzo1c_3.obj' -+'lzo1c_4.obj' -+'lzo1c_5.obj' -+'lzo1c_6.obj' -+'lzo1c_7.obj' -+'lzo1c_8.obj' -+'lzo1c_9.obj' -+'lzo1c_99.obj' -+'lzo1c_9x.obj' -+'lzo1c_cc.obj' -+'lzo1c_d1.obj' -+'lzo1c_d2.obj' -+'lzo1c_rr.obj' -+'lzo1c_xx.obj' -+'lzo1f_1.obj' -+'lzo1f_9x.obj' -+'lzo1f_d1.obj' -+'lzo1f_d2.obj' -+'lzo1x_1.obj' -+'lzo1x_1k.obj' -+'lzo1x_1l.obj' -+'lzo1x_1o.obj' -+'lzo1x_9x.obj' -+'lzo1x_d1.obj' -+'lzo1x_d2.obj' -+'lzo1x_d3.obj' -+'lzo1x_o.obj' -+'lzo1y_1.obj' -+'lzo1y_9x.obj' -+'lzo1y_d1.obj' -+'lzo1y_d2.obj' -+'lzo1y_d3.obj' -+'lzo1y_o.obj' -+'lzo1z_9x.obj' -+'lzo1z_d1.obj' -+'lzo1z_d2.obj' -+'lzo1z_d3.obj' -+'lzo2a_9x.obj' -+'lzo2a_d1.obj' -+'lzo2a_d2.obj' -+'lzo_crc.obj' -+'lzo_init.obj' -+'lzo_ptr.obj' -+'lzo_str.obj' -+'lzo_util.obj' ++'lzo1.obj' ++'lzo1_99.obj' ++'lzo1a.obj' ++'lzo1a_99.obj' ++'lzo1b_1.obj' ++'lzo1b_2.obj' ++'lzo1b_3.obj' ++'lzo1b_4.obj' ++'lzo1b_5.obj' ++'lzo1b_6.obj' ++'lzo1b_7.obj' ++'lzo1b_8.obj' ++'lzo1b_9.obj' ++'lzo1b_99.obj' ++'lzo1b_9x.obj' ++'lzo1b_cc.obj' ++'lzo1b_d1.obj' ++'lzo1b_d2.obj' ++'lzo1b_rr.obj' ++'lzo1b_xx.obj' ++'lzo1c_1.obj' ++'lzo1c_2.obj' ++'lzo1c_3.obj' ++'lzo1c_4.obj' ++'lzo1c_5.obj' ++'lzo1c_6.obj' ++'lzo1c_7.obj' ++'lzo1c_8.obj' ++'lzo1c_9.obj' ++'lzo1c_99.obj' ++'lzo1c_9x.obj' ++'lzo1c_cc.obj' ++'lzo1c_d1.obj' ++'lzo1c_d2.obj' ++'lzo1c_rr.obj' ++'lzo1c_xx.obj' ++'lzo1f_1.obj' ++'lzo1f_9x.obj' ++'lzo1f_d1.obj' ++'lzo1f_d2.obj' ++'lzo1x_1.obj' ++'lzo1x_1k.obj' ++'lzo1x_1l.obj' ++'lzo1x_1o.obj' ++'lzo1x_9x.obj' ++'lzo1x_d1.obj' ++'lzo1x_d2.obj' ++'lzo1x_d3.obj' ++'lzo1x_o.obj' ++'lzo1y_1.obj' ++'lzo1y_9x.obj' ++'lzo1y_d1.obj' ++'lzo1y_d2.obj' ++'lzo1y_d3.obj' ++'lzo1y_o.obj' ++'lzo1z_9x.obj' ++'lzo1z_d1.obj' ++'lzo1z_d2.obj' ++'lzo1z_d3.obj' ++'lzo2a_9x.obj' ++'lzo2a_d1.obj' ++'lzo2a_d2.obj' ++'lzo_crc.obj' ++'lzo_init.obj' ++'lzo_ptr.obj' ++'lzo_str.obj' ++'lzo_util.obj' diff --git a/app/lzo/B/dos32/bc_pp.bat b/app/lzo/B/dos32/bc_pp.bat index dbaf460c..77063464 100644 --- a/app/lzo/B/dos32/bc_pp.bat +++ b/app/lzo/B/dos32/bc_pp.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // Borland C/C++ + Borland PowerPack 1.0 (DPMI32) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=bcc32 -WX -set CF=-O2 -w -w-aus %CFI% -Iinclude\lzo %CFASM% -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -tlib %BLIB% @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -ls -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -ls -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -ls -Iminilzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // Borland C/C++ + Borland PowerPack 1.0 (DPMI32) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=bcc32 -WX +set CF=-O2 -w -w-aus %CFI% -Iinclude\lzo %CFASM% +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +tlib %BLIB% @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -ls -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -ls -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -ls -Iminilzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/dj2.bat b/app/lzo/B/dos32/dj2.bat index 02da9232..85928d02 100644 --- a/app/lzo/B/dos32/dj2.bat +++ b/app/lzo/B/dos32/dj2.bat @@ -1,45 +1,45 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // djgpp2 + gcc -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=lib%BNAME%.a -set CC=gcc -set CF=@b/dos32/dj2.opt %CFI% %CFASM% -set LF=%BLIB% -s - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S -@if errorlevel 1 goto error -ar rcs %BLIB% @b/win32/cygwin.rsp -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples/dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples/precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples/simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -s -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // djgpp2 + gcc +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=lib%BNAME%.a +set CC=gcc +set CF=@b/dos32/dj2.opt %CFI% %CFASM% +set LF=%BLIB% -s + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S +@if errorlevel 1 goto error +ar rcs %BLIB% @b/win32/cygwin.rsp +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples/dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples/precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples/simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -s -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/dj2.opt b/app/lzo/B/dos32/dj2.opt index 782eda58..9cd02cd0 100644 --- a/app/lzo/B/dos32/dj2.opt +++ b/app/lzo/B/dos32/dj2.opt @@ -1,6 +1,6 @@ --O2 --fomit-frame-pointer --Wall --Wcast-align --Wcast-qual --Wwrite-strings +-O2 +-fomit-frame-pointer +-Wall +-Wcast-align +-Wcast-qual +-Wwrite-strings diff --git a/app/lzo/B/dos32/dm.bat b/app/lzo/B/dos32/dm.bat index 3328e1f3..9384bed5 100644 --- a/app/lzo/B/dos32/dm.bat +++ b/app/lzo/B/dos32/dm.bat @@ -1,43 +1,43 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // Digital Mars C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=dmc -mx -set CF=-o -w- %CFI% %CFASM% -set LF=%BLIB% x32.lib - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -set LF=x32.lib -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // Digital Mars C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=dmc -mx +set CF=-o -w- %CFI% %CFASM% +set LF=%BLIB% x32.lib + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +set LF=x32.lib +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/emx.bat b/app/lzo/B/dos32/emx.bat index 631dceb3..04423424 100644 --- a/app/lzo/B/dos32/emx.bat +++ b/app/lzo/B/dos32/emx.bat @@ -1,45 +1,45 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // emx + gcc -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=%BNAME%.a -set CC=gcc -set CF=@b/dos32/dj2.opt %CFI% %CFASM% -set LF=%BLIB% -s - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S -@if errorlevel 1 goto error -ar rcs %BLIB% @b/win32/cygwin.rsp -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples/dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples/precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples/simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // emx + gcc +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=%BNAME%.a +set CC=gcc +set CF=@b/dos32/dj2.opt %CFI% %CFASM% +set LF=%BLIB% -s + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S +@if errorlevel 1 goto error +ar rcs %BLIB% @b/win32/cygwin.rsp +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples/dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples/precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples/simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/highc.bat b/app/lzo/B/dos32/highc.bat index 83d4cdc0..a9881164 100644 --- a/app/lzo/B/dos32/highc.bat +++ b/app/lzo/B/dos32/highc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // MetaWare High C/C++ (using Pharlap DOS extender) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=hc386 -set CF=-O3 -w4 %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -w1 -c src\*.c -@if errorlevel 1 goto error -386lib %BLIB% -nobanner @b\dos32\highc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // MetaWare High C/C++ (using Pharlap DOS extender) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=hc386 +set CF=-O3 -w4 %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -w1 -c src\*.c +@if errorlevel 1 goto error +386lib %BLIB% -nobanner @b\dos32\highc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/highc.rsp b/app/lzo/B/dos32/highc.rsp index d17a52ae..d477b962 100644 --- a/app/lzo/B/dos32/highc.rsp +++ b/app/lzo/B/dos32/highc.rsp @@ -1,73 +1,73 @@ --create lzo1.obj --create lzo1_99.obj --create lzo1a.obj --create lzo1a_99.obj --create lzo1b_1.obj --create lzo1b_2.obj --create lzo1b_3.obj --create lzo1b_4.obj --create lzo1b_5.obj --create lzo1b_6.obj --create lzo1b_7.obj --create lzo1b_8.obj --create lzo1b_9.obj --create lzo1b_99.obj --create lzo1b_9x.obj --create lzo1b_cc.obj --create lzo1b_d1.obj --create lzo1b_d2.obj --create lzo1b_rr.obj --create lzo1b_xx.obj --create lzo1c_1.obj --create lzo1c_2.obj --create lzo1c_3.obj --create lzo1c_4.obj --create lzo1c_5.obj --create lzo1c_6.obj --create lzo1c_7.obj --create lzo1c_8.obj --create lzo1c_9.obj --create lzo1c_99.obj --create lzo1c_9x.obj --create lzo1c_cc.obj --create lzo1c_d1.obj --create lzo1c_d2.obj --create lzo1c_rr.obj --create lzo1c_xx.obj --create lzo1f_1.obj --create lzo1f_9x.obj --create lzo1f_d1.obj --create lzo1f_d2.obj --create lzo1x_1.obj --create lzo1x_1k.obj --create lzo1x_1l.obj --create lzo1x_1o.obj --create lzo1x_9x.obj --create lzo1x_d1.obj --create lzo1x_d2.obj --create lzo1x_d3.obj --create lzo1x_o.obj --create lzo1y_1.obj --create lzo1y_9x.obj --create lzo1y_d1.obj --create lzo1y_d2.obj --create lzo1y_d3.obj --create lzo1y_o.obj --create lzo1z_9x.obj --create lzo1z_d1.obj --create lzo1z_d2.obj --create lzo1z_d3.obj --create lzo2a_9x.obj --create lzo2a_d1.obj --create lzo2a_d2.obj --create lzo_crc.obj --create lzo_init.obj --create lzo_ptr.obj --create lzo_str.obj --create lzo_util.obj --create asm\i386\obj\omf32\lzo1c_s1.obj --create asm\i386\obj\omf32\lzo1f_f1.obj --create asm\i386\obj\omf32\lzo1x_f1.obj --create asm\i386\obj\omf32\lzo1x_s1.obj --create asm\i386\obj\omf32\lzo1y_f1.obj --create asm\i386\obj\omf32\lzo1y_s1.obj +-create lzo1.obj +-create lzo1_99.obj +-create lzo1a.obj +-create lzo1a_99.obj +-create lzo1b_1.obj +-create lzo1b_2.obj +-create lzo1b_3.obj +-create lzo1b_4.obj +-create lzo1b_5.obj +-create lzo1b_6.obj +-create lzo1b_7.obj +-create lzo1b_8.obj +-create lzo1b_9.obj +-create lzo1b_99.obj +-create lzo1b_9x.obj +-create lzo1b_cc.obj +-create lzo1b_d1.obj +-create lzo1b_d2.obj +-create lzo1b_rr.obj +-create lzo1b_xx.obj +-create lzo1c_1.obj +-create lzo1c_2.obj +-create lzo1c_3.obj +-create lzo1c_4.obj +-create lzo1c_5.obj +-create lzo1c_6.obj +-create lzo1c_7.obj +-create lzo1c_8.obj +-create lzo1c_9.obj +-create lzo1c_99.obj +-create lzo1c_9x.obj +-create lzo1c_cc.obj +-create lzo1c_d1.obj +-create lzo1c_d2.obj +-create lzo1c_rr.obj +-create lzo1c_xx.obj +-create lzo1f_1.obj +-create lzo1f_9x.obj +-create lzo1f_d1.obj +-create lzo1f_d2.obj +-create lzo1x_1.obj +-create lzo1x_1k.obj +-create lzo1x_1l.obj +-create lzo1x_1o.obj +-create lzo1x_9x.obj +-create lzo1x_d1.obj +-create lzo1x_d2.obj +-create lzo1x_d3.obj +-create lzo1x_o.obj +-create lzo1y_1.obj +-create lzo1y_9x.obj +-create lzo1y_d1.obj +-create lzo1y_d2.obj +-create lzo1y_d3.obj +-create lzo1y_o.obj +-create lzo1z_9x.obj +-create lzo1z_d1.obj +-create lzo1z_d2.obj +-create lzo1z_d3.obj +-create lzo2a_9x.obj +-create lzo2a_d1.obj +-create lzo2a_d2.obj +-create lzo_crc.obj +-create lzo_init.obj +-create lzo_ptr.obj +-create lzo_str.obj +-create lzo_util.obj +-create asm\i386\obj\omf32\lzo1c_s1.obj +-create asm\i386\obj\omf32\lzo1f_f1.obj +-create asm\i386\obj\omf32\lzo1x_f1.obj +-create asm\i386\obj\omf32\lzo1x_s1.obj +-create asm\i386\obj\omf32\lzo1y_f1.obj +-create asm\i386\obj\omf32\lzo1y_s1.obj diff --git a/app/lzo/B/dos32/ndp.bat b/app/lzo/B/dos32/ndp.bat index c2c1beba..2b203118 100644 --- a/app/lzo/B/dos32/ndp.bat +++ b/app/lzo/B/dos32/ndp.bat @@ -1,40 +1,40 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // Microway NDP C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=mx486 -set CF=-ansi -on %CFI% -set LF=%BLIB% -bind -map - -@REM %CC% %CF% -Isrc -c src\*.c -for %%f in (src\*.c) do %CC% %CF% -Isrc -c %%f -@if errorlevel 1 goto error -ndplib %BLIB% @b\dos32\ndp.rsp -@if errorlevel 1 goto error - -%CC% %CF% -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Dconst= -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // Microway NDP C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=mx486 +set CF=-ansi -on %CFI% +set LF=%BLIB% -bind -map + +@REM %CC% %CF% -Isrc -c src\*.c +for %%f in (src\*.c) do %CC% %CF% -Isrc -c %%f +@if errorlevel 1 goto error +ndplib %BLIB% @b\dos32\ndp.rsp +@if errorlevel 1 goto error + +%CC% %CF% -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Dconst= -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/ndp.rsp b/app/lzo/B/dos32/ndp.rsp index 24c1d438..c62b19eb 100644 --- a/app/lzo/B/dos32/ndp.rsp +++ b/app/lzo/B/dos32/ndp.rsp @@ -1,67 +1,67 @@ --add lzo1.obj --add lzo1_99.obj --add lzo1a.obj --add lzo1a_99.obj --add lzo1b_1.obj --add lzo1b_2.obj --add lzo1b_3.obj --add lzo1b_4.obj --add lzo1b_5.obj --add lzo1b_6.obj --add lzo1b_7.obj --add lzo1b_8.obj --add lzo1b_9.obj --add lzo1b_99.obj --add lzo1b_9x.obj --add lzo1b_cc.obj --add lzo1b_d1.obj --add lzo1b_d2.obj --add lzo1b_rr.obj --add lzo1b_xx.obj --add lzo1c_1.obj --add lzo1c_2.obj --add lzo1c_3.obj --add lzo1c_4.obj --add lzo1c_5.obj --add lzo1c_6.obj --add lzo1c_7.obj --add lzo1c_8.obj --add lzo1c_9.obj --add lzo1c_99.obj --add lzo1c_9x.obj --add lzo1c_cc.obj --add lzo1c_d1.obj --add lzo1c_d2.obj --add lzo1c_rr.obj --add lzo1c_xx.obj --add lzo1f_1.obj --add lzo1f_9x.obj --add lzo1f_d1.obj --add lzo1f_d2.obj --add lzo1x_1.obj --add lzo1x_1k.obj --add lzo1x_1l.obj --add lzo1x_1o.obj --add lzo1x_9x.obj --add lzo1x_d1.obj --add lzo1x_d2.obj --add lzo1x_d3.obj --add lzo1x_o.obj --add lzo1y_1.obj --add lzo1y_9x.obj --add lzo1y_d1.obj --add lzo1y_d2.obj --add lzo1y_d3.obj --add lzo1y_o.obj --add lzo1z_9x.obj --add lzo1z_d1.obj --add lzo1z_d2.obj --add lzo1z_d3.obj --add lzo2a_9x.obj --add lzo2a_d1.obj --add lzo2a_d2.obj --add lzo_crc.obj --add lzo_init.obj --add lzo_ptr.obj --add lzo_str.obj --add lzo_util.obj +-add lzo1.obj +-add lzo1_99.obj +-add lzo1a.obj +-add lzo1a_99.obj +-add lzo1b_1.obj +-add lzo1b_2.obj +-add lzo1b_3.obj +-add lzo1b_4.obj +-add lzo1b_5.obj +-add lzo1b_6.obj +-add lzo1b_7.obj +-add lzo1b_8.obj +-add lzo1b_9.obj +-add lzo1b_99.obj +-add lzo1b_9x.obj +-add lzo1b_cc.obj +-add lzo1b_d1.obj +-add lzo1b_d2.obj +-add lzo1b_rr.obj +-add lzo1b_xx.obj +-add lzo1c_1.obj +-add lzo1c_2.obj +-add lzo1c_3.obj +-add lzo1c_4.obj +-add lzo1c_5.obj +-add lzo1c_6.obj +-add lzo1c_7.obj +-add lzo1c_8.obj +-add lzo1c_9.obj +-add lzo1c_99.obj +-add lzo1c_9x.obj +-add lzo1c_cc.obj +-add lzo1c_d1.obj +-add lzo1c_d2.obj +-add lzo1c_rr.obj +-add lzo1c_xx.obj +-add lzo1f_1.obj +-add lzo1f_9x.obj +-add lzo1f_d1.obj +-add lzo1f_d2.obj +-add lzo1x_1.obj +-add lzo1x_1k.obj +-add lzo1x_1l.obj +-add lzo1x_1o.obj +-add lzo1x_9x.obj +-add lzo1x_d1.obj +-add lzo1x_d2.obj +-add lzo1x_d3.obj +-add lzo1x_o.obj +-add lzo1y_1.obj +-add lzo1y_9x.obj +-add lzo1y_d1.obj +-add lzo1y_d2.obj +-add lzo1y_d3.obj +-add lzo1y_o.obj +-add lzo1z_9x.obj +-add lzo1z_d1.obj +-add lzo1z_d2.obj +-add lzo1z_d3.obj +-add lzo2a_9x.obj +-add lzo2a_d1.obj +-add lzo2a_d2.obj +-add lzo_crc.obj +-add lzo_init.obj +-add lzo_ptr.obj +-add lzo_str.obj +-add lzo_util.obj diff --git a/app/lzo/B/dos32/sc.bat b/app/lzo/B/dos32/sc.bat index 5751fa3b..44fed188 100644 --- a/app/lzo/B/dos32/sc.bat +++ b/app/lzo/B/dos32/sc.bat @@ -1,51 +1,51 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // Symantec C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=sc -mx -set CF=-o -w- %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -%CC% dict.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -%CC% lzopack.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -%CC% precomp.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -%CC% precomp2.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -%CC% simple.obj %LF% -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -%CC% lzotest.obj %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // Symantec C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=sc -mx +set CF=-o -w- %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +%CC% dict.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +%CC% lzopack.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +%CC% precomp.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +%CC% precomp2.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +%CC% simple.obj %LF% +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +%CC% lzotest.obj %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/wc.bat b/app/lzo/B/dos32/wc.bat index 8817b19c..21de11dc 100644 --- a/app/lzo/B/dos32/wc.bat +++ b/app/lzo/B/dos32/wc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // Watcom C/C++ (using DOS/4G extender) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=wcl386 -zq -mf -5r -bt#dos -l#dos4g -set CF=-ox -zc %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -wlib -q -b -n -t %BLIB% @b\win32\wc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // Watcom C/C++ (using DOS/4G extender) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=wcl386 -zq -mf -5r -bt#dos -l#dos4g +set CF=-ox -zc %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +wlib -q -b -n -t %BLIB% @b\win32\wc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/zc.bat b/app/lzo/B/dos32/zc.bat index 159e99ca..193502f9 100644 --- a/app/lzo/B/dos32/zc.bat +++ b/app/lzo/B/dos32/zc.bat @@ -1,51 +1,51 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // Zortech C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=ztc -b -v0 -mx -set CF=-o -w- -r %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -zorlib %BLIB% @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -%CC% dict.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -%CC% lzopack.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -%CC% precomp.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -%CC% precomp2.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -%CC% simple.obj %LF% -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -%CC% lzotest.obj %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // Zortech C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=ztc -b -v0 -mx +set CF=-o -w- -r %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +zorlib %BLIB% @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +%CC% dict.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +%CC% lzopack.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +%CC% precomp.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +%CC% precomp2.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +%CC% simple.obj %LF% +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +%CC% lzotest.obj %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/os2/emx.bat b/app/lzo/B/os2/emx.bat index 35e26fc5..2367d65c 100644 --- a/app/lzo/B/os2/emx.bat +++ b/app/lzo/B/os2/emx.bat @@ -1,45 +1,45 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // OS/2 32-bit -@echo // emx + gcc -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=%BNAME%.a -set CC=gcc -set CF=@b/dos32/dj2.opt %CFI% %CFASM% -set LF=%BLIB% -s - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S -@if errorlevel 1 goto error -ar rcs %BLIB% @b/win32/cygwin.rsp -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples/dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples/precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples/simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // OS/2 32-bit +@echo // emx + gcc +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=%BNAME%.a +set CC=gcc +set CF=@b/dos32/dj2.opt %CFI% %CFASM% +set LF=%BLIB% -s + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S +@if errorlevel 1 goto error +ar rcs %BLIB% @b/win32/cygwin.rsp +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples/dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples/precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples/simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/os2/wc.bat b/app/lzo/B/os2/wc.bat index 44ca3ab1..44b43f50 100644 --- a/app/lzo/B/os2/wc.bat +++ b/app/lzo/B/os2/wc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // OS/2 32-bit -@echo // Watcom C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=wcl386 -zq -mf -5r -bt#os2 -l#os2v2 -set CF=-ox -zc %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -wlib -q -b -n -t %BLIB% @b\win32\wc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // OS/2 32-bit +@echo // Watcom C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=wcl386 -zq -mf -5r -bt#os2 -l#os2v2 +set CF=-ox -zc %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +wlib -q -b -n -t %BLIB% @b\win32\wc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/os2/zc.bat b/app/lzo/B/os2/zc.bat index db751a67..5e4e7f68 100644 --- a/app/lzo/B/os2/zc.bat +++ b/app/lzo/B/os2/zc.bat @@ -1,51 +1,51 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // OS/2 32-bit -@echo // Zortech C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=ztc -b -v0 -mf -set CF=-o -w- -r %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -zorlib %BLIB% @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -%CC% dict.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -%CC% lzopack.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -%CC% precomp.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -%CC% precomp2.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -%CC% simple.obj %LF% -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -%CC% lzotest.obj %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // OS/2 32-bit +@echo // Zortech C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=ztc -b -v0 -mf +set CF=-o -w- -r %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +zorlib %BLIB% @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +%CC% dict.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +%CC% lzopack.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +%CC% precomp.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +%CC% precomp2.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +%CC% simple.obj %LF% +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +%CC% lzotest.obj %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/os2_16/mc.bat b/app/lzo/B/os2_16/mc.bat index 378d43f0..95742d41 100644 --- a/app/lzo/B/os2_16/mc.bat +++ b/app/lzo/B/os2_16/mc.bat @@ -1,43 +1,43 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // OS/2 16-bit -@echo // Microsoft C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -AL -G2 -set CF=-D__OS2__ -O -Gf -Gs -W3 %CFI% -set LF=%BLIB% -Lp -Fm /link /stack:8096 - -@REM %CC% %CF% -c src\*.c -for %%f in (src\*.c) do %CC% %CF% -c %%f -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // OS/2 16-bit +@echo // Microsoft C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -AL -G2 +set CF=-D__OS2__ -O -Gf -Gs -W3 %CFI% +set LF=%BLIB% -Lp -Fm /link /stack:8096 + +@REM %CC% %CF% -c src\*.c +for %%f in (src\*.c) do %CC% %CF% -c %%f +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/os2_16/wc.bat b/app/lzo/B/os2_16/wc.bat index fcf7fc99..bbe17430 100644 --- a/app/lzo/B/os2_16/wc.bat +++ b/app/lzo/B/os2_16/wc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // OS/2 16-bit -@echo // Watcom C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=wcl -zq -ml -2 -bt#os2 -l#os2 -set CF=-ox %CFI% -set LF=%BLIB% - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -wlib -q -b -n -t %BLIB% @b\dos16\wc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // OS/2 16-bit +@echo // Watcom C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=wcl -zq -ml -2 -bt#os2 -l#os2 +set CF=-ox %CFI% +set LF=%BLIB% + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +wlib -q -b -n -t %BLIB% @b\dos16\wc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/prepare.bat b/app/lzo/B/prepare.bat index 5f70fb77..34bac191 100644 --- a/app/lzo/B/prepare.bat +++ b/app/lzo/B/prepare.bat @@ -1,10 +1,10 @@ -@call b\unset.bat -@call b\clean.bat - -@set CFI=-Iinclude -I. -Isrc -@set CFASM=-DLZO_USE_ASM -@set BNAME=lzo2 -@set BLIB=lzo2.lib -@set BDLL=lzo2.dll - -@echo Compiling, please be patient... +@call b\unset.bat +@call b\clean.bat + +@set CFI=-Iinclude -I. -Isrc +@set CFASM=-DLZO_USE_ASM +@set BNAME=lzo2 +@set BLIB=lzo2.lib +@set BDLL=lzo2.dll + +@echo Compiling, please be patient... diff --git a/app/lzo/B/src.rsp b/app/lzo/B/src.rsp index 26fd0110..2a1dbce6 100644 --- a/app/lzo/B/src.rsp +++ b/app/lzo/B/src.rsp @@ -1,67 +1,67 @@ -src/lzo1.c -src/lzo1_99.c -src/lzo1a.c -src/lzo1a_99.c -src/lzo1b_1.c -src/lzo1b_2.c -src/lzo1b_3.c -src/lzo1b_4.c -src/lzo1b_5.c -src/lzo1b_6.c -src/lzo1b_7.c -src/lzo1b_8.c -src/lzo1b_9.c -src/lzo1b_99.c -src/lzo1b_9x.c -src/lzo1b_cc.c -src/lzo1b_d1.c -src/lzo1b_d2.c -src/lzo1b_rr.c -src/lzo1b_xx.c -src/lzo1c_1.c -src/lzo1c_2.c -src/lzo1c_3.c -src/lzo1c_4.c -src/lzo1c_5.c -src/lzo1c_6.c -src/lzo1c_7.c -src/lzo1c_8.c -src/lzo1c_9.c -src/lzo1c_99.c -src/lzo1c_9x.c -src/lzo1c_cc.c -src/lzo1c_d1.c -src/lzo1c_d2.c -src/lzo1c_rr.c -src/lzo1c_xx.c -src/lzo1f_1.c -src/lzo1f_9x.c -src/lzo1f_d1.c -src/lzo1f_d2.c -src/lzo1x_1.c -src/lzo1x_1k.c -src/lzo1x_1l.c -src/lzo1x_1o.c -src/lzo1x_9x.c -src/lzo1x_d1.c -src/lzo1x_d2.c -src/lzo1x_d3.c -src/lzo1x_o.c -src/lzo1y_1.c -src/lzo1y_9x.c -src/lzo1y_d1.c -src/lzo1y_d2.c -src/lzo1y_d3.c -src/lzo1y_o.c -src/lzo1z_9x.c -src/lzo1z_d1.c -src/lzo1z_d2.c -src/lzo1z_d3.c -src/lzo2a_9x.c -src/lzo2a_d1.c -src/lzo2a_d2.c -src/lzo_crc.c -src/lzo_init.c -src/lzo_ptr.c -src/lzo_str.c -src/lzo_util.c +src/lzo1.c +src/lzo1_99.c +src/lzo1a.c +src/lzo1a_99.c +src/lzo1b_1.c +src/lzo1b_2.c +src/lzo1b_3.c +src/lzo1b_4.c +src/lzo1b_5.c +src/lzo1b_6.c +src/lzo1b_7.c +src/lzo1b_8.c +src/lzo1b_9.c +src/lzo1b_99.c +src/lzo1b_9x.c +src/lzo1b_cc.c +src/lzo1b_d1.c +src/lzo1b_d2.c +src/lzo1b_rr.c +src/lzo1b_xx.c +src/lzo1c_1.c +src/lzo1c_2.c +src/lzo1c_3.c +src/lzo1c_4.c +src/lzo1c_5.c +src/lzo1c_6.c +src/lzo1c_7.c +src/lzo1c_8.c +src/lzo1c_9.c +src/lzo1c_99.c +src/lzo1c_9x.c +src/lzo1c_cc.c +src/lzo1c_d1.c +src/lzo1c_d2.c +src/lzo1c_rr.c +src/lzo1c_xx.c +src/lzo1f_1.c +src/lzo1f_9x.c +src/lzo1f_d1.c +src/lzo1f_d2.c +src/lzo1x_1.c +src/lzo1x_1k.c +src/lzo1x_1l.c +src/lzo1x_1o.c +src/lzo1x_9x.c +src/lzo1x_d1.c +src/lzo1x_d2.c +src/lzo1x_d3.c +src/lzo1x_o.c +src/lzo1y_1.c +src/lzo1y_9x.c +src/lzo1y_d1.c +src/lzo1y_d2.c +src/lzo1y_d3.c +src/lzo1y_o.c +src/lzo1z_9x.c +src/lzo1z_d1.c +src/lzo1z_d2.c +src/lzo1z_d3.c +src/lzo2a_9x.c +src/lzo2a_d1.c +src/lzo2a_d2.c +src/lzo_crc.c +src/lzo_init.c +src/lzo_ptr.c +src/lzo_str.c +src/lzo_util.c diff --git a/app/lzo/B/unset.bat b/app/lzo/B/unset.bat index 98f13786..21672dd6 100644 --- a/app/lzo/B/unset.bat +++ b/app/lzo/B/unset.bat @@ -1,10 +1,10 @@ -@set CC= -@set CF= -@set CFI= -@set CFX= -@set CFASM= -@set LF= -@set BNAME= -@set BLIB= -@set BDLL= -@set BECHO= +@set CC= +@set CF= +@set CFI= +@set CFX= +@set CFASM= +@set LF= +@set BNAME= +@set BLIB= +@set BDLL= +@set BECHO= diff --git a/app/lzo/B/win16/bc.bat b/app/lzo/B/win16/bc.bat index 06f64bda..f252ada8 100644 --- a/app/lzo/B/win16/bc.bat +++ b/app/lzo/B/win16/bc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 16-bit -@echo // Borland C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=bcc -ml -2 -tW -h -set CF=-O1 -d -w -w-rch -w-sig %CFI% -Iinclude\lzo -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -tlib %BLIB% @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -ls -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -ls -Iminilzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 16-bit +@echo // Borland C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=bcc -ml -2 -tW -h +set CF=-O1 -d -w -w-rch -w-sig %CFI% -Iinclude\lzo +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +tlib %BLIB% @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -ls -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -ls -Iminilzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win16/dm.bat b/app/lzo/B/win16/dm.bat index d844ef83..8009a996 100644 --- a/app/lzo/B/win16/dm.bat +++ b/app/lzo/B/win16/dm.bat @@ -1,43 +1,43 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 16-bit -@echo // Digital Mars C/C++ (using WINIO) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=dmc -ml -2 -W -set CF=-o -w- %CFI% -set LF=%BLIB% libw.lib commdlg.lib lwindos.lib /L/map/stack:8096 - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -set LF=libw.lib commdlg.lib lwindos.lib /L/map/stack:8096 -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 16-bit +@echo // Digital Mars C/C++ (using WINIO) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=dmc -ml -2 -W +set CF=-o -w- %CFI% +set LF=%BLIB% libw.lib commdlg.lib lwindos.lib /L/map/stack:8096 + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +set LF=libw.lib commdlg.lib lwindos.lib /L/map/stack:8096 +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win16/mc.bat b/app/lzo/B/win16/mc.bat index 26af7b6e..4404e421 100644 --- a/app/lzo/B/win16/mc.bat +++ b/app/lzo/B/win16/mc.bat @@ -1,56 +1,56 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 16-bit -@echo // Microsoft C/C++ (using QuickWin) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -AL -G2 -Mq -set CF=-O -Gf -W3 %CFI% -set LF=/seg:256 /stack:8096 /nod:llibce /map - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -link %LF% dict.obj,,,llibcewq.lib libw.lib %BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -link %LF% lzopack.obj,,,llibcewq.lib libw.lib %BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -link %LF% precomp.obj,,,llibcewq.lib libw.lib %BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -link %LF% precomp2.obj,,,llibcewq.lib libw.lib %BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -link %LF% simple.obj,,,llibcewq.lib libw.lib %BLIB%; -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -link %LF% lzotest.obj,,,llibcewq.lib libw.lib %BLIB%; -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo -c minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error -link %LF% testmini.obj minilzo.obj,,,llibcewq.lib libw.lib; -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 16-bit +@echo // Microsoft C/C++ (using QuickWin) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -AL -G2 -Mq +set CF=-O -Gf -W3 %CFI% +set LF=/seg:256 /stack:8096 /nod:llibce /map + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +link %LF% dict.obj,,,llibcewq.lib libw.lib %BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +link %LF% lzopack.obj,,,llibcewq.lib libw.lib %BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +link %LF% precomp.obj,,,llibcewq.lib libw.lib %BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +link %LF% precomp2.obj,,,llibcewq.lib libw.lib %BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +link %LF% simple.obj,,,llibcewq.lib libw.lib %BLIB%; +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +link %LF% lzotest.obj,,,llibcewq.lib libw.lib %BLIB%; +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo -c minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error +link %LF% testmini.obj minilzo.obj,,,llibcewq.lib libw.lib; +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win16/sc.bat b/app/lzo/B/win16/sc.bat index 89dd73c5..0dab9e3c 100644 --- a/app/lzo/B/win16/sc.bat +++ b/app/lzo/B/win16/sc.bat @@ -1,53 +1,53 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 16-bit -@echo // Symantec C/C++ (using WINIO) -@echo // -@echo // NOTE: LZO breaks the optimizer, so we disable optimizations -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=sc -ml -2 -W -set CF=-w- %CFI% -set LF=%BLIB% libw.lib commdlg.lib lwindos.lib /L/map/stack:8096 - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -%CC% dict.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -%CC% lzopack.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -%CC% precomp.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -%CC% precomp2.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -%CC% simple.obj %LF% -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -%CC% lzotest.obj %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 16-bit +@echo // Symantec C/C++ (using WINIO) +@echo // +@echo // NOTE: LZO breaks the optimizer, so we disable optimizations +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=sc -ml -2 -W +set CF=-w- %CFI% +set LF=%BLIB% libw.lib commdlg.lib lwindos.lib /L/map/stack:8096 + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +%CC% dict.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +%CC% lzopack.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +%CC% precomp.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +%CC% precomp2.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +%CC% simple.obj %LF% +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +%CC% lzotest.obj %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win16/vc.bat b/app/lzo/B/win16/vc.bat index 74962326..2c35cfc4 100644 --- a/app/lzo/B/win16/vc.bat +++ b/app/lzo/B/win16/vc.bat @@ -1,43 +1,43 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 16-bit -@echo // Microsoft Visual C/C++ (using QuickWin) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -AL -G2 -Mq -set CF=-O -Gf -Gs -Gy -W3 %CFI% -set LF=%BLIB% -Fm /link /seg:256 - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -set LF=-Fm /link /seg:256 -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 16-bit +@echo // Microsoft Visual C/C++ (using QuickWin) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -AL -G2 -Mq +set CF=-O -Gf -Gs -Gy -W3 %CFI% +set LF=%BLIB% -Fm /link /seg:256 + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +set LF=-Fm /link /seg:256 +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win16/wc.bat b/app/lzo/B/win16/wc.bat index be06828a..b5dbf22a 100644 --- a/app/lzo/B/win16/wc.bat +++ b/app/lzo/B/win16/wc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 16-bit -@echo // Watcom C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=wcl -zq -ml -2 -bw -bt#windows -l#windows -set CF=-ox %CFI% -set LF=%BLIB% - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -wlib -q -b -n -t %BLIB% @b\dos16\wc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 16-bit +@echo // Watcom C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=wcl -zq -ml -2 -bw -bt#windows -l#windows +set CF=-ox %CFI% +set LF=%BLIB% + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +wlib -q -b -n -t %BLIB% @b\dos16\wc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/bc.bat b/app/lzo/B/win32/bc.bat index 21fafa0e..811acdbb 100644 --- a/app/lzo/B/win32/bc.bat +++ b/app/lzo/B/win32/bc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Borland C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=bcc32 -set CF=-O2 -w -w-aus %CFI% -Iinclude\lzo %CFASM% -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -tlib %BLIB% @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -ls -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -ls -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -ls -Iminilzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Borland C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=bcc32 +set CF=-O2 -w -w-aus %CFI% -Iinclude\lzo %CFASM% +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +tlib %BLIB% @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -ls -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -ls -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -ls -Iminilzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/bc.rsp b/app/lzo/B/win32/bc.rsp index d62b5b38..b48a182d 100644 --- a/app/lzo/B/win32/bc.rsp +++ b/app/lzo/B/win32/bc.rsp @@ -1,73 +1,73 @@ -+lzo1.obj & -+lzo1_99.obj & -+lzo1a.obj & -+lzo1a_99.obj & -+lzo1b_1.obj & -+lzo1b_2.obj & -+lzo1b_3.obj & -+lzo1b_4.obj & -+lzo1b_5.obj & -+lzo1b_6.obj & -+lzo1b_7.obj & -+lzo1b_8.obj & -+lzo1b_9.obj & -+lzo1b_99.obj & -+lzo1b_9x.obj & -+lzo1b_cc.obj & -+lzo1b_d1.obj & -+lzo1b_d2.obj & -+lzo1b_rr.obj & -+lzo1b_xx.obj & -+lzo1c_1.obj & -+lzo1c_2.obj & -+lzo1c_3.obj & -+lzo1c_4.obj & -+lzo1c_5.obj & -+lzo1c_6.obj & -+lzo1c_7.obj & -+lzo1c_8.obj & -+lzo1c_9.obj & -+lzo1c_99.obj & -+lzo1c_9x.obj & -+lzo1c_cc.obj & -+lzo1c_d1.obj & -+lzo1c_d2.obj & -+lzo1c_rr.obj & -+lzo1c_xx.obj & -+lzo1f_1.obj & -+lzo1f_9x.obj & -+lzo1f_d1.obj & -+lzo1f_d2.obj & -+lzo1x_1.obj & -+lzo1x_1k.obj & -+lzo1x_1l.obj & -+lzo1x_1o.obj & -+lzo1x_9x.obj & -+lzo1x_d1.obj & -+lzo1x_d2.obj & -+lzo1x_d3.obj & -+lzo1x_o.obj & -+lzo1y_1.obj & -+lzo1y_9x.obj & -+lzo1y_d1.obj & -+lzo1y_d2.obj & -+lzo1y_d3.obj & -+lzo1y_o.obj & -+lzo1z_9x.obj & -+lzo1z_d1.obj & -+lzo1z_d2.obj & -+lzo1z_d3.obj & -+lzo2a_9x.obj & -+lzo2a_d1.obj & -+lzo2a_d2.obj & -+lzo_crc.obj & -+lzo_init.obj & -+lzo_ptr.obj & -+lzo_str.obj & -+lzo_util.obj & -+asm\i386\obj\omf32\lzo1c_s1.obj & -+asm\i386\obj\omf32\lzo1f_f1.obj & -+asm\i386\obj\omf32\lzo1x_f1.obj & -+asm\i386\obj\omf32\lzo1x_s1.obj & -+asm\i386\obj\omf32\lzo1y_f1.obj & -+asm\i386\obj\omf32\lzo1y_s1.obj ++lzo1.obj & ++lzo1_99.obj & ++lzo1a.obj & ++lzo1a_99.obj & ++lzo1b_1.obj & ++lzo1b_2.obj & ++lzo1b_3.obj & ++lzo1b_4.obj & ++lzo1b_5.obj & ++lzo1b_6.obj & ++lzo1b_7.obj & ++lzo1b_8.obj & ++lzo1b_9.obj & ++lzo1b_99.obj & ++lzo1b_9x.obj & ++lzo1b_cc.obj & ++lzo1b_d1.obj & ++lzo1b_d2.obj & ++lzo1b_rr.obj & ++lzo1b_xx.obj & ++lzo1c_1.obj & ++lzo1c_2.obj & ++lzo1c_3.obj & ++lzo1c_4.obj & ++lzo1c_5.obj & ++lzo1c_6.obj & ++lzo1c_7.obj & ++lzo1c_8.obj & ++lzo1c_9.obj & ++lzo1c_99.obj & ++lzo1c_9x.obj & ++lzo1c_cc.obj & ++lzo1c_d1.obj & ++lzo1c_d2.obj & ++lzo1c_rr.obj & ++lzo1c_xx.obj & ++lzo1f_1.obj & ++lzo1f_9x.obj & ++lzo1f_d1.obj & ++lzo1f_d2.obj & ++lzo1x_1.obj & ++lzo1x_1k.obj & ++lzo1x_1l.obj & ++lzo1x_1o.obj & ++lzo1x_9x.obj & ++lzo1x_d1.obj & ++lzo1x_d2.obj & ++lzo1x_d3.obj & ++lzo1x_o.obj & ++lzo1y_1.obj & ++lzo1y_9x.obj & ++lzo1y_d1.obj & ++lzo1y_d2.obj & ++lzo1y_d3.obj & ++lzo1y_o.obj & ++lzo1z_9x.obj & ++lzo1z_d1.obj & ++lzo1z_d2.obj & ++lzo1z_d3.obj & ++lzo2a_9x.obj & ++lzo2a_d1.obj & ++lzo2a_d2.obj & ++lzo_crc.obj & ++lzo_init.obj & ++lzo_ptr.obj & ++lzo_str.obj & ++lzo_util.obj & ++asm\i386\obj\omf32\lzo1c_s1.obj & ++asm\i386\obj\omf32\lzo1f_f1.obj & ++asm\i386\obj\omf32\lzo1x_f1.obj & ++asm\i386\obj\omf32\lzo1x_s1.obj & ++asm\i386\obj\omf32\lzo1y_f1.obj & ++asm\i386\obj\omf32\lzo1y_s1.obj diff --git a/app/lzo/B/win32/cygwin.bat b/app/lzo/B/win32/cygwin.bat index 35baf058..57533e0e 100644 --- a/app/lzo/B/win32/cygwin.bat +++ b/app/lzo/B/win32/cygwin.bat @@ -1,45 +1,45 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // cygwin + gcc -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=lib%BNAME%.a -set CC=gcc -set CF=-O2 -fomit-frame-pointer -Wall %CFI% %CFASM% -set LF=%BLIB% -lwinmm -s - -%CC% %CF% -c src/*.c -@if errorlevel 1 goto error -%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S -@if errorlevel 1 goto error -ar rcs %BLIB% @b/win32/cygwin.rsp -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples/dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples/precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples/simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // cygwin + gcc +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=lib%BNAME%.a +set CC=gcc +set CF=-O2 -fomit-frame-pointer -Wall %CFI% %CFASM% +set LF=%BLIB% -lwinmm -s + +%CC% %CF% -c src/*.c +@if errorlevel 1 goto error +%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S +@if errorlevel 1 goto error +ar rcs %BLIB% @b/win32/cygwin.rsp +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples/dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples/precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples/simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/cygwin.rsp b/app/lzo/B/win32/cygwin.rsp index d6b3da0b..62eab38c 100644 --- a/app/lzo/B/win32/cygwin.rsp +++ b/app/lzo/B/win32/cygwin.rsp @@ -1,73 +1,73 @@ -lzo1.o -lzo1_99.o -lzo1a.o -lzo1a_99.o -lzo1b_1.o -lzo1b_2.o -lzo1b_3.o -lzo1b_4.o -lzo1b_5.o -lzo1b_6.o -lzo1b_7.o -lzo1b_8.o -lzo1b_9.o -lzo1b_99.o -lzo1b_9x.o -lzo1b_cc.o -lzo1b_d1.o -lzo1b_d2.o -lzo1b_rr.o -lzo1b_xx.o -lzo1c_1.o -lzo1c_2.o -lzo1c_3.o -lzo1c_4.o -lzo1c_5.o -lzo1c_6.o -lzo1c_7.o -lzo1c_8.o -lzo1c_9.o -lzo1c_99.o -lzo1c_9x.o -lzo1c_cc.o -lzo1c_d1.o -lzo1c_d2.o -lzo1c_rr.o -lzo1c_xx.o -lzo1f_1.o -lzo1f_9x.o -lzo1f_d1.o -lzo1f_d2.o -lzo1x_1.o -lzo1x_1k.o -lzo1x_1l.o -lzo1x_1o.o -lzo1x_9x.o -lzo1x_d1.o -lzo1x_d2.o -lzo1x_d3.o -lzo1x_o.o -lzo1y_1.o -lzo1y_9x.o -lzo1y_d1.o -lzo1y_d2.o -lzo1y_d3.o -lzo1y_o.o -lzo1z_9x.o -lzo1z_d1.o -lzo1z_d2.o -lzo1z_d3.o -lzo2a_9x.o -lzo2a_d1.o -lzo2a_d2.o -lzo_crc.o -lzo_init.o -lzo_ptr.o -lzo_str.o -lzo_util.o -lzo1c_s1.o -lzo1f_f1.o -lzo1x_f1.o -lzo1x_s1.o -lzo1y_f1.o -lzo1y_s1.o +lzo1.o +lzo1_99.o +lzo1a.o +lzo1a_99.o +lzo1b_1.o +lzo1b_2.o +lzo1b_3.o +lzo1b_4.o +lzo1b_5.o +lzo1b_6.o +lzo1b_7.o +lzo1b_8.o +lzo1b_9.o +lzo1b_99.o +lzo1b_9x.o +lzo1b_cc.o +lzo1b_d1.o +lzo1b_d2.o +lzo1b_rr.o +lzo1b_xx.o +lzo1c_1.o +lzo1c_2.o +lzo1c_3.o +lzo1c_4.o +lzo1c_5.o +lzo1c_6.o +lzo1c_7.o +lzo1c_8.o +lzo1c_9.o +lzo1c_99.o +lzo1c_9x.o +lzo1c_cc.o +lzo1c_d1.o +lzo1c_d2.o +lzo1c_rr.o +lzo1c_xx.o +lzo1f_1.o +lzo1f_9x.o +lzo1f_d1.o +lzo1f_d2.o +lzo1x_1.o +lzo1x_1k.o +lzo1x_1l.o +lzo1x_1o.o +lzo1x_9x.o +lzo1x_d1.o +lzo1x_d2.o +lzo1x_d3.o +lzo1x_o.o +lzo1y_1.o +lzo1y_9x.o +lzo1y_d1.o +lzo1y_d2.o +lzo1y_d3.o +lzo1y_o.o +lzo1z_9x.o +lzo1z_d1.o +lzo1z_d2.o +lzo1z_d3.o +lzo2a_9x.o +lzo2a_d1.o +lzo2a_d2.o +lzo_crc.o +lzo_init.o +lzo_ptr.o +lzo_str.o +lzo_util.o +lzo1c_s1.o +lzo1f_f1.o +lzo1x_f1.o +lzo1x_s1.o +lzo1y_f1.o +lzo1y_s1.o diff --git a/app/lzo/B/win32/dm.bat b/app/lzo/B/win32/dm.bat index 3e152a6a..5c65dfe4 100644 --- a/app/lzo/B/win32/dm.bat +++ b/app/lzo/B/win32/dm.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Digital Mars C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=dmc -mn -set CF=-o -w- %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Digital Mars C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=dmc -mn +set CF=-o -w- %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/ic.bat b/app/lzo/B/win32/ic.bat index 9fe53821..f9f6f5a5 100644 --- a/app/lzo/B/win32/ic.bat +++ b/app/lzo/B/win32/ic.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Intel C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=icl -nologo -MT -set CF=-O2 -GF -W3 -Qvec-report0 %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -link -lib -nologo -out:%BLIB% @b\win32\vc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Intel C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=icl -nologo -MT +set CF=-O2 -GF -W3 -Qvec-report0 %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +link -lib -nologo -out:%BLIB% @b\win32\vc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/lccwin32.bat b/app/lzo/B/win32/lccwin32.bat index 1b5265f0..e01ee98a 100644 --- a/app/lzo/B/win32/lccwin32.bat +++ b/app/lzo/B/win32/lccwin32.bat @@ -1,59 +1,59 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // lcc-win32 -@echo // -@echo // NOTE: some lcc-win32 versions are buggy, so we disable optimizations -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=lcc -set CF=-O -A %CFI% -Iinclude\lzo %CFASM% -set CF=-A %CFI% -Iinclude\lzo %CFASM% -set LF=%BLIB% winmm.lib - -for %%f in (src\*.c) do %CC% %CF% -c %%f -@if errorlevel 1 goto error -lcclib /out:%BLIB% @b\win32\vc.rsp -@if errorlevel 1 goto error - -%CC% -c %CF% examples\dict.c -@if errorlevel 1 goto error -lc dict.obj %LF% -@if errorlevel 1 goto error -%CC% -c %CF% examples\lzopack.c -@if errorlevel 1 goto error -lc lzopack.obj %LF% -@if errorlevel 1 goto error -%CC% -c %CF% examples\precomp.c -@if errorlevel 1 goto error -lc precomp.obj %LF% -@if errorlevel 1 goto error -%CC% -c %CF% examples\precomp2.c -@if errorlevel 1 goto error -lc precomp2.obj %LF% -@if errorlevel 1 goto error -%CC% -c %CF% examples\simple.c -@if errorlevel 1 goto error -lc simple.obj %LF% -@if errorlevel 1 goto error - -%CC% -c %CF% lzotest\lzotest.c -@if errorlevel 1 goto error -lc lzotest.obj %LF% -@if errorlevel 1 goto error - -%CC% -c %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error -lc testmini.obj minilzo.obj -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // lcc-win32 +@echo // +@echo // NOTE: some lcc-win32 versions are buggy, so we disable optimizations +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=lcc +set CF=-O -A %CFI% -Iinclude\lzo %CFASM% +set CF=-A %CFI% -Iinclude\lzo %CFASM% +set LF=%BLIB% winmm.lib + +for %%f in (src\*.c) do %CC% %CF% -c %%f +@if errorlevel 1 goto error +lcclib /out:%BLIB% @b\win32\vc.rsp +@if errorlevel 1 goto error + +%CC% -c %CF% examples\dict.c +@if errorlevel 1 goto error +lc dict.obj %LF% +@if errorlevel 1 goto error +%CC% -c %CF% examples\lzopack.c +@if errorlevel 1 goto error +lc lzopack.obj %LF% +@if errorlevel 1 goto error +%CC% -c %CF% examples\precomp.c +@if errorlevel 1 goto error +lc precomp.obj %LF% +@if errorlevel 1 goto error +%CC% -c %CF% examples\precomp2.c +@if errorlevel 1 goto error +lc precomp2.obj %LF% +@if errorlevel 1 goto error +%CC% -c %CF% examples\simple.c +@if errorlevel 1 goto error +lc simple.obj %LF% +@if errorlevel 1 goto error + +%CC% -c %CF% lzotest\lzotest.c +@if errorlevel 1 goto error +lc lzotest.obj %LF% +@if errorlevel 1 goto error + +%CC% -c %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error +lc testmini.obj minilzo.obj +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/mingw.bat b/app/lzo/B/win32/mingw.bat index 3bba6e07..81830ae6 100644 --- a/app/lzo/B/win32/mingw.bat +++ b/app/lzo/B/win32/mingw.bat @@ -1,45 +1,45 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // MinGW + gcc -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=lib%BNAME%.a -set CC=gcc -set CF=-O2 -fomit-frame-pointer -Wall %CFI% %CFASM% -set LF=%BLIB% -lwinmm -s - -%CC% %CF% -c src/*.c -@if errorlevel 1 goto error -%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S -@if errorlevel 1 goto error -ar rcs %BLIB% *.o -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples/dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples/precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples/simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // MinGW + gcc +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=lib%BNAME%.a +set CC=gcc +set CF=-O2 -fomit-frame-pointer -Wall %CFI% %CFASM% +set LF=%BLIB% -lwinmm -s + +%CC% %CF% -c src/*.c +@if errorlevel 1 goto error +%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S +@if errorlevel 1 goto error +ar rcs %BLIB% *.o +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples/dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples/precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples/simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/mwerks.bat b/app/lzo/B/win32/mwerks.bat index 3cc27428..8aad1488 100644 --- a/app/lzo/B/win32/mwerks.bat +++ b/app/lzo/B/win32/mwerks.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Metrowerks CodeWarrior C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=mwcc -gccinc -set CF=-opt full %CFI% %CFASM% -set LF=%BLIB% -lwinmm.lib - -%CC% -w on %CF% -w nounusedexpr -c @b\src.rsp -@if errorlevel 1 goto error -mwld -library -o %BLIB% @b\win32\vc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Metrowerks CodeWarrior C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=mwcc -gccinc +set CF=-opt full %CFI% %CFASM% +set LF=%BLIB% -lwinmm.lib + +%CC% -w on %CF% -w nounusedexpr -c @b\src.rsp +@if errorlevel 1 goto error +mwld -library -o %BLIB% @b\win32\vc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/pellesc.bat b/app/lzo/B/win32/pellesc.bat index 5e92ab94..7123f1b1 100644 --- a/app/lzo/B/win32/pellesc.bat +++ b/app/lzo/B/win32/pellesc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Pelles C -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cc -Ze -Go -set CF=-O2 -W2 %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -polib -out:%BLIB% @b\win32\vc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Pelles C +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cc -Ze -Go +set CF=-O2 -W2 %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +polib -out:%BLIB% @b\win32\vc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/pgi.bat b/app/lzo/B/win32/pgi.bat index 079a7142..82e17727 100644 --- a/app/lzo/B/win32/pgi.bat +++ b/app/lzo/B/win32/pgi.bat @@ -1,43 +1,43 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Portland Group PGI C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=lib%BNAME%.a -set CC=pgcc -set CF=-fast %CFI% %CFASM% -set LF=%BLIB% -lwinmm - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -ar rcs %BLIB% *.o asm/i386/obj/win32/*.obj -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Portland Group PGI C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=lib%BNAME%.a +set CC=pgcc +set CF=-fast %CFI% %CFASM% +set LF=%BLIB% -lwinmm + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +ar rcs %BLIB% *.o asm/i386/obj/win32/*.obj +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/pw32.bat b/app/lzo/B/win32/pw32.bat index a156242f..583ca493 100644 --- a/app/lzo/B/win32/pw32.bat +++ b/app/lzo/B/win32/pw32.bat @@ -1,45 +1,45 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // PW32 + gcc -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=lib%BNAME%.a -set CC=gcc -set CF=-O2 -fomit-frame-pointer -Wall %CFI% %CFASM% -set LF=%BLIB% -lwinmm -s - -%CC% %CF% -c src/*.c -@if errorlevel 1 goto error -%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S -@if errorlevel 1 goto error -ar rcs %BLIB% *.o -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples/dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples/precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples/simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // PW32 + gcc +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=lib%BNAME%.a +set CC=gcc +set CF=-O2 -fomit-frame-pointer -Wall %CFI% %CFASM% +set LF=%BLIB% -lwinmm -s + +%CC% %CF% -c src/*.c +@if errorlevel 1 goto error +%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S +@if errorlevel 1 goto error +ar rcs %BLIB% *.o +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples/dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples/precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples/simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/rsxnt.bat b/app/lzo/B/win32/rsxnt.bat index 1b9d87e6..3077f6f4 100644 --- a/app/lzo/B/win32/rsxnt.bat +++ b/app/lzo/B/win32/rsxnt.bat @@ -1,45 +1,45 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // rsxnt + gcc -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=%BNAME%.a -set CC=gcc -Zwin32 -Zsys -mprobe -set CF=@b/dos32/dj2.opt %CFI% %CFASM% -set LF=%BLIB% -s - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S -@if errorlevel 1 goto error -ar rcs %BLIB% @b/win32/cygwin.rsp -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples/dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples/precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples/simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // rsxnt + gcc +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=%BNAME%.a +set CC=gcc -Zwin32 -Zsys -mprobe +set CF=@b/dos32/dj2.opt %CFI% %CFASM% +set LF=%BLIB% -s + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S +@if errorlevel 1 goto error +ar rcs %BLIB% @b/win32/cygwin.rsp +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples/dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples/precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples/simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/sc.bat b/app/lzo/B/win32/sc.bat index 38edbc7f..b1f5c463 100644 --- a/app/lzo/B/win32/sc.bat +++ b/app/lzo/B/win32/sc.bat @@ -1,56 +1,56 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Symantec C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=sc -mn -set CF=-o -w- %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -%CC% dict.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -%CC% lzopack.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -%CC% precomp.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -%CC% precomp2.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -%CC% simple.obj %LF% -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -%CC% lzotest.obj %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo -c minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error -%CC% testmini.obj minilzo.obj -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Symantec C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=sc -mn +set CF=-o -w- %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +%CC% dict.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +%CC% lzopack.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +%CC% precomp.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +%CC% precomp2.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +%CC% simple.obj %LF% +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +%CC% lzotest.obj %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo -c minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error +%CC% testmini.obj minilzo.obj +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/vc.bat b/app/lzo/B/win32/vc.bat index db278b58..0b7d1f65 100644 --- a/app/lzo/B/win32/vc.bat +++ b/app/lzo/B/win32/vc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Microsoft Visual C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -MT -set CF=-O2 -GF -W3 %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -link -lib -nologo -out:%BLIB% @b\win32\vc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Microsoft Visual C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -MT +set CF=-O2 -GF -W3 %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +link -lib -nologo -out:%BLIB% @b\win32\vc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/vc.rsp b/app/lzo/B/win32/vc.rsp index 1cf09fe2..690bec1c 100644 --- a/app/lzo/B/win32/vc.rsp +++ b/app/lzo/B/win32/vc.rsp @@ -1,73 +1,73 @@ -.\lzo1.obj -.\lzo1_99.obj -.\lzo1a.obj -.\lzo1a_99.obj -.\lzo1b_1.obj -.\lzo1b_2.obj -.\lzo1b_3.obj -.\lzo1b_4.obj -.\lzo1b_5.obj -.\lzo1b_6.obj -.\lzo1b_7.obj -.\lzo1b_8.obj -.\lzo1b_9.obj -.\lzo1b_99.obj -.\lzo1b_9x.obj -.\lzo1b_cc.obj -.\lzo1b_d1.obj -.\lzo1b_d2.obj -.\lzo1b_rr.obj -.\lzo1b_xx.obj -.\lzo1c_1.obj -.\lzo1c_2.obj -.\lzo1c_3.obj -.\lzo1c_4.obj -.\lzo1c_5.obj -.\lzo1c_6.obj -.\lzo1c_7.obj -.\lzo1c_8.obj -.\lzo1c_9.obj -.\lzo1c_99.obj -.\lzo1c_9x.obj -.\lzo1c_cc.obj -.\lzo1c_d1.obj -.\lzo1c_d2.obj -.\lzo1c_rr.obj -.\lzo1c_xx.obj -.\lzo1f_1.obj -.\lzo1f_9x.obj -.\lzo1f_d1.obj -.\lzo1f_d2.obj -.\lzo1x_1.obj -.\lzo1x_1k.obj -.\lzo1x_1l.obj -.\lzo1x_1o.obj -.\lzo1x_9x.obj -.\lzo1x_d1.obj -.\lzo1x_d2.obj -.\lzo1x_d3.obj -.\lzo1x_o.obj -.\lzo1y_1.obj -.\lzo1y_9x.obj -.\lzo1y_d1.obj -.\lzo1y_d2.obj -.\lzo1y_d3.obj -.\lzo1y_o.obj -.\lzo1z_9x.obj -.\lzo1z_d1.obj -.\lzo1z_d2.obj -.\lzo1z_d3.obj -.\lzo2a_9x.obj -.\lzo2a_d1.obj -.\lzo2a_d2.obj -.\lzo_crc.obj -.\lzo_init.obj -.\lzo_ptr.obj -.\lzo_str.obj -.\lzo_util.obj -.\asm\i386\obj\win32\lzo1c_s1.obj -.\asm\i386\obj\win32\lzo1f_f1.obj -.\asm\i386\obj\win32\lzo1x_f1.obj -.\asm\i386\obj\win32\lzo1x_s1.obj -.\asm\i386\obj\win32\lzo1y_f1.obj -.\asm\i386\obj\win32\lzo1y_s1.obj +.\lzo1.obj +.\lzo1_99.obj +.\lzo1a.obj +.\lzo1a_99.obj +.\lzo1b_1.obj +.\lzo1b_2.obj +.\lzo1b_3.obj +.\lzo1b_4.obj +.\lzo1b_5.obj +.\lzo1b_6.obj +.\lzo1b_7.obj +.\lzo1b_8.obj +.\lzo1b_9.obj +.\lzo1b_99.obj +.\lzo1b_9x.obj +.\lzo1b_cc.obj +.\lzo1b_d1.obj +.\lzo1b_d2.obj +.\lzo1b_rr.obj +.\lzo1b_xx.obj +.\lzo1c_1.obj +.\lzo1c_2.obj +.\lzo1c_3.obj +.\lzo1c_4.obj +.\lzo1c_5.obj +.\lzo1c_6.obj +.\lzo1c_7.obj +.\lzo1c_8.obj +.\lzo1c_9.obj +.\lzo1c_99.obj +.\lzo1c_9x.obj +.\lzo1c_cc.obj +.\lzo1c_d1.obj +.\lzo1c_d2.obj +.\lzo1c_rr.obj +.\lzo1c_xx.obj +.\lzo1f_1.obj +.\lzo1f_9x.obj +.\lzo1f_d1.obj +.\lzo1f_d2.obj +.\lzo1x_1.obj +.\lzo1x_1k.obj +.\lzo1x_1l.obj +.\lzo1x_1o.obj +.\lzo1x_9x.obj +.\lzo1x_d1.obj +.\lzo1x_d2.obj +.\lzo1x_d3.obj +.\lzo1x_o.obj +.\lzo1y_1.obj +.\lzo1y_9x.obj +.\lzo1y_d1.obj +.\lzo1y_d2.obj +.\lzo1y_d3.obj +.\lzo1y_o.obj +.\lzo1z_9x.obj +.\lzo1z_d1.obj +.\lzo1z_d2.obj +.\lzo1z_d3.obj +.\lzo2a_9x.obj +.\lzo2a_d1.obj +.\lzo2a_d2.obj +.\lzo_crc.obj +.\lzo_init.obj +.\lzo_ptr.obj +.\lzo_str.obj +.\lzo_util.obj +.\asm\i386\obj\win32\lzo1c_s1.obj +.\asm\i386\obj\win32\lzo1f_f1.obj +.\asm\i386\obj\win32\lzo1x_f1.obj +.\asm\i386\obj\win32\lzo1x_s1.obj +.\asm\i386\obj\win32\lzo1y_f1.obj +.\asm\i386\obj\win32\lzo1y_s1.obj diff --git a/app/lzo/B/win32/vc_dll.bat b/app/lzo/B/win32/vc_dll.bat index c6ba0863..72788c0a 100644 --- a/app/lzo/B/win32/vc_dll.bat +++ b/app/lzo/B/win32/vc_dll.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Microsoft Visual C/C++ (DLL) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -MT -set CF=-O2 -GF -W3 %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -D__LZO_EXPORT1#__declspec(dllexport) -c @b\src.rsp -@if errorlevel 1 goto error -%CC% -LD -Fe%BDLL% @b\win32\vc.rsp /link /map /def:b\win32\vc_dll.def -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Microsoft Visual C/C++ (DLL) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -MT +set CF=-O2 -GF -W3 %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -D__LZO_EXPORT1#__declspec(dllexport) -c @b\src.rsp +@if errorlevel 1 goto error +%CC% -LD -Fe%BDLL% @b\win32\vc.rsp /link /map /def:b\win32\vc_dll.def +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/vc_dll.def b/app/lzo/B/win32/vc_dll.def index a5abd35a..215fb86d 100644 --- a/app/lzo/B/win32/vc_dll.def +++ b/app/lzo/B/win32/vc_dll.def @@ -1,7 +1,7 @@ -EXPORTS - _lzo1c_decompress_asm=lzo1c_decompress_asm - _lzo1f_decompress_asm_fast=lzo1f_decompress_asm_fast - _lzo1x_decompress_asm=lzo1x_decompress_asm - _lzo1x_decompress_asm_fast=lzo1x_decompress_asm_fast - _lzo1y_decompress_asm=lzo1y_decompress_asm - _lzo1y_decompress_asm_fast=lzo1y_decompress_asm_fast +EXPORTS + _lzo1c_decompress_asm=lzo1c_decompress_asm + _lzo1f_decompress_asm_fast=lzo1f_decompress_asm_fast + _lzo1x_decompress_asm=lzo1x_decompress_asm + _lzo1x_decompress_asm_fast=lzo1x_decompress_asm_fast + _lzo1y_decompress_asm=lzo1y_decompress_asm + _lzo1y_decompress_asm_fast=lzo1y_decompress_asm_fast diff --git a/app/lzo/B/win32/wc.bat b/app/lzo/B/win32/wc.bat index ab9f5f44..64e34a23 100644 --- a/app/lzo/B/win32/wc.bat +++ b/app/lzo/B/win32/wc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Watcom C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=wcl386 -zq -mf -5r -bt#nt -l#nt -set CF=-ox -zc %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -wlib -q -b -n -t %BLIB% @b\win32\wc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Watcom C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=wcl386 -zq -mf -5r -bt#nt -l#nt +set CF=-ox -zc %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +wlib -q -b -n -t %BLIB% @b\win32\wc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/wc.rsp b/app/lzo/B/win32/wc.rsp index 12760d98..503667ba 100644 --- a/app/lzo/B/win32/wc.rsp +++ b/app/lzo/B/win32/wc.rsp @@ -1,73 +1,73 @@ -+'lzo1.obj' -+'lzo1_99.obj' -+'lzo1a.obj' -+'lzo1a_99.obj' -+'lzo1b_1.obj' -+'lzo1b_2.obj' -+'lzo1b_3.obj' -+'lzo1b_4.obj' -+'lzo1b_5.obj' -+'lzo1b_6.obj' -+'lzo1b_7.obj' -+'lzo1b_8.obj' -+'lzo1b_9.obj' -+'lzo1b_99.obj' -+'lzo1b_9x.obj' -+'lzo1b_cc.obj' -+'lzo1b_d1.obj' -+'lzo1b_d2.obj' -+'lzo1b_rr.obj' -+'lzo1b_xx.obj' -+'lzo1c_1.obj' -+'lzo1c_2.obj' -+'lzo1c_3.obj' -+'lzo1c_4.obj' -+'lzo1c_5.obj' -+'lzo1c_6.obj' -+'lzo1c_7.obj' -+'lzo1c_8.obj' -+'lzo1c_9.obj' -+'lzo1c_99.obj' -+'lzo1c_9x.obj' -+'lzo1c_cc.obj' -+'lzo1c_d1.obj' -+'lzo1c_d2.obj' -+'lzo1c_rr.obj' -+'lzo1c_xx.obj' -+'lzo1f_1.obj' -+'lzo1f_9x.obj' -+'lzo1f_d1.obj' -+'lzo1f_d2.obj' -+'lzo1x_1.obj' -+'lzo1x_1k.obj' -+'lzo1x_1l.obj' -+'lzo1x_1o.obj' -+'lzo1x_9x.obj' -+'lzo1x_d1.obj' -+'lzo1x_d2.obj' -+'lzo1x_d3.obj' -+'lzo1x_o.obj' -+'lzo1y_1.obj' -+'lzo1y_9x.obj' -+'lzo1y_d1.obj' -+'lzo1y_d2.obj' -+'lzo1y_d3.obj' -+'lzo1y_o.obj' -+'lzo1z_9x.obj' -+'lzo1z_d1.obj' -+'lzo1z_d2.obj' -+'lzo1z_d3.obj' -+'lzo2a_9x.obj' -+'lzo2a_d1.obj' -+'lzo2a_d2.obj' -+'lzo_crc.obj' -+'lzo_init.obj' -+'lzo_ptr.obj' -+'lzo_str.obj' -+'lzo_util.obj' -+'asm\i386\obj\omf32\lzo1c_s1.obj' -+'asm\i386\obj\omf32\lzo1f_f1.obj' -+'asm\i386\obj\omf32\lzo1x_f1.obj' -+'asm\i386\obj\omf32\lzo1x_s1.obj' -+'asm\i386\obj\omf32\lzo1y_f1.obj' -+'asm\i386\obj\omf32\lzo1y_s1.obj' ++'lzo1.obj' ++'lzo1_99.obj' ++'lzo1a.obj' ++'lzo1a_99.obj' ++'lzo1b_1.obj' ++'lzo1b_2.obj' ++'lzo1b_3.obj' ++'lzo1b_4.obj' ++'lzo1b_5.obj' ++'lzo1b_6.obj' ++'lzo1b_7.obj' ++'lzo1b_8.obj' ++'lzo1b_9.obj' ++'lzo1b_99.obj' ++'lzo1b_9x.obj' ++'lzo1b_cc.obj' ++'lzo1b_d1.obj' ++'lzo1b_d2.obj' ++'lzo1b_rr.obj' ++'lzo1b_xx.obj' ++'lzo1c_1.obj' ++'lzo1c_2.obj' ++'lzo1c_3.obj' ++'lzo1c_4.obj' ++'lzo1c_5.obj' ++'lzo1c_6.obj' ++'lzo1c_7.obj' ++'lzo1c_8.obj' ++'lzo1c_9.obj' ++'lzo1c_99.obj' ++'lzo1c_9x.obj' ++'lzo1c_cc.obj' ++'lzo1c_d1.obj' ++'lzo1c_d2.obj' ++'lzo1c_rr.obj' ++'lzo1c_xx.obj' ++'lzo1f_1.obj' ++'lzo1f_9x.obj' ++'lzo1f_d1.obj' ++'lzo1f_d2.obj' ++'lzo1x_1.obj' ++'lzo1x_1k.obj' ++'lzo1x_1l.obj' ++'lzo1x_1o.obj' ++'lzo1x_9x.obj' ++'lzo1x_d1.obj' ++'lzo1x_d2.obj' ++'lzo1x_d3.obj' ++'lzo1x_o.obj' ++'lzo1y_1.obj' ++'lzo1y_9x.obj' ++'lzo1y_d1.obj' ++'lzo1y_d2.obj' ++'lzo1y_d3.obj' ++'lzo1y_o.obj' ++'lzo1z_9x.obj' ++'lzo1z_d1.obj' ++'lzo1z_d2.obj' ++'lzo1z_d3.obj' ++'lzo2a_9x.obj' ++'lzo2a_d1.obj' ++'lzo2a_d2.obj' ++'lzo_crc.obj' ++'lzo_init.obj' ++'lzo_ptr.obj' ++'lzo_str.obj' ++'lzo_util.obj' ++'asm\i386\obj\omf32\lzo1c_s1.obj' ++'asm\i386\obj\omf32\lzo1f_f1.obj' ++'asm\i386\obj\omf32\lzo1x_f1.obj' ++'asm\i386\obj\omf32\lzo1x_s1.obj' ++'asm\i386\obj\omf32\lzo1y_f1.obj' ++'asm\i386\obj\omf32\lzo1y_s1.obj' diff --git a/app/lzo/B/win64/ic.bat b/app/lzo/B/win64/ic.bat index cdc9762f..154d5e4d 100644 --- a/app/lzo/B/win64/ic.bat +++ b/app/lzo/B/win64/ic.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 64-bit (Itanium) -@echo // Intel C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=icl -nologo -MT -set CF=-O2 -GF -W3 -Qvec-report0 %CFI% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -link -lib -nologo -out:%BLIB% @b\win64\vc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 64-bit (Itanium) +@echo // Intel C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=icl -nologo -MT +set CF=-O2 -GF -W3 -Qvec-report0 %CFI% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +link -lib -nologo -out:%BLIB% @b\win64\vc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win64/ic_dll.bat b/app/lzo/B/win64/ic_dll.bat index ec806084..620fadcc 100644 --- a/app/lzo/B/win64/ic_dll.bat +++ b/app/lzo/B/win64/ic_dll.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 64-bit (Itanium) -@echo // Intel C/C++ (DLL) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=icl -nologo -MT -set CF=-O2 -GF -W3 %CFI% -set LF=%BLIB% - -%CC% %CF% -D__LZO_EXPORT1#__declspec(dllexport) -c @b\src.rsp -@if errorlevel 1 goto error -%CC% -LD -Fe%BDLL% @b\win64\vc.rsp /link /map /def:b\win64\vc_dll.def -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 64-bit (Itanium) +@echo // Intel C/C++ (DLL) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=icl -nologo -MT +set CF=-O2 -GF -W3 %CFI% +set LF=%BLIB% + +%CC% %CF% -D__LZO_EXPORT1#__declspec(dllexport) -c @b\src.rsp +@if errorlevel 1 goto error +%CC% -LD -Fe%BDLL% @b\win64\vc.rsp /link /map /def:b\win64\vc_dll.def +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win64/vc.bat b/app/lzo/B/win64/vc.bat index 03694ca9..1aada926 100644 --- a/app/lzo/B/win64/vc.bat +++ b/app/lzo/B/win64/vc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 64-bit (AMD64 or Itanium) -@echo // Microsoft Visual C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -MT -set CF=-O2 -GF -W3 %CFI% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -link -lib -nologo -out:%BLIB% @b\win64\vc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 64-bit (AMD64 or Itanium) +@echo // Microsoft Visual C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -MT +set CF=-O2 -GF -W3 %CFI% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +link -lib -nologo -out:%BLIB% @b\win64\vc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win64/vc.rsp b/app/lzo/B/win64/vc.rsp index da684c94..182de60c 100644 --- a/app/lzo/B/win64/vc.rsp +++ b/app/lzo/B/win64/vc.rsp @@ -1,67 +1,67 @@ -.\lzo1.obj -.\lzo1_99.obj -.\lzo1a.obj -.\lzo1a_99.obj -.\lzo1b_1.obj -.\lzo1b_2.obj -.\lzo1b_3.obj -.\lzo1b_4.obj -.\lzo1b_5.obj -.\lzo1b_6.obj -.\lzo1b_7.obj -.\lzo1b_8.obj -.\lzo1b_9.obj -.\lzo1b_99.obj -.\lzo1b_9x.obj -.\lzo1b_cc.obj -.\lzo1b_d1.obj -.\lzo1b_d2.obj -.\lzo1b_rr.obj -.\lzo1b_xx.obj -.\lzo1c_1.obj -.\lzo1c_2.obj -.\lzo1c_3.obj -.\lzo1c_4.obj -.\lzo1c_5.obj -.\lzo1c_6.obj -.\lzo1c_7.obj -.\lzo1c_8.obj -.\lzo1c_9.obj -.\lzo1c_99.obj -.\lzo1c_9x.obj -.\lzo1c_cc.obj -.\lzo1c_d1.obj -.\lzo1c_d2.obj -.\lzo1c_rr.obj -.\lzo1c_xx.obj -.\lzo1f_1.obj -.\lzo1f_9x.obj -.\lzo1f_d1.obj -.\lzo1f_d2.obj -.\lzo1x_1.obj -.\lzo1x_1k.obj -.\lzo1x_1l.obj -.\lzo1x_1o.obj -.\lzo1x_9x.obj -.\lzo1x_d1.obj -.\lzo1x_d2.obj -.\lzo1x_d3.obj -.\lzo1x_o.obj -.\lzo1y_1.obj -.\lzo1y_9x.obj -.\lzo1y_d1.obj -.\lzo1y_d2.obj -.\lzo1y_d3.obj -.\lzo1y_o.obj -.\lzo1z_9x.obj -.\lzo1z_d1.obj -.\lzo1z_d2.obj -.\lzo1z_d3.obj -.\lzo2a_9x.obj -.\lzo2a_d1.obj -.\lzo2a_d2.obj -.\lzo_crc.obj -.\lzo_init.obj -.\lzo_ptr.obj -.\lzo_str.obj -.\lzo_util.obj +.\lzo1.obj +.\lzo1_99.obj +.\lzo1a.obj +.\lzo1a_99.obj +.\lzo1b_1.obj +.\lzo1b_2.obj +.\lzo1b_3.obj +.\lzo1b_4.obj +.\lzo1b_5.obj +.\lzo1b_6.obj +.\lzo1b_7.obj +.\lzo1b_8.obj +.\lzo1b_9.obj +.\lzo1b_99.obj +.\lzo1b_9x.obj +.\lzo1b_cc.obj +.\lzo1b_d1.obj +.\lzo1b_d2.obj +.\lzo1b_rr.obj +.\lzo1b_xx.obj +.\lzo1c_1.obj +.\lzo1c_2.obj +.\lzo1c_3.obj +.\lzo1c_4.obj +.\lzo1c_5.obj +.\lzo1c_6.obj +.\lzo1c_7.obj +.\lzo1c_8.obj +.\lzo1c_9.obj +.\lzo1c_99.obj +.\lzo1c_9x.obj +.\lzo1c_cc.obj +.\lzo1c_d1.obj +.\lzo1c_d2.obj +.\lzo1c_rr.obj +.\lzo1c_xx.obj +.\lzo1f_1.obj +.\lzo1f_9x.obj +.\lzo1f_d1.obj +.\lzo1f_d2.obj +.\lzo1x_1.obj +.\lzo1x_1k.obj +.\lzo1x_1l.obj +.\lzo1x_1o.obj +.\lzo1x_9x.obj +.\lzo1x_d1.obj +.\lzo1x_d2.obj +.\lzo1x_d3.obj +.\lzo1x_o.obj +.\lzo1y_1.obj +.\lzo1y_9x.obj +.\lzo1y_d1.obj +.\lzo1y_d2.obj +.\lzo1y_d3.obj +.\lzo1y_o.obj +.\lzo1z_9x.obj +.\lzo1z_d1.obj +.\lzo1z_d2.obj +.\lzo1z_d3.obj +.\lzo2a_9x.obj +.\lzo2a_d1.obj +.\lzo2a_d2.obj +.\lzo_crc.obj +.\lzo_init.obj +.\lzo_ptr.obj +.\lzo_str.obj +.\lzo_util.obj diff --git a/app/lzo/B/win64/vc_dll.bat b/app/lzo/B/win64/vc_dll.bat index f21361a7..63ea5fc8 100644 --- a/app/lzo/B/win64/vc_dll.bat +++ b/app/lzo/B/win64/vc_dll.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 64-bit (AMD64 or Itanium) -@echo // Microsoft Visual C/C++ (DLL) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -MT -set CF=-O2 -GF -W3 %CFI% -set LF=%BLIB% - -%CC% %CF% -D__LZO_EXPORT1#__declspec(dllexport) -c @b\src.rsp -@if errorlevel 1 goto error -%CC% -LD -Fe%BDLL% @b\win64\vc.rsp /link /map /def:b\win64\vc_dll.def -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 64-bit (AMD64 or Itanium) +@echo // Microsoft Visual C/C++ (DLL) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -MT +set CF=-O2 -GF -W3 %CFI% +set LF=%BLIB% + +%CC% %CF% -D__LZO_EXPORT1#__declspec(dllexport) -c @b\src.rsp +@if errorlevel 1 goto error +%CC% -LD -Fe%BDLL% @b\win64\vc.rsp /link /map /def:b\win64\vc_dll.def +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win64/vc_dll.def b/app/lzo/B/win64/vc_dll.def index aabd2247..e80a637a 100644 --- a/app/lzo/B/win64/vc_dll.def +++ b/app/lzo/B/win64/vc_dll.def @@ -1 +1 @@ -EXPORTS +EXPORTS diff --git a/app/lzo/autoconf/shtool b/app/lzo/autoconf/shtool index 78c0b125..a1f4e7ee 100755 --- a/app/lzo/autoconf/shtool +++ b/app/lzo/autoconf/shtool @@ -3102,9 +3102,9 @@ platform ) sed -e 's/^://' \ -e 's;\[\([^]]*\)\];\1;g' \ -e 's;<\([^>]*\)>;\1;g' \ - -e "s; ;��;g" \ + -e "s; ;§§;g" \ -e "s;/;%%;g" \ - -e "s;��;${opt_S};g" \ + -e "s;§§;${opt_S};g" \ -e "s;%%;${opt_C};g"` eval "${var_uc}_V=\"\${val_V}\"" ;; @@ -3115,9 +3115,9 @@ platform ) sed -e 's/^://' \ -e 's;\[\([^]]*\)\];;g' \ -e 's;<\([^>]*\)>;\1;g' \ - -e "s; ;��;g" \ + -e "s; ;§§;g" \ -e "s;/;%%;g" \ - -e "s;��;${opt_S};g" \ + -e "s;§§;${opt_S};g" \ -e "s;%%;${opt_C};g"` eval "${var_uc}_N=\"\${val_N}\"" ;; @@ -3128,9 +3128,9 @@ platform ) sed -e 's/^://' \ -e 's;\[\([^]]*\)\];;g' \ -e 's;[^<]*<\([^>]*\)>[^<]*;\1;g' \ - -e "s; ;��;g" \ + -e "s; ;§§;g" \ -e "s;/;%%;g" \ - -e "s;��;${opt_S};g" \ + -e "s;§§;${opt_S};g" \ -e "s;%%;${opt_C};g"` eval "${var_uc}_C=\"\${val_C}\"" ;; diff --git a/app/openssl/Apps-config-host.mk b/app/openssl/Apps-config-host.mk index 37dcb78b..5c1604e0 100644 --- a/app/openssl/Apps-config-host.mk +++ b/app/openssl/Apps-config-host.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz # # This script will append to the following variables: # diff --git a/app/openssl/Apps-config-target.mk b/app/openssl/Apps-config-target.mk index bccd250d..0c567d4d 100644 --- a/app/openssl/Apps-config-target.mk +++ b/app/openssl/Apps-config-target.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz # # This script will append to the following variables: # diff --git a/app/openssl/Apps.mk b/app/openssl/Apps.mk index 3fb94dbe..b2d871c1 100644 --- a/app/openssl/Apps.mk +++ b/app/openssl/Apps.mk @@ -1,9 +1,12 @@ # Copyright 2006 The Android Open Source Project -LOCAL_PATH:= $(call my-dir) +LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) -LOCAL_MODULE:= openssl +LOCAL_MODULE := openssl +LOCAL_MULTILIB := both +LOCAL_MODULE_STEM_32 := openssl +LOCAL_MODULE_STEM_64 := openssl64 LOCAL_CLANG := true LOCAL_MODULE_TAGS := optional LOCAL_SHARED_LIBRARIES := libssl libcrypto @@ -13,7 +16,7 @@ LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/A include $(BUILD_EXECUTABLE) include $(CLEAR_VARS) -LOCAL_MODULE:= openssl +LOCAL_MODULE := openssl LOCAL_MODULE_TAGS := optional LOCAL_SHARED_LIBRARIES := libssl-host libcrypto-host include $(LOCAL_PATH)/Apps-config-host.mk diff --git a/app/openssl/Crypto-config-host.mk b/app/openssl/Crypto-config-host.mk index a377fec4..5b643792 100644 --- a/app/openssl/Crypto-config-host.mk +++ b/app/openssl/Crypto-config-host.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz # # This script will append to the following variables: # @@ -332,7 +332,6 @@ common_src_files := \ crypto/evp/m_md5.c \ crypto/evp/m_mdc2.c \ crypto/evp/m_null.c \ - crypto/evp/m_ripemd.c \ crypto/evp/m_sha1.c \ crypto/evp/m_sigver.c \ crypto/evp/m_wp.c \ @@ -438,8 +437,6 @@ common_src_files := \ crypto/rc4/rc4_enc.c \ crypto/rc4/rc4_skey.c \ crypto/rc4/rc4_utl.c \ - crypto/ripemd/rmd_dgst.c \ - crypto/ripemd/rmd_one.c \ crypto/rsa/rsa_ameth.c \ crypto/rsa/rsa_asn1.c \ crypto/rsa/rsa_chk.c \ @@ -546,6 +543,7 @@ common_c_includes := \ arm_cflags := \ -DAES_ASM \ -DBSAES_ASM \ + -DDES_UNROLL \ -DGHASH_ASM \ -DOPENSSL_BN_ASM_GF2m \ -DOPENSSL_BN_ASM_MONT \ @@ -556,12 +554,14 @@ arm_cflags := \ arm_src_files := \ crypto/aes/asm/aes-armv4.S \ + crypto/aes/asm/aesv8-armx.S \ crypto/aes/asm/bsaes-armv7.S \ crypto/armcap.c \ crypto/armv4cpuid.S \ crypto/bn/asm/armv4-gf2m.S \ crypto/bn/asm/armv4-mont.S \ crypto/modes/asm/ghash-armv4.S \ + crypto/modes/asm/ghashv8-armx.S \ crypto/sha/asm/sha1-armv4-large.S \ crypto/sha/asm/sha256-armv4.S \ crypto/sha/asm/sha512-armv4.S \ @@ -571,9 +571,20 @@ arm_exclude_files := \ crypto/mem_clr.c \ arm64_cflags := \ - -DOPENSSL_NO_ASM \ + -DDES_UNROLL \ + -DOPENSSL_CPUID_OBJ \ + -DSHA1_ASM \ + -DSHA256_ASM \ + -DSHA512_ASM \ -arm64_src_files := +arm64_src_files := \ + crypto/aes/asm/aesv8-armx-64.S \ + crypto/arm64cpuid.S \ + crypto/armcap.c \ + crypto/modes/asm/ghashv8-armx-64.S \ + crypto/sha/asm/sha1-armv8.S \ + crypto/sha/asm/sha256-armv8.S \ + crypto/sha/asm/sha512-armv8.S \ arm64_exclude_files := @@ -589,6 +600,8 @@ x86_cflags := \ -DOPENSSL_BN_ASM_PART_WORDS \ -DOPENSSL_CPUID_OBJ \ -DOPENSSL_IA32_SSE2 \ + -DRC4_INDEX \ + -DRMD160_ASM \ -DSHA1_ASM \ -DSHA256_ASM \ -DSHA512_ASM \ @@ -624,8 +637,6 @@ x86_exclude_files := \ x86_64_cflags := \ -DAES_ASM \ -DBSAES_ASM \ - -DDES_PTR \ - -DDES_RISC1 \ -DDES_UNROLL \ -DGHASH_ASM \ -DMD5_ASM \ @@ -633,6 +644,7 @@ x86_64_cflags := \ -DOPENSSL_BN_ASM_MONT \ -DOPENSSL_BN_ASM_MONT5 \ -DOPENSSL_CPUID_OBJ \ + -DOPENSSL_IA32_SSE2 \ -DSHA1_ASM \ -DSHA256_ASM \ -DSHA512_ASM \ diff --git a/app/openssl/Crypto-config-target.mk b/app/openssl/Crypto-config-target.mk index 2c5b01e5..bd29dfe5 100644 --- a/app/openssl/Crypto-config-target.mk +++ b/app/openssl/Crypto-config-target.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz # # This script will append to the following variables: # @@ -332,7 +332,6 @@ common_src_files := \ crypto/evp/m_md5.c \ crypto/evp/m_mdc2.c \ crypto/evp/m_null.c \ - crypto/evp/m_ripemd.c \ crypto/evp/m_sha1.c \ crypto/evp/m_sigver.c \ crypto/evp/m_wp.c \ @@ -438,8 +437,6 @@ common_src_files := \ crypto/rc4/rc4_enc.c \ crypto/rc4/rc4_skey.c \ crypto/rc4/rc4_utl.c \ - crypto/ripemd/rmd_dgst.c \ - crypto/ripemd/rmd_one.c \ crypto/rsa/rsa_ameth.c \ crypto/rsa/rsa_asn1.c \ crypto/rsa/rsa_chk.c \ @@ -546,6 +543,7 @@ common_c_includes := \ arm_cflags := \ -DAES_ASM \ -DBSAES_ASM \ + -DDES_UNROLL \ -DGHASH_ASM \ -DOPENSSL_BN_ASM_GF2m \ -DOPENSSL_BN_ASM_MONT \ @@ -556,12 +554,14 @@ arm_cflags := \ arm_src_files := \ crypto/aes/asm/aes-armv4.S \ + crypto/aes/asm/aesv8-armx.S \ crypto/aes/asm/bsaes-armv7.S \ crypto/armcap.c \ crypto/armv4cpuid.S \ crypto/bn/asm/armv4-gf2m.S \ crypto/bn/asm/armv4-mont.S \ crypto/modes/asm/ghash-armv4.S \ + crypto/modes/asm/ghashv8-armx.S \ crypto/sha/asm/sha1-armv4-large.S \ crypto/sha/asm/sha256-armv4.S \ crypto/sha/asm/sha512-armv4.S \ @@ -571,9 +571,20 @@ arm_exclude_files := \ crypto/mem_clr.c \ arm64_cflags := \ - -DOPENSSL_NO_ASM \ + -DDES_UNROLL \ + -DOPENSSL_CPUID_OBJ \ + -DSHA1_ASM \ + -DSHA256_ASM \ + -DSHA512_ASM \ -arm64_src_files := +arm64_src_files := \ + crypto/aes/asm/aesv8-armx-64.S \ + crypto/arm64cpuid.S \ + crypto/armcap.c \ + crypto/modes/asm/ghashv8-armx-64.S \ + crypto/sha/asm/sha1-armv8.S \ + crypto/sha/asm/sha256-armv8.S \ + crypto/sha/asm/sha512-armv8.S \ arm64_exclude_files := @@ -589,6 +600,8 @@ x86_cflags := \ -DOPENSSL_BN_ASM_PART_WORDS \ -DOPENSSL_CPUID_OBJ \ -DOPENSSL_IA32_SSE2 \ + -DRC4_INDEX \ + -DRMD160_ASM \ -DSHA1_ASM \ -DSHA256_ASM \ -DSHA512_ASM \ @@ -624,8 +637,6 @@ x86_exclude_files := \ x86_64_cflags := \ -DAES_ASM \ -DBSAES_ASM \ - -DDES_PTR \ - -DDES_RISC1 \ -DDES_UNROLL \ -DGHASH_ASM \ -DMD5_ASM \ @@ -633,6 +644,7 @@ x86_64_cflags := \ -DOPENSSL_BN_ASM_MONT \ -DOPENSSL_BN_ASM_MONT5 \ -DOPENSSL_CPUID_OBJ \ + -DOPENSSL_IA32_SSE2 \ -DSHA1_ASM \ -DSHA256_ASM \ -DSHA512_ASM \ diff --git a/app/openssl/Crypto-config-trusty.mk b/app/openssl/Crypto-config-trusty.mk index dc5b12c2..59915986 100644 --- a/app/openssl/Crypto-config-trusty.mk +++ b/app/openssl/Crypto-config-trusty.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz # # This script will append to the following variables: # diff --git a/app/openssl/Crypto.mk b/app/openssl/Crypto.mk index 4214b91e..6565f97c 100644 --- a/app/openssl/Crypto.mk +++ b/app/openssl/Crypto.mk @@ -9,7 +9,7 @@ LOCAL_SHARED_LIBRARIES := $(log_shared_libraries) LOCAL_SDK_VERSION := 9 LOCAL_MODULE_TAGS := optional -LOCAL_MODULE:= libcrypto_static +LOCAL_MODULE := libcrypto_static LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk include $(LOCAL_PATH)/Crypto-config-target.mk include $(LOCAL_PATH)/android-config.mk @@ -31,7 +31,7 @@ LOCAL_SHARED_LIBRARIES := $(log_shared_libraries) # in the NDK. ifeq (,$(TARGET_BUILD_APPS)) LOCAL_CLANG := true -ifeq ($(HOST_OS), darwin_XXX) +ifeq ($(HOST_OS), darwin_does_not_wrok) LOCAL_ASFLAGS += -no-integrated-as LOCAL_CFLAGS += -no-integrated-as endif @@ -41,7 +41,7 @@ endif LOCAL_LDFLAGS += -ldl LOCAL_MODULE_TAGS := optional -LOCAL_MODULE:= libcrypto +LOCAL_MODULE := libcrypto LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk include $(LOCAL_PATH)/Crypto-config-target.mk include $(LOCAL_PATH)/android-config.mk @@ -50,16 +50,16 @@ include $(BUILD_SHARED_LIBRARY) ####################################### # host shared library -# include $(CLEAR_VARS) -# LOCAL_SHARED_LIBRARIES := $(log_shared_libraries) -# LOCAL_CFLAGS += -DPURIFY -# LOCAL_LDLIBS += -ldl -# LOCAL_MODULE_TAGS := optional -# LOCAL_MODULE:= libcrypto-host -# LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk -# include $(LOCAL_PATH)/Crypto-config-host.mk -# include $(LOCAL_PATH)/android-config.mk -# include $(BUILD_HOST_SHARED_LIBRARY) +#include $(CLEAR_VARS) +#LOCAL_SHARED_LIBRARIES := $(log_shared_libraries) +#LOCAL_CFLAGS += -DPURIFY +#LOCAL_LDLIBS += -ldl +#LOCAL_MODULE_TAGS := optional +#LOCAL_MODULE := libcrypto-host +#LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk +#include $(LOCAL_PATH)/Crypto-config-host.mk +#include $(LOCAL_PATH)/android-config.mk +#include $(BUILD_HOST_SHARED_LIBRARY) ######################################## # host static library, which is used by some SDK tools. @@ -69,8 +69,9 @@ include $(BUILD_SHARED_LIBRARY) # LOCAL_CFLAGS += -DPURIFY # LOCAL_LDLIBS += -ldl # LOCAL_MODULE_TAGS := optional -# LOCAL_MODULE:= libcrypto_static +# LOCAL_MODULE := libcrypto_static # LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk # include $(LOCAL_PATH)/Crypto-config-host.mk # include $(LOCAL_PATH)/android-config.mk # include $(BUILD_HOST_STATIC_LIBRARY) + diff --git a/app/openssl/Ssl-config-host.mk b/app/openssl/Ssl-config-host.mk index 95035487..57ea3775 100644 --- a/app/openssl/Ssl-config-host.mk +++ b/app/openssl/Ssl-config-host.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz # # This script will append to the following variables: # diff --git a/app/openssl/Ssl-config-target.mk b/app/openssl/Ssl-config-target.mk index 32439d3f..c08a971d 100644 --- a/app/openssl/Ssl-config-target.mk +++ b/app/openssl/Ssl-config-target.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz # # This script will append to the following variables: # diff --git a/app/openssl/Ssl.mk b/app/openssl/Ssl.mk index 8ce82d9b..6c04950a 100644 --- a/app/openssl/Ssl.mk +++ b/app/openssl/Ssl.mk @@ -12,7 +12,7 @@ LOCAL_CFLAGS += $(target_c_flags) LOCAL_C_INCLUDES += $(target_c_includes) LOCAL_SHARED_LIBRARIES = $(log_shared_libraries) LOCAL_MODULE_TAGS := optional -LOCAL_MODULE:= libssl_static +LOCAL_MODULE := libssl_static LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk include $(LOCAL_PATH)/Ssl-config-target.mk include $(LOCAL_PATH)/android-config.mk @@ -35,19 +35,20 @@ endif LOCAL_SHARED_LIBRARIES += libcrypto $(log_shared_libraries) LOCAL_MODULE_TAGS := optional -LOCAL_MODULE:= libssl +LOCAL_MODULE := libssl LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk include $(LOCAL_PATH)/Ssl-config-target.mk include $(LOCAL_PATH)/android-config.mk include $(LOCAL_PATH)/ndk-build.mk include $(BUILD_SHARED_LIBRARY) + # ####################################### # # host shared library # include $(CLEAR_VARS) # LOCAL_SHARED_LIBRARIES += libcrypto-host $(log_shared_libraries) # LOCAL_MODULE_TAGS := optional -# LOCAL_MODULE:= libssl-host +# LOCAL_MODULE := libssl-host # LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk # include $(LOCAL_PATH)/Ssl-config-host.mk # include $(LOCAL_PATH)/android-config.mk @@ -56,9 +57,12 @@ include $(BUILD_SHARED_LIBRARY) # ####################################### # # ssltest # include $(CLEAR_VARS) -# LOCAL_SRC_FILES:= ssl/ssltest.c +# LOCAL_SRC_FILES := ssl/ssltest.c # LOCAL_SHARED_LIBRARIES := libssl libcrypto $(log_shared_libraries) -# LOCAL_MODULE:= ssltest +# LOCAL_MODULE := ssltest +# LOCAL_MULTILIB := both +# LOCAL_MODULE_STEM_32 := ssltest +# LOCAL_MODULE_STEM_64 := ssltest64 # LOCAL_MODULE_TAGS := optional # LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk # include $(LOCAL_PATH)/Ssl-config-host.mk diff --git a/app/openssl/apps/enc.c b/app/openssl/apps/enc.c index 719acc32..19ea3df9 100644 --- a/app/openssl/apps/enc.c +++ b/app/openssl/apps/enc.c @@ -331,6 +331,12 @@ bad: setup_engine(bio_err, engine, 0); #endif + if (cipher && EVP_CIPHER_flags(cipher) & EVP_CIPH_FLAG_AEAD_CIPHER) + { + BIO_printf(bio_err, "AEAD ciphers not supported by the enc utility\n"); + goto end; + } + if (md && (dgst=EVP_get_digestbyname(md)) == NULL) { BIO_printf(bio_err,"%s is an unsupported message digest type\n",md); diff --git a/app/openssl/apps/ocsp.c b/app/openssl/apps/ocsp.c index 83c5a767..767f12c6 100644 --- a/app/openssl/apps/ocsp.c +++ b/app/openssl/apps/ocsp.c @@ -127,6 +127,7 @@ int MAIN(int argc, char **argv) ENGINE *e = NULL; char **args; char *host = NULL, *port = NULL, *path = "/"; + char *thost = NULL, *tport = NULL, *tpath = NULL; char *reqin = NULL, *respin = NULL; char *reqout = NULL, *respout = NULL; char *signfile = NULL, *keyfile = NULL; @@ -204,6 +205,12 @@ int MAIN(int argc, char **argv) } else if (!strcmp(*args, "-url")) { + if (thost) + OPENSSL_free(thost); + if (tport) + OPENSSL_free(tport); + if (tpath) + OPENSSL_free(tpath); if (args[1]) { args++; @@ -212,6 +219,9 @@ int MAIN(int argc, char **argv) BIO_printf(bio_err, "Error parsing URL\n"); badarg = 1; } + thost = host; + tport = port; + tpath = path; } else badarg = 1; } @@ -920,12 +930,12 @@ end: sk_X509_pop_free(verify_other, X509_free); sk_CONF_VALUE_pop_free(headers, X509V3_conf_free); - if (use_ssl != -1) - { - OPENSSL_free(host); - OPENSSL_free(port); - OPENSSL_free(path); - } + if (thost) + OPENSSL_free(thost); + if (tport) + OPENSSL_free(tport); + if (tpath) + OPENSSL_free(tpath); OPENSSL_EXIT(ret); } diff --git a/app/openssl/apps/req.c b/app/openssl/apps/req.c index 5e034a85..d41385d7 100644 --- a/app/openssl/apps/req.c +++ b/app/openssl/apps/req.c @@ -1489,7 +1489,13 @@ start: #ifdef CHARSET_EBCDIC ebcdic2ascii(buf, buf, i); #endif - if(!req_check_len(i, n_min, n_max)) goto start; + if(!req_check_len(i, n_min, n_max)) + { + if (batch || value) + return 0; + goto start; + } + if (!X509_NAME_add_entry_by_NID(n,nid, chtype, (unsigned char *) buf, -1,-1,mval)) goto err; ret=1; @@ -1548,7 +1554,12 @@ start: #ifdef CHARSET_EBCDIC ebcdic2ascii(buf, buf, i); #endif - if(!req_check_len(i, n_min, n_max)) goto start; + if(!req_check_len(i, n_min, n_max)) + { + if (batch || value) + return 0; + goto start; + } if(!X509_REQ_add1_attr_by_NID(req, nid, chtype, (unsigned char *)buf, -1)) { diff --git a/app/openssl/apps/s_cb.c b/app/openssl/apps/s_cb.c index 84c3b447..146a9607 100644 --- a/app/openssl/apps/s_cb.c +++ b/app/openssl/apps/s_cb.c @@ -747,6 +747,10 @@ void MS_CALLBACK tlsext_cb(SSL *s, int client_server, int type, break; #endif + case TLSEXT_TYPE_padding: + extname = "TLS padding"; + break; + default: extname = "unknown"; break; diff --git a/app/openssl/apps/s_socket.c b/app/openssl/apps/s_socket.c index 380efdb1..94eb40f3 100644 --- a/app/openssl/apps/s_socket.c +++ b/app/openssl/apps/s_socket.c @@ -274,7 +274,7 @@ static int init_client_ip(int *sock, unsigned char ip[4], int port, int type) { i=0; i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE,(char *)&i,sizeof(i)); - if (i < 0) { perror("keepalive"); return(0); } + if (i < 0) { closesocket(s); perror("keepalive"); return(0); } } #endif @@ -450,6 +450,7 @@ redoit: if ((*host=(char *)OPENSSL_malloc(strlen(h1->h_name)+1)) == NULL) { perror("OPENSSL_malloc"); + closesocket(ret); return(0); } BUF_strlcpy(*host,h1->h_name,strlen(h1->h_name)+1); @@ -458,11 +459,13 @@ redoit: if (h2 == NULL) { BIO_printf(bio_err,"gethostbyname failure\n"); + closesocket(ret); return(0); } if (h2->h_addrtype != AF_INET) { BIO_printf(bio_err,"gethostbyname addr is not AF_INET\n"); + closesocket(ret); return(0); } } diff --git a/app/openssl/apps/smime.c b/app/openssl/apps/smime.c index c583f8a0..d1fe32d3 100644 --- a/app/openssl/apps/smime.c +++ b/app/openssl/apps/smime.c @@ -541,8 +541,8 @@ int MAIN(int argc, char **argv) { if (!cipher) { -#ifndef OPENSSL_NO_RC2 - cipher = EVP_rc2_40_cbc(); +#ifndef OPENSSL_NO_DES + cipher = EVP_des_ede3_cbc(); #else BIO_printf(bio_err, "No cipher selected\n"); goto end; diff --git a/app/openssl/build-config-32.mk b/app/openssl/build-config-32.mk index 4f7484b9..d035f1e4 100644 --- a/app/openssl/build-config-32.mk +++ b/app/openssl/build-config-32.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz # openssl_cflags_32 := \ -DOPENSSL_THREADS \ @@ -24,6 +24,7 @@ openssl_cflags_32 := \ -DOPENSSL_NO_RC5 \ -DOPENSSL_NO_RDRAND \ -DOPENSSL_NO_RFC3779 \ + -DOPENSSL_NO_RIPEMD \ -DOPENSSL_NO_RSAX \ -DOPENSSL_NO_SCTP \ -DOPENSSL_NO_SEED \ @@ -52,6 +53,7 @@ openssl_cflags_static_32 := \ -DOPENSSL_NO_RC5 \ -DOPENSSL_NO_RDRAND \ -DOPENSSL_NO_RFC3779 \ + -DOPENSSL_NO_RIPEMD \ -DOPENSSL_NO_RSAX \ -DOPENSSL_NO_SCTP \ -DOPENSSL_NO_SEED \ diff --git a/app/openssl/build-config-64.mk b/app/openssl/build-config-64.mk index c0e6f6de..45a8141d 100644 --- a/app/openssl/build-config-64.mk +++ b/app/openssl/build-config-64.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz # openssl_cflags_64 := \ -DOPENSSL_THREADS \ @@ -24,6 +24,7 @@ openssl_cflags_64 := \ -DOPENSSL_NO_RC5 \ -DOPENSSL_NO_RDRAND \ -DOPENSSL_NO_RFC3779 \ + -DOPENSSL_NO_RIPEMD \ -DOPENSSL_NO_RSAX \ -DOPENSSL_NO_SCTP \ -DOPENSSL_NO_SEED \ @@ -52,6 +53,7 @@ openssl_cflags_static_64 := \ -DOPENSSL_NO_RC5 \ -DOPENSSL_NO_RDRAND \ -DOPENSSL_NO_RFC3779 \ + -DOPENSSL_NO_RIPEMD \ -DOPENSSL_NO_RSAX \ -DOPENSSL_NO_SCTP \ -DOPENSSL_NO_SEED \ diff --git a/app/openssl/build-config-trusty.mk b/app/openssl/build-config-trusty.mk index e5809a3b..4d6fb58c 100644 --- a/app/openssl/build-config-trusty.mk +++ b/app/openssl/build-config-trusty.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz # openssl_cflags_trusty := \ -DL_ENDIAN \ diff --git a/app/openssl/check-all-builds.sh b/app/openssl/check-all-builds.sh index cff2ba5d..9743872a 100755 --- a/app/openssl/check-all-builds.sh +++ b/app/openssl/check-all-builds.sh @@ -143,7 +143,7 @@ esac # NOTE: x86_64 is not ready yet, while the toolchain is in # prebuilts/ it doesn't have a sysroot which means it requires # a platform build to get Bionic and stuff. -ANDROID_ARCHS="arm x86 mips" +ANDROID_ARCHS="arm arm64 x86 x86_64 mips" BUILD_TYPES= for ARCH in $ANDROID_ARCHS; do @@ -311,11 +311,14 @@ get_build_arch () { # Out: GNU configuration target (e.g. arm-linux-androideabi) get_build_arch_target () { case $1 in + arm64) + echo "aarch64-linux-android" + ;; arm) echo "arm-linux-androideabi" ;; x86) - echo "i686-linux-android" + echo "x86_64-linux-android" ;; x86_64) echo "x86_64-linux-android" @@ -329,8 +332,8 @@ get_build_arch_target () { esac } -GCC_VERSION=4.7 -CLANG_VERSION=3.1 +GCC_VERSION=4.8 +CLANG_VERSION=3.2 get_prebuilt_gcc_dir_for_arch () { local arch=$1 @@ -341,6 +344,9 @@ get_prebuilt_gcc_dir_for_arch () { x86_64) arch=x86 ;; + arm64) + arch=aarch64 + ;; esac echo "$ANDROID_BUILD_TOP/prebuilts/gcc/$ANDROID_HOST_TAG/$arch/$target-$GCC_VERSION" } @@ -397,7 +403,7 @@ get_build_compiler () { # Force -m32 flag when needed for 32-bit builds. case $1 in - *-linux-x86|*-darwin-x86|*-generic32) + *-x86|*-generic32) result="$result -m32" ;; esac diff --git a/app/openssl/crypto/aes/asm/aes-armv4.pl b/app/openssl/crypto/aes/asm/aes-armv4.pl index 86b86c4a..4f891708 100644 --- a/app/openssl/crypto/aes/asm/aes-armv4.pl +++ b/app/openssl/crypto/aes/asm/aes-armv4.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -51,9 +51,23 @@ $key="r11"; $rounds="r12"; $code=<<___; -#include "arm_arch.h" +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +#endif + .text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +.thumb +# else .code 32 +# endif +#endif .type AES_Te,%object .align 5 @@ -167,7 +181,11 @@ AES_Te: .type AES_encrypt,%function .align 5 AES_encrypt: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_encrypt +#else + adr r3,AES_encrypt +#endif stmdb sp!,{r1,r4-r12,lr} mov $rounds,r0 @ inp mov $key,r2 @@ -409,11 +427,21 @@ _armv4_AES_encrypt: .align 5 private_AES_set_encrypt_key: _armv4_AES_set_encrypt_key: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_set_encrypt_key +#else + adr r3,private_AES_set_encrypt_key +#endif teq r0,#0 +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif moveq r0,#-1 beq .Labrt teq r2,#0 +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif moveq r0,#-1 beq .Labrt @@ -422,6 +450,9 @@ _armv4_AES_set_encrypt_key: teq r1,#192 beq .Lok teq r1,#256 +#if __ARM_ARCH__>=7 + itt ne @ Thumb2 thing, sanity check in ARM +#endif movne r0,#-1 bne .Labrt @@ -576,6 +607,9 @@ _armv4_AES_set_encrypt_key: str $s2,[$key,#-16] subs $rounds,$rounds,#1 str $s3,[$key,#-12] +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif subeq r2,$key,#216 beq .Ldone @@ -645,6 +679,9 @@ _armv4_AES_set_encrypt_key: str $s2,[$key,#-24] subs $rounds,$rounds,#1 str $s3,[$key,#-20] +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif subeq r2,$key,#256 beq .Ldone @@ -674,11 +711,17 @@ _armv4_AES_set_encrypt_key: str $i3,[$key,#-4] b .L256_loop +.align 2 .Ldone: mov r0,#0 ldmia sp!,{r4-r12,lr} -.Labrt: tst lr,#1 +.Labrt: +#if __ARM_ARCH__>=5 + ret @ bx lr +#else + tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) +#endif .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key .global private_AES_set_decrypt_key @@ -688,34 +731,57 @@ private_AES_set_decrypt_key: str lr,[sp,#-4]! @ push lr bl _armv4_AES_set_encrypt_key teq r0,#0 - ldrne lr,[sp],#4 @ pop lr + ldr lr,[sp],#4 @ pop lr bne .Labrt - stmdb sp!,{r4-r12} + mov r0,r2 @ AES_set_encrypt_key preserves r2, + mov r1,r2 @ which is AES_KEY *key + b _armv4_AES_set_enc2dec_key +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key - ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2, - mov $key,r2 @ which is AES_KEY *key - mov $i1,r2 - add $i2,r2,$rounds,lsl#4 +@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out) +.global AES_set_enc2dec_key +.type AES_set_enc2dec_key,%function +.align 5 +AES_set_enc2dec_key: +_armv4_AES_set_enc2dec_key: + stmdb sp!,{r4-r12,lr} + + ldr $rounds,[r0,#240] + mov $i1,r0 @ input + add $i2,r0,$rounds,lsl#4 + mov $key,r1 @ ouput + add $tbl,r1,$rounds,lsl#4 + str $rounds,[r1,#240] + +.Linv: ldr $s0,[$i1],#16 + ldr $s1,[$i1,#-12] + ldr $s2,[$i1,#-8] + ldr $s3,[$i1,#-4] + ldr $t1,[$i2],#-16 + ldr $t2,[$i2,#16+4] + ldr $t3,[$i2,#16+8] + ldr $i3,[$i2,#16+12] + str $s0,[$tbl],#-16 + str $s1,[$tbl,#16+4] + str $s2,[$tbl,#16+8] + str $s3,[$tbl,#16+12] + str $t1,[$key],#16 + str $t2,[$key,#-12] + str $t3,[$key,#-8] + str $i3,[$key,#-4] + teq $i1,$i2 + bne .Linv -.Linv: ldr $s0,[$i1] + ldr $s0,[$i1] ldr $s1,[$i1,#4] ldr $s2,[$i1,#8] ldr $s3,[$i1,#12] - ldr $t1,[$i2] - ldr $t2,[$i2,#4] - ldr $t3,[$i2,#8] - ldr $i3,[$i2,#12] - str $s0,[$i2],#-16 - str $s1,[$i2,#16+4] - str $s2,[$i2,#16+8] - str $s3,[$i2,#16+12] - str $t1,[$i1],#16 - str $t2,[$i1,#-12] - str $t3,[$i1,#-8] - str $i3,[$i1,#-4] - teq $i1,$i2 - bne .Linv + str $s0,[$key] + str $s1,[$key,#4] + str $s2,[$key,#8] + str $s3,[$key,#12] + sub $key,$key,$rounds,lsl#3 ___ $mask80=$i1; $mask1b=$i2; @@ -773,7 +839,7 @@ $code.=<<___; moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key +.size AES_set_enc2dec_key,.-AES_set_enc2dec_key .type AES_Td,%object .align 5 @@ -883,7 +949,11 @@ AES_Td: .type AES_decrypt,%function .align 5 AES_decrypt: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_decrypt +#else + adr r3,AES_decrypt +#endif stmdb sp!,{r1,r4-r12,lr} mov $rounds,r0 @ inp mov $key,r2 @@ -1080,8 +1150,9 @@ _armv4_AES_decrypt: ldrb $t3,[$tbl,$i3] @ Td4[s0>>0] and $i3,lr,$s1,lsr#8 + add $s1,$tbl,$s1,lsr#24 ldrb $i1,[$tbl,$i1] @ Td4[s1>>0] - ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24] + ldrb $s1,[$s1] @ Td4[s1>>24] ldrb $i2,[$tbl,$i2] @ Td4[s1>>16] eor $s0,$i1,$s0,lsl#24 ldrb $i3,[$tbl,$i3] @ Td4[s1>>8] @@ -1094,7 +1165,8 @@ _armv4_AES_decrypt: ldrb $i2,[$tbl,$i2] @ Td4[s2>>0] and $i3,lr,$s2,lsr#16 - ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24] + add $s2,$tbl,$s2,lsr#24 + ldrb $s2,[$s2] @ Td4[s2>>24] eor $s0,$s0,$i1,lsl#8 ldrb $i3,[$tbl,$i3] @ Td4[s2>>16] eor $s1,$i2,$s1,lsl#16 @@ -1106,8 +1178,9 @@ _armv4_AES_decrypt: ldrb $i2,[$tbl,$i2] @ Td4[s3>>8] and $i3,lr,$s3 @ i2 + add $s3,$tbl,$s3,lsr#24 ldrb $i3,[$tbl,$i3] @ Td4[s3>>0] - ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24] + ldrb $s3,[$s3] @ Td4[s3>>24] eor $s0,$s0,$i1,lsl#16 ldr $i1,[$key,#0] eor $s1,$s1,$i2,lsl#8 @@ -1130,5 +1203,15 @@ _armv4_AES_decrypt: ___ $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 +$code =~ s/\bret\b/bx\tlr/gm; + +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + print $code; close STDOUT; # enforce flush diff --git a/app/openssl/crypto/aes/asm/aes-armv4.s b/app/openssl/crypto/aes/asm/aes-armv4.s index 2697d4ce..333a5227 100644 --- a/app/openssl/crypto/aes/asm/aes-armv4.s +++ b/app/openssl/crypto/aes/asm/aes-armv4.s @@ -1,6 +1,53 @@ -#include "arm_arch.h" + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ ==================================================================== + +@ AES for ARMv4 + +@ January 2007. +@ +@ Code uses single 1K S-box and is >2 times faster than code generated +@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which +@ allows to merge logical or arithmetic operation with shift or rotate +@ in one instruction and emit combined result every cycle. The module +@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit +@ key [on single-issue Xscale PXA250 core]. + +@ May 2007. +@ +@ AES_set_[en|de]crypt_key is added. + +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 12% improvement on +@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 16% +@ improvement on Cortex A8 core and ~21.5 cycles per byte. + +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +#endif + .text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +.thumb +# else .code 32 +# endif +#endif .type AES_Te,%object .align 5 @@ -114,7 +161,11 @@ AES_Te: .type AES_encrypt,%function .align 5 AES_encrypt: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_encrypt +#else + adr r3,AES_encrypt +#endif stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp mov r11,r2 @@ -356,11 +407,21 @@ _armv4_AES_encrypt: .align 5 private_AES_set_encrypt_key: _armv4_AES_set_encrypt_key: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_set_encrypt_key +#else + adr r3,private_AES_set_encrypt_key +#endif teq r0,#0 +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif moveq r0,#-1 beq .Labrt teq r2,#0 +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif moveq r0,#-1 beq .Labrt @@ -369,6 +430,9 @@ _armv4_AES_set_encrypt_key: teq r1,#192 beq .Lok teq r1,#256 +#if __ARM_ARCH__>=7 + itt ne @ Thumb2 thing, sanity check in ARM +#endif movne r0,#-1 bne .Labrt @@ -523,6 +587,9 @@ _armv4_AES_set_encrypt_key: str r2,[r11,#-16] subs r12,r12,#1 str r3,[r11,#-12] +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif subeq r2,r11,#216 beq .Ldone @@ -592,6 +659,9 @@ _armv4_AES_set_encrypt_key: str r2,[r11,#-24] subs r12,r12,#1 str r3,[r11,#-20] +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif subeq r2,r11,#256 beq .Ldone @@ -621,11 +691,17 @@ _armv4_AES_set_encrypt_key: str r9,[r11,#-4] b .L256_loop +.align 2 .Ldone: mov r0,#0 ldmia sp!,{r4-r12,lr} -.Labrt: tst lr,#1 +.Labrt: +#if __ARM_ARCH__>=5 + bx lr @ .word 0xe12fff1e +#else + tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key .global private_AES_set_decrypt_key @@ -635,34 +711,57 @@ private_AES_set_decrypt_key: str lr,[sp,#-4]! @ push lr bl _armv4_AES_set_encrypt_key teq r0,#0 - ldrne lr,[sp],#4 @ pop lr + ldr lr,[sp],#4 @ pop lr bne .Labrt - stmdb sp!,{r4-r12} + mov r0,r2 @ AES_set_encrypt_key preserves r2, + mov r1,r2 @ which is AES_KEY *key + b _armv4_AES_set_enc2dec_key +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key + +@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out) +.global AES_set_enc2dec_key +.type AES_set_enc2dec_key,%function +.align 5 +AES_set_enc2dec_key: +_armv4_AES_set_enc2dec_key: + stmdb sp!,{r4-r12,lr} + + ldr r12,[r0,#240] + mov r7,r0 @ input + add r8,r0,r12,lsl#4 + mov r11,r1 @ ouput + add r10,r1,r12,lsl#4 + str r12,[r1,#240] - ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2, - mov r11,r2 @ which is AES_KEY *key - mov r7,r2 - add r8,r2,r12,lsl#4 +.Linv: ldr r0,[r7],#16 + ldr r1,[r7,#-12] + ldr r2,[r7,#-8] + ldr r3,[r7,#-4] + ldr r4,[r8],#-16 + ldr r5,[r8,#16+4] + ldr r6,[r8,#16+8] + ldr r9,[r8,#16+12] + str r0,[r10],#-16 + str r1,[r10,#16+4] + str r2,[r10,#16+8] + str r3,[r10,#16+12] + str r4,[r11],#16 + str r5,[r11,#-12] + str r6,[r11,#-8] + str r9,[r11,#-4] + teq r7,r8 + bne .Linv -.Linv: ldr r0,[r7] + ldr r0,[r7] ldr r1,[r7,#4] ldr r2,[r7,#8] ldr r3,[r7,#12] - ldr r4,[r8] - ldr r5,[r8,#4] - ldr r6,[r8,#8] - ldr r9,[r8,#12] - str r0,[r8],#-16 - str r1,[r8,#16+4] - str r2,[r8,#16+8] - str r3,[r8,#16+12] - str r4,[r7],#16 - str r5,[r7,#-12] - str r6,[r7,#-8] - str r9,[r7,#-4] - teq r7,r8 - bne .Linv + str r0,[r11] + str r1,[r11,#4] + str r2,[r11,#8] + str r3,[r11,#12] + sub r11,r11,r12,lsl#3 ldr r0,[r11,#16]! @ prefetch tp1 mov r7,#0x80 mov r8,#0x1b @@ -715,7 +814,7 @@ private_AES_set_decrypt_key: moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key +.size AES_set_enc2dec_key,.-AES_set_enc2dec_key .type AES_Td,%object .align 5 @@ -825,7 +924,11 @@ AES_Td: .type AES_decrypt,%function .align 5 AES_decrypt: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_decrypt +#else + adr r3,AES_decrypt +#endif stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp mov r11,r2 @@ -1022,8 +1125,9 @@ _armv4_AES_decrypt: ldrb r6,[r10,r9] @ Td4[s0>>0] and r9,lr,r1,lsr#8 + add r1,r10,r1,lsr#24 ldrb r7,[r10,r7] @ Td4[s1>>0] - ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24] + ldrb r1,[r1] @ Td4[s1>>24] ldrb r8,[r10,r8] @ Td4[s1>>16] eor r0,r7,r0,lsl#24 ldrb r9,[r10,r9] @ Td4[s1>>8] @@ -1036,7 +1140,8 @@ _armv4_AES_decrypt: ldrb r8,[r10,r8] @ Td4[s2>>0] and r9,lr,r2,lsr#16 - ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24] + add r2,r10,r2,lsr#24 + ldrb r2,[r2] @ Td4[s2>>24] eor r0,r0,r7,lsl#8 ldrb r9,[r10,r9] @ Td4[s2>>16] eor r1,r8,r1,lsl#16 @@ -1048,8 +1153,9 @@ _armv4_AES_decrypt: ldrb r8,[r10,r8] @ Td4[s3>>8] and r9,lr,r3 @ i2 + add r3,r10,r3,lsr#24 ldrb r9,[r10,r9] @ Td4[s3>>0] - ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24] + ldrb r3,[r3] @ Td4[s3>>24] eor r0,r0,r7,lsl#16 ldr r7,[r11,#0] eor r1,r1,r8,lsl#8 diff --git a/app/openssl/crypto/aes/asm/aesv8-armx-64.S b/app/openssl/crypto/aes/asm/aesv8-armx-64.S new file mode 100644 index 00000000..be0a13df --- /dev/null +++ b/app/openssl/crypto/aes/asm/aesv8-armx-64.S @@ -0,0 +1,761 @@ +#include "arm_arch.h" + +#if __ARM_ARCH__>=7 +.text +.arch armv8-a+crypto +.align 5 +rcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.globl aes_v8_set_encrypt_key +.type aes_v8_set_encrypt_key,%function +.align 5 +aes_v8_set_encrypt_key: +.Lenc_key: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + adr x3,rcon + cmp w1,#192 + + eor v0.16b,v0.16b,v0.16b + ld1 {v3.16b},[x0],#16 + mov w1,#8 // reuse w1 + ld1 {v1.4s,v2.4s},[x3],#32 + + b.lt .Loop128 + b.eq .L192 + b .L256 + +.align 4 +.Loop128: + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + b.ne .Loop128 + + ld1 {v1.4s},[x3] + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2] + add x2,x2,#0x50 + + mov w12,#10 + b .Ldone + +.align 4 +.L192: + ld1 {v4.8b},[x0],#8 + movi v6.16b,#8 // borrow v6.16b + st1 {v3.4s},[x2],#16 + sub v2.16b,v2.16b,v6.16b // adjust the mask + +.Loop192: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.8b},[x2],#8 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + + dup v5.4s,v3.s[3] + eor v5.16b,v5.16b,v4.16b + eor v6.16b,v6.16b,v1.16b + ext v4.16b,v0.16b,v4.16b,#12 + shl v1.16b,v1.16b,#1 + eor v4.16b,v4.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + eor v4.16b,v4.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.ne .Loop192 + + mov w12,#12 + add x2,x2,#0x20 + b .Ldone + +.align 4 +.L256: + ld1 {v4.16b},[x0] + mov w1,#7 + mov w12,#14 + st1 {v3.4s},[x2],#16 + +.Loop256: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.eq .Ldone + + dup v6.4s,v3.s[3] // just splat + ext v5.16b,v0.16b,v4.16b,#12 + aese v6.16b,v0.16b + + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + + eor v4.16b,v4.16b,v6.16b + b .Loop256 + +.Ldone: + str w12,[x2] + + eor x0,x0,x0 // return value + ldr x29,[sp],#16 + ret +.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key + +.globl aes_v8_set_decrypt_key +.type aes_v8_set_decrypt_key,%function +.align 5 +aes_v8_set_decrypt_key: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + bl .Lenc_key + + sub x2,x2,#240 // restore original x2 + mov x4,#-16 + add x0,x2,x12,lsl#4 // end of key schedule + + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + +.Loop_imc: + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + cmp x0,x2 + b.hi .Loop_imc + + ld1 {v0.4s},[x2] + aesimc v0.16b,v0.16b + st1 {v0.4s},[x0] + + eor x0,x0,x0 // return value + ldp x29,x30,[sp],#16 + ret +.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key +.globl aes_v8_encrypt +.type aes_v8_encrypt,%function +.align 5 +aes_v8_encrypt: + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +.Loop_enc: + aese v2.16b,v0.16b + ld1 {v0.4s},[x2],#16 + aesmc v2.16b,v2.16b + subs w3,w3,#2 + aese v2.16b,v1.16b + ld1 {v1.4s},[x2],#16 + aesmc v2.16b,v2.16b + b.gt .Loop_enc + + aese v2.16b,v0.16b + ld1 {v0.4s},[x2] + aesmc v2.16b,v2.16b + aese v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret +.size aes_v8_encrypt,.-aes_v8_encrypt +.globl aes_v8_decrypt +.type aes_v8_decrypt,%function +.align 5 +aes_v8_decrypt: + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +.Loop_dec: + aesd v2.16b,v0.16b + ld1 {v0.4s},[x2],#16 + aesimc v2.16b,v2.16b + subs w3,w3,#2 + aesd v2.16b,v1.16b + ld1 {v1.4s},[x2],#16 + aesimc v2.16b,v2.16b + b.gt .Loop_dec + + aesd v2.16b,v0.16b + ld1 {v0.4s},[x2] + aesimc v2.16b,v2.16b + aesd v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret +.size aes_v8_decrypt,.-aes_v8_decrypt +.globl aes_v8_cbc_encrypt +.type aes_v8_cbc_encrypt,%function +.align 5 +aes_v8_cbc_encrypt: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + subs x2,x2,#16 + mov x8,#16 + b.lo .Lcbc_abort + csel x8,xzr,x8,eq + + cmp w5,#0 // en- or decrypting? + ldr w5,[x3,#240] + and x2,x2,#-16 + ld1 {v6.16b},[x4] + ld1 {v0.16b},[x0],x8 + + ld1 {v16.4s-v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s-v19.4s},[x7],#32 + ld1 {v20.4s-v21.4s},[x7],#32 + ld1 {v22.4s-v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + add x7,x3,#32 + mov w6,w5 + b.eq .Lcbc_dec + + cmp w5,#2 + eor v0.16b,v0.16b,v6.16b + eor v5.16b,v16.16b,v7.16b + b.eq .Lcbc_enc128 + +.Loop_cbc_enc: + aese v0.16b,v16.16b + ld1 {v16.4s},[x7],#16 + aesmc v0.16b,v0.16b + subs w6,w6,#2 + aese v0.16b,v17.16b + ld1 {v17.4s},[x7],#16 + aesmc v0.16b,v0.16b + b.gt .Loop_cbc_enc + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + add x7,x3,#16 + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v0.16b,v23.16b + + mov w6,w5 + eor v6.16b,v0.16b,v7.16b + st1 {v6.16b},[x1],#16 + b.hs .Loop_cbc_enc + + b .Lcbc_done + +.align 5 +.Lcbc_enc128: + ld1 {v2.4s-v3.4s},[x7] + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + b .Lenter_cbc_enc128 +.Loop_cbc_enc128: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + st1 {v6.16b},[x1],#16 +.Lenter_cbc_enc128: + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v23.16b + eor v6.16b,v0.16b,v7.16b + b.hs .Loop_cbc_enc128 + + st1 {v6.16b},[x1],#16 + b .Lcbc_done + +.align 5 +.Lcbc_dec128: + ld1 {v4.4s-v5.4s},[x7] + eor v6.16b,v6.16b,v7.16b + eor v2.16b,v0.16b,v7.16b + mov x12,x8 + +.Loop2x_cbc_dec128: + aesd v0.16b,v16.16b + aesd v1.16b,v16.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + subs x2,x2,#32 + aesd v0.16b,v17.16b + aesd v1.16b,v17.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + csel x8,xzr,x8,lo + aesd v0.16b,v4.16b + aesd v1.16b,v4.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + csel x12,xzr,x12,ls + aesd v0.16b,v5.16b + aesd v1.16b,v5.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesd v0.16b,v18.16b + aesd v1.16b,v18.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesd v0.16b,v19.16b + aesd v1.16b,v19.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesd v0.16b,v20.16b + aesd v1.16b,v20.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesd v0.16b,v21.16b + aesd v1.16b,v21.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesd v0.16b,v22.16b + aesd v1.16b,v22.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesd v0.16b,v23.16b + aesd v1.16b,v23.16b + + eor v6.16b,v6.16b,v0.16b + ld1 {v0.16b},[x0],x8 + eor v2.16b,v2.16b,v1.16b + ld1 {v1.16b},[x0],x12 + st1 {v6.16b},[x1],#16 + eor v6.16b,v3.16b,v7.16b + st1 {v2.16b},[x1],#16 + eor v2.16b,v0.16b,v7.16b + orr v3.16b,v1.16b,v1.16b + b.hs .Loop2x_cbc_dec128 + + adds x2,x2,#32 + eor v6.16b,v6.16b,v7.16b + b.eq .Lcbc_done + eor v2.16b,v2.16b,v7.16b + b .Lcbc_dec_tail + +.align 5 +.Lcbc_dec: + subs x2,x2,#16 + orr v2.16b,v0.16b,v0.16b + b.lo .Lcbc_dec_tail + + csel x8,xzr,x8,eq + cmp w5,#2 + ld1 {v1.16b},[x0],x8 + orr v3.16b,v1.16b,v1.16b + b.eq .Lcbc_dec128 + +.Loop2x_cbc_dec: + aesd v0.16b,v16.16b + aesd v1.16b,v16.16b + ld1 {v16.4s},[x7],#16 + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesd v1.16b,v17.16b + ld1 {v17.4s},[x7],#16 + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + b.gt .Loop2x_cbc_dec + + aesd v0.16b,v16.16b + aesd v1.16b,v16.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + eor v4.16b,v6.16b,v7.16b + eor v5.16b,v2.16b,v7.16b + aesd v0.16b,v17.16b + aesd v1.16b,v17.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + orr v6.16b,v3.16b,v3.16b + subs x2,x2,#32 + aesd v0.16b,v18.16b + aesd v1.16b,v18.16b + aesimc v0.16b,v0.16b + csel x8,xzr,x8,lo + aesimc v1.16b,v1.16b + mov x7,x3 + aesd v0.16b,v19.16b + aesd v1.16b,v19.16b + aesimc v0.16b,v0.16b + ld1 {v2.16b},[x0],x8 + aesimc v1.16b,v1.16b + csel x8,xzr,x8,ls + aesd v0.16b,v20.16b + aesd v1.16b,v20.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + ld1 {v3.16b},[x0],x8 + aesd v0.16b,v21.16b + aesd v1.16b,v21.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + aesd v0.16b,v22.16b + aesd v1.16b,v22.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + aesd v0.16b,v23.16b + aesd v1.16b,v23.16b + + mov w6,w5 + eor v4.16b,v4.16b,v0.16b + eor v5.16b,v5.16b,v1.16b + orr v0.16b,v2.16b,v2.16b + st1 {v4.16b},[x1],#16 + orr v1.16b,v3.16b,v3.16b + st1 {v5.16b},[x1],#16 + b.hs .Loop2x_cbc_dec + + adds x2,x2,#32 + b.eq .Lcbc_done + +.Lcbc_dec_tail: + aesd v0.16b,v16.16b + ld1 {v16.4s},[x7],#16 + aesimc v0.16b,v0.16b + subs w6,w6,#2 + aesd v0.16b,v17.16b + ld1 {v17.4s},[x7],#16 + aesimc v0.16b,v0.16b + b.gt .Lcbc_dec_tail + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + eor v4.16b,v6.16b,v7.16b + aesd v0.16b,v18.16b + aesimc v0.16b,v0.16b + orr v6.16b,v2.16b,v2.16b + aesd v0.16b,v19.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v23.16b + + eor v4.16b,v4.16b,v0.16b + st1 {v4.16b},[x1],#16 + +.Lcbc_done: + st1 {v6.16b},[x4] +.Lcbc_abort: + ldr x29,[sp],#16 + ret +.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt +.globl aes_v8_ctr32_encrypt_blocks +.type aes_v8_ctr32_encrypt_blocks,%function +.align 5 +aes_v8_ctr32_encrypt_blocks: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + ldr w5,[x3,#240] + + ldr w8, [x4, #12] + ld1 {v0.4s},[x4] + + ld1 {v16.4s-v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s-v19.4s},[x7],#32 + ld1 {v20.4s-v21.4s},[x7],#32 + ld1 {v22.4s-v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + add x7,x3,#32 + mov w6,w5 + + subs x2,x2,#2 + b.lo .Lctr32_tail + +#ifndef __ARMEB__ + rev w8, w8 +#endif + orr v1.16b,v0.16b,v0.16b + add w8, w8, #1 + orr v6.16b,v0.16b,v0.16b + rev w10, w8 + cmp w5,#2 + mov v1.s[3],w10 + b.eq .Lctr32_128 + +.Loop2x_ctr32: + aese v0.16b,v16.16b + aese v1.16b,v16.16b + ld1 {v16.4s},[x7],#16 + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + subs w6,w6,#2 + aese v0.16b,v17.16b + aese v1.16b,v17.16b + ld1 {v17.4s},[x7],#16 + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + b.gt .Loop2x_ctr32 + + aese v0.16b,v16.16b + aese v1.16b,v16.16b + aesmc v4.16b,v0.16b + orr v0.16b,v6.16b,v6.16b + aesmc v5.16b,v1.16b + orr v1.16b,v6.16b,v6.16b + aese v4.16b,v17.16b + aese v5.16b,v17.16b + ld1 {v2.16b},[x0],#16 + aesmc v4.16b,v4.16b + ld1 {v3.16b},[x0],#16 + aesmc v5.16b,v5.16b + add w8,w8,#1 + aese v4.16b,v18.16b + aese v5.16b,v18.16b + rev w9,w8 + aesmc v4.16b,v4.16b + aesmc v5.16b,v5.16b + add w8,w8,#1 + aese v4.16b,v19.16b + aese v5.16b,v19.16b + eor v2.16b,v2.16b,v7.16b + rev w10,w8 + aesmc v4.16b,v4.16b + aesmc v5.16b,v5.16b + eor v3.16b,v3.16b,v7.16b + mov x7,x3 + aese v4.16b,v20.16b + aese v5.16b,v20.16b + subs x2,x2,#2 + aesmc v4.16b,v4.16b + aesmc v5.16b,v5.16b + ld1 {v16.4s-v17.4s},[x7],#32 // re-pre-load rndkey[0-1] + aese v4.16b,v21.16b + aese v5.16b,v21.16b + aesmc v4.16b,v4.16b + aesmc v5.16b,v5.16b + aese v4.16b,v22.16b + aese v5.16b,v22.16b + mov v0.s[3], w9 + aesmc v4.16b,v4.16b + mov v1.s[3], w10 + aesmc v5.16b,v5.16b + aese v4.16b,v23.16b + aese v5.16b,v23.16b + + mov w6,w5 + eor v2.16b,v2.16b,v4.16b + eor v3.16b,v3.16b,v5.16b + st1 {v2.16b},[x1],#16 + st1 {v3.16b},[x1],#16 + b.hs .Loop2x_ctr32 + + adds x2,x2,#2 + b.eq .Lctr32_done + b .Lctr32_tail + +.Lctr32_128: + ld1 {v4.4s-v5.4s},[x7] + +.Loop2x_ctr32_128: + aese v0.16b,v16.16b + aese v1.16b,v16.16b + aesmc v0.16b,v0.16b + ld1 {v2.16b},[x0],#16 + aesmc v1.16b,v1.16b + ld1 {v3.16b},[x0],#16 + aese v0.16b,v17.16b + aese v1.16b,v17.16b + add w8,w8,#1 + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + rev w9,w8 + aese v0.16b,v4.16b + aese v1.16b,v4.16b + add w8,w8,#1 + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + rev w10,w8 + aese v0.16b,v5.16b + aese v1.16b,v5.16b + subs x2,x2,#2 + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + aese v0.16b,v18.16b + aese v1.16b,v18.16b + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + aese v0.16b,v19.16b + aese v1.16b,v19.16b + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + aese v0.16b,v20.16b + aese v1.16b,v20.16b + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + aese v0.16b,v21.16b + aese v1.16b,v21.16b + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + aese v0.16b,v22.16b + aese v1.16b,v22.16b + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + eor v2.16b,v2.16b,v7.16b + aese v0.16b,v23.16b + eor v3.16b,v3.16b,v7.16b + aese v1.16b,v23.16b + + eor v2.16b,v2.16b,v0.16b + orr v0.16b,v6.16b,v6.16b + eor v3.16b,v3.16b,v1.16b + orr v1.16b,v6.16b,v6.16b + st1 {v2.16b},[x1],#16 + mov v0.s[3], w9 + st1 {v3.16b},[x1],#16 + mov v1.s[3], w10 + b.hs .Loop2x_ctr32_128 + + adds x2,x2,#2 + b.eq .Lctr32_done + +.Lctr32_tail: + aese v0.16b,v16.16b + ld1 {v16.4s},[x7],#16 + aesmc v0.16b,v0.16b + subs w6,w6,#2 + aese v0.16b,v17.16b + ld1 {v17.4s},[x7],#16 + aesmc v0.16b,v0.16b + b.gt .Lctr32_tail + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v2.16b},[x0] + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + eor v2.16b,v2.16b,v7.16b + aese v0.16b,v23.16b + + eor v2.16b,v2.16b,v0.16b + st1 {v2.16b},[x1] + +.Lctr32_done: + ldr x29,[sp],#16 + ret +.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks +#endif diff --git a/app/openssl/crypto/aes/asm/aesv8-armx.S b/app/openssl/crypto/aes/asm/aesv8-armx.S new file mode 100644 index 00000000..1637e4d4 --- /dev/null +++ b/app/openssl/crypto/aes/asm/aesv8-armx.S @@ -0,0 +1,767 @@ +#include "arm_arch.h" + +#if __ARM_ARCH__>=7 +.text +.fpu neon +.code 32 +.align 5 +rcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.globl aes_v8_set_encrypt_key +.type aes_v8_set_encrypt_key,%function +.align 5 +aes_v8_set_encrypt_key: +.Lenc_key: + adr r3,rcon + cmp r1,#192 + + veor q0,q0,q0 + vld1.8 {q3},[r0]! + mov r1,#8 @ reuse r1 + vld1.32 {q1,q2},[r3]! + + blt .Loop128 + beq .L192 + b .L256 + +.align 4 +.Loop128: + vtbl.8 d20,{q3},d4 + vtbl.8 d21,{q3},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q3},[r2]! + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + subs r1,r1,#1 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + vshl.u8 q1,q1,#1 + veor q3,q3,q10 + bne .Loop128 + + vld1.32 {q1},[r3] + + vtbl.8 d20,{q3},d4 + vtbl.8 d21,{q3},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q3},[r2]! + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + vshl.u8 q1,q1,#1 + veor q3,q3,q10 + + vtbl.8 d20,{q3},d4 + vtbl.8 d21,{q3},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q3},[r2]! + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + veor q3,q3,q10 + vst1.32 {q3},[r2] + add r2,r2,#0x50 + + mov r12,#10 + b .Ldone + +.align 4 +.L192: + vld1.8 {d16},[r0]! + vmov.i8 q10,#8 @ borrow q10 + vst1.32 {q3},[r2]! + vsub.i8 q2,q2,q10 @ adjust the mask + +.Loop192: + vtbl.8 d20,{q8},d4 + vtbl.8 d21,{q8},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {d16},[r2]! + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + subs r1,r1,#1 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + + vdup.32 q9,d7[1] + veor q9,q9,q8 + veor q10,q10,q1 + vext.8 q8,q0,q8,#12 + vshl.u8 q1,q1,#1 + veor q8,q8,q9 + veor q3,q3,q10 + veor q8,q8,q10 + vst1.32 {q3},[r2]! + bne .Loop192 + + mov r12,#12 + add r2,r2,#0x20 + b .Ldone + +.align 4 +.L256: + vld1.8 {q8},[r0] + mov r1,#7 + mov r12,#14 + vst1.32 {q3},[r2]! + +.Loop256: + vtbl.8 d20,{q8},d4 + vtbl.8 d21,{q8},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q8},[r2]! + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + subs r1,r1,#1 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + vshl.u8 q1,q1,#1 + veor q3,q3,q10 + vst1.32 {q3},[r2]! + beq .Ldone + + vdup.32 q10,d7[1] + vext.8 q9,q0,q8,#12 + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + + veor q8,q8,q9 + vext.8 q9,q0,q9,#12 + veor q8,q8,q9 + vext.8 q9,q0,q9,#12 + veor q8,q8,q9 + + veor q8,q8,q10 + b .Loop256 + +.Ldone: + str r12,[r2] + + eor r0,r0,r0 @ return value + + bx lr +.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key + +.globl aes_v8_set_decrypt_key +.type aes_v8_set_decrypt_key,%function +.align 5 +aes_v8_set_decrypt_key: + stmdb sp!,{r4,lr} + bl .Lenc_key + + sub r2,r2,#240 @ restore original r2 + mov r4,#-16 + add r0,r2,r12,lsl#4 @ end of key schedule + + vld1.32 {q0},[r2] + vld1.32 {q1},[r0] + vst1.32 {q0},[r0],r4 + vst1.32 {q1},[r2]! + +.Loop_imc: + vld1.32 {q0},[r2] + vld1.32 {q1},[r0] + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + vst1.32 {q0},[r0],r4 + vst1.32 {q1},[r2]! + cmp r0,r2 + bhi .Loop_imc + + vld1.32 {q0},[r2] + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + vst1.32 {q0},[r0] + + eor r0,r0,r0 @ return value + ldmia sp!,{r4,pc} +.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key +.globl aes_v8_encrypt +.type aes_v8_encrypt,%function +.align 5 +aes_v8_encrypt: + ldr r3,[r2,#240] + vld1.32 {q0},[r2]! + vld1.8 {q2},[r0] + sub r3,r3,#2 + vld1.32 {q1},[r2]! + +.Loop_enc: + .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 + vld1.32 {q0},[r2]! + .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + subs r3,r3,#2 + .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 + vld1.32 {q1},[r2]! + .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + bgt .Loop_enc + + .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 + vld1.32 {q0},[r2] + .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 + veor q2,q2,q0 + + vst1.8 {q2},[r1] + bx lr +.size aes_v8_encrypt,.-aes_v8_encrypt +.globl aes_v8_decrypt +.type aes_v8_decrypt,%function +.align 5 +aes_v8_decrypt: + ldr r3,[r2,#240] + vld1.32 {q0},[r2]! + vld1.8 {q2},[r0] + sub r3,r3,#2 + vld1.32 {q1},[r2]! + +.Loop_dec: + .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 + vld1.32 {q0},[r2]! + .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + subs r3,r3,#2 + .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 + vld1.32 {q1},[r2]! + .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + bgt .Loop_dec + + .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 + vld1.32 {q0},[r2] + .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 + veor q2,q2,q0 + + vst1.8 {q2},[r1] + bx lr +.size aes_v8_decrypt,.-aes_v8_decrypt +.globl aes_v8_cbc_encrypt +.type aes_v8_cbc_encrypt,%function +.align 5 +aes_v8_cbc_encrypt: + mov ip,sp + stmdb sp!,{r4-r8,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldmia ip,{r4-r5} @ load remaining args + subs r2,r2,#16 + mov r8,#16 + blo .Lcbc_abort + moveq r8,#0 + + cmp r5,#0 @ en- or decrypting? + ldr r5,[r3,#240] + and r2,r2,#-16 + vld1.8 {q6},[r4] + vld1.8 {q0},[r0],r8 + + vld1.32 {q8-q9},[r3] @ load key schedule... + sub r5,r5,#6 + add r7,r3,r5,lsl#4 @ pointer to last 7 round keys + sub r5,r5,#2 + vld1.32 {q10-q11},[r7]! + vld1.32 {q12-q13},[r7]! + vld1.32 {q14-q15},[r7]! + vld1.32 {q7},[r7] + + add r7,r3,#32 + mov r6,r5 + beq .Lcbc_dec + + cmp r5,#2 + veor q0,q0,q6 + veor q5,q8,q7 + beq .Lcbc_enc128 + +.Loop_cbc_enc: + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + vld1.32 {q8},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r6,r6,#2 + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + vld1.32 {q9},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + bgt .Loop_cbc_enc + + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r2,r2,#16 + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + moveq r8,#0 + .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + add r7,r3,#16 + .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.8 {q8},[r0],r8 + .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + veor q8,q8,q5 + .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] + .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 + + mov r6,r5 + veor q6,q0,q7 + vst1.8 {q6},[r1]! + bhs .Loop_cbc_enc + + b .Lcbc_done + +.align 5 +.Lcbc_enc128: + vld1.32 {q2-q3},[r7] + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + b .Lenter_cbc_enc128 +.Loop_cbc_enc128: + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vst1.8 {q6},[r1]! +.Lenter_cbc_enc128: + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r2,r2,#16 + .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + moveq r8,#0 + .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.8 {q8},[r0],r8 + .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + veor q8,q8,q5 + .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 + veor q6,q0,q7 + bhs .Loop_cbc_enc128 + + vst1.8 {q6},[r1]! + b .Lcbc_done + +.align 5 +.Lcbc_dec128: + vld1.32 {q4-q5},[r7] + veor q6,q6,q7 + veor q2,q0,q7 + mov r12,r8 + +.Loop2x_cbc_dec128: + .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 + .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + subs r2,r2,#32 + .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 + .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + movlo r8,#0 + .byte 0x48,0x03,0xb0,0xf3 @ aesd q0,q4 + .byte 0x48,0x23,0xb0,0xf3 @ aesd q1,q4 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + movls r12,#0 + .byte 0x4a,0x03,0xb0,0xf3 @ aesd q0,q5 + .byte 0x4a,0x23,0xb0,0xf3 @ aesd q1,q5 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x64,0x03,0xb0,0xf3 @ aesd q0,q10 + .byte 0x64,0x23,0xb0,0xf3 @ aesd q1,q10 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x66,0x03,0xb0,0xf3 @ aesd q0,q11 + .byte 0x66,0x23,0xb0,0xf3 @ aesd q1,q11 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 + .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 + .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 + .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 + .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 + + veor q6,q6,q0 + vld1.8 {q0},[r0],r8 + veor q2,q2,q1 + vld1.8 {q1},[r0],r12 + vst1.8 {q6},[r1]! + veor q6,q3,q7 + vst1.8 {q2},[r1]! + veor q2,q0,q7 + vorr q3,q1,q1 + bhs .Loop2x_cbc_dec128 + + adds r2,r2,#32 + veor q6,q6,q7 + beq .Lcbc_done + veor q2,q2,q7 + b .Lcbc_dec_tail + +.align 5 +.Lcbc_dec: + subs r2,r2,#16 + vorr q2,q0,q0 + blo .Lcbc_dec_tail + + moveq r8,#0 + cmp r5,#2 + vld1.8 {q1},[r0],r8 + vorr q3,q1,q1 + beq .Lcbc_dec128 + +.Loop2x_cbc_dec: + .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 + .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 + vld1.32 {q8},[r7]! + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + subs r6,r6,#2 + .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 + .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 + vld1.32 {q9},[r7]! + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + bgt .Loop2x_cbc_dec + + .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 + .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + veor q4,q6,q7 + veor q5,q2,q7 + .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 + .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + vorr q6,q3,q3 + subs r2,r2,#32 + .byte 0x64,0x03,0xb0,0xf3 @ aesd q0,q10 + .byte 0x64,0x23,0xb0,0xf3 @ aesd q1,q10 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + movlo r8,#0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + mov r7,r3 + .byte 0x66,0x03,0xb0,0xf3 @ aesd q0,q11 + .byte 0x66,0x23,0xb0,0xf3 @ aesd q1,q11 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + vld1.8 {q2},[r0],r8 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + movls r8,#0 + .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 + .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + vld1.8 {q3},[r0],r8 + .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 + .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] + .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 + .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] + .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 + .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 + + mov r6,r5 + veor q4,q4,q0 + veor q5,q5,q1 + vorr q0,q2,q2 + vst1.8 {q4},[r1]! + vorr q1,q3,q3 + vst1.8 {q5},[r1]! + bhs .Loop2x_cbc_dec + + adds r2,r2,#32 + beq .Lcbc_done + +.Lcbc_dec_tail: + .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 + vld1.32 {q8},[r7]! + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + subs r6,r6,#2 + .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 + vld1.32 {q9},[r7]! + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + bgt .Lcbc_dec_tail + + .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + veor q4,q6,q7 + .byte 0x64,0x03,0xb0,0xf3 @ aesd q0,q10 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + vorr q6,q2,q2 + .byte 0x66,0x03,0xb0,0xf3 @ aesd q0,q11 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 + + veor q4,q4,q0 + vst1.8 {q4},[r1]! + +.Lcbc_done: + vst1.8 {q6},[r4] +.Lcbc_abort: + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r8,pc} +.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt +.globl aes_v8_ctr32_encrypt_blocks +.type aes_v8_ctr32_encrypt_blocks,%function +.align 5 +aes_v8_ctr32_encrypt_blocks: + mov ip,sp + stmdb sp!,{r4-r10,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldr r4, [ip] @ load remaining arg + ldr r5,[r3,#240] + + ldr r8, [r4, #12] + vld1.32 {q0},[r4] + + vld1.32 {q8-q9},[r3] @ load key schedule... + sub r5,r5,#6 + add r7,r3,r5,lsl#4 @ pointer to last 7 round keys + sub r5,r5,#2 + vld1.32 {q10-q11},[r7]! + vld1.32 {q12-q13},[r7]! + vld1.32 {q14-q15},[r7]! + vld1.32 {q7},[r7] + + add r7,r3,#32 + mov r6,r5 + + subs r2,r2,#2 + blo .Lctr32_tail + +#ifndef __ARMEB__ + rev r8, r8 +#endif + vorr q1,q0,q0 + add r8, r8, #1 + vorr q6,q0,q0 + rev r10, r8 + cmp r5,#2 + vmov.32 d3[1],r10 + beq .Lctr32_128 + +.Loop2x_ctr32: + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 + vld1.32 {q8},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + subs r6,r6,#2 + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 + vld1.32 {q9},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + bgt .Loop2x_ctr32 + + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 + .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 + vorr q0,q6,q6 + .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 + vorr q1,q6,q6 + .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 + .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 + vld1.8 {q2},[r0]! + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 + vld1.8 {q3},[r0]! + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + add r8,r8,#1 + .byte 0x24,0x83,0xb0,0xf3 @ aese q4,q10 + .byte 0x24,0xa3,0xb0,0xf3 @ aese q5,q10 + rev r9,r8 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + add r8,r8,#1 + .byte 0x26,0x83,0xb0,0xf3 @ aese q4,q11 + .byte 0x26,0xa3,0xb0,0xf3 @ aese q5,q11 + veor q2,q2,q7 + rev r10,r8 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + veor q3,q3,q7 + mov r7,r3 + .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 + .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 + subs r2,r2,#2 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + vld1.32 {q8-q9},[r7]! @ re-pre-load rndkey[0-1] + .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 + .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 + .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 + vmov.32 d1[1], r9 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 + vmov.32 d3[1], r10 + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 + .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 + + mov r6,r5 + veor q2,q2,q4 + veor q3,q3,q5 + vst1.8 {q2},[r1]! + vst1.8 {q3},[r1]! + bhs .Loop2x_ctr32 + + adds r2,r2,#2 + beq .Lctr32_done + b .Lctr32_tail + +.Lctr32_128: + vld1.32 {q4-q5},[r7] + +.Loop2x_ctr32_128: + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.8 {q2},[r0]! + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.8 {q3},[r0]! + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 + add r8,r8,#1 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + rev r9,r8 + .byte 0x08,0x03,0xb0,0xf3 @ aese q0,q4 + .byte 0x08,0x23,0xb0,0xf3 @ aese q1,q4 + add r8,r8,#1 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + rev r10,r8 + .byte 0x0a,0x03,0xb0,0xf3 @ aese q0,q5 + .byte 0x0a,0x23,0xb0,0xf3 @ aese q1,q5 + subs r2,r2,#2 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 + .byte 0x24,0x23,0xb0,0xf3 @ aese q1,q10 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 + .byte 0x26,0x23,0xb0,0xf3 @ aese q1,q11 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 + .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 + .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 + .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + veor q2,q2,q7 + .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 + veor q3,q3,q7 + .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 + + veor q2,q2,q0 + vorr q0,q6,q6 + veor q3,q3,q1 + vorr q1,q6,q6 + vst1.8 {q2},[r1]! + vmov.32 d1[1], r9 + vst1.8 {q3},[r1]! + vmov.32 d3[1], r10 + bhs .Loop2x_ctr32_128 + + adds r2,r2,#2 + beq .Lctr32_done + +.Lctr32_tail: + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + vld1.32 {q8},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r6,r6,#2 + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + vld1.32 {q9},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + bgt .Lctr32_tail + + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.8 {q2},[r0] + .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + veor q2,q2,q7 + .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 + + veor q2,q2,q0 + vst1.8 {q2},[r1] + +.Lctr32_done: + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r10,pc} +.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks +#endif diff --git a/app/openssl/crypto/aes/asm/aesv8-armx.pl b/app/openssl/crypto/aes/asm/aesv8-armx.pl new file mode 100644 index 00000000..415dc04a --- /dev/null +++ b/app/openssl/crypto/aes/asm/aesv8-armx.pl @@ -0,0 +1,980 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# This module implements support for ARMv8 AES instructions. The +# module is endian-agnostic in sense that it supports both big- and +# little-endian cases. As does it support both 32- and 64-bit modes +# of operation. Latter is achieved by limiting amount of utilized +# registers to 16, which implies additional instructions. This has +# no effect on mighty Apple A7, as results are literally equal to +# the theoretical estimates based on instruction latencies and issue +# rate. It remains to be seen how does it affect other platforms... +# +# Performance in cycles per byte processed with 128-bit key: +# +# CBC enc CBC dec CTR +# Apple A7 2.39 1.20 1.20 +# Cortex-A5x n/a n/a n/a + +$flavour = shift; +open STDOUT,">".shift; + +$prefix="aes_v8"; + +$code=<<___; +#include "arm_arch.h" + +#if __ARM_ARCH__>=7 +.text +___ +$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/); +$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/); + +# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax, +# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to +# maintain both 32- and 64-bit codes within single module and +# transliterate common code to either flavour with regex vodoo. +# +{{{ +my ($inp,$bits,$out,$ptr,$rounds)=("x0","w1","x2","x3","w12"); +my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)= + $flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10)); + + +$code.=<<___; +.align 5 +rcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.globl ${prefix}_set_encrypt_key +.type ${prefix}_set_encrypt_key,%function +.align 5 +${prefix}_set_encrypt_key: +.Lenc_key: +___ +$code.=<<___ if ($flavour =~ /64/); + stp x29,x30,[sp,#-16]! + add x29,sp,#0 +___ +$code.=<<___; + adr $ptr,rcon + cmp $bits,#192 + + veor $zero,$zero,$zero + vld1.8 {$in0},[$inp],#16 + mov $bits,#8 // reuse $bits + vld1.32 {$rcon,$mask},[$ptr],#32 + + b.lt .Loop128 + b.eq .L192 + b .L256 + +.align 4 +.Loop128: + vtbl.8 $key,{$in0},$mask + vext.8 $tmp,$zero,$in0,#12 + vst1.32 {$in0},[$out],#16 + aese $key,$zero + subs $bits,$bits,#1 + + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $key,$key,$rcon + veor $in0,$in0,$tmp + vshl.u8 $rcon,$rcon,#1 + veor $in0,$in0,$key + b.ne .Loop128 + + vld1.32 {$rcon},[$ptr] + + vtbl.8 $key,{$in0},$mask + vext.8 $tmp,$zero,$in0,#12 + vst1.32 {$in0},[$out],#16 + aese $key,$zero + + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $key,$key,$rcon + veor $in0,$in0,$tmp + vshl.u8 $rcon,$rcon,#1 + veor $in0,$in0,$key + + vtbl.8 $key,{$in0},$mask + vext.8 $tmp,$zero,$in0,#12 + vst1.32 {$in0},[$out],#16 + aese $key,$zero + + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $key,$key,$rcon + veor $in0,$in0,$tmp + veor $in0,$in0,$key + vst1.32 {$in0},[$out] + add $out,$out,#0x50 + + mov $rounds,#10 + b .Ldone + +.align 4 +.L192: + vld1.8 {$in1},[$inp],#8 + vmov.i8 $key,#8 // borrow $key + vst1.32 {$in0},[$out],#16 + vsub.i8 $mask,$mask,$key // adjust the mask + +.Loop192: + vtbl.8 $key,{$in1},$mask + vext.8 $tmp,$zero,$in0,#12 + vst1.32 {$in1},[$out],#8 + aese $key,$zero + subs $bits,$bits,#1 + + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + + vdup.32 $tmp,${in0}[3] + veor $tmp,$tmp,$in1 + veor $key,$key,$rcon + vext.8 $in1,$zero,$in1,#12 + vshl.u8 $rcon,$rcon,#1 + veor $in1,$in1,$tmp + veor $in0,$in0,$key + veor $in1,$in1,$key + vst1.32 {$in0},[$out],#16 + b.ne .Loop192 + + mov $rounds,#12 + add $out,$out,#0x20 + b .Ldone + +.align 4 +.L256: + vld1.8 {$in1},[$inp] + mov $bits,#7 + mov $rounds,#14 + vst1.32 {$in0},[$out],#16 + +.Loop256: + vtbl.8 $key,{$in1},$mask + vext.8 $tmp,$zero,$in0,#12 + vst1.32 {$in1},[$out],#16 + aese $key,$zero + subs $bits,$bits,#1 + + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $key,$key,$rcon + veor $in0,$in0,$tmp + vshl.u8 $rcon,$rcon,#1 + veor $in0,$in0,$key + vst1.32 {$in0},[$out],#16 + b.eq .Ldone + + vdup.32 $key,${in0}[3] // just splat + vext.8 $tmp,$zero,$in1,#12 + aese $key,$zero + + veor $in1,$in1,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in1,$in1,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in1,$in1,$tmp + + veor $in1,$in1,$key + b .Loop256 + +.Ldone: + str $rounds,[$out] + + eor x0,x0,x0 // return value + `"ldr x29,[sp],#16" if ($flavour =~ /64/)` + ret +.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key + +.globl ${prefix}_set_decrypt_key +.type ${prefix}_set_decrypt_key,%function +.align 5 +${prefix}_set_decrypt_key: +___ +$code.=<<___ if ($flavour =~ /64/); + stp x29,x30,[sp,#-16]! + add x29,sp,#0 +___ +$code.=<<___ if ($flavour !~ /64/); + stmdb sp!,{r4,lr} +___ +$code.=<<___; + bl .Lenc_key + + sub $out,$out,#240 // restore original $out + mov x4,#-16 + add $inp,$out,x12,lsl#4 // end of key schedule + + vld1.32 {v0.16b},[$out] + vld1.32 {v1.16b},[$inp] + vst1.32 {v0.16b},[$inp],x4 + vst1.32 {v1.16b},[$out],#16 + +.Loop_imc: + vld1.32 {v0.16b},[$out] + vld1.32 {v1.16b},[$inp] + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + vst1.32 {v0.16b},[$inp],x4 + vst1.32 {v1.16b},[$out],#16 + cmp $inp,$out + b.hi .Loop_imc + + vld1.32 {v0.16b},[$out] + aesimc v0.16b,v0.16b + vst1.32 {v0.16b},[$inp] + + eor x0,x0,x0 // return value +___ +$code.=<<___ if ($flavour !~ /64/); + ldmia sp!,{r4,pc} +___ +$code.=<<___ if ($flavour =~ /64/); + ldp x29,x30,[sp],#16 + ret +___ +$code.=<<___; +.size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key +___ +}}} +{{{ +sub gen_block () { +my $dir = shift; +my ($e,$mc) = $dir eq "en" ? ("e","mc") : ("d","imc"); +my ($inp,$out,$key)=map("x$_",(0..2)); +my $rounds="w3"; +my ($rndkey0,$rndkey1,$inout)=map("q$_",(0..3)); + +$code.=<<___; +.globl ${prefix}_${dir}crypt +.type ${prefix}_${dir}crypt,%function +.align 5 +${prefix}_${dir}crypt: + ldr $rounds,[$key,#240] + vld1.32 {$rndkey0},[$key],#16 + vld1.8 {$inout},[$inp] + sub $rounds,$rounds,#2 + vld1.32 {$rndkey1},[$key],#16 + +.Loop_${dir}c: + aes$e $inout,$rndkey0 + vld1.32 {$rndkey0},[$key],#16 + aes$mc $inout,$inout + subs $rounds,$rounds,#2 + aes$e $inout,$rndkey1 + vld1.32 {$rndkey1},[$key],#16 + aes$mc $inout,$inout + b.gt .Loop_${dir}c + + aes$e $inout,$rndkey0 + vld1.32 {$rndkey0},[$key] + aes$mc $inout,$inout + aes$e $inout,$rndkey1 + veor $inout,$inout,$rndkey0 + + vst1.8 {$inout},[$out] + ret +.size ${prefix}_${dir}crypt,.-${prefix}_${dir}crypt +___ +} +&gen_block("en"); +&gen_block("de"); +}}} +{{{ +my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5"; +my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12"); +my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); + +my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1); + +### q8-q15 preloaded key schedule + +$code.=<<___; +.globl ${prefix}_cbc_encrypt +.type ${prefix}_cbc_encrypt,%function +.align 5 +${prefix}_cbc_encrypt: +___ +$code.=<<___ if ($flavour =~ /64/); + stp x29,x30,[sp,#-16]! + add x29,sp,#0 +___ +$code.=<<___ if ($flavour !~ /64/); + mov ip,sp + stmdb sp!,{r4-r8,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldmia ip,{r4-r5} @ load remaining args +___ +$code.=<<___; + subs $len,$len,#16 + mov $step,#16 + b.lo .Lcbc_abort + cclr $step,eq + + cmp $enc,#0 // en- or decrypting? + ldr $rounds,[$key,#240] + and $len,$len,#-16 + vld1.8 {$ivec},[$ivp] + vld1.8 {$dat},[$inp],$step + + vld1.32 {q8-q9},[$key] // load key schedule... + sub $rounds,$rounds,#6 + add $key_,$key,x5,lsl#4 // pointer to last 7 round keys + sub $rounds,$rounds,#2 + vld1.32 {q10-q11},[$key_],#32 + vld1.32 {q12-q13},[$key_],#32 + vld1.32 {q14-q15},[$key_],#32 + vld1.32 {$rndlast},[$key_] + + add $key_,$key,#32 + mov $cnt,$rounds + b.eq .Lcbc_dec + + cmp $rounds,#2 + veor $dat,$dat,$ivec + veor $rndzero_n_last,q8,$rndlast + b.eq .Lcbc_enc128 + +.Loop_cbc_enc: + aese $dat,q8 + vld1.32 {q8},[$key_],#16 + aesmc $dat,$dat + subs $cnt,$cnt,#2 + aese $dat,q9 + vld1.32 {q9},[$key_],#16 + aesmc $dat,$dat + b.gt .Loop_cbc_enc + + aese $dat,q8 + aesmc $dat,$dat + subs $len,$len,#16 + aese $dat,q9 + aesmc $dat,$dat + cclr $step,eq + aese $dat,q10 + aesmc $dat,$dat + add $key_,$key,#16 + aese $dat,q11 + aesmc $dat,$dat + vld1.8 {q8},[$inp],$step + aese $dat,q12 + aesmc $dat,$dat + veor q8,q8,$rndzero_n_last + aese $dat,q13 + aesmc $dat,$dat + vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] + aese $dat,q14 + aesmc $dat,$dat + aese $dat,q15 + + mov $cnt,$rounds + veor $ivec,$dat,$rndlast + vst1.8 {$ivec},[$out],#16 + b.hs .Loop_cbc_enc + + b .Lcbc_done + +.align 5 +.Lcbc_enc128: + vld1.32 {$in0-$in1},[$key_] + aese $dat,q8 + aesmc $dat,$dat + b .Lenter_cbc_enc128 +.Loop_cbc_enc128: + aese $dat,q8 + aesmc $dat,$dat + vst1.8 {$ivec},[$out],#16 +.Lenter_cbc_enc128: + aese $dat,q9 + aesmc $dat,$dat + subs $len,$len,#16 + aese $dat,$in0 + aesmc $dat,$dat + cclr $step,eq + aese $dat,$in1 + aesmc $dat,$dat + aese $dat,q10 + aesmc $dat,$dat + aese $dat,q11 + aesmc $dat,$dat + vld1.8 {q8},[$inp],$step + aese $dat,q12 + aesmc $dat,$dat + aese $dat,q13 + aesmc $dat,$dat + aese $dat,q14 + aesmc $dat,$dat + veor q8,q8,$rndzero_n_last + aese $dat,q15 + veor $ivec,$dat,$rndlast + b.hs .Loop_cbc_enc128 + + vst1.8 {$ivec},[$out],#16 + b .Lcbc_done + +.align 5 +.Lcbc_dec128: + vld1.32 {$tmp0-$tmp1},[$key_] + veor $ivec,$ivec,$rndlast + veor $in0,$dat0,$rndlast + mov $step1,$step + +.Loop2x_cbc_dec128: + aesd $dat0,q8 + aesd $dat1,q8 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + subs $len,$len,#32 + aesd $dat0,q9 + aesd $dat1,q9 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + cclr $step,lo + aesd $dat0,$tmp0 + aesd $dat1,$tmp0 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + cclr $step1,ls + aesd $dat0,$tmp1 + aesd $dat1,$tmp1 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesd $dat0,q10 + aesd $dat1,q10 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesd $dat0,q11 + aesd $dat1,q11 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesd $dat0,q12 + aesd $dat1,q12 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesd $dat0,q13 + aesd $dat1,q13 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesd $dat0,q14 + aesd $dat1,q14 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesd $dat0,q15 + aesd $dat1,q15 + + veor $ivec,$ivec,$dat0 + vld1.8 {$dat0},[$inp],$step + veor $in0,$in0,$dat1 + vld1.8 {$dat1},[$inp],$step1 + vst1.8 {$ivec},[$out],#16 + veor $ivec,$in1,$rndlast + vst1.8 {$in0},[$out],#16 + veor $in0,$dat0,$rndlast + vorr $in1,$dat1,$dat1 + b.hs .Loop2x_cbc_dec128 + + adds $len,$len,#32 + veor $ivec,$ivec,$rndlast + b.eq .Lcbc_done + veor $in0,$in0,$rndlast + b .Lcbc_dec_tail + +.align 5 +.Lcbc_dec: + subs $len,$len,#16 + vorr $in0,$dat,$dat + b.lo .Lcbc_dec_tail + + cclr $step,eq + cmp $rounds,#2 + vld1.8 {$dat1},[$inp],$step + vorr $in1,$dat1,$dat1 + b.eq .Lcbc_dec128 + +.Loop2x_cbc_dec: + aesd $dat0,q8 + aesd $dat1,q8 + vld1.32 {q8},[$key_],#16 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + subs $cnt,$cnt,#2 + aesd $dat0,q9 + aesd $dat1,q9 + vld1.32 {q9},[$key_],#16 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + b.gt .Loop2x_cbc_dec + + aesd $dat0,q8 + aesd $dat1,q8 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + veor $tmp0,$ivec,$rndlast + veor $tmp1,$in0,$rndlast + aesd $dat0,q9 + aesd $dat1,q9 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + vorr $ivec,$in1,$in1 + subs $len,$len,#32 + aesd $dat0,q10 + aesd $dat1,q10 + aesimc $dat0,$dat0 + cclr $step,lo + aesimc $dat1,$dat1 + mov $key_,$key + aesd $dat0,q11 + aesd $dat1,q11 + aesimc $dat0,$dat0 + vld1.8 {$in0},[$inp],$step + aesimc $dat1,$dat1 + cclr $step,ls + aesd $dat0,q12 + aesd $dat1,q12 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + vld1.8 {$in1},[$inp],$step + aesd $dat0,q13 + aesd $dat1,q13 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] + aesd $dat0,q14 + aesd $dat1,q14 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] + aesd $dat0,q15 + aesd $dat1,q15 + + mov $cnt,$rounds + veor $tmp0,$tmp0,$dat0 + veor $tmp1,$tmp1,$dat1 + vorr $dat0,$in0,$in0 + vst1.8 {$tmp0},[$out],#16 + vorr $dat1,$in1,$in1 + vst1.8 {$tmp1},[$out],#16 + b.hs .Loop2x_cbc_dec + + adds $len,$len,#32 + b.eq .Lcbc_done + +.Lcbc_dec_tail: + aesd $dat,q8 + vld1.32 {q8},[$key_],#16 + aesimc $dat,$dat + subs $cnt,$cnt,#2 + aesd $dat,q9 + vld1.32 {q9},[$key_],#16 + aesimc $dat,$dat + b.gt .Lcbc_dec_tail + + aesd $dat,q8 + aesimc $dat,$dat + aesd $dat,q9 + aesimc $dat,$dat + veor $tmp,$ivec,$rndlast + aesd $dat,q10 + aesimc $dat,$dat + vorr $ivec,$in0,$in0 + aesd $dat,q11 + aesimc $dat,$dat + aesd $dat,q12 + aesimc $dat,$dat + aesd $dat,q13 + aesimc $dat,$dat + aesd $dat,q14 + aesimc $dat,$dat + aesd $dat,q15 + + veor $tmp,$tmp,$dat + vst1.8 {$tmp},[$out],#16 + +.Lcbc_done: + vst1.8 {$ivec},[$ivp] +.Lcbc_abort: +___ +$code.=<<___ if ($flavour !~ /64/); + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r8,pc} +___ +$code.=<<___ if ($flavour =~ /64/); + ldr x29,[sp],#16 + ret +___ +$code.=<<___; +.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt +___ +}}} +{{{ +my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); +my ($rounds,$cnt,$key_,$ctr,$tctr,$tctr1)=("w5","w6","x7","w8","w9","w10"); +my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); + +my ($dat,$tmp)=($dat0,$tmp0); + +### q8-q15 preloaded key schedule + +$code.=<<___; +.globl ${prefix}_ctr32_encrypt_blocks +.type ${prefix}_ctr32_encrypt_blocks,%function +.align 5 +${prefix}_ctr32_encrypt_blocks: +___ +$code.=<<___ if ($flavour =~ /64/); + stp x29,x30,[sp,#-16]! + add x29,sp,#0 +___ +$code.=<<___ if ($flavour !~ /64/); + mov ip,sp + stmdb sp!,{r4-r10,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldr r4, [ip] @ load remaining arg +___ +$code.=<<___; + ldr $rounds,[$key,#240] + + ldr $ctr, [$ivp, #12] + vld1.32 {$dat0},[$ivp] + + vld1.32 {q8-q9},[$key] // load key schedule... + sub $rounds,$rounds,#6 + add $key_,$key,x5,lsl#4 // pointer to last 7 round keys + sub $rounds,$rounds,#2 + vld1.32 {q10-q11},[$key_],#32 + vld1.32 {q12-q13},[$key_],#32 + vld1.32 {q14-q15},[$key_],#32 + vld1.32 {$rndlast},[$key_] + + add $key_,$key,#32 + mov $cnt,$rounds + + subs $len,$len,#2 + b.lo .Lctr32_tail + +#ifndef __ARMEB__ + rev $ctr, $ctr +#endif + vorr $dat1,$dat0,$dat0 + add $ctr, $ctr, #1 + vorr $ivec,$dat0,$dat0 + rev $tctr1, $ctr + cmp $rounds,#2 + vmov.32 ${dat1}[3],$tctr1 + b.eq .Lctr32_128 + +.Loop2x_ctr32: + aese $dat0,q8 + aese $dat1,q8 + vld1.32 {q8},[$key_],#16 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + subs $cnt,$cnt,#2 + aese $dat0,q9 + aese $dat1,q9 + vld1.32 {q9},[$key_],#16 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + b.gt .Loop2x_ctr32 + + aese $dat0,q8 + aese $dat1,q8 + aesmc $tmp0,$dat0 + vorr $dat0,$ivec,$ivec + aesmc $tmp1,$dat1 + vorr $dat1,$ivec,$ivec + aese $tmp0,q9 + aese $tmp1,q9 + vld1.8 {$in0},[$inp],#16 + aesmc $tmp0,$tmp0 + vld1.8 {$in1},[$inp],#16 + aesmc $tmp1,$tmp1 + add $ctr,$ctr,#1 + aese $tmp0,q10 + aese $tmp1,q10 + rev $tctr,$ctr + aesmc $tmp0,$tmp0 + aesmc $tmp1,$tmp1 + add $ctr,$ctr,#1 + aese $tmp0,q11 + aese $tmp1,q11 + veor $in0,$in0,$rndlast + rev $tctr1,$ctr + aesmc $tmp0,$tmp0 + aesmc $tmp1,$tmp1 + veor $in1,$in1,$rndlast + mov $key_,$key + aese $tmp0,q12 + aese $tmp1,q12 + subs $len,$len,#2 + aesmc $tmp0,$tmp0 + aesmc $tmp1,$tmp1 + vld1.32 {q8-q9},[$key_],#32 // re-pre-load rndkey[0-1] + aese $tmp0,q13 + aese $tmp1,q13 + aesmc $tmp0,$tmp0 + aesmc $tmp1,$tmp1 + aese $tmp0,q14 + aese $tmp1,q14 + vmov.32 ${dat0}[3], $tctr + aesmc $tmp0,$tmp0 + vmov.32 ${dat1}[3], $tctr1 + aesmc $tmp1,$tmp1 + aese $tmp0,q15 + aese $tmp1,q15 + + mov $cnt,$rounds + veor $in0,$in0,$tmp0 + veor $in1,$in1,$tmp1 + vst1.8 {$in0},[$out],#16 + vst1.8 {$in1},[$out],#16 + b.hs .Loop2x_ctr32 + + adds $len,$len,#2 + b.eq .Lctr32_done + b .Lctr32_tail + +.Lctr32_128: + vld1.32 {$tmp0-$tmp1},[$key_] + +.Loop2x_ctr32_128: + aese $dat0,q8 + aese $dat1,q8 + aesmc $dat0,$dat0 + vld1.8 {$in0},[$inp],#16 + aesmc $dat1,$dat1 + vld1.8 {$in1},[$inp],#16 + aese $dat0,q9 + aese $dat1,q9 + add $ctr,$ctr,#1 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + rev $tctr,$ctr + aese $dat0,$tmp0 + aese $dat1,$tmp0 + add $ctr,$ctr,#1 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + rev $tctr1,$ctr + aese $dat0,$tmp1 + aese $dat1,$tmp1 + subs $len,$len,#2 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + aese $dat0,q10 + aese $dat1,q10 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + aese $dat0,q11 + aese $dat1,q11 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + aese $dat0,q12 + aese $dat1,q12 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + aese $dat0,q13 + aese $dat1,q13 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + aese $dat0,q14 + aese $dat1,q14 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + veor $in0,$in0,$rndlast + aese $dat0,q15 + veor $in1,$in1,$rndlast + aese $dat1,q15 + + veor $in0,$in0,$dat0 + vorr $dat0,$ivec,$ivec + veor $in1,$in1,$dat1 + vorr $dat1,$ivec,$ivec + vst1.8 {$in0},[$out],#16 + vmov.32 ${dat0}[3], $tctr + vst1.8 {$in1},[$out],#16 + vmov.32 ${dat1}[3], $tctr1 + b.hs .Loop2x_ctr32_128 + + adds $len,$len,#2 + b.eq .Lctr32_done + +.Lctr32_tail: + aese $dat,q8 + vld1.32 {q8},[$key_],#16 + aesmc $dat,$dat + subs $cnt,$cnt,#2 + aese $dat,q9 + vld1.32 {q9},[$key_],#16 + aesmc $dat,$dat + b.gt .Lctr32_tail + + aese $dat,q8 + aesmc $dat,$dat + aese $dat,q9 + aesmc $dat,$dat + vld1.8 {$in0},[$inp] + aese $dat,q10 + aesmc $dat,$dat + aese $dat,q11 + aesmc $dat,$dat + aese $dat,q12 + aesmc $dat,$dat + aese $dat,q13 + aesmc $dat,$dat + aese $dat,q14 + aesmc $dat,$dat + veor $in0,$in0,$rndlast + aese $dat,q15 + + veor $in0,$in0,$dat + vst1.8 {$in0},[$out] + +.Lctr32_done: +___ +$code.=<<___ if ($flavour !~ /64/); + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r10,pc} +___ +$code.=<<___ if ($flavour =~ /64/); + ldr x29,[sp],#16 + ret +___ +$code.=<<___; +.size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks +___ +}}} +$code.=<<___; +#endif +___ +######################################## +if ($flavour =~ /64/) { ######## 64-bit code + my %opcode = ( + "aesd" => 0x4e285800, "aese" => 0x4e284800, + "aesimc"=> 0x4e287800, "aesmc" => 0x4e286800 ); + + local *unaes = sub { + my ($mnemonic,$arg)=@_; + + $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o && + sprintf ".inst\t0x%08x\t//%s %s", + $opcode{$mnemonic}|$1|($2<<5), + $mnemonic,$arg; + }; + + foreach(split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/geo; + + s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers + s/@\s/\/\//o; # old->new style commentary + + #s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or + s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or + s/vmov\.i8/movi/o or # fix up legacy mnemonics + s/vext\.8/ext/o or + s/vrev32\.8/rev32/o or + s/vtst\.8/cmtst/o or + s/vshr/ushr/o or + s/^(\s+)v/$1/o or # strip off v prefix + s/\bbx\s+lr\b/ret/o; + + # fix up remainig legacy suffixes + s/\.[ui]?8//o; + m/\],#8/o and s/\.16b/\.8b/go; + s/\.[ui]?32//o and s/\.16b/\.4s/go; + s/\.[ui]?64//o and s/\.16b/\.2d/go; + s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o; + + print $_,"\n"; + } +} else { ######## 32-bit code + my %opcode = ( + "aesd" => 0xf3b00340, "aese" => 0xf3b00300, + "aesimc"=> 0xf3b003c0, "aesmc" => 0xf3b00380 ); + + local *unaes = sub { + my ($mnemonic,$arg)=@_; + + if ($arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o) { + my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) + |(($2&7)<<1) |(($2&8)<<2); + # since ARMv7 instructions are always encoded little-endian. + # correct solution is to use .inst directive, but older + # assemblers don't implement it:-( + sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", + $word&0xff,($word>>8)&0xff, + ($word>>16)&0xff,($word>>24)&0xff, + $mnemonic,$arg; + } + }; + + sub unvtbl { + my $arg=shift; + + $arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o && + sprintf "vtbl.8 d%d,{q%d},d%d\n\t". + "vtbl.8 d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1; + } + + sub unvdup32 { + my $arg=shift; + + $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o && + sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1; + } + + sub unvmov32 { + my $arg=shift; + + $arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o && + sprintf "vmov.32 d%d[%d],%s",2*$1+($2>>1),$2&1,$3; + } + + foreach(split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/geo; + + s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers + s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers + s/\/\/\s?/@ /o; # new->old style commentary + + # fix up remainig new-style suffixes + s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo or + s/\],#[0-9]+/]!/o; + + s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or + s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or + s/vtbl\.8\s+(.*)/unvtbl($1)/geo or + s/vdup\.32\s+(.*)/unvdup32($1)/geo or + s/vmov\.32\s+(.*)/unvmov32($1)/geo or + s/^(\s+)b\./$1b/o or + s/^(\s+)ret/$1bx\tlr/o; + + print $_,"\n"; + } +} + +close STDOUT; diff --git a/app/openssl/crypto/arm64cpuid.S b/app/openssl/crypto/arm64cpuid.S new file mode 100644 index 00000000..4778ac1d --- /dev/null +++ b/app/openssl/crypto/arm64cpuid.S @@ -0,0 +1,46 @@ +#include "arm_arch.h" + +.text +.arch armv8-a+crypto + +.align 5 +.global _armv7_neon_probe +.type _armv7_neon_probe,%function +_armv7_neon_probe: + orr v15.16b, v15.16b, v15.16b + ret +.size _armv7_neon_probe,.-_armv7_neon_probe + +.global _armv7_tick +.type _armv7_tick,%function +_armv7_tick: + mrs x0, CNTVCT_EL0 + ret +.size _armv7_tick,.-_armv7_tick + +.global _armv8_aes_probe +.type _armv8_aes_probe,%function +_armv8_aes_probe: + aese v0.16b, v0.16b + ret +.size _armv8_aes_probe,.-_armv8_aes_probe + +.global _armv8_sha1_probe +.type _armv8_sha1_probe,%function +_armv8_sha1_probe: + sha1h s0, s0 + ret +.size _armv8_sha1_probe,.-_armv8_sha1_probe + +.global _armv8_sha256_probe +.type _armv8_sha256_probe,%function +_armv8_sha256_probe: + sha256su0 v0.4s, v0.4s + ret +.size _armv8_sha256_probe,.-_armv8_sha256_probe +.global _armv8_pmull_probe +.type _armv8_pmull_probe,%function +_armv8_pmull_probe: + pmull v0.1q, v0.1d, v0.1d + ret +.size _armv8_pmull_probe,.-_armv8_pmull_probe diff --git a/app/openssl/crypto/arm_arch.h b/app/openssl/crypto/arm_arch.h index 5a831076..6fa87244 100644 --- a/app/openssl/crypto/arm_arch.h +++ b/app/openssl/crypto/arm_arch.h @@ -10,13 +10,24 @@ # define __ARMEL__ # endif # elif defined(__GNUC__) +# if defined(__aarch64__) +# define __ARM_ARCH__ 8 +# if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ +# define __ARMEB__ +# else +# define __ARMEL__ +# endif /* * Why doesn't gcc define __ARM_ARCH__? Instead it defines * bunch of below macros. See all_architectires[] table in * gcc/config/arm/arm.c. On a side note it defines * __ARMEL__/__ARMEB__ for little-/big-endian. */ -# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ +# elif defined(__ARM_ARCH) +# define __ARM_ARCH__ __ARM_ARCH +# elif defined(__ARM_ARCH_8A__) +# define __ARM_ARCH__ 8 +# elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) || \ defined(__ARM_ARCH_7EM__) # define __ARM_ARCH__ 7 @@ -43,9 +54,13 @@ #if !__ASSEMBLER__ extern unsigned int OPENSSL_armcap_P; +#endif #define ARMV7_NEON (1<<0) #define ARMV7_TICK (1<<1) -#endif +#define ARMV8_AES (1<<2) +#define ARMV8_SHA1 (1<<3) +#define ARMV8_SHA256 (1<<4) +#define ARMV8_PMULL (1<<5) #endif diff --git a/app/openssl/crypto/armcap.c b/app/openssl/crypto/armcap.c index 9abaf396..7e46d07a 100644 --- a/app/openssl/crypto/armcap.c +++ b/app/openssl/crypto/armcap.c @@ -19,9 +19,13 @@ static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); } * ARM compilers support inline assembler... */ void _armv7_neon_probe(void); -unsigned int _armv7_tick(void); +void _armv8_aes_probe(void); +void _armv8_sha1_probe(void); +void _armv8_sha256_probe(void); +void _armv8_pmull_probe(void); +unsigned long _armv7_tick(void); -unsigned int OPENSSL_rdtsc(void) +unsigned long OPENSSL_rdtsc(void) { if (OPENSSL_armcap_P & ARMV7_TICK) return _armv7_tick(); @@ -29,9 +33,41 @@ unsigned int OPENSSL_rdtsc(void) return 0; } +/* + * Use a weak reference to getauxval() so we can use it if it is available but + * don't break the build if it is not. + */ #if defined(__GNUC__) && __GNUC__>=2 void OPENSSL_cpuid_setup(void) __attribute__((constructor)); +extern unsigned long getauxval(unsigned long type) __attribute__((weak)); +#else +static unsigned long (*getauxval)(unsigned long) = NULL; #endif + +/* + * ARM puts the the feature bits for Crypto Extensions in AT_HWCAP2, whereas + * AArch64 used AT_HWCAP. + */ +#if defined(__arm__) || defined (__arm) +# define HWCAP 16 /* AT_HWCAP */ +# define HWCAP_NEON (1 << 12) + +# define HWCAP_CE 26 /* AT_HWCAP2 */ +# define HWCAP_CE_AES (1 << 0) +# define HWCAP_CE_PMULL (1 << 1) +# define HWCAP_CE_SHA1 (1 << 2) +# define HWCAP_CE_SHA256 (1 << 3) +#elif defined(__aarch64__) +# define HWCAP 16 /* AT_HWCAP */ +# define HWCAP_NEON (1 << 1) + +# define HWCAP_CE HWCAP +# define HWCAP_CE_AES (1 << 3) +# define HWCAP_CE_PMULL (1 << 4) +# define HWCAP_CE_SHA1 (1 << 5) +# define HWCAP_CE_SHA256 (1 << 6) +#endif + void OPENSSL_cpuid_setup(void) { char *e; @@ -44,7 +80,7 @@ void OPENSSL_cpuid_setup(void) if ((e=getenv("OPENSSL_armcap"))) { - OPENSSL_armcap_P=strtoul(e,NULL,0); + OPENSSL_armcap_P=(unsigned int)strtoul(e,NULL,0); return; } @@ -64,10 +100,51 @@ void OPENSSL_cpuid_setup(void) sigprocmask(SIG_SETMASK,&ill_act.sa_mask,&oset); sigaction(SIGILL,&ill_act,&ill_oact); - if (sigsetjmp(ill_jmp,1) == 0) + if (getauxval != NULL) + { + if (getauxval(HWCAP) & HWCAP_NEON) + { + unsigned long hwcap = getauxval(HWCAP_CE); + + OPENSSL_armcap_P |= ARMV7_NEON; + + if (hwcap & HWCAP_CE_AES) + OPENSSL_armcap_P |= ARMV8_AES; + + if (hwcap & HWCAP_CE_PMULL) + OPENSSL_armcap_P |= ARMV8_PMULL; + + if (hwcap & HWCAP_CE_SHA1) + OPENSSL_armcap_P |= ARMV8_SHA1; + + if (hwcap & HWCAP_CE_SHA256) + OPENSSL_armcap_P |= ARMV8_SHA256; + } + } + else if (sigsetjmp(ill_jmp,1) == 0) { _armv7_neon_probe(); OPENSSL_armcap_P |= ARMV7_NEON; + if (sigsetjmp(ill_jmp,1) == 0) + { + _armv8_pmull_probe(); + OPENSSL_armcap_P |= ARMV8_PMULL|ARMV8_AES; + } + else if (sigsetjmp(ill_jmp,1) == 0) + { + _armv8_aes_probe(); + OPENSSL_armcap_P |= ARMV8_AES; + } + if (sigsetjmp(ill_jmp,1) == 0) + { + _armv8_sha1_probe(); + OPENSSL_armcap_P |= ARMV8_SHA1; + } + if (sigsetjmp(ill_jmp,1) == 0) + { + _armv8_sha256_probe(); + OPENSSL_armcap_P |= ARMV8_SHA256; + } } if (sigsetjmp(ill_jmp,1) == 0) { diff --git a/app/openssl/crypto/armv4cpuid.S b/app/openssl/crypto/armv4cpuid.S index 2d618dea..add11d40 100644 --- a/app/openssl/crypto/armv4cpuid.S +++ b/app/openssl/crypto/armv4cpuid.S @@ -7,17 +7,49 @@ .global _armv7_neon_probe .type _armv7_neon_probe,%function _armv7_neon_probe: - .word 0xf26ee1fe @ vorr q15,q15,q15 - .word 0xe12fff1e @ bx lr + .byte 0xf0,0x01,0x60,0xf2 @ vorr q8,q8,q8 + .byte 0x1e,0xff,0x2f,0xe1 @ bx lr .size _armv7_neon_probe,.-_armv7_neon_probe .global _armv7_tick .type _armv7_tick,%function _armv7_tick: - mrc p15,0,r0,c9,c13,0 - .word 0xe12fff1e @ bx lr + mrrc p15,1,r0,r1,c14 @ CNTVCT +#if __ARM_ARCH__>=5 + bx lr +#else + .word 0xe12fff1e @ bx lr +#endif .size _armv7_tick,.-_armv7_tick +.global _armv8_aes_probe +.type _armv8_aes_probe,%function +_armv8_aes_probe: + .byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0 + .byte 0x1e,0xff,0x2f,0xe1 @ bx lr +.size _armv8_aes_probe,.-_armv8_aes_probe + +.global _armv8_sha1_probe +.type _armv8_sha1_probe,%function +_armv8_sha1_probe: + .byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0 + .byte 0x1e,0xff,0x2f,0xe1 @ bx lr +.size _armv8_sha1_probe,.-_armv8_sha1_probe + +.global _armv8_sha256_probe +.type _armv8_sha256_probe,%function +_armv8_sha256_probe: + .byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0 + .byte 0x1e,0xff,0x2f,0xe1 @ bx lr +.size _armv8_sha256_probe,.-_armv8_sha256_probe +.global _armv8_pmull_probe +.type _armv8_pmull_probe,%function +_armv8_pmull_probe: + .byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0 + .byte 0x1e,0xff,0x2f,0xe1 @ bx lr +.size _armv8_pmull_probe,.-_armv8_pmull_probe + +.align 5 .global OPENSSL_atomic_add .type OPENSSL_atomic_add,%function OPENSSL_atomic_add: @@ -28,7 +60,7 @@ OPENSSL_atomic_add: cmp r2,#0 bne .Ladd mov r0,r3 - .word 0xe12fff1e @ bx lr + bx lr #else stmdb sp!,{r4-r6,lr} ldr r2,.Lspinlock @@ -81,9 +113,13 @@ OPENSSL_cleanse: adds r1,r1,#4 bne .Little .Lcleanse_done: +#if __ARM_ARCH__>=5 + bx lr +#else tst lr,#1 moveq pc,lr .word 0xe12fff1e @ bx lr +#endif .size OPENSSL_cleanse,.-OPENSSL_cleanse .global OPENSSL_wipe_cpu @@ -97,41 +133,53 @@ OPENSSL_wipe_cpu: eor ip,ip,ip tst r0,#1 beq .Lwipe_done - .word 0xf3000150 @ veor q0, q0, q0 - .word 0xf3022152 @ veor q1, q1, q1 - .word 0xf3044154 @ veor q2, q2, q2 - .word 0xf3066156 @ veor q3, q3, q3 - .word 0xf34001f0 @ veor q8, q8, q8 - .word 0xf34221f2 @ veor q9, q9, q9 - .word 0xf34441f4 @ veor q10, q10, q10 - .word 0xf34661f6 @ veor q11, q11, q11 - .word 0xf34881f8 @ veor q12, q12, q12 - .word 0xf34aa1fa @ veor q13, q13, q13 - .word 0xf34cc1fc @ veor q14, q14, q14 - .word 0xf34ee1fe @ veor q15, q15, q15 + .byte 0x50,0x01,0x00,0xf3 @ veor q0, q0, q0 + .byte 0x52,0x21,0x02,0xf3 @ veor q1, q1, q1 + .byte 0x54,0x41,0x04,0xf3 @ veor q2, q2, q2 + .byte 0x56,0x61,0x06,0xf3 @ veor q3, q3, q3 + .byte 0xf0,0x01,0x40,0xf3 @ veor q8, q8, q8 + .byte 0xf2,0x21,0x42,0xf3 @ veor q9, q9, q9 + .byte 0xf4,0x41,0x44,0xf3 @ veor q10, q10, q10 + .byte 0xf6,0x61,0x46,0xf3 @ veor q11, q11, q11 + .byte 0xf8,0x81,0x48,0xf3 @ veor q12, q12, q12 + .byte 0xfa,0xa1,0x4a,0xf3 @ veor q13, q13, q13 + .byte 0xfc,0xc1,0x4c,0xf3 @ veor q14, q14, q14 + .byte 0xfe,0xe1,0x4e,0xf3 @ veor q14, q14, q14 .Lwipe_done: mov r0,sp +#if __ARM_ARCH__>=5 + bx lr +#else tst lr,#1 moveq pc,lr .word 0xe12fff1e @ bx lr +#endif .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu .global OPENSSL_instrument_bus .type OPENSSL_instrument_bus,%function OPENSSL_instrument_bus: eor r0,r0,r0 +#if __ARM_ARCH__>=5 + bx lr +#else tst lr,#1 moveq pc,lr .word 0xe12fff1e @ bx lr +#endif .size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus .global OPENSSL_instrument_bus2 .type OPENSSL_instrument_bus2,%function OPENSSL_instrument_bus2: eor r0,r0,r0 +#if __ARM_ARCH__>=5 + bx lr +#else tst lr,#1 moveq pc,lr .word 0xe12fff1e @ bx lr +#endif .size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 .align 5 diff --git a/app/openssl/crypto/asn1/a_strnid.c b/app/openssl/crypto/asn1/a_strnid.c index 2fc48c15..2afd5a41 100644 --- a/app/openssl/crypto/asn1/a_strnid.c +++ b/app/openssl/crypto/asn1/a_strnid.c @@ -74,7 +74,7 @@ static int sk_table_cmp(const ASN1_STRING_TABLE * const *a, * certain software (e.g. Netscape) has problems with them. */ -static unsigned long global_mask = 0xFFFFFFFFL; +static unsigned long global_mask = B_ASN1_UTF8STRING; void ASN1_STRING_set_default_mask(unsigned long mask) { diff --git a/app/openssl/crypto/bio/bio.h b/app/openssl/crypto/bio/bio.h index 05699ab2..d05fa22a 100644 --- a/app/openssl/crypto/bio/bio.h +++ b/app/openssl/crypto/bio/bio.h @@ -266,6 +266,9 @@ void BIO_clear_flags(BIO *b, int flags); #define BIO_RR_CONNECT 0x02 /* Returned from the accept BIO when an accept would have blocked */ #define BIO_RR_ACCEPT 0x03 +/* Returned from the SSL bio when the channel id retrieval code cannot find the + * private key. */ +#define BIO_RR_SSL_CHANNEL_ID_LOOKUP 0x04 /* These are passed by the BIO callback */ #define BIO_CB_FREE 0x01 diff --git a/app/openssl/crypto/bio/bss_dgram.c b/app/openssl/crypto/bio/bss_dgram.c index 54c012c4..d9967e72 100644 --- a/app/openssl/crypto/bio/bss_dgram.c +++ b/app/openssl/crypto/bio/bss_dgram.c @@ -1333,7 +1333,7 @@ static long dgram_sctp_ctrl(BIO *b, int cmd, long num, void *ptr) bio_dgram_sctp_data *data = NULL; socklen_t sockopt_len = 0; struct sctp_authkeyid authkeyid; - struct sctp_authkey *authkey; + struct sctp_authkey *authkey = NULL; data = (bio_dgram_sctp_data *)b->ptr; @@ -1388,6 +1388,11 @@ static long dgram_sctp_ctrl(BIO *b, int cmd, long num, void *ptr) /* Add new key */ sockopt_len = sizeof(struct sctp_authkey) + 64 * sizeof(uint8_t); authkey = OPENSSL_malloc(sockopt_len); + if (authkey == NULL) + { + ret = -1; + break; + } memset(authkey, 0x00, sockopt_len); authkey->sca_keynumber = authkeyid.scact_keynumber + 1; #ifndef __FreeBSD__ @@ -1399,6 +1404,8 @@ static long dgram_sctp_ctrl(BIO *b, int cmd, long num, void *ptr) memcpy(&authkey->sca_key[0], ptr, 64 * sizeof(uint8_t)); ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_KEY, authkey, sockopt_len); + OPENSSL_free(authkey); + authkey = NULL; if (ret < 0) break; /* Reset active key */ diff --git a/app/openssl/crypto/bn/asm/armv4-gf2m.S b/app/openssl/crypto/bn/asm/armv4-gf2m.S index 038f0864..0fa25b26 100644 --- a/app/openssl/crypto/bn/asm/armv4-gf2m.S +++ b/app/openssl/crypto/bn/asm/armv4-gf2m.S @@ -5,31 +5,6 @@ #if __ARM_ARCH__>=7 .fpu neon - -.type mul_1x1_neon,%function -.align 5 -mul_1x1_neon: - vshl.u64 d2,d16,#8 @ q1-q3 are slided - vmull.p8 q0,d16,d17 @ a·bb - vshl.u64 d4,d16,#16 - vmull.p8 q1,d2,d17 @ a<<8·bb - vshl.u64 d6,d16,#24 - vmull.p8 q2,d4,d17 @ a<<16·bb - vshr.u64 d2,#8 - vmull.p8 q3,d6,d17 @ a<<24·bb - vshl.u64 d3,#24 - veor d0,d2 - vshr.u64 d4,#16 - veor d0,d3 - vshl.u64 d5,#16 - veor d0,d4 - vshr.u64 d6,#24 - veor d0,d5 - vshl.u64 d7,#8 - veor d0,d6 - veor d0,d7 - .word 0xe12fff1e -.size mul_1x1_neon,.-mul_1x1_neon #endif .type mul_1x1_ialu,%function .align 5 @@ -120,40 +95,53 @@ bn_GF2m_mul_2x2: tst r12,#1 beq .Lialu - veor d18,d18 - vmov.32 d19,r3,r3 @ two copies of b1 - vmov.32 d18[0],r1 @ a1 - - veor d20,d20 - vld1.32 d21[],[sp,:32] @ two copies of b0 - vmov.32 d20[0],r2 @ a0 - mov r12,lr - - vmov d16,d18 - vmov d17,d19 - bl mul_1x1_neon @ a1·b1 - vmov d22,d0 - - vmov d16,d20 - vmov d17,d21 - bl mul_1x1_neon @ a0·b0 - vmov d23,d0 - - veor d16,d20,d18 - veor d17,d21,d19 - veor d20,d23,d22 - bl mul_1x1_neon @ (a0+a1)·(b0+b1) - - veor d0,d20 @ (a0+a1)·(b0+b1)-a0·b0-a1·b1 - vshl.u64 d1,d0,#32 - vshr.u64 d0,d0,#32 - veor d23,d1 - veor d22,d0 - vst1.32 {d23[0]},[r0,:32]! - vst1.32 {d23[1]},[r0,:32]! - vst1.32 {d22[0]},[r0,:32]! - vst1.32 {d22[1]},[r0,:32] - bx r12 + ldr r12, [sp] @ 5th argument + vmov.32 d26, r2, r1 + vmov.32 d27, r12, r3 + vmov.i64 d28, #0x0000ffffffffffff + vmov.i64 d29, #0x00000000ffffffff + vmov.i64 d30, #0x000000000000ffff + + vext.8 d2, d26, d26, #1 @ A1 + vmull.p8 q1, d2, d27 @ F = A1*B + vext.8 d0, d27, d27, #1 @ B1 + vmull.p8 q0, d26, d0 @ E = A*B1 + vext.8 d4, d26, d26, #2 @ A2 + vmull.p8 q2, d4, d27 @ H = A2*B + vext.8 d16, d27, d27, #2 @ B2 + vmull.p8 q8, d26, d16 @ G = A*B2 + vext.8 d6, d26, d26, #3 @ A3 + veor q1, q1, q0 @ L = E + F + vmull.p8 q3, d6, d27 @ J = A3*B + vext.8 d0, d27, d27, #3 @ B3 + veor q2, q2, q8 @ M = G + H + vmull.p8 q0, d26, d0 @ I = A*B3 + veor d2, d2, d3 @ t0 = (L) (P0 + P1) << 8 + vand d3, d3, d28 + vext.8 d16, d27, d27, #4 @ B4 + veor d4, d4, d5 @ t1 = (M) (P2 + P3) << 16 + vand d5, d5, d29 + vmull.p8 q8, d26, d16 @ K = A*B4 + veor q3, q3, q0 @ N = I + J + veor d2, d2, d3 + veor d4, d4, d5 + veor d6, d6, d7 @ t2 = (N) (P4 + P5) << 24 + vand d7, d7, d30 + vext.8 q1, q1, q1, #15 + veor d16, d16, d17 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d17, #0 + vext.8 q2, q2, q2, #14 + veor d6, d6, d7 + vmull.p8 q0, d26, d27 @ D = A*B + vext.8 q8, q8, q8, #12 + vext.8 q3, q3, q3, #13 + veor q1, q1, q2 + veor q3, q3, q8 + veor q0, q0, q1 + veor q0, q0, q3 + + vst1.32 {q0}, [r0] + bx lr @ bx lr .align 4 .Lialu: #endif diff --git a/app/openssl/crypto/bn/asm/armv4-gf2m.pl b/app/openssl/crypto/bn/asm/armv4-gf2m.pl index 22ad1f85..3f1f4f67 100644 --- a/app/openssl/crypto/bn/asm/armv4-gf2m.pl +++ b/app/openssl/crypto/bn/asm/armv4-gf2m.pl @@ -20,14 +20,21 @@ # length, more for longer keys. Even though NEON 1x1 multiplication # runs in even less cycles, ~30, improvement is measurable only on # longer keys. One has to optimize code elsewhere to get NEON glow... +# +# April 2014 +# +# Double bn_GF2m_mul_2x2 performance by using algorithm from paper +# referred below, which improves ECDH and ECDSA verify benchmarks +# by 18-40%. +# +# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software +# Polynomial Multiplication on ARM Processors using the NEON Engine. +# +# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; -sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } -sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } -sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; } - $code=<<___; #include "arm_arch.h" @@ -36,31 +43,6 @@ $code=<<___; #if __ARM_ARCH__>=7 .fpu neon - -.type mul_1x1_neon,%function -.align 5 -mul_1x1_neon: - vshl.u64 `&Dlo("q1")`,d16,#8 @ q1-q3 are slided $a - vmull.p8 `&Q("d0")`,d16,d17 @ a·bb - vshl.u64 `&Dlo("q2")`,d16,#16 - vmull.p8 q1,`&Dlo("q1")`,d17 @ a<<8·bb - vshl.u64 `&Dlo("q3")`,d16,#24 - vmull.p8 q2,`&Dlo("q2")`,d17 @ a<<16·bb - vshr.u64 `&Dlo("q1")`,#8 - vmull.p8 q3,`&Dlo("q3")`,d17 @ a<<24·bb - vshl.u64 `&Dhi("q1")`,#24 - veor d0,`&Dlo("q1")` - vshr.u64 `&Dlo("q2")`,#16 - veor d0,`&Dhi("q1")` - vshl.u64 `&Dhi("q2")`,#16 - veor d0,`&Dlo("q2")` - vshr.u64 `&Dlo("q3")`,#24 - veor d0,`&Dhi("q2")` - vshl.u64 `&Dhi("q3")`,#8 - veor d0,`&Dlo("q3")` - veor d0,`&Dhi("q3")` - bx lr -.size mul_1x1_neon,.-mul_1x1_neon #endif ___ ################ @@ -159,8 +141,9 @@ ___ # void bn_GF2m_mul_2x2(BN_ULONG *r, # BN_ULONG a1,BN_ULONG a0, # BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0 - -($A1,$B1,$A0,$B0,$A1B1,$A0B0)=map("d$_",(18..23)); +{ +my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12)); +my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31)); $code.=<<___; .global bn_GF2m_mul_2x2 @@ -173,44 +156,58 @@ bn_GF2m_mul_2x2: tst r12,#1 beq .Lialu - veor $A1,$A1 - vmov.32 $B1,r3,r3 @ two copies of b1 - vmov.32 ${A1}[0],r1 @ a1 - - veor $A0,$A0 - vld1.32 ${B0}[],[sp,:32] @ two copies of b0 - vmov.32 ${A0}[0],r2 @ a0 - mov r12,lr - - vmov d16,$A1 - vmov d17,$B1 - bl mul_1x1_neon @ a1·b1 - vmov $A1B1,d0 - - vmov d16,$A0 - vmov d17,$B0 - bl mul_1x1_neon @ a0·b0 - vmov $A0B0,d0 - - veor d16,$A0,$A1 - veor d17,$B0,$B1 - veor $A0,$A0B0,$A1B1 - bl mul_1x1_neon @ (a0+a1)·(b0+b1) - - veor d0,$A0 @ (a0+a1)·(b0+b1)-a0·b0-a1·b1 - vshl.u64 d1,d0,#32 - vshr.u64 d0,d0,#32 - veor $A0B0,d1 - veor $A1B1,d0 - vst1.32 {${A0B0}[0]},[r0,:32]! - vst1.32 {${A0B0}[1]},[r0,:32]! - vst1.32 {${A1B1}[0]},[r0,:32]! - vst1.32 {${A1B1}[1]},[r0,:32] - bx r12 + ldr r12, [sp] @ 5th argument + vmov.32 $a, r2, r1 + vmov.32 $b, r12, r3 + vmov.i64 $k48, #0x0000ffffffffffff + vmov.i64 $k32, #0x00000000ffffffff + vmov.i64 $k16, #0x000000000000ffff + + vext.8 $t0#lo, $a, $a, #1 @ A1 + vmull.p8 $t0, $t0#lo, $b @ F = A1*B + vext.8 $r#lo, $b, $b, #1 @ B1 + vmull.p8 $r, $a, $r#lo @ E = A*B1 + vext.8 $t1#lo, $a, $a, #2 @ A2 + vmull.p8 $t1, $t1#lo, $b @ H = A2*B + vext.8 $t3#lo, $b, $b, #2 @ B2 + vmull.p8 $t3, $a, $t3#lo @ G = A*B2 + vext.8 $t2#lo, $a, $a, #3 @ A3 + veor $t0, $t0, $r @ L = E + F + vmull.p8 $t2, $t2#lo, $b @ J = A3*B + vext.8 $r#lo, $b, $b, #3 @ B3 + veor $t1, $t1, $t3 @ M = G + H + vmull.p8 $r, $a, $r#lo @ I = A*B3 + veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8 + vand $t0#hi, $t0#hi, $k48 + vext.8 $t3#lo, $b, $b, #4 @ B4 + veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16 + vand $t1#hi, $t1#hi, $k32 + vmull.p8 $t3, $a, $t3#lo @ K = A*B4 + veor $t2, $t2, $r @ N = I + J + veor $t0#lo, $t0#lo, $t0#hi + veor $t1#lo, $t1#lo, $t1#hi + veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24 + vand $t2#hi, $t2#hi, $k16 + vext.8 $t0, $t0, $t0, #15 + veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32 + vmov.i64 $t3#hi, #0 + vext.8 $t1, $t1, $t1, #14 + veor $t2#lo, $t2#lo, $t2#hi + vmull.p8 $r, $a, $b @ D = A*B + vext.8 $t3, $t3, $t3, #12 + vext.8 $t2, $t2, $t2, #13 + veor $t0, $t0, $t1 + veor $t2, $t2, $t3 + veor $r, $r, $t0 + veor $r, $r, $t2 + + vst1.32 {$r}, [r0] + ret @ bx lr .align 4 .Lialu: #endif ___ +} $ret="r10"; # reassigned 1st argument $code.=<<___; stmdb sp!,{r4-r10,lr} @@ -272,7 +269,13 @@ $code.=<<___; .comm OPENSSL_armcap_P,4,4 ___ -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + + s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or + s/\bret\b/bx lr/go or + s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 + + print $_,"\n"; +} close STDOUT; # enforce flush diff --git a/app/openssl/crypto/bn/asm/armv4-mont.pl b/app/openssl/crypto/bn/asm/armv4-mont.pl index f78a8b5f..72bad8e3 100644 --- a/app/openssl/crypto/bn/asm/armv4-mont.pl +++ b/app/openssl/crypto/bn/asm/armv4-mont.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -23,6 +23,21 @@ # than 1/2KB. Windows CE port would be trivial, as it's exclusively # about decorations, ABI and instruction syntax are identical. +# November 2013 +# +# Add NEON code path, which handles lengths divisible by 8. RSA/DSA +# performance improvement on Cortex-A8 is ~45-100% depending on key +# length, more for longer keys. On Cortex-A15 the span is ~10-105%. +# On Snapdragon S4 improvement was measured to vary from ~70% to +# incredible ~380%, yes, 4.8x faster, for RSA4096 sign. But this is +# rather because original integer-only code seems to perform +# suboptimally on S4. Situation on Cortex-A9 is unfortunately +# different. It's being looked into, but the trouble is that +# performance for vectors longer than 256 bits is actually couple +# of percent worse than for integer-only code. The code is chosen +# for execution on all NEON-capable processors, because gain on +# others outweighs the marginal loss on Cortex-A9. + while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; @@ -52,16 +67,40 @@ $_n0="$num,#14*4"; $_num="$num,#15*4"; $_bpend=$_num; $code=<<___; +#include "arm_arch.h" + .text +.code 32 + +#if __ARM_ARCH__>=7 +.align 5 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-bn_mul_mont +#endif .global bn_mul_mont .type bn_mul_mont,%function -.align 2 +.align 5 bn_mul_mont: + ldr ip,[sp,#4] @ load num stmdb sp!,{r0,r2} @ sp points at argument block - ldr $num,[sp,#3*4] @ load num - cmp $num,#2 +#if __ARM_ARCH__>=7 + tst ip,#7 + bne .Lialu + adr r0,bn_mul_mont + ldr r2,.LOPENSSL_armcap + ldr r0,[r0,r2] + tst r0,#1 @ NEON available? + ldmia sp, {r0,r2} + beq .Lialu + add sp,sp,#8 + b bn_mul8x_mont_neon +.align 4 +.Lialu: +#endif + cmp ip,#2 + mov $num,ip @ load num movlt r0,#0 addlt sp,sp,#2*4 blt .Labrt @@ -191,14 +230,446 @@ bn_mul_mont: ldmia sp!,{r4-r12,lr} @ restore registers add sp,sp,#2*4 @ skip over {r0,r2} mov r0,#1 -.Labrt: tst lr,#1 +.Labrt: +#if __ARM_ARCH__>=5 + ret @ bx lr +#else + tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) +#endif .size bn_mul_mont,.-bn_mul_mont -.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by " +___ +{ +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } + +my ($A0,$A1,$A2,$A3)=map("d$_",(0..3)); +my ($N0,$N1,$N2,$N3)=map("d$_",(4..7)); +my ($Z,$Temp)=("q4","q5"); +my ($A0xB,$A1xB,$A2xB,$A3xB,$A4xB,$A5xB,$A6xB,$A7xB)=map("q$_",(6..13)); +my ($Bi,$Ni,$M0)=map("d$_",(28..31)); +my $zero=&Dlo($Z); +my $temp=&Dlo($Temp); + +my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5)); +my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9)); + +$code.=<<___; +#if __ARM_ARCH__>=7 +.fpu neon + +.type bn_mul8x_mont_neon,%function +.align 5 +bn_mul8x_mont_neon: + mov ip,sp + stmdb sp!,{r4-r11} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldmia ip,{r4-r5} @ load rest of parameter block + + sub $toutptr,sp,#16 + vld1.32 {${Bi}[0]}, [$bptr,:32]! + sub $toutptr,$toutptr,$num,lsl#4 + vld1.32 {$A0-$A3}, [$aptr]! @ can't specify :32 :-( + and $toutptr,$toutptr,#-64 + vld1.32 {${M0}[0]}, [$n0,:32] + mov sp,$toutptr @ alloca + veor $zero,$zero,$zero + subs $inner,$num,#8 + vzip.16 $Bi,$zero + + vmull.u32 $A0xB,$Bi,${A0}[0] + vmull.u32 $A1xB,$Bi,${A0}[1] + vmull.u32 $A2xB,$Bi,${A1}[0] + vshl.i64 $temp,`&Dhi("$A0xB")`,#16 + vmull.u32 $A3xB,$Bi,${A1}[1] + + vadd.u64 $temp,$temp,`&Dlo("$A0xB")` + veor $zero,$zero,$zero + vmul.u32 $Ni,$temp,$M0 + + vmull.u32 $A4xB,$Bi,${A2}[0] + vld1.32 {$N0-$N3}, [$nptr]! + vmull.u32 $A5xB,$Bi,${A2}[1] + vmull.u32 $A6xB,$Bi,${A3}[0] + vzip.16 $Ni,$zero + vmull.u32 $A7xB,$Bi,${A3}[1] + + bne .LNEON_1st + + @ special case for num=8, everything is in register bank... + + vmlal.u32 $A0xB,$Ni,${N0}[0] + sub $outer,$num,#1 + vmlal.u32 $A1xB,$Ni,${N0}[1] + vmlal.u32 $A2xB,$Ni,${N1}[0] + vmlal.u32 $A3xB,$Ni,${N1}[1] + + vmlal.u32 $A4xB,$Ni,${N2}[0] + vmov $Temp,$A0xB + vmlal.u32 $A5xB,$Ni,${N2}[1] + vmov $A0xB,$A1xB + vmlal.u32 $A6xB,$Ni,${N3}[0] + vmov $A1xB,$A2xB + vmlal.u32 $A7xB,$Ni,${N3}[1] + vmov $A2xB,$A3xB + vmov $A3xB,$A4xB + vshr.u64 $temp,$temp,#16 + vmov $A4xB,$A5xB + vmov $A5xB,$A6xB + vadd.u64 $temp,$temp,`&Dhi("$Temp")` + vmov $A6xB,$A7xB + veor $A7xB,$A7xB + vshr.u64 $temp,$temp,#16 + + b .LNEON_outer8 + +.align 4 +.LNEON_outer8: + vld1.32 {${Bi}[0]}, [$bptr,:32]! + veor $zero,$zero,$zero + vzip.16 $Bi,$zero + vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp + + vmlal.u32 $A0xB,$Bi,${A0}[0] + vmlal.u32 $A1xB,$Bi,${A0}[1] + vmlal.u32 $A2xB,$Bi,${A1}[0] + vshl.i64 $temp,`&Dhi("$A0xB")`,#16 + vmlal.u32 $A3xB,$Bi,${A1}[1] + + vadd.u64 $temp,$temp,`&Dlo("$A0xB")` + veor $zero,$zero,$zero + subs $outer,$outer,#1 + vmul.u32 $Ni,$temp,$M0 + + vmlal.u32 $A4xB,$Bi,${A2}[0] + vmlal.u32 $A5xB,$Bi,${A2}[1] + vmlal.u32 $A6xB,$Bi,${A3}[0] + vzip.16 $Ni,$zero + vmlal.u32 $A7xB,$Bi,${A3}[1] + + vmlal.u32 $A0xB,$Ni,${N0}[0] + vmlal.u32 $A1xB,$Ni,${N0}[1] + vmlal.u32 $A2xB,$Ni,${N1}[0] + vmlal.u32 $A3xB,$Ni,${N1}[1] + + vmlal.u32 $A4xB,$Ni,${N2}[0] + vmov $Temp,$A0xB + vmlal.u32 $A5xB,$Ni,${N2}[1] + vmov $A0xB,$A1xB + vmlal.u32 $A6xB,$Ni,${N3}[0] + vmov $A1xB,$A2xB + vmlal.u32 $A7xB,$Ni,${N3}[1] + vmov $A2xB,$A3xB + vmov $A3xB,$A4xB + vshr.u64 $temp,$temp,#16 + vmov $A4xB,$A5xB + vmov $A5xB,$A6xB + vadd.u64 $temp,$temp,`&Dhi("$Temp")` + vmov $A6xB,$A7xB + veor $A7xB,$A7xB + vshr.u64 $temp,$temp,#16 + + bne .LNEON_outer8 + + vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp + mov $toutptr,sp + vshr.u64 $temp,`&Dlo("$A0xB")`,#16 + mov $inner,$num + vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp + add $tinptr,sp,#16 + vshr.u64 $temp,`&Dhi("$A0xB")`,#16 + vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")` + + b .LNEON_tail2 + +.align 4 +.LNEON_1st: + vmlal.u32 $A0xB,$Ni,${N0}[0] + vld1.32 {$A0-$A3}, [$aptr]! + vmlal.u32 $A1xB,$Ni,${N0}[1] + subs $inner,$inner,#8 + vmlal.u32 $A2xB,$Ni,${N1}[0] + vmlal.u32 $A3xB,$Ni,${N1}[1] + + vmlal.u32 $A4xB,$Ni,${N2}[0] + vld1.32 {$N0-$N1}, [$nptr]! + vmlal.u32 $A5xB,$Ni,${N2}[1] + vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! + vmlal.u32 $A6xB,$Ni,${N3}[0] + vmlal.u32 $A7xB,$Ni,${N3}[1] + vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! + + vmull.u32 $A0xB,$Bi,${A0}[0] + vld1.32 {$N2-$N3}, [$nptr]! + vmull.u32 $A1xB,$Bi,${A0}[1] + vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! + vmull.u32 $A2xB,$Bi,${A1}[0] + vmull.u32 $A3xB,$Bi,${A1}[1] + vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! + + vmull.u32 $A4xB,$Bi,${A2}[0] + vmull.u32 $A5xB,$Bi,${A2}[1] + vmull.u32 $A6xB,$Bi,${A3}[0] + vmull.u32 $A7xB,$Bi,${A3}[1] + + bne .LNEON_1st + + vmlal.u32 $A0xB,$Ni,${N0}[0] + add $tinptr,sp,#16 + vmlal.u32 $A1xB,$Ni,${N0}[1] + sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr + vmlal.u32 $A2xB,$Ni,${N1}[0] + vld1.64 {$Temp}, [sp,:128] + vmlal.u32 $A3xB,$Ni,${N1}[1] + sub $outer,$num,#1 + + vmlal.u32 $A4xB,$Ni,${N2}[0] + vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! + vmlal.u32 $A5xB,$Ni,${N2}[1] + vshr.u64 $temp,$temp,#16 + vld1.64 {$A0xB}, [$tinptr, :128]! + vmlal.u32 $A6xB,$Ni,${N3}[0] + vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! + vmlal.u32 $A7xB,$Ni,${N3}[1] + + vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! + vadd.u64 $temp,$temp,`&Dhi("$Temp")` + veor $Z,$Z,$Z + vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! + vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! + vst1.64 {$Z}, [$toutptr,:128] + vshr.u64 $temp,$temp,#16 + + b .LNEON_outer + +.align 4 +.LNEON_outer: + vld1.32 {${Bi}[0]}, [$bptr,:32]! + sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr + vld1.32 {$A0-$A3}, [$aptr]! + veor $zero,$zero,$zero + mov $toutptr,sp + vzip.16 $Bi,$zero + sub $inner,$num,#8 + vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp + + vmlal.u32 $A0xB,$Bi,${A0}[0] + vld1.64 {$A3xB-$A4xB},[$tinptr,:256]! + vmlal.u32 $A1xB,$Bi,${A0}[1] + vmlal.u32 $A2xB,$Bi,${A1}[0] + vld1.64 {$A5xB-$A6xB},[$tinptr,:256]! + vmlal.u32 $A3xB,$Bi,${A1}[1] + + vshl.i64 $temp,`&Dhi("$A0xB")`,#16 + veor $zero,$zero,$zero + vadd.u64 $temp,$temp,`&Dlo("$A0xB")` + vld1.64 {$A7xB},[$tinptr,:128]! + vmul.u32 $Ni,$temp,$M0 + + vmlal.u32 $A4xB,$Bi,${A2}[0] + vld1.32 {$N0-$N3}, [$nptr]! + vmlal.u32 $A5xB,$Bi,${A2}[1] + vmlal.u32 $A6xB,$Bi,${A3}[0] + vzip.16 $Ni,$zero + vmlal.u32 $A7xB,$Bi,${A3}[1] + +.LNEON_inner: + vmlal.u32 $A0xB,$Ni,${N0}[0] + vld1.32 {$A0-$A3}, [$aptr]! + vmlal.u32 $A1xB,$Ni,${N0}[1] + subs $inner,$inner,#8 + vmlal.u32 $A2xB,$Ni,${N1}[0] + vmlal.u32 $A3xB,$Ni,${N1}[1] + vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! + + vmlal.u32 $A4xB,$Ni,${N2}[0] + vld1.64 {$A0xB}, [$tinptr, :128]! + vmlal.u32 $A5xB,$Ni,${N2}[1] + vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! + vmlal.u32 $A6xB,$Ni,${N3}[0] + vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! + vmlal.u32 $A7xB,$Ni,${N3}[1] + vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! + + vmlal.u32 $A0xB,$Bi,${A0}[0] + vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]! + vmlal.u32 $A1xB,$Bi,${A0}[1] + vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! + vmlal.u32 $A2xB,$Bi,${A1}[0] + vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]! + vmlal.u32 $A3xB,$Bi,${A1}[1] + vld1.32 {$N0-$N3}, [$nptr]! + + vmlal.u32 $A4xB,$Bi,${A2}[0] + vld1.64 {$A7xB}, [$tinptr, :128]! + vmlal.u32 $A5xB,$Bi,${A2}[1] + vmlal.u32 $A6xB,$Bi,${A3}[0] + vmlal.u32 $A7xB,$Bi,${A3}[1] + + bne .LNEON_inner + + vmlal.u32 $A0xB,$Ni,${N0}[0] + add $tinptr,sp,#16 + vmlal.u32 $A1xB,$Ni,${N0}[1] + sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr + vmlal.u32 $A2xB,$Ni,${N1}[0] + vld1.64 {$Temp}, [sp,:128] + vmlal.u32 $A3xB,$Ni,${N1}[1] + subs $outer,$outer,#1 + + vmlal.u32 $A4xB,$Ni,${N2}[0] + vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! + vmlal.u32 $A5xB,$Ni,${N2}[1] + vld1.64 {$A0xB}, [$tinptr, :128]! + vshr.u64 $temp,$temp,#16 + vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! + vmlal.u32 $A6xB,$Ni,${N3}[0] + vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! + vmlal.u32 $A7xB,$Ni,${N3}[1] + + vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! + vadd.u64 $temp,$temp,`&Dhi("$Temp")` + vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! + vshr.u64 $temp,$temp,#16 + + bne .LNEON_outer + + mov $toutptr,sp + mov $inner,$num + +.LNEON_tail: + vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp + vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]! + vshr.u64 $temp,`&Dlo("$A0xB")`,#16 + vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp + vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]! + vshr.u64 $temp,`&Dhi("$A0xB")`,#16 + vld1.64 {$A7xB}, [$tinptr, :128]! + vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")` + +.LNEON_tail2: + vadd.u64 `&Dlo("$A1xB")`,`&Dlo("$A1xB")`,$temp + vst1.32 {`&Dlo("$A0xB")`[0]}, [$toutptr, :32]! + vshr.u64 $temp,`&Dlo("$A1xB")`,#16 + vadd.u64 `&Dhi("$A1xB")`,`&Dhi("$A1xB")`,$temp + vshr.u64 $temp,`&Dhi("$A1xB")`,#16 + vzip.16 `&Dlo("$A1xB")`,`&Dhi("$A1xB")` + + vadd.u64 `&Dlo("$A2xB")`,`&Dlo("$A2xB")`,$temp + vst1.32 {`&Dlo("$A1xB")`[0]}, [$toutptr, :32]! + vshr.u64 $temp,`&Dlo("$A2xB")`,#16 + vadd.u64 `&Dhi("$A2xB")`,`&Dhi("$A2xB")`,$temp + vshr.u64 $temp,`&Dhi("$A2xB")`,#16 + vzip.16 `&Dlo("$A2xB")`,`&Dhi("$A2xB")` + + vadd.u64 `&Dlo("$A3xB")`,`&Dlo("$A3xB")`,$temp + vst1.32 {`&Dlo("$A2xB")`[0]}, [$toutptr, :32]! + vshr.u64 $temp,`&Dlo("$A3xB")`,#16 + vadd.u64 `&Dhi("$A3xB")`,`&Dhi("$A3xB")`,$temp + vshr.u64 $temp,`&Dhi("$A3xB")`,#16 + vzip.16 `&Dlo("$A3xB")`,`&Dhi("$A3xB")` + + vadd.u64 `&Dlo("$A4xB")`,`&Dlo("$A4xB")`,$temp + vst1.32 {`&Dlo("$A3xB")`[0]}, [$toutptr, :32]! + vshr.u64 $temp,`&Dlo("$A4xB")`,#16 + vadd.u64 `&Dhi("$A4xB")`,`&Dhi("$A4xB")`,$temp + vshr.u64 $temp,`&Dhi("$A4xB")`,#16 + vzip.16 `&Dlo("$A4xB")`,`&Dhi("$A4xB")` + + vadd.u64 `&Dlo("$A5xB")`,`&Dlo("$A5xB")`,$temp + vst1.32 {`&Dlo("$A4xB")`[0]}, [$toutptr, :32]! + vshr.u64 $temp,`&Dlo("$A5xB")`,#16 + vadd.u64 `&Dhi("$A5xB")`,`&Dhi("$A5xB")`,$temp + vshr.u64 $temp,`&Dhi("$A5xB")`,#16 + vzip.16 `&Dlo("$A5xB")`,`&Dhi("$A5xB")` + + vadd.u64 `&Dlo("$A6xB")`,`&Dlo("$A6xB")`,$temp + vst1.32 {`&Dlo("$A5xB")`[0]}, [$toutptr, :32]! + vshr.u64 $temp,`&Dlo("$A6xB")`,#16 + vadd.u64 `&Dhi("$A6xB")`,`&Dhi("$A6xB")`,$temp + vld1.64 {$A0xB}, [$tinptr, :128]! + vshr.u64 $temp,`&Dhi("$A6xB")`,#16 + vzip.16 `&Dlo("$A6xB")`,`&Dhi("$A6xB")` + + vadd.u64 `&Dlo("$A7xB")`,`&Dlo("$A7xB")`,$temp + vst1.32 {`&Dlo("$A6xB")`[0]}, [$toutptr, :32]! + vshr.u64 $temp,`&Dlo("$A7xB")`,#16 + vadd.u64 `&Dhi("$A7xB")`,`&Dhi("$A7xB")`,$temp + vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! + vshr.u64 $temp,`&Dhi("$A7xB")`,#16 + vzip.16 `&Dlo("$A7xB")`,`&Dhi("$A7xB")` + subs $inner,$inner,#8 + vst1.32 {`&Dlo("$A7xB")`[0]}, [$toutptr, :32]! + + bne .LNEON_tail + + vst1.32 {${temp}[0]}, [$toutptr, :32] @ top-most bit + sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr + subs $aptr,sp,#0 @ clear carry flag + add $bptr,sp,$num,lsl#2 + +.LNEON_sub: + ldmia $aptr!, {r4-r7} + ldmia $nptr!, {r8-r11} + sbcs r8, r4,r8 + sbcs r9, r5,r9 + sbcs r10,r6,r10 + sbcs r11,r7,r11 + teq $aptr,$bptr @ preserves carry + stmia $rptr!, {r8-r11} + bne .LNEON_sub + + ldr r10, [$aptr] @ load top-most bit + veor q0,q0,q0 + sub r11,$bptr,sp @ this is num*4 + veor q1,q1,q1 + mov $aptr,sp + sub $rptr,$rptr,r11 @ rewind $rptr + mov $nptr,$bptr @ second 3/4th of frame + sbcs r10,r10,#0 @ result is carry flag + +.LNEON_copy_n_zap: + ldmia $aptr!, {r4-r7} + ldmia $rptr, {r8-r11} + movcc r8, r4 + vst1.64 {q0-q1}, [$nptr,:256]! @ wipe + movcc r9, r5 + movcc r10,r6 + vst1.64 {q0-q1}, [$nptr,:256]! @ wipe + movcc r11,r7 + ldmia $aptr, {r4-r7} + stmia $rptr!, {r8-r11} + sub $aptr,$aptr,#16 + ldmia $rptr, {r8-r11} + movcc r8, r4 + vst1.64 {q0-q1}, [$aptr,:256]! @ wipe + movcc r9, r5 + movcc r10,r6 + vst1.64 {q0-q1}, [$nptr,:256]! @ wipe + movcc r11,r7 + teq $aptr,$bptr @ preserves carry + stmia $rptr!, {r8-r11} + bne .LNEON_copy_n_zap + + sub sp,ip,#96 + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r11} + ret @ bx lr +.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon +#endif +___ +} +$code.=<<___; +.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by " .align 2 +#if __ARM_ARCH__>=7 +.comm OPENSSL_armcap_P,4,4 +#endif ___ +$code =~ s/\`([^\`]*)\`/eval $1/gem; $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 +$code =~ s/\bret\b/bx lr/gm; print $code; close STDOUT; diff --git a/app/openssl/crypto/bn/asm/armv4-mont.s b/app/openssl/crypto/bn/asm/armv4-mont.s index 64c220b5..fecae15e 100644 --- a/app/openssl/crypto/bn/asm/armv4-mont.s +++ b/app/openssl/crypto/bn/asm/armv4-mont.s @@ -1,13 +1,37 @@ +#include "arm_arch.h" + .text +.code 32 + +#if __ARM_ARCH__>=7 +.align 5 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-bn_mul_mont +#endif .global bn_mul_mont .type bn_mul_mont,%function -.align 2 +.align 5 bn_mul_mont: + ldr ip,[sp,#4] @ load num stmdb sp!,{r0,r2} @ sp points at argument block - ldr r0,[sp,#3*4] @ load num - cmp r0,#2 +#if __ARM_ARCH__>=7 + tst ip,#7 + bne .Lialu + adr r0,bn_mul_mont + ldr r2,.LOPENSSL_armcap + ldr r0,[r0,r2] + tst r0,#1 @ NEON available? + ldmia sp, {r0,r2} + beq .Lialu + add sp,sp,#8 + b bn_mul8x_mont_neon +.align 4 +.Lialu: +#endif + cmp ip,#2 + mov r0,ip @ load num movlt r0,#0 addlt sp,sp,#2*4 blt .Labrt @@ -137,9 +161,419 @@ bn_mul_mont: ldmia sp!,{r4-r12,lr} @ restore registers add sp,sp,#2*4 @ skip over {r0,r2} mov r0,#1 -.Labrt: tst lr,#1 +.Labrt: +#if __ARM_ARCH__>=5 + bx lr @ .word 0xe12fff1e +#else + tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif .size bn_mul_mont,.-bn_mul_mont -.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by " +#if __ARM_ARCH__>=7 +.fpu neon + +.type bn_mul8x_mont_neon,%function +.align 5 +bn_mul8x_mont_neon: + mov ip,sp + stmdb sp!,{r4-r11} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldmia ip,{r4-r5} @ load rest of parameter block + + sub r7,sp,#16 + vld1.32 {d28[0]}, [r2,:32]! + sub r7,r7,r5,lsl#4 + vld1.32 {d0-d3}, [r1]! @ can't specify :32 :-( + and r7,r7,#-64 + vld1.32 {d30[0]}, [r4,:32] + mov sp,r7 @ alloca + veor d8,d8,d8 + subs r8,r5,#8 + vzip.16 d28,d8 + + vmull.u32 q6,d28,d0[0] + vmull.u32 q7,d28,d0[1] + vmull.u32 q8,d28,d1[0] + vshl.i64 d10,d13,#16 + vmull.u32 q9,d28,d1[1] + + vadd.u64 d10,d10,d12 + veor d8,d8,d8 + vmul.u32 d29,d10,d30 + + vmull.u32 q10,d28,d2[0] + vld1.32 {d4-d7}, [r3]! + vmull.u32 q11,d28,d2[1] + vmull.u32 q12,d28,d3[0] + vzip.16 d29,d8 + vmull.u32 q13,d28,d3[1] + + bne .LNEON_1st + + @ special case for num=8, everything is in register bank... + + vmlal.u32 q6,d29,d4[0] + sub r9,r5,#1 + vmlal.u32 q7,d29,d4[1] + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + + vmlal.u32 q10,d29,d6[0] + vmov q5,q6 + vmlal.u32 q11,d29,d6[1] + vmov q6,q7 + vmlal.u32 q12,d29,d7[0] + vmov q7,q8 + vmlal.u32 q13,d29,d7[1] + vmov q8,q9 + vmov q9,q10 + vshr.u64 d10,d10,#16 + vmov q10,q11 + vmov q11,q12 + vadd.u64 d10,d10,d11 + vmov q12,q13 + veor q13,q13 + vshr.u64 d10,d10,#16 + + b .LNEON_outer8 + +.align 4 +.LNEON_outer8: + vld1.32 {d28[0]}, [r2,:32]! + veor d8,d8,d8 + vzip.16 d28,d8 + vadd.u64 d12,d12,d10 + + vmlal.u32 q6,d28,d0[0] + vmlal.u32 q7,d28,d0[1] + vmlal.u32 q8,d28,d1[0] + vshl.i64 d10,d13,#16 + vmlal.u32 q9,d28,d1[1] + + vadd.u64 d10,d10,d12 + veor d8,d8,d8 + subs r9,r9,#1 + vmul.u32 d29,d10,d30 + + vmlal.u32 q10,d28,d2[0] + vmlal.u32 q11,d28,d2[1] + vmlal.u32 q12,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q13,d28,d3[1] + + vmlal.u32 q6,d29,d4[0] + vmlal.u32 q7,d29,d4[1] + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + + vmlal.u32 q10,d29,d6[0] + vmov q5,q6 + vmlal.u32 q11,d29,d6[1] + vmov q6,q7 + vmlal.u32 q12,d29,d7[0] + vmov q7,q8 + vmlal.u32 q13,d29,d7[1] + vmov q8,q9 + vmov q9,q10 + vshr.u64 d10,d10,#16 + vmov q10,q11 + vmov q11,q12 + vadd.u64 d10,d10,d11 + vmov q12,q13 + veor q13,q13 + vshr.u64 d10,d10,#16 + + bne .LNEON_outer8 + + vadd.u64 d12,d12,d10 + mov r7,sp + vshr.u64 d10,d12,#16 + mov r8,r5 + vadd.u64 d13,d13,d10 + add r6,sp,#16 + vshr.u64 d10,d13,#16 + vzip.16 d12,d13 + + b .LNEON_tail2 + +.align 4 +.LNEON_1st: + vmlal.u32 q6,d29,d4[0] + vld1.32 {d0-d3}, [r1]! + vmlal.u32 q7,d29,d4[1] + subs r8,r8,#8 + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + + vmlal.u32 q10,d29,d6[0] + vld1.32 {d4-d5}, [r3]! + vmlal.u32 q11,d29,d6[1] + vst1.64 {q6-q7}, [r7,:256]! + vmlal.u32 q12,d29,d7[0] + vmlal.u32 q13,d29,d7[1] + vst1.64 {q8-q9}, [r7,:256]! + + vmull.u32 q6,d28,d0[0] + vld1.32 {d6-d7}, [r3]! + vmull.u32 q7,d28,d0[1] + vst1.64 {q10-q11}, [r7,:256]! + vmull.u32 q8,d28,d1[0] + vmull.u32 q9,d28,d1[1] + vst1.64 {q12-q13}, [r7,:256]! + + vmull.u32 q10,d28,d2[0] + vmull.u32 q11,d28,d2[1] + vmull.u32 q12,d28,d3[0] + vmull.u32 q13,d28,d3[1] + + bne .LNEON_1st + + vmlal.u32 q6,d29,d4[0] + add r6,sp,#16 + vmlal.u32 q7,d29,d4[1] + sub r1,r1,r5,lsl#2 @ rewind r1 + vmlal.u32 q8,d29,d5[0] + vld1.64 {q5}, [sp,:128] + vmlal.u32 q9,d29,d5[1] + sub r9,r5,#1 + + vmlal.u32 q10,d29,d6[0] + vst1.64 {q6-q7}, [r7,:256]! + vmlal.u32 q11,d29,d6[1] + vshr.u64 d10,d10,#16 + vld1.64 {q6}, [r6, :128]! + vmlal.u32 q12,d29,d7[0] + vst1.64 {q8-q9}, [r7,:256]! + vmlal.u32 q13,d29,d7[1] + + vst1.64 {q10-q11}, [r7,:256]! + vadd.u64 d10,d10,d11 + veor q4,q4,q4 + vst1.64 {q12-q13}, [r7,:256]! + vld1.64 {q7-q8}, [r6, :256]! + vst1.64 {q4}, [r7,:128] + vshr.u64 d10,d10,#16 + + b .LNEON_outer + +.align 4 +.LNEON_outer: + vld1.32 {d28[0]}, [r2,:32]! + sub r3,r3,r5,lsl#2 @ rewind r3 + vld1.32 {d0-d3}, [r1]! + veor d8,d8,d8 + mov r7,sp + vzip.16 d28,d8 + sub r8,r5,#8 + vadd.u64 d12,d12,d10 + + vmlal.u32 q6,d28,d0[0] + vld1.64 {q9-q10},[r6,:256]! + vmlal.u32 q7,d28,d0[1] + vmlal.u32 q8,d28,d1[0] + vld1.64 {q11-q12},[r6,:256]! + vmlal.u32 q9,d28,d1[1] + + vshl.i64 d10,d13,#16 + veor d8,d8,d8 + vadd.u64 d10,d10,d12 + vld1.64 {q13},[r6,:128]! + vmul.u32 d29,d10,d30 + + vmlal.u32 q10,d28,d2[0] + vld1.32 {d4-d7}, [r3]! + vmlal.u32 q11,d28,d2[1] + vmlal.u32 q12,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q13,d28,d3[1] + +.LNEON_inner: + vmlal.u32 q6,d29,d4[0] + vld1.32 {d0-d3}, [r1]! + vmlal.u32 q7,d29,d4[1] + subs r8,r8,#8 + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + vst1.64 {q6-q7}, [r7,:256]! + + vmlal.u32 q10,d29,d6[0] + vld1.64 {q6}, [r6, :128]! + vmlal.u32 q11,d29,d6[1] + vst1.64 {q8-q9}, [r7,:256]! + vmlal.u32 q12,d29,d7[0] + vld1.64 {q7-q8}, [r6, :256]! + vmlal.u32 q13,d29,d7[1] + vst1.64 {q10-q11}, [r7,:256]! + + vmlal.u32 q6,d28,d0[0] + vld1.64 {q9-q10}, [r6, :256]! + vmlal.u32 q7,d28,d0[1] + vst1.64 {q12-q13}, [r7,:256]! + vmlal.u32 q8,d28,d1[0] + vld1.64 {q11-q12}, [r6, :256]! + vmlal.u32 q9,d28,d1[1] + vld1.32 {d4-d7}, [r3]! + + vmlal.u32 q10,d28,d2[0] + vld1.64 {q13}, [r6, :128]! + vmlal.u32 q11,d28,d2[1] + vmlal.u32 q12,d28,d3[0] + vmlal.u32 q13,d28,d3[1] + + bne .LNEON_inner + + vmlal.u32 q6,d29,d4[0] + add r6,sp,#16 + vmlal.u32 q7,d29,d4[1] + sub r1,r1,r5,lsl#2 @ rewind r1 + vmlal.u32 q8,d29,d5[0] + vld1.64 {q5}, [sp,:128] + vmlal.u32 q9,d29,d5[1] + subs r9,r9,#1 + + vmlal.u32 q10,d29,d6[0] + vst1.64 {q6-q7}, [r7,:256]! + vmlal.u32 q11,d29,d6[1] + vld1.64 {q6}, [r6, :128]! + vshr.u64 d10,d10,#16 + vst1.64 {q8-q9}, [r7,:256]! + vmlal.u32 q12,d29,d7[0] + vld1.64 {q7-q8}, [r6, :256]! + vmlal.u32 q13,d29,d7[1] + + vst1.64 {q10-q11}, [r7,:256]! + vadd.u64 d10,d10,d11 + vst1.64 {q12-q13}, [r7,:256]! + vshr.u64 d10,d10,#16 + + bne .LNEON_outer + + mov r7,sp + mov r8,r5 + +.LNEON_tail: + vadd.u64 d12,d12,d10 + vld1.64 {q9-q10}, [r6, :256]! + vshr.u64 d10,d12,#16 + vadd.u64 d13,d13,d10 + vld1.64 {q11-q12}, [r6, :256]! + vshr.u64 d10,d13,#16 + vld1.64 {q13}, [r6, :128]! + vzip.16 d12,d13 + +.LNEON_tail2: + vadd.u64 d14,d14,d10 + vst1.32 {d12[0]}, [r7, :32]! + vshr.u64 d10,d14,#16 + vadd.u64 d15,d15,d10 + vshr.u64 d10,d15,#16 + vzip.16 d14,d15 + + vadd.u64 d16,d16,d10 + vst1.32 {d14[0]}, [r7, :32]! + vshr.u64 d10,d16,#16 + vadd.u64 d17,d17,d10 + vshr.u64 d10,d17,#16 + vzip.16 d16,d17 + + vadd.u64 d18,d18,d10 + vst1.32 {d16[0]}, [r7, :32]! + vshr.u64 d10,d18,#16 + vadd.u64 d19,d19,d10 + vshr.u64 d10,d19,#16 + vzip.16 d18,d19 + + vadd.u64 d20,d20,d10 + vst1.32 {d18[0]}, [r7, :32]! + vshr.u64 d10,d20,#16 + vadd.u64 d21,d21,d10 + vshr.u64 d10,d21,#16 + vzip.16 d20,d21 + + vadd.u64 d22,d22,d10 + vst1.32 {d20[0]}, [r7, :32]! + vshr.u64 d10,d22,#16 + vadd.u64 d23,d23,d10 + vshr.u64 d10,d23,#16 + vzip.16 d22,d23 + + vadd.u64 d24,d24,d10 + vst1.32 {d22[0]}, [r7, :32]! + vshr.u64 d10,d24,#16 + vadd.u64 d25,d25,d10 + vld1.64 {q6}, [r6, :128]! + vshr.u64 d10,d25,#16 + vzip.16 d24,d25 + + vadd.u64 d26,d26,d10 + vst1.32 {d24[0]}, [r7, :32]! + vshr.u64 d10,d26,#16 + vadd.u64 d27,d27,d10 + vld1.64 {q7-q8}, [r6, :256]! + vshr.u64 d10,d27,#16 + vzip.16 d26,d27 + subs r8,r8,#8 + vst1.32 {d26[0]}, [r7, :32]! + + bne .LNEON_tail + + vst1.32 {d10[0]}, [r7, :32] @ top-most bit + sub r3,r3,r5,lsl#2 @ rewind r3 + subs r1,sp,#0 @ clear carry flag + add r2,sp,r5,lsl#2 + +.LNEON_sub: + ldmia r1!, {r4-r7} + ldmia r3!, {r8-r11} + sbcs r8, r4,r8 + sbcs r9, r5,r9 + sbcs r10,r6,r10 + sbcs r11,r7,r11 + teq r1,r2 @ preserves carry + stmia r0!, {r8-r11} + bne .LNEON_sub + + ldr r10, [r1] @ load top-most bit + veor q0,q0,q0 + sub r11,r2,sp @ this is num*4 + veor q1,q1,q1 + mov r1,sp + sub r0,r0,r11 @ rewind r0 + mov r3,r2 @ second 3/4th of frame + sbcs r10,r10,#0 @ result is carry flag + +.LNEON_copy_n_zap: + ldmia r1!, {r4-r7} + ldmia r0, {r8-r11} + movcc r8, r4 + vst1.64 {q0-q1}, [r3,:256]! @ wipe + movcc r9, r5 + movcc r10,r6 + vst1.64 {q0-q1}, [r3,:256]! @ wipe + movcc r11,r7 + ldmia r1, {r4-r7} + stmia r0!, {r8-r11} + sub r1,r1,#16 + ldmia r0, {r8-r11} + movcc r8, r4 + vst1.64 {q0-q1}, [r1,:256]! @ wipe + movcc r9, r5 + movcc r10,r6 + vst1.64 {q0-q1}, [r3,:256]! @ wipe + movcc r11,r7 + teq r1,r2 @ preserves carry + stmia r0!, {r8-r11} + bne .LNEON_copy_n_zap + + sub sp,ip,#96 + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r11} + bx lr @ .word 0xe12fff1e +.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon +#endif +.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by " .align 2 +#if __ARM_ARCH__>=7 +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/app/openssl/crypto/bn/asm/mips3.S b/app/openssl/crypto/bn/asm/mips3.S new file mode 100644 index 00000000..dca4105c --- /dev/null +++ b/app/openssl/crypto/bn/asm/mips3.S @@ -0,0 +1,2201 @@ +.rdata +.asciiz "mips3.s, Version 1.1" +.asciiz "MIPS III/IV ISA artwork by Andy Polyakov " + +/* + * ==================================================================== + * Written by Andy Polyakov for the OpenSSL + * project. + * + * Rights for redistribution and usage in source and binary forms are + * granted according to the OpenSSL license. Warranty of any kind is + * disclaimed. + * ==================================================================== + */ + +/* + * This is my modest contributon to the OpenSSL project (see + * http://www.openssl.org/ for more information about it) and is + * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c + * module. For updates see http://fy.chalmers.se/~appro/hpe/. + * + * The module is designed to work with either of the "new" MIPS ABI(5), + * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under + * IRIX 5.x not only because it doesn't support new ABIs but also + * because 5.x kernels put R4x00 CPU into 32-bit mode and all those + * 64-bit instructions (daddu, dmultu, etc.) found below gonna only + * cause illegal instruction exception:-( + * + * In addition the code depends on preprocessor flags set up by MIPSpro + * compiler driver (either as or cc) and therefore (probably?) can't be + * compiled by the GNU assembler. GNU C driver manages fine though... + * I mean as long as -mmips-as is specified or is the default option, + * because then it simply invokes /usr/bin/as which in turn takes + * perfect care of the preprocessor definitions. Another neat feature + * offered by the MIPSpro assembler is an optimization pass. This gave + * me the opportunity to have the code looking more regular as all those + * architecture dependent instruction rescheduling details were left to + * the assembler. Cool, huh? + * + * Performance improvement is astonishing! 'apps/openssl speed rsa dsa' + * goes way over 3 times faster! + * + * + */ +#include +#include + +#if _MIPS_ISA>=4 +#define MOVNZ(cond,dst,src) \ + movn dst,src,cond +#else +#define MOVNZ(cond,dst,src) \ + .set noreorder; \ + bnezl cond,.+8; \ + move dst,src; \ + .set reorder +#endif + +.text + +.set noat +.set reorder + +#define MINUS4 v1 + +.align 5 +LEAF(bn_mul_add_words) + .set noreorder + bgtzl a2,.L_bn_mul_add_words_proceed + ld t0,0(a1) + jr ra + move v0,zero + .set reorder + +.L_bn_mul_add_words_proceed: + li MINUS4,-4 + and ta0,a2,MINUS4 + move v0,zero + beqz ta0,.L_bn_mul_add_words_tail + +.L_bn_mul_add_words_loop: + dmultu t0,a3 + ld t1,0(a0) + ld t2,8(a1) + ld t3,8(a0) + ld ta0,16(a1) + ld ta1,16(a0) + daddu t1,v0 + sltu v0,t1,v0 /* All manuals say it "compares 32-bit + * values", but it seems to work fine + * even on 64-bit registers. */ + mflo AT + mfhi t0 + daddu t1,AT + daddu v0,t0 + sltu AT,t1,AT + sd t1,0(a0) + daddu v0,AT + + dmultu t2,a3 + ld ta2,24(a1) + ld ta3,24(a0) + daddu t3,v0 + sltu v0,t3,v0 + mflo AT + mfhi t2 + daddu t3,AT + daddu v0,t2 + sltu AT,t3,AT + sd t3,8(a0) + daddu v0,AT + + dmultu ta0,a3 + subu a2,4 + PTR_ADD a0,32 + PTR_ADD a1,32 + daddu ta1,v0 + sltu v0,ta1,v0 + mflo AT + mfhi ta0 + daddu ta1,AT + daddu v0,ta0 + sltu AT,ta1,AT + sd ta1,-16(a0) + daddu v0,AT + + + dmultu ta2,a3 + and ta0,a2,MINUS4 + daddu ta3,v0 + sltu v0,ta3,v0 + mflo AT + mfhi ta2 + daddu ta3,AT + daddu v0,ta2 + sltu AT,ta3,AT + sd ta3,-8(a0) + daddu v0,AT + .set noreorder + bgtzl ta0,.L_bn_mul_add_words_loop + ld t0,0(a1) + + bnezl a2,.L_bn_mul_add_words_tail + ld t0,0(a1) + .set reorder + +.L_bn_mul_add_words_return: + jr ra + +.L_bn_mul_add_words_tail: + dmultu t0,a3 + ld t1,0(a0) + subu a2,1 + daddu t1,v0 + sltu v0,t1,v0 + mflo AT + mfhi t0 + daddu t1,AT + daddu v0,t0 + sltu AT,t1,AT + sd t1,0(a0) + daddu v0,AT + beqz a2,.L_bn_mul_add_words_return + + ld t0,8(a1) + dmultu t0,a3 + ld t1,8(a0) + subu a2,1 + daddu t1,v0 + sltu v0,t1,v0 + mflo AT + mfhi t0 + daddu t1,AT + daddu v0,t0 + sltu AT,t1,AT + sd t1,8(a0) + daddu v0,AT + beqz a2,.L_bn_mul_add_words_return + + ld t0,16(a1) + dmultu t0,a3 + ld t1,16(a0) + daddu t1,v0 + sltu v0,t1,v0 + mflo AT + mfhi t0 + daddu t1,AT + daddu v0,t0 + sltu AT,t1,AT + sd t1,16(a0) + daddu v0,AT + jr ra +END(bn_mul_add_words) + +.align 5 +LEAF(bn_mul_words) + .set noreorder + bgtzl a2,.L_bn_mul_words_proceed + ld t0,0(a1) + jr ra + move v0,zero + .set reorder + +.L_bn_mul_words_proceed: + li MINUS4,-4 + and ta0,a2,MINUS4 + move v0,zero + beqz ta0,.L_bn_mul_words_tail + +.L_bn_mul_words_loop: + dmultu t0,a3 + ld t2,8(a1) + ld ta0,16(a1) + ld ta2,24(a1) + mflo AT + mfhi t0 + daddu v0,AT + sltu t1,v0,AT + sd v0,0(a0) + daddu v0,t1,t0 + + dmultu t2,a3 + subu a2,4 + PTR_ADD a0,32 + PTR_ADD a1,32 + mflo AT + mfhi t2 + daddu v0,AT + sltu t3,v0,AT + sd v0,-24(a0) + daddu v0,t3,t2 + + dmultu ta0,a3 + mflo AT + mfhi ta0 + daddu v0,AT + sltu ta1,v0,AT + sd v0,-16(a0) + daddu v0,ta1,ta0 + + + dmultu ta2,a3 + and ta0,a2,MINUS4 + mflo AT + mfhi ta2 + daddu v0,AT + sltu ta3,v0,AT + sd v0,-8(a0) + daddu v0,ta3,ta2 + .set noreorder + bgtzl ta0,.L_bn_mul_words_loop + ld t0,0(a1) + + bnezl a2,.L_bn_mul_words_tail + ld t0,0(a1) + .set reorder + +.L_bn_mul_words_return: + jr ra + +.L_bn_mul_words_tail: + dmultu t0,a3 + subu a2,1 + mflo AT + mfhi t0 + daddu v0,AT + sltu t1,v0,AT + sd v0,0(a0) + daddu v0,t1,t0 + beqz a2,.L_bn_mul_words_return + + ld t0,8(a1) + dmultu t0,a3 + subu a2,1 + mflo AT + mfhi t0 + daddu v0,AT + sltu t1,v0,AT + sd v0,8(a0) + daddu v0,t1,t0 + beqz a2,.L_bn_mul_words_return + + ld t0,16(a1) + dmultu t0,a3 + mflo AT + mfhi t0 + daddu v0,AT + sltu t1,v0,AT + sd v0,16(a0) + daddu v0,t1,t0 + jr ra +END(bn_mul_words) + +.align 5 +LEAF(bn_sqr_words) + .set noreorder + bgtzl a2,.L_bn_sqr_words_proceed + ld t0,0(a1) + jr ra + move v0,zero + .set reorder + +.L_bn_sqr_words_proceed: + li MINUS4,-4 + and ta0,a2,MINUS4 + move v0,zero + beqz ta0,.L_bn_sqr_words_tail + +.L_bn_sqr_words_loop: + dmultu t0,t0 + ld t2,8(a1) + ld ta0,16(a1) + ld ta2,24(a1) + mflo t1 + mfhi t0 + sd t1,0(a0) + sd t0,8(a0) + + dmultu t2,t2 + subu a2,4 + PTR_ADD a0,64 + PTR_ADD a1,32 + mflo t3 + mfhi t2 + sd t3,-48(a0) + sd t2,-40(a0) + + dmultu ta0,ta0 + mflo ta1 + mfhi ta0 + sd ta1,-32(a0) + sd ta0,-24(a0) + + + dmultu ta2,ta2 + and ta0,a2,MINUS4 + mflo ta3 + mfhi ta2 + sd ta3,-16(a0) + sd ta2,-8(a0) + + .set noreorder + bgtzl ta0,.L_bn_sqr_words_loop + ld t0,0(a1) + + bnezl a2,.L_bn_sqr_words_tail + ld t0,0(a1) + .set reorder + +.L_bn_sqr_words_return: + move v0,zero + jr ra + +.L_bn_sqr_words_tail: + dmultu t0,t0 + subu a2,1 + mflo t1 + mfhi t0 + sd t1,0(a0) + sd t0,8(a0) + beqz a2,.L_bn_sqr_words_return + + ld t0,8(a1) + dmultu t0,t0 + subu a2,1 + mflo t1 + mfhi t0 + sd t1,16(a0) + sd t0,24(a0) + beqz a2,.L_bn_sqr_words_return + + ld t0,16(a1) + dmultu t0,t0 + mflo t1 + mfhi t0 + sd t1,32(a0) + sd t0,40(a0) + jr ra +END(bn_sqr_words) + +.align 5 +LEAF(bn_add_words) + .set noreorder + bgtzl a3,.L_bn_add_words_proceed + ld t0,0(a1) + jr ra + move v0,zero + .set reorder + +.L_bn_add_words_proceed: + li MINUS4,-4 + and AT,a3,MINUS4 + move v0,zero + beqz AT,.L_bn_add_words_tail + +.L_bn_add_words_loop: + ld ta0,0(a2) + subu a3,4 + ld t1,8(a1) + and AT,a3,MINUS4 + ld t2,16(a1) + PTR_ADD a2,32 + ld t3,24(a1) + PTR_ADD a0,32 + ld ta1,-24(a2) + PTR_ADD a1,32 + ld ta2,-16(a2) + ld ta3,-8(a2) + daddu ta0,t0 + sltu t8,ta0,t0 + daddu t0,ta0,v0 + sltu v0,t0,ta0 + sd t0,-32(a0) + daddu v0,t8 + + daddu ta1,t1 + sltu t9,ta1,t1 + daddu t1,ta1,v0 + sltu v0,t1,ta1 + sd t1,-24(a0) + daddu v0,t9 + + daddu ta2,t2 + sltu t8,ta2,t2 + daddu t2,ta2,v0 + sltu v0,t2,ta2 + sd t2,-16(a0) + daddu v0,t8 + + daddu ta3,t3 + sltu t9,ta3,t3 + daddu t3,ta3,v0 + sltu v0,t3,ta3 + sd t3,-8(a0) + daddu v0,t9 + + .set noreorder + bgtzl AT,.L_bn_add_words_loop + ld t0,0(a1) + + bnezl a3,.L_bn_add_words_tail + ld t0,0(a1) + .set reorder + +.L_bn_add_words_return: + jr ra + +.L_bn_add_words_tail: + ld ta0,0(a2) + daddu ta0,t0 + subu a3,1 + sltu t8,ta0,t0 + daddu t0,ta0,v0 + sltu v0,t0,ta0 + sd t0,0(a0) + daddu v0,t8 + beqz a3,.L_bn_add_words_return + + ld t1,8(a1) + ld ta1,8(a2) + daddu ta1,t1 + subu a3,1 + sltu t9,ta1,t1 + daddu t1,ta1,v0 + sltu v0,t1,ta1 + sd t1,8(a0) + daddu v0,t9 + beqz a3,.L_bn_add_words_return + + ld t2,16(a1) + ld ta2,16(a2) + daddu ta2,t2 + sltu t8,ta2,t2 + daddu t2,ta2,v0 + sltu v0,t2,ta2 + sd t2,16(a0) + daddu v0,t8 + jr ra +END(bn_add_words) + +.align 5 +LEAF(bn_sub_words) + .set noreorder + bgtzl a3,.L_bn_sub_words_proceed + ld t0,0(a1) + jr ra + move v0,zero + .set reorder + +.L_bn_sub_words_proceed: + li MINUS4,-4 + and AT,a3,MINUS4 + move v0,zero + beqz AT,.L_bn_sub_words_tail + +.L_bn_sub_words_loop: + ld ta0,0(a2) + subu a3,4 + ld t1,8(a1) + and AT,a3,MINUS4 + ld t2,16(a1) + PTR_ADD a2,32 + ld t3,24(a1) + PTR_ADD a0,32 + ld ta1,-24(a2) + PTR_ADD a1,32 + ld ta2,-16(a2) + ld ta3,-8(a2) + sltu t8,t0,ta0 + dsubu t0,ta0 + dsubu ta0,t0,v0 + sd ta0,-32(a0) + MOVNZ (t0,v0,t8) + + sltu t9,t1,ta1 + dsubu t1,ta1 + dsubu ta1,t1,v0 + sd ta1,-24(a0) + MOVNZ (t1,v0,t9) + + + sltu t8,t2,ta2 + dsubu t2,ta2 + dsubu ta2,t2,v0 + sd ta2,-16(a0) + MOVNZ (t2,v0,t8) + + sltu t9,t3,ta3 + dsubu t3,ta3 + dsubu ta3,t3,v0 + sd ta3,-8(a0) + MOVNZ (t3,v0,t9) + + .set noreorder + bgtzl AT,.L_bn_sub_words_loop + ld t0,0(a1) + + bnezl a3,.L_bn_sub_words_tail + ld t0,0(a1) + .set reorder + +.L_bn_sub_words_return: + jr ra + +.L_bn_sub_words_tail: + ld ta0,0(a2) + subu a3,1 + sltu t8,t0,ta0 + dsubu t0,ta0 + dsubu ta0,t0,v0 + MOVNZ (t0,v0,t8) + sd ta0,0(a0) + beqz a3,.L_bn_sub_words_return + + ld t1,8(a1) + subu a3,1 + ld ta1,8(a2) + sltu t9,t1,ta1 + dsubu t1,ta1 + dsubu ta1,t1,v0 + MOVNZ (t1,v0,t9) + sd ta1,8(a0) + beqz a3,.L_bn_sub_words_return + + ld t2,16(a1) + ld ta2,16(a2) + sltu t8,t2,ta2 + dsubu t2,ta2 + dsubu ta2,t2,v0 + MOVNZ (t2,v0,t8) + sd ta2,16(a0) + jr ra +END(bn_sub_words) + +#undef MINUS4 + +.align 5 +LEAF(bn_div_3_words) + .set reorder + move a3,a0 /* we know that bn_div_words doesn't + * touch a3, ta2, ta3 and preserves a2 + * so that we can save two arguments + * and return address in registers + * instead of stack:-) + */ + ld a0,(a3) + move ta2,a1 + ld a1,-8(a3) + bne a0,a2,.L_bn_div_3_words_proceed + li v0,-1 + jr ra +.L_bn_div_3_words_proceed: + move ta3,ra + bal bn_div_words + move ra,ta3 + dmultu ta2,v0 + ld t2,-16(a3) + move ta0,zero + mfhi t1 + mflo t0 + sltu t8,t1,v1 +.L_bn_div_3_words_inner_loop: + bnez t8,.L_bn_div_3_words_inner_loop_done + sgeu AT,t2,t0 + seq t9,t1,v1 + and AT,t9 + sltu t3,t0,ta2 + daddu v1,a2 + dsubu t1,t3 + dsubu t0,ta2 + sltu t8,t1,v1 + sltu ta0,v1,a2 + or t8,ta0 + .set noreorder + beqzl AT,.L_bn_div_3_words_inner_loop + dsubu v0,1 + .set reorder +.L_bn_div_3_words_inner_loop_done: + jr ra +END(bn_div_3_words) + +.align 5 +LEAF(bn_div_words) + .set noreorder + bnezl a2,.L_bn_div_words_proceed + move v1,zero + jr ra + li v0,-1 /* I'd rather signal div-by-zero + * which can be done with 'break 7' */ + +.L_bn_div_words_proceed: + bltz a2,.L_bn_div_words_body + move t9,v1 + dsll a2,1 + bgtz a2,.-4 + addu t9,1 + + .set reorder + negu t1,t9 + li t2,-1 + dsll t2,t1 + and t2,a0 + dsrl AT,a1,t1 + .set noreorder + bnezl t2,.+8 + break 6 /* signal overflow */ + .set reorder + dsll a0,t9 + dsll a1,t9 + or a0,AT + +#define QT ta0 +#define HH ta1 +#define DH v1 +.L_bn_div_words_body: + dsrl DH,a2,32 + sgeu AT,a0,a2 + .set noreorder + bnezl AT,.+8 + dsubu a0,a2 + .set reorder + + li QT,-1 + dsrl HH,a0,32 + dsrl QT,32 /* q=0xffffffff */ + beq DH,HH,.L_bn_div_words_skip_div1 + ddivu zero,a0,DH + mflo QT +.L_bn_div_words_skip_div1: + dmultu a2,QT + dsll t3,a0,32 + dsrl AT,a1,32 + or t3,AT + mflo t0 + mfhi t1 +.L_bn_div_words_inner_loop1: + sltu t2,t3,t0 + seq t8,HH,t1 + sltu AT,HH,t1 + and t2,t8 + sltu v0,t0,a2 + or AT,t2 + .set noreorder + beqz AT,.L_bn_div_words_inner_loop1_done + dsubu t1,v0 + dsubu t0,a2 + b .L_bn_div_words_inner_loop1 + dsubu QT,1 + .set reorder +.L_bn_div_words_inner_loop1_done: + + dsll a1,32 + dsubu a0,t3,t0 + dsll v0,QT,32 + + li QT,-1 + dsrl HH,a0,32 + dsrl QT,32 /* q=0xffffffff */ + beq DH,HH,.L_bn_div_words_skip_div2 + ddivu zero,a0,DH + mflo QT +.L_bn_div_words_skip_div2: +#undef DH + dmultu a2,QT + dsll t3,a0,32 + dsrl AT,a1,32 + or t3,AT + mflo t0 + mfhi t1 +.L_bn_div_words_inner_loop2: + sltu t2,t3,t0 + seq t8,HH,t1 + sltu AT,HH,t1 + and t2,t8 + sltu v1,t0,a2 + or AT,t2 + .set noreorder + beqz AT,.L_bn_div_words_inner_loop2_done + dsubu t1,v1 + dsubu t0,a2 + b .L_bn_div_words_inner_loop2 + dsubu QT,1 + .set reorder +.L_bn_div_words_inner_loop2_done: +#undef HH + + dsubu a0,t3,t0 + or v0,QT + dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */ + dsrl a2,t9 /* restore a2 */ + jr ra +#undef QT +END(bn_div_words) + +#define a_0 t0 +#define a_1 t1 +#define a_2 t2 +#define a_3 t3 +#define b_0 ta0 +#define b_1 ta1 +#define b_2 ta2 +#define b_3 ta3 + +#define a_4 s0 +#define a_5 s2 +#define a_6 s4 +#define a_7 a1 /* once we load a[7] we don't need a anymore */ +#define b_4 s1 +#define b_5 s3 +#define b_6 s5 +#define b_7 a2 /* once we load b[7] we don't need b anymore */ + +#define t_1 t8 +#define t_2 t9 + +#define c_1 v0 +#define c_2 v1 +#define c_3 a3 + +#define FRAME_SIZE 48 + +.align 5 +LEAF(bn_mul_comba8) + .set noreorder + PTR_SUB sp,FRAME_SIZE + .frame sp,64,ra + .set reorder + ld a_0,0(a1) /* If compiled with -mips3 option on + * R5000 box assembler barks on this + * line with "shouldn't have mult/div + * as last instruction in bb (R10K + * bug)" warning. If anybody out there + * has a clue about how to circumvent + * this do send me a note. + * + */ + ld b_0,0(a2) + ld a_1,8(a1) + ld a_2,16(a1) + ld a_3,24(a1) + ld b_1,8(a2) + ld b_2,16(a2) + ld b_3,24(a2) + dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ + sd s0,0(sp) + sd s1,8(sp) + sd s2,16(sp) + sd s3,24(sp) + sd s4,32(sp) + sd s5,40(sp) + mflo c_1 + mfhi c_2 + + dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ + ld a_4,32(a1) + ld a_5,40(a1) + ld a_6,48(a1) + ld a_7,56(a1) + ld b_4,32(a2) + ld b_5,40(a2) + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu c_3,t_2,AT + dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ + ld b_6,48(a2) + ld b_7,56(a2) + sd c_1,0(a0) /* r[0]=c1; */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu c_1,c_3,t_2 + sd c_2,8(a0) /* r[1]=c2; */ + + dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu c_2,c_1,t_2 + dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,16(a0) /* r[2]=c3; */ + + dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu c_3,c_2,t_2 + dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,24(a0) /* r[3]=c1; */ + + dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu c_1,c_3,t_2 + dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,32(a0) /* r[4]=c2; */ + + dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu c_2,c_1,t_2 + dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,40(a0) /* r[5]=c3; */ + + dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu c_3,c_2,t_2 + dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,48(a0) /* r[6]=c1; */ + + dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu c_1,c_3,t_2 + dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,56(a0) /* r[7]=c2; */ + + dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu c_2,c_1,t_2 + dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,64(a0) /* r[8]=c3; */ + + dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu c_3,c_2,t_2 + dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,72(a0) /* r[9]=c1; */ + + dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu c_1,c_3,t_2 + dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,80(a0) /* r[10]=c2; */ + + dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu c_2,c_1,t_2 + dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,88(a0) /* r[11]=c3; */ + + dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu c_3,c_2,t_2 + dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,96(a0) /* r[12]=c1; */ + + dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu c_1,c_3,t_2 + dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,104(a0) /* r[13]=c2; */ + + dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ + ld s0,0(sp) + ld s1,8(sp) + ld s2,16(sp) + ld s3,24(sp) + ld s4,32(sp) + ld s5,40(sp) + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sd c_3,112(a0) /* r[14]=c3; */ + sd c_1,120(a0) /* r[15]=c1; */ + + PTR_ADD sp,FRAME_SIZE + + jr ra +END(bn_mul_comba8) + +.align 5 +LEAF(bn_mul_comba4) + .set reorder + ld a_0,0(a1) + ld b_0,0(a2) + ld a_1,8(a1) + ld a_2,16(a1) + dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ + ld a_3,24(a1) + ld b_1,8(a2) + ld b_2,16(a2) + ld b_3,24(a2) + mflo c_1 + mfhi c_2 + sd c_1,0(a0) + + dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu c_3,t_2,AT + dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu c_1,c_3,t_2 + sd c_2,8(a0) + + dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu c_2,c_1,t_2 + dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,16(a0) + + dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu c_3,c_2,t_2 + dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,24(a0) + + dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu c_1,c_3,t_2 + dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,32(a0) + + dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu c_2,c_1,t_2 + dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,40(a0) + + dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sd c_1,48(a0) + sd c_2,56(a0) + + jr ra +END(bn_mul_comba4) + +#undef a_4 +#undef a_5 +#undef a_6 +#undef a_7 +#define a_4 b_0 +#define a_5 b_1 +#define a_6 b_2 +#define a_7 b_3 + +.align 5 +LEAF(bn_sqr_comba8) + .set reorder + ld a_0,0(a1) + ld a_1,8(a1) + ld a_2,16(a1) + ld a_3,24(a1) + + dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ + ld a_4,32(a1) + ld a_5,40(a1) + ld a_6,48(a1) + ld a_7,56(a1) + mflo c_1 + mfhi c_2 + sd c_1,0(a0) + + dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt c_1,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu c_3,t_2,AT + sd c_2,8(a0) + + dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt c_2,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,16(a0) + + dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt c_3,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_3,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,24(a0) + + dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt c_1,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_1,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,32(a0) + + dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt c_2,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_2,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_2,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,40(a0) + + dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt c_3,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_3,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_3,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,48(a0) + + dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt c_1,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_1,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_1,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_1,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,56(a0) + + dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt c_2,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_2,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_2,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,64(a0) + + dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt c_3,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_3,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_3,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,72(a0) + + dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt c_1,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_1,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,80(a0) + + dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt c_2,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_2,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,88(a0) + + dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt c_3,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,96(a0) + + dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt c_1,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,104(a0) + + dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sd c_3,112(a0) + sd c_1,120(a0) + + jr ra +END(bn_sqr_comba8) + +.align 5 +LEAF(bn_sqr_comba4) + .set reorder + ld a_0,0(a1) + ld a_1,8(a1) + ld a_2,16(a1) + ld a_3,24(a1) + dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ + mflo c_1 + mfhi c_2 + sd c_1,0(a0) + + dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt c_1,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu c_3,t_2,AT + sd c_2,8(a0) + + dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt c_2,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,16(a0) + + dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt c_3,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_3,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,24(a0) + + dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt c_1,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,32(a0) + + dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt c_2,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,40(a0) + + dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sd c_1,48(a0) + sd c_2,56(a0) + + jr ra +END(bn_sqr_comba4) diff --git a/app/openssl/crypto/bn/asm/pa-risc2.S b/app/openssl/crypto/bn/asm/pa-risc2.S new file mode 100644 index 00000000..f3b16290 --- /dev/null +++ b/app/openssl/crypto/bn/asm/pa-risc2.S @@ -0,0 +1,1618 @@ +; +; PA-RISC 2.0 implementation of bn_asm code, based on the +; 64-bit version of the code. This code is effectively the +; same as the 64-bit version except the register model is +; slightly different given all values must be 32-bit between +; function calls. Thus the 64-bit return values are returned +; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit +; +; +; This code is approximately 2x faster than the C version +; for RSA/DSA. +; +; See http://devresource.hp.com/ for more details on the PA-RISC +; architecture. Also see the book "PA-RISC 2.0 Architecture" +; by Gerry Kane for information on the instruction set architecture. +; +; Code written by Chris Ruemmler (with some help from the HP C +; compiler). +; +; The code compiles with HP's assembler +; + + .level 2.0N + .space $TEXT$ + .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY + +; +; Global Register definitions used for the routines. +; +; Some information about HP's runtime architecture for 32-bits. +; +; "Caller save" means the calling function must save the register +; if it wants the register to be preserved. +; "Callee save" means if a function uses the register, it must save +; the value before using it. +; +; For the floating point registers +; +; "caller save" registers: fr4-fr11, fr22-fr31 +; "callee save" registers: fr12-fr21 +; "special" registers: fr0-fr3 (status and exception registers) +; +; For the integer registers +; value zero : r0 +; "caller save" registers: r1,r19-r26 +; "callee save" registers: r3-r18 +; return register : r2 (rp) +; return values ; r28,r29 (ret0,ret1) +; Stack pointer ; r30 (sp) +; millicode return ptr ; r31 (also a caller save register) + + +; +; Arguments to the routines +; +r_ptr .reg %r26 +a_ptr .reg %r25 +b_ptr .reg %r24 +num .reg %r24 +n .reg %r23 + +; +; Note that the "w" argument for bn_mul_add_words and bn_mul_words +; is passed on the stack at a delta of -56 from the top of stack +; as the routine is entered. +; + +; +; Globals used in some routines +; + +top_overflow .reg %r23 +high_mask .reg %r22 ; value 0xffffffff80000000L + + +;------------------------------------------------------------------------------ +; +; bn_mul_add_words +; +;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, +; int num, BN_ULONG w) +; +; arg0 = r_ptr +; arg1 = a_ptr +; arg3 = num +; -56(sp) = w +; +; Local register definitions +; + +fm1 .reg %fr22 +fm .reg %fr23 +ht_temp .reg %fr24 +ht_temp_1 .reg %fr25 +lt_temp .reg %fr26 +lt_temp_1 .reg %fr27 +fm1_1 .reg %fr28 +fm_1 .reg %fr29 + +fw_h .reg %fr7L +fw_l .reg %fr7R +fw .reg %fr7 + +fht_0 .reg %fr8L +flt_0 .reg %fr8R +t_float_0 .reg %fr8 + +fht_1 .reg %fr9L +flt_1 .reg %fr9R +t_float_1 .reg %fr9 + +tmp_0 .reg %r31 +tmp_1 .reg %r21 +m_0 .reg %r20 +m_1 .reg %r19 +ht_0 .reg %r1 +ht_1 .reg %r3 +lt_0 .reg %r4 +lt_1 .reg %r5 +m1_0 .reg %r6 +m1_1 .reg %r7 +rp_val .reg %r8 +rp_val_1 .reg %r9 + +bn_mul_add_words + .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN + .proc + .callinfo frame=128 + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP ; Needed to make the loop 16-byte aligned + NOP ; needed to make the loop 16-byte aligned + + STD %r5,16(%sp) ; save r5 + NOP + STD %r6,24(%sp) ; save r6 + STD %r7,32(%sp) ; save r7 + + STD %r8,40(%sp) ; save r8 + STD %r9,48(%sp) ; save r9 + COPY %r0,%ret1 ; return 0 by default + DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 + + CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit + LDO 128(%sp),%sp ; bump stack + + ; + ; The loop is unrolled twice, so if there is only 1 number + ; then go straight to the cleanup code. + ; + CMPIB,= 1,num,bn_mul_add_words_single_top + FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus + ; two 32-bit mutiplies can be issued per cycle. + ; +bn_mul_add_words_unroll2 + + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) + LDD 0(r_ptr),rp_val ; rp[0] + LDD 8(r_ptr),rp_val_1 ; rp[1] + + XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l + XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1[0] + FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] + + XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h + XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m[0] + FSTD fm_1,-40(%sp) ; -40(sp) = m[1] + + XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h + XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp + FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 + + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp + FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 + + LDD -8(%sp),m_0 ; m[0] + LDD -40(%sp),m_1 ; m[1] + LDD -16(%sp),m1_0 ; m1[0] + LDD -48(%sp),m1_1 ; m1[1] + + LDD -24(%sp),ht_0 ; ht[0] + LDD -56(%sp),ht_1 ; ht[1] + ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; + ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; + + LDD -32(%sp),lt_0 + LDD -64(%sp),lt_1 + CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) + ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) + + CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) + ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) + EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 + + EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 + DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 + ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) + ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) + + ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + + ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c; + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + + LDO -2(num),num ; num = num - 2; + ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + STD lt_0,0(r_ptr) ; rp[0] = lt[0] + + ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] + ADD,DC ht_1,%r0,%ret1 ; ht[1]++ + LDO 16(a_ptr),a_ptr ; a_ptr += 2 + + STD lt_1,8(r_ptr) ; rp[1] = lt[1] + CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do + LDO 16(r_ptr),r_ptr ; r_ptr += 2 + + CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_mul_add_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + LDD 0(r_ptr),rp_val ; rp[0] + LDO 8(a_ptr),a_ptr ; a_ptr++ + XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + + LDD -8(%sp),m_0 + LDD -16(%sp),m1_0 ; m1 = temp1 + ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; + LDD -24(%sp),ht_0 + LDD -32(%sp),lt_0 + + CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + ADD %ret1,tmp_0,lt_0 ; lt = lt + c; + ADD,DC ht_0,%r0,ht_0 ; ht++ + ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] + ADD,DC ht_0,%r0,%ret1 ; ht++ + STD lt_0,0(r_ptr) ; rp[0] = lt + +bn_mul_add_words_exit + .EXIT + + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + LDD -80(%sp),%r9 ; restore r9 + LDD -88(%sp),%r8 ; restore r8 + LDD -96(%sp),%r7 ; restore r7 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 ; restore r3 + .PROCEND ;in=23,24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +; +; arg0 = rp +; arg1 = ap +; arg3 = num +; w on stack at -56(sp) + +bn_mul_words + .proc + .callinfo frame=128 + .entry + .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP + STD %r5,16(%sp) ; save r5 + + STD %r6,24(%sp) ; save r6 + STD %r7,32(%sp) ; save r7 + COPY %r0,%ret1 ; return 0 by default + DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 + + CMPIB,>= 0,num,bn_mul_words_exit + LDO 128(%sp),%sp ; bump stack + + ; + ; See if only 1 word to do, thus just do cleanup + ; + CMPIB,= 1,num,bn_mul_words_single_top + FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus + ; two 32-bit mutiplies can be issued per cycle. + ; +bn_mul_words_unroll2 + + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) + XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l + XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l + + FSTD fm1,-16(%sp) ; -16(sp) = m1 + FSTD fm1_1,-48(%sp) ; -48(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h + + FSTD fm,-8(%sp) ; -8(sp) = m + FSTD fm_1,-40(%sp) ; -40(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h + XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h + + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l + + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt + LDD -8(%sp),m_0 + LDD -40(%sp),m_1 + + LDD -16(%sp),m1_0 + LDD -48(%sp),m1_1 + LDD -24(%sp),ht_0 + LDD -56(%sp),ht_1 + + ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; + ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; + LDD -32(%sp),lt_0 + LDD -64(%sp),lt_1 + + CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) + ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + EXTRD,U tmp_1,31,32,m_1 ; m>>32 + DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) + ADD lt_0,m1_0,lt_0 ; lt = lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD lt_1,m1_1,lt_1 ; lt = lt+m1; + ADD,DC ht_1,%r0,ht_1 ; ht++ + ADD %ret1,lt_0,lt_0 ; lt = lt + c (ret1); + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) + ADD,DC ht_1,%r0,ht_1 ; ht++ + STD lt_0,0(r_ptr) ; rp[0] = lt + STD lt_1,8(r_ptr) ; rp[1] = lt + + COPY ht_1,%ret1 ; carry = ht + LDO -2(num),num ; num = num - 2; + LDO 16(a_ptr),a_ptr ; ap += 2 + CMPIB,<= 2,num,bn_mul_words_unroll2 + LDO 16(r_ptr),r_ptr ; rp++ + + CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_mul_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + + XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + + LDD -8(%sp),m_0 + LDD -16(%sp),m1_0 + ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; + LDD -24(%sp),ht_0 + LDD -32(%sp),lt_0 + + CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD lt_0,m1_0,lt_0 ; lt= lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD %ret1,lt_0,lt_0 ; lt = lt + c; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + COPY ht_0,%ret1 ; copy carry + STD lt_0,0(r_ptr) ; rp[0] = lt + +bn_mul_words_exit + .EXIT + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + LDD -96(%sp),%r7 ; restore r7 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 ; restore r3 + .PROCEND + +;---------------------------------------------------------------------------- +; +;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) +; +; arg0 = rp +; arg1 = ap +; arg2 = num +; + +bn_sqr_words + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP + STD %r5,16(%sp) ; save r5 + + CMPIB,>= 0,num,bn_sqr_words_exit + LDO 128(%sp),%sp ; bump stack + + ; + ; If only 1, the goto straight to cleanup + ; + CMPIB,= 1,num,bn_sqr_words_single_top + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + +bn_sqr_words_unroll2 + FLDD 0(a_ptr),t_float_0 ; a[0] + FLDD 8(a_ptr),t_float_1 ; a[1] + XMPYU fht_0,flt_0,fm ; m[0] + XMPYU fht_1,flt_1,fm_1 ; m[1] + + FSTD fm,-24(%sp) ; store m[0] + FSTD fm_1,-56(%sp) ; store m[1] + XMPYU flt_0,flt_0,lt_temp ; lt[0] + XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] + + FSTD lt_temp,-16(%sp) ; store lt[0] + FSTD lt_temp_1,-48(%sp) ; store lt[1] + XMPYU fht_0,fht_0,ht_temp ; ht[0] + XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] + + FSTD ht_temp,-8(%sp) ; store ht[0] + FSTD ht_temp_1,-40(%sp) ; store ht[1] + LDD -24(%sp),m_0 + LDD -56(%sp),m_1 + + AND m_0,high_mask,tmp_0 ; m[0] & Mask + AND m_1,high_mask,tmp_1 ; m[1] & Mask + DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 + DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 + + LDD -16(%sp),lt_0 + LDD -48(%sp),lt_1 + EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 + EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 + + LDD -8(%sp),ht_0 + LDD -40(%sp),ht_1 + ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 + ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 + + ADD lt_0,m_0,lt_0 ; lt = lt+m + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + STD lt_0,0(r_ptr) ; rp[0] = lt[0] + STD ht_0,8(r_ptr) ; rp[1] = ht[1] + + ADD lt_1,m_1,lt_1 ; lt = lt+m + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + STD lt_1,16(r_ptr) ; rp[2] = lt[1] + STD ht_1,24(r_ptr) ; rp[3] = ht[1] + + LDO -2(num),num ; num = num - 2; + LDO 16(a_ptr),a_ptr ; ap += 2 + CMPIB,<= 2,num,bn_sqr_words_unroll2 + LDO 32(r_ptr),r_ptr ; rp += 4 + + CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_sqr_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + + XMPYU fht_0,flt_0,fm ; m + FSTD fm,-24(%sp) ; store m + + XMPYU flt_0,flt_0,lt_temp ; lt + FSTD lt_temp,-16(%sp) ; store lt + + XMPYU fht_0,fht_0,ht_temp ; ht + FSTD ht_temp,-8(%sp) ; store ht + + LDD -24(%sp),m_0 ; load m + AND m_0,high_mask,tmp_0 ; m & Mask + DEPD,Z m_0,30,31,m_0 ; m << 32+1 + LDD -16(%sp),lt_0 ; lt + + LDD -8(%sp),ht_0 ; ht + EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 + ADD m_0,lt_0,lt_0 ; lt = lt+m + ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 + ADD,DC ht_0,%r0,ht_0 ; ht++ + + STD lt_0,0(r_ptr) ; rp[0] = lt + STD ht_0,8(r_ptr) ; rp[1] = ht + +bn_sqr_words_exit + .EXIT + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + .PROCEND ;in=23,24,25,26,29;out=28; + + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +; +; arg0 = rp +; arg1 = ap +; arg2 = bp +; arg3 = n + +t .reg %r22 +b .reg %r21 +l .reg %r20 + +bn_add_words + .proc + .entry + .callinfo + .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .align 64 + + CMPIB,>= 0,n,bn_add_words_exit + COPY %r0,%ret1 ; return 0 by default + + ; + ; If 2 or more numbers do the loop + ; + CMPIB,= 1,n,bn_add_words_single_top + NOP + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; +bn_add_words_unroll2 + LDD 0(a_ptr),t + LDD 0(b_ptr),b + ADD t,%ret1,t ; t = t+c; + ADD,DC %r0,%r0,%ret1 ; set c to carry + ADD t,b,l ; l = t + b[0] + ADD,DC %ret1,%r0,%ret1 ; c+= carry + STD l,0(r_ptr) + + LDD 8(a_ptr),t + LDD 8(b_ptr),b + ADD t,%ret1,t ; t = t+c; + ADD,DC %r0,%r0,%ret1 ; set c to carry + ADD t,b,l ; l = t + b[0] + ADD,DC %ret1,%r0,%ret1 ; c+= carry + STD l,8(r_ptr) + + LDO -2(n),n + LDO 16(a_ptr),a_ptr + LDO 16(b_ptr),b_ptr + + CMPIB,<= 2,n,bn_add_words_unroll2 + LDO 16(r_ptr),r_ptr + + CMPIB,=,N 0,n,bn_add_words_exit ; are we done? + +bn_add_words_single_top + LDD 0(a_ptr),t + LDD 0(b_ptr),b + + ADD t,%ret1,t ; t = t+c; + ADD,DC %r0,%r0,%ret1 ; set c to carry (could use CMPCLR??) + ADD t,b,l ; l = t + b[0] + ADD,DC %ret1,%r0,%ret1 ; c+= carry + STD l,0(r_ptr) + +bn_add_words_exit + .EXIT + BVE (%rp) + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + .PROCEND ;in=23,24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +; +; arg0 = rp +; arg1 = ap +; arg2 = bp +; arg3 = n + +t1 .reg %r22 +t2 .reg %r21 +sub_tmp1 .reg %r20 +sub_tmp2 .reg %r19 + + +bn_sub_words + .proc + .callinfo + .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + CMPIB,>= 0,n,bn_sub_words_exit + COPY %r0,%ret1 ; return 0 by default + + ; + ; If 2 or more numbers do the loop + ; + CMPIB,= 1,n,bn_sub_words_single_top + NOP + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; +bn_sub_words_unroll2 + LDD 0(a_ptr),t1 + LDD 0(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; + + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret1 + STD sub_tmp1,0(r_ptr) + + LDD 8(a_ptr),t1 + LDD 8(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret1 + STD sub_tmp1,8(r_ptr) + + LDO -2(n),n + LDO 16(a_ptr),a_ptr + LDO 16(b_ptr),b_ptr + + CMPIB,<= 2,n,bn_sub_words_unroll2 + LDO 16(r_ptr),r_ptr + + CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? + +bn_sub_words_single_top + LDD 0(a_ptr),t1 + LDD 0(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret1 + + STD sub_tmp1,0(r_ptr) + +bn_sub_words_exit + .EXIT + BVE (%rp) + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + .PROCEND ;in=23,24,25,26,29;out=28; + +;------------------------------------------------------------------------------ +; +; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) +; +; arg0 = h +; arg1 = l +; arg2 = d +; +; This is mainly just output from the HP C compiler. +; +;------------------------------------------------------------------------------ +bn_div_words + .PROC + .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN + .IMPORT BN_num_bits_word,CODE + ;--- not PIC .IMPORT __iob,DATA + ;--- not PIC .IMPORT fprintf,CODE + .IMPORT abort,CODE + .IMPORT $$div2U,MILLICODE + .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE + .ENTRY + STW %r2,-20(%r30) ;offset 0x8ec + STW,MA %r3,192(%r30) ;offset 0x8f0 + STW %r4,-188(%r30) ;offset 0x8f4 + DEPD %r5,31,32,%r6 ;offset 0x8f8 + STD %r6,-184(%r30) ;offset 0x8fc + DEPD %r7,31,32,%r8 ;offset 0x900 + STD %r8,-176(%r30) ;offset 0x904 + STW %r9,-168(%r30) ;offset 0x908 + LDD -248(%r30),%r3 ;offset 0x90c + COPY %r26,%r4 ;offset 0x910 + COPY %r24,%r5 ;offset 0x914 + DEPD %r25,31,32,%r4 ;offset 0x918 + CMPB,*<> %r3,%r0,$0006000C ;offset 0x91c + DEPD %r23,31,32,%r5 ;offset 0x920 + MOVIB,TR -1,%r29,$00060002 ;offset 0x924 + EXTRD,U %r29,31,32,%r28 ;offset 0x928 +$0006002A + LDO -1(%r29),%r29 ;offset 0x92c + SUB %r23,%r7,%r23 ;offset 0x930 +$00060024 + SUB %r4,%r31,%r25 ;offset 0x934 + AND %r25,%r19,%r26 ;offset 0x938 + CMPB,*<>,N %r0,%r26,$00060046 ;offset 0x93c + DEPD,Z %r25,31,32,%r20 ;offset 0x940 + OR %r20,%r24,%r21 ;offset 0x944 + CMPB,*<<,N %r21,%r23,$0006002A ;offset 0x948 + SUB %r31,%r2,%r31 ;offset 0x94c +$00060046 +$0006002E + DEPD,Z %r23,31,32,%r25 ;offset 0x950 + EXTRD,U %r23,31,32,%r26 ;offset 0x954 + AND %r25,%r19,%r24 ;offset 0x958 + ADD,L %r31,%r26,%r31 ;offset 0x95c + CMPCLR,*>>= %r5,%r24,%r0 ;offset 0x960 + LDO 1(%r31),%r31 ;offset 0x964 +$00060032 + CMPB,*<<=,N %r31,%r4,$00060036 ;offset 0x968 + LDO -1(%r29),%r29 ;offset 0x96c + ADD,L %r4,%r3,%r4 ;offset 0x970 +$00060036 + ADDIB,=,N -1,%r8,$D0 ;offset 0x974 + SUB %r5,%r24,%r28 ;offset 0x978 +$0006003A + SUB %r4,%r31,%r24 ;offset 0x97c + SHRPD %r24,%r28,32,%r4 ;offset 0x980 + DEPD,Z %r29,31,32,%r9 ;offset 0x984 + DEPD,Z %r28,31,32,%r5 ;offset 0x988 +$0006001C + EXTRD,U %r4,31,32,%r31 ;offset 0x98c + CMPB,*<>,N %r31,%r2,$00060020 ;offset 0x990 + MOVB,TR %r6,%r29,$D1 ;offset 0x994 + STD %r29,-152(%r30) ;offset 0x998 +$0006000C + EXTRD,U %r3,31,32,%r25 ;offset 0x99c + COPY %r3,%r26 ;offset 0x9a0 + EXTRD,U %r3,31,32,%r9 ;offset 0x9a4 + EXTRD,U %r4,31,32,%r8 ;offset 0x9a8 + .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28; + B,L BN_num_bits_word,%r2 ;offset 0x9ac + EXTRD,U %r5,31,32,%r7 ;offset 0x9b0 + LDI 64,%r20 ;offset 0x9b4 + DEPD %r7,31,32,%r5 ;offset 0x9b8 + DEPD %r8,31,32,%r4 ;offset 0x9bc + DEPD %r9,31,32,%r3 ;offset 0x9c0 + CMPB,= %r28,%r20,$00060012 ;offset 0x9c4 + COPY %r28,%r24 ;offset 0x9c8 + MTSARCM %r24 ;offset 0x9cc + DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0 + CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4 +$00060012 + SUBI 64,%r24,%r31 ;offset 0x9d8 + CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc + SUB %r4,%r3,%r4 ;offset 0x9e0 +$00060016 + CMPB,= %r31,%r0,$0006001A ;offset 0x9e4 + COPY %r0,%r9 ;offset 0x9e8 + MTSARCM %r31 ;offset 0x9ec + DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0 + SUBI 64,%r31,%r26 ;offset 0x9f4 + MTSAR %r26 ;offset 0x9f8 + SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc + MTSARCM %r31 ;offset 0xa00 + DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04 +$0006001A + DEPDI,Z -1,31,32,%r19 ;offset 0xa08 + AND %r3,%r19,%r29 ;offset 0xa0c + EXTRD,U %r29,31,32,%r2 ;offset 0xa10 + DEPDI,Z -1,63,32,%r6 ;offset 0xa14 + MOVIB,TR 2,%r8,$0006001C ;offset 0xa18 + EXTRD,U %r3,63,32,%r7 ;offset 0xa1c +$D2 + ;--- not PIC ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20 + ;--- not PIC LDIL LR'C$7,%r21 ;offset 0xa24 + ;--- not PIC LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28 + ;--- not PIC .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28; + ;--- not PIC B,L fprintf,%r2 ;offset 0xa2c + ;--- not PIC LDO RR'C$7(%r21),%r25 ;offset 0xa30 + .CALL ; + B,L abort,%r2 ;offset 0xa34 + NOP ;offset 0xa38 + B $D3 ;offset 0xa3c + LDW -212(%r30),%r2 ;offset 0xa40 +$00060020 + COPY %r4,%r26 ;offset 0xa44 + EXTRD,U %r4,31,32,%r25 ;offset 0xa48 + COPY %r2,%r24 ;offset 0xa4c + .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) + B,L $$div2U,%r31 ;offset 0xa50 + EXTRD,U %r2,31,32,%r23 ;offset 0xa54 + DEPD %r28,31,32,%r29 ;offset 0xa58 +$00060022 + STD %r29,-152(%r30) ;offset 0xa5c +$D1 + AND %r5,%r19,%r24 ;offset 0xa60 + EXTRD,U %r24,31,32,%r24 ;offset 0xa64 + STW %r2,-160(%r30) ;offset 0xa68 + STW %r7,-128(%r30) ;offset 0xa6c + FLDD -152(%r30),%fr4 ;offset 0xa70 + FLDD -152(%r30),%fr7 ;offset 0xa74 + FLDW -160(%r30),%fr8L ;offset 0xa78 + FLDW -128(%r30),%fr5L ;offset 0xa7c + XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80 + FSTD %fr10,-136(%r30) ;offset 0xa84 + XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88 + FSTD %fr22,-144(%r30) ;offset 0xa8c + XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90 + XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94 + FSTD %fr11,-112(%r30) ;offset 0xa98 + FSTD %fr23,-120(%r30) ;offset 0xa9c + LDD -136(%r30),%r28 ;offset 0xaa0 + DEPD,Z %r28,31,32,%r31 ;offset 0xaa4 + LDD -144(%r30),%r20 ;offset 0xaa8 + ADD,L %r20,%r31,%r31 ;offset 0xaac + LDD -112(%r30),%r22 ;offset 0xab0 + DEPD,Z %r22,31,32,%r22 ;offset 0xab4 + LDD -120(%r30),%r21 ;offset 0xab8 + B $00060024 ;offset 0xabc + ADD,L %r21,%r22,%r23 ;offset 0xac0 +$D0 + OR %r9,%r29,%r29 ;offset 0xac4 +$00060040 + EXTRD,U %r29,31,32,%r28 ;offset 0xac8 +$00060002 +$L2 + LDW -212(%r30),%r2 ;offset 0xacc +$D3 + LDW -168(%r30),%r9 ;offset 0xad0 + LDD -176(%r30),%r8 ;offset 0xad4 + EXTRD,U %r8,31,32,%r7 ;offset 0xad8 + LDD -184(%r30),%r6 ;offset 0xadc + EXTRD,U %r6,31,32,%r5 ;offset 0xae0 + LDW -188(%r30),%r4 ;offset 0xae4 + BVE (%r2) ;offset 0xae8 + .EXIT + LDW,MB -192(%r30),%r3 ;offset 0xaec + .PROCEND ;in=23,25;out=28,29;fpin=105,107; + + + + +;---------------------------------------------------------------------------- +; +; Registers to hold 64-bit values to manipulate. The "L" part +; of the register corresponds to the upper 32-bits, while the "R" +; part corresponds to the lower 32-bits +; +; Note, that when using b6 and b7, the code must save these before +; using them because they are callee save registers +; +; +; Floating point registers to use to save values that +; are manipulated. These don't collide with ftemp1-6 and +; are all caller save registers +; +a0 .reg %fr22 +a0L .reg %fr22L +a0R .reg %fr22R + +a1 .reg %fr23 +a1L .reg %fr23L +a1R .reg %fr23R + +a2 .reg %fr24 +a2L .reg %fr24L +a2R .reg %fr24R + +a3 .reg %fr25 +a3L .reg %fr25L +a3R .reg %fr25R + +a4 .reg %fr26 +a4L .reg %fr26L +a4R .reg %fr26R + +a5 .reg %fr27 +a5L .reg %fr27L +a5R .reg %fr27R + +a6 .reg %fr28 +a6L .reg %fr28L +a6R .reg %fr28R + +a7 .reg %fr29 +a7L .reg %fr29L +a7R .reg %fr29R + +b0 .reg %fr30 +b0L .reg %fr30L +b0R .reg %fr30R + +b1 .reg %fr31 +b1L .reg %fr31L +b1R .reg %fr31R + +; +; Temporary floating point variables, these are all caller save +; registers +; +ftemp1 .reg %fr4 +ftemp2 .reg %fr5 +ftemp3 .reg %fr6 +ftemp4 .reg %fr7 + +; +; The B set of registers when used. +; + +b2 .reg %fr8 +b2L .reg %fr8L +b2R .reg %fr8R + +b3 .reg %fr9 +b3L .reg %fr9L +b3R .reg %fr9R + +b4 .reg %fr10 +b4L .reg %fr10L +b4R .reg %fr10R + +b5 .reg %fr11 +b5L .reg %fr11L +b5R .reg %fr11R + +b6 .reg %fr12 +b6L .reg %fr12L +b6R .reg %fr12R + +b7 .reg %fr13 +b7L .reg %fr13L +b7R .reg %fr13R + +c1 .reg %r21 ; only reg +temp1 .reg %r20 ; only reg +temp2 .reg %r19 ; only reg +temp3 .reg %r31 ; only reg + +m1 .reg %r28 +c2 .reg %r23 +high_one .reg %r1 +ht .reg %r6 +lt .reg %r5 +m .reg %r4 +c3 .reg %r3 + +SQR_ADD_C .macro A0L,A0R,C1,C2,C3 + XMPYU A0L,A0R,ftemp1 ; m + FSTD ftemp1,-24(%sp) ; store m + + XMPYU A0R,A0R,ftemp2 ; lt + FSTD ftemp2,-16(%sp) ; store lt + + XMPYU A0L,A0L,ftemp3 ; ht + FSTD ftemp3,-8(%sp) ; store ht + + LDD -24(%sp),m ; load m + AND m,high_mask,temp2 ; m & Mask + DEPD,Z m,30,31,temp3 ; m << 32+1 + LDD -16(%sp),lt ; lt + + LDD -8(%sp),ht ; ht + EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 + ADD temp3,lt,lt ; lt = lt+m + ADD,L ht,temp1,ht ; ht += temp1 + ADD,DC ht,%r0,ht ; ht++ + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC ht,%r0,ht ; ht++ + + ADD C2,ht,C2 ; c2=c2+ht + ADD,DC C3,%r0,C3 ; c3++ +.endm + +SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 + XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht + FSTD ftemp1,-16(%sp) ; + XMPYU A0R,A1L,ftemp2 ; m = bh*lt + FSTD ftemp2,-8(%sp) ; + XMPYU A0R,A1R,ftemp3 ; lt = bl*lt + FSTD ftemp3,-32(%sp) + XMPYU A0L,A1L,ftemp4 ; ht = bh*ht + FSTD ftemp4,-24(%sp) ; + + LDD -8(%sp),m ; r21 = m + LDD -16(%sp),m1 ; r19 = m1 + ADD,L m,m1,m ; m+m1 + + DEPD,Z m,31,32,temp3 ; (m+m1<<32) + LDD -24(%sp),ht ; r24 = ht + + CMPCLR,*>>= m,m1,%r0 ; if (m < m1) + ADD,L ht,high_one,ht ; ht+=high_one + + EXTRD,U m,31,32,temp1 ; m >> 32 + LDD -32(%sp),lt ; lt + ADD,L ht,temp1,ht ; ht+= m>>32 + ADD lt,temp3,lt ; lt = lt+m1 + ADD,DC ht,%r0,ht ; ht++ + + ADD ht,ht,ht ; ht=ht+ht; + ADD,DC C3,%r0,C3 ; add in carry (c3++) + + ADD lt,lt,lt ; lt=lt+lt; + ADD,DC ht,%r0,ht ; add in carry (ht++) + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) + LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise + + ADD C2,ht,C2 ; c2 = c2 + ht + ADD,DC C3,%r0,C3 ; add in carry (c3++) +.endm + +; +;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) +; arg0 = r_ptr +; arg1 = a_ptr +; + +bn_sqr_comba8 + .PROC + .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .ENTRY + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + SQR_ADD_C a0L,a0R,c1,c2,c3 + STD c1,0(r_ptr) ; r[0] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 + STD c2,8(r_ptr) ; r[1] = c2; + COPY %r0,c2 + + SQR_ADD_C a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 + STD c3,16(r_ptr) ; r[2] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 + SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 + STD c1,24(r_ptr) ; r[3] = c1; + COPY %r0,c1 + + SQR_ADD_C a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 + SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 + STD c2,32(r_ptr) ; r[4] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 + SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 + STD c3,40(r_ptr) ; r[5] = c3; + COPY %r0,c3 + + SQR_ADD_C a3L,a3R,c1,c2,c3 + SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 + SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 + SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 + STD c1,48(r_ptr) ; r[6] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 + SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 + SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 + STD c2,56(r_ptr) ; r[7] = c2; + COPY %r0,c2 + + SQR_ADD_C a4L,a4R,c3,c1,c2 + SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 + SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 + SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 + STD c3,64(r_ptr) ; r[8] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 + SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 + SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 + STD c1,72(r_ptr) ; r[9] = c1; + COPY %r0,c1 + + SQR_ADD_C a5L,a5R,c2,c3,c1 + SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 + SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 + STD c2,80(r_ptr) ; r[10] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 + SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 + STD c3,88(r_ptr) ; r[11] = c3; + COPY %r0,c3 + + SQR_ADD_C a6L,a6R,c1,c2,c3 + SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 + STD c1,96(r_ptr) ; r[12] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 + STD c2,104(r_ptr) ; r[13] = c2; + COPY %r0,c2 + + SQR_ADD_C a7L,a7R,c3,c1,c2 + STD c3, 112(r_ptr) ; r[14] = c3 + STD c1, 120(r_ptr) ; r[15] = c1 + + .EXIT + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + +;----------------------------------------------------------------------------- +; +;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) +; arg0 = r_ptr +; arg1 = a_ptr +; + +bn_sqr_comba4 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + SQR_ADD_C a0L,a0R,c1,c2,c3 + + STD c1,0(r_ptr) ; r[0] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 + + STD c2,8(r_ptr) ; r[1] = c2; + COPY %r0,c2 + + SQR_ADD_C a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 + + STD c3,16(r_ptr) ; r[2] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 + SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 + + STD c1,24(r_ptr) ; r[3] = c1; + COPY %r0,c1 + + SQR_ADD_C a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 + + STD c2,32(r_ptr) ; r[4] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 + STD c3,40(r_ptr) ; r[5] = c3; + COPY %r0,c3 + + SQR_ADD_C a3L,a3R,c1,c2,c3 + STD c1,48(r_ptr) ; r[6] = c1; + STD c2,56(r_ptr) ; r[7] = c2; + + .EXIT + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + + +;--------------------------------------------------------------------------- + +MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 + XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht + FSTD ftemp1,-16(%sp) ; + XMPYU A0R,B0L,ftemp2 ; m = bh*lt + FSTD ftemp2,-8(%sp) ; + XMPYU A0R,B0R,ftemp3 ; lt = bl*lt + FSTD ftemp3,-32(%sp) + XMPYU A0L,B0L,ftemp4 ; ht = bh*ht + FSTD ftemp4,-24(%sp) ; + + LDD -8(%sp),m ; r21 = m + LDD -16(%sp),m1 ; r19 = m1 + ADD,L m,m1,m ; m+m1 + + DEPD,Z m,31,32,temp3 ; (m+m1<<32) + LDD -24(%sp),ht ; r24 = ht + + CMPCLR,*>>= m,m1,%r0 ; if (m < m1) + ADD,L ht,high_one,ht ; ht+=high_one + + EXTRD,U m,31,32,temp1 ; m >> 32 + LDD -32(%sp),lt ; lt + ADD,L ht,temp1,ht ; ht+= m>>32 + ADD lt,temp3,lt ; lt = lt+m1 + ADD,DC ht,%r0,ht ; ht++ + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise + + ADD C2,ht,C2 ; c2 = c2 + ht + ADD,DC C3,%r0,C3 ; add in carry (c3++) +.endm + + +; +;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +; arg0 = r_ptr +; arg1 = a_ptr +; arg2 = b_ptr +; + +bn_mul_comba8 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + FSTD %fr12,32(%sp) ; save r6 + FSTD %fr13,40(%sp) ; save r7 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + FLDD 0(b_ptr),b0 + FLDD 8(b_ptr),b1 + FLDD 16(b_ptr),b2 + FLDD 24(b_ptr),b3 + FLDD 32(b_ptr),b4 + FLDD 40(b_ptr),b5 + FLDD 48(b_ptr),b6 + FLDD 56(b_ptr),b7 + + MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 + STD c1,0(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 + STD c2,8(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 + STD c3,16(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 + STD c1,24(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 + STD c2,32(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 + MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 + MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 + STD c3,40(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 + STD c1,48(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 + MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 + STD c2,56(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 + STD c3,64(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 + MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 + MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 + STD c1,72(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 + MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 + MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 + STD c2,80(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 + MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 + STD c3,88(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 + STD c1,96(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 + MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 + STD c2,104(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 + STD c3,112(r_ptr) + STD c1,120(r_ptr) + + .EXIT + FLDD -88(%sp),%fr13 + FLDD -96(%sp),%fr12 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + +;----------------------------------------------------------------------------- +; +;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +; arg0 = r_ptr +; arg1 = a_ptr +; arg2 = b_ptr +; + +bn_mul_comba4 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + FSTD %fr12,32(%sp) ; save r6 + FSTD %fr13,40(%sp) ; save r7 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + + FLDD 0(b_ptr),b0 + FLDD 8(b_ptr),b1 + FLDD 16(b_ptr),b2 + FLDD 24(b_ptr),b3 + + MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 + STD c1,0(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 + STD c2,8(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 + STD c3,16(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 + STD c1,24(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 + STD c2,32(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 + STD c3,40(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 + STD c1,48(r_ptr) + STD c2,56(r_ptr) + + .EXIT + FLDD -88(%sp),%fr13 + FLDD -96(%sp),%fr12 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + + +;--- not PIC .SPACE $TEXT$ +;--- not PIC .SUBSPA $CODE$ +;--- not PIC .SPACE $PRIVATE$,SORT=16 +;--- not PIC .IMPORT $global$,DATA +;--- not PIC .SPACE $TEXT$ +;--- not PIC .SUBSPA $CODE$ +;--- not PIC .SUBSPA $LIT$,ACCESS=0x2c +;--- not PIC C$7 +;--- not PIC .ALIGN 8 +;--- not PIC .STRINGZ "Division would overflow (%d)\n" + .END diff --git a/app/openssl/crypto/bn/asm/pa-risc2W.S b/app/openssl/crypto/bn/asm/pa-risc2W.S new file mode 100644 index 00000000..a9954575 --- /dev/null +++ b/app/openssl/crypto/bn/asm/pa-risc2W.S @@ -0,0 +1,1605 @@ +; +; PA-RISC 64-bit implementation of bn_asm code +; +; This code is approximately 2x faster than the C version +; for RSA/DSA. +; +; See http://devresource.hp.com/ for more details on the PA-RISC +; architecture. Also see the book "PA-RISC 2.0 Architecture" +; by Gerry Kane for information on the instruction set architecture. +; +; Code written by Chris Ruemmler (with some help from the HP C +; compiler). +; +; The code compiles with HP's assembler +; + + .level 2.0W + .space $TEXT$ + .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY + +; +; Global Register definitions used for the routines. +; +; Some information about HP's runtime architecture for 64-bits. +; +; "Caller save" means the calling function must save the register +; if it wants the register to be preserved. +; "Callee save" means if a function uses the register, it must save +; the value before using it. +; +; For the floating point registers +; +; "caller save" registers: fr4-fr11, fr22-fr31 +; "callee save" registers: fr12-fr21 +; "special" registers: fr0-fr3 (status and exception registers) +; +; For the integer registers +; value zero : r0 +; "caller save" registers: r1,r19-r26 +; "callee save" registers: r3-r18 +; return register : r2 (rp) +; return values ; r28 (ret0,ret1) +; Stack pointer ; r30 (sp) +; global data pointer ; r27 (dp) +; argument pointer ; r29 (ap) +; millicode return ptr ; r31 (also a caller save register) + + +; +; Arguments to the routines +; +r_ptr .reg %r26 +a_ptr .reg %r25 +b_ptr .reg %r24 +num .reg %r24 +w .reg %r23 +n .reg %r23 + + +; +; Globals used in some routines +; + +top_overflow .reg %r29 +high_mask .reg %r22 ; value 0xffffffff80000000L + + +;------------------------------------------------------------------------------ +; +; bn_mul_add_words +; +;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, +; int num, BN_ULONG w) +; +; arg0 = r_ptr +; arg1 = a_ptr +; arg2 = num +; arg3 = w +; +; Local register definitions +; + +fm1 .reg %fr22 +fm .reg %fr23 +ht_temp .reg %fr24 +ht_temp_1 .reg %fr25 +lt_temp .reg %fr26 +lt_temp_1 .reg %fr27 +fm1_1 .reg %fr28 +fm_1 .reg %fr29 + +fw_h .reg %fr7L +fw_l .reg %fr7R +fw .reg %fr7 + +fht_0 .reg %fr8L +flt_0 .reg %fr8R +t_float_0 .reg %fr8 + +fht_1 .reg %fr9L +flt_1 .reg %fr9R +t_float_1 .reg %fr9 + +tmp_0 .reg %r31 +tmp_1 .reg %r21 +m_0 .reg %r20 +m_1 .reg %r19 +ht_0 .reg %r1 +ht_1 .reg %r3 +lt_0 .reg %r4 +lt_1 .reg %r5 +m1_0 .reg %r6 +m1_1 .reg %r7 +rp_val .reg %r8 +rp_val_1 .reg %r9 + +bn_mul_add_words + .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN + .proc + .callinfo frame=128 + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP ; Needed to make the loop 16-byte aligned + NOP ; Needed to make the loop 16-byte aligned + + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + STD %r7,32(%sp) ; save r7 + STD %r8,40(%sp) ; save r8 + + STD %r9,48(%sp) ; save r9 + COPY %r0,%ret0 ; return 0 by default + DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 + STD w,56(%sp) ; store w on stack + + CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit + LDO 128(%sp),%sp ; bump stack + + ; + ; The loop is unrolled twice, so if there is only 1 number + ; then go straight to the cleanup code. + ; + CMPIB,= 1,num,bn_mul_add_words_single_top + FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l) + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus + ; two 32-bit mutiplies can be issued per cycle. + ; +bn_mul_add_words_unroll2 + + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) + LDD 0(r_ptr),rp_val ; rp[0] + LDD 8(r_ptr),rp_val_1 ; rp[1] + + XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l + XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1[0] + FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] + + XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h + XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m[0] + FSTD fm_1,-40(%sp) ; -40(sp) = m[1] + + XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h + XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp + FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 + + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp + FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 + + LDD -8(%sp),m_0 ; m[0] + LDD -40(%sp),m_1 ; m[1] + LDD -16(%sp),m1_0 ; m1[0] + LDD -48(%sp),m1_1 ; m1[1] + + LDD -24(%sp),ht_0 ; ht[0] + LDD -56(%sp),ht_1 ; ht[1] + ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; + ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; + + LDD -32(%sp),lt_0 + LDD -64(%sp),lt_1 + CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) + ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) + + CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) + ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) + EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 + + EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 + DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 + ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) + ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) + + ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + + ADD %ret0,lt_0,lt_0 ; lt[0] = lt[0] + c; + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + + LDO -2(num),num ; num = num - 2; + ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + STD lt_0,0(r_ptr) ; rp[0] = lt[0] + + ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] + ADD,DC ht_1,%r0,%ret0 ; ht[1]++ + LDO 16(a_ptr),a_ptr ; a_ptr += 2 + + STD lt_1,8(r_ptr) ; rp[1] = lt[1] + CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do + LDO 16(r_ptr),r_ptr ; r_ptr += 2 + + CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_mul_add_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + LDD 0(r_ptr),rp_val ; rp[0] + LDO 8(a_ptr),a_ptr ; a_ptr++ + XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + + LDD -8(%sp),m_0 + LDD -16(%sp),m1_0 ; m1 = temp1 + ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; + LDD -24(%sp),ht_0 + LDD -32(%sp),lt_0 + + CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + ADD %ret0,tmp_0,lt_0 ; lt = lt + c; + ADD,DC ht_0,%r0,ht_0 ; ht++ + ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] + ADD,DC ht_0,%r0,%ret0 ; ht++ + STD lt_0,0(r_ptr) ; rp[0] = lt + +bn_mul_add_words_exit + .EXIT + LDD -80(%sp),%r9 ; restore r9 + LDD -88(%sp),%r8 ; restore r8 + LDD -96(%sp),%r7 ; restore r7 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 ; restore r3 + .PROCEND ;in=23,24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +; +; arg0 = rp +; arg1 = ap +; arg2 = num +; arg3 = w + +bn_mul_words + .proc + .callinfo frame=128 + .entry + .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + + STD %r7,32(%sp) ; save r7 + COPY %r0,%ret0 ; return 0 by default + DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 + STD w,56(%sp) ; w on stack + + CMPIB,>= 0,num,bn_mul_words_exit + LDO 128(%sp),%sp ; bump stack + + ; + ; See if only 1 word to do, thus just do cleanup + ; + CMPIB,= 1,num,bn_mul_words_single_top + FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l) + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus + ; two 32-bit mutiplies can be issued per cycle. + ; +bn_mul_words_unroll2 + + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) + XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l + XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l + + FSTD fm1,-16(%sp) ; -16(sp) = m1 + FSTD fm1_1,-48(%sp) ; -48(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h + + FSTD fm,-8(%sp) ; -8(sp) = m + FSTD fm_1,-40(%sp) ; -40(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h + XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h + + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l + + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt + LDD -8(%sp),m_0 + LDD -40(%sp),m_1 + + LDD -16(%sp),m1_0 + LDD -48(%sp),m1_1 + LDD -24(%sp),ht_0 + LDD -56(%sp),ht_1 + + ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; + ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; + LDD -32(%sp),lt_0 + LDD -64(%sp),lt_1 + + CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) + ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + EXTRD,U tmp_1,31,32,m_1 ; m>>32 + DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) + ADD lt_0,m1_0,lt_0 ; lt = lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD lt_1,m1_1,lt_1 ; lt = lt+m1; + ADD,DC ht_1,%r0,ht_1 ; ht++ + ADD %ret0,lt_0,lt_0 ; lt = lt + c (ret0); + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) + ADD,DC ht_1,%r0,ht_1 ; ht++ + STD lt_0,0(r_ptr) ; rp[0] = lt + STD lt_1,8(r_ptr) ; rp[1] = lt + + COPY ht_1,%ret0 ; carry = ht + LDO -2(num),num ; num = num - 2; + LDO 16(a_ptr),a_ptr ; ap += 2 + CMPIB,<= 2,num,bn_mul_words_unroll2 + LDO 16(r_ptr),r_ptr ; rp++ + + CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_mul_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + + XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + + LDD -8(%sp),m_0 + LDD -16(%sp),m1_0 + ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; + LDD -24(%sp),ht_0 + LDD -32(%sp),lt_0 + + CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD lt_0,m1_0,lt_0 ; lt= lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD %ret0,lt_0,lt_0 ; lt = lt + c; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + COPY ht_0,%ret0 ; copy carry + STD lt_0,0(r_ptr) ; rp[0] = lt + +bn_mul_words_exit + .EXIT + LDD -96(%sp),%r7 ; restore r7 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 ; restore r3 + .PROCEND ;in=23,24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) +; +; arg0 = rp +; arg1 = ap +; arg2 = num +; + +bn_sqr_words + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP + STD %r5,16(%sp) ; save r5 + + CMPIB,>= 0,num,bn_sqr_words_exit + LDO 128(%sp),%sp ; bump stack + + ; + ; If only 1, the goto straight to cleanup + ; + CMPIB,= 1,num,bn_sqr_words_single_top + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + +bn_sqr_words_unroll2 + FLDD 0(a_ptr),t_float_0 ; a[0] + FLDD 8(a_ptr),t_float_1 ; a[1] + XMPYU fht_0,flt_0,fm ; m[0] + XMPYU fht_1,flt_1,fm_1 ; m[1] + + FSTD fm,-24(%sp) ; store m[0] + FSTD fm_1,-56(%sp) ; store m[1] + XMPYU flt_0,flt_0,lt_temp ; lt[0] + XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] + + FSTD lt_temp,-16(%sp) ; store lt[0] + FSTD lt_temp_1,-48(%sp) ; store lt[1] + XMPYU fht_0,fht_0,ht_temp ; ht[0] + XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] + + FSTD ht_temp,-8(%sp) ; store ht[0] + FSTD ht_temp_1,-40(%sp) ; store ht[1] + LDD -24(%sp),m_0 + LDD -56(%sp),m_1 + + AND m_0,high_mask,tmp_0 ; m[0] & Mask + AND m_1,high_mask,tmp_1 ; m[1] & Mask + DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 + DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 + + LDD -16(%sp),lt_0 + LDD -48(%sp),lt_1 + EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 + EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 + + LDD -8(%sp),ht_0 + LDD -40(%sp),ht_1 + ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 + ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 + + ADD lt_0,m_0,lt_0 ; lt = lt+m + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + STD lt_0,0(r_ptr) ; rp[0] = lt[0] + STD ht_0,8(r_ptr) ; rp[1] = ht[1] + + ADD lt_1,m_1,lt_1 ; lt = lt+m + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + STD lt_1,16(r_ptr) ; rp[2] = lt[1] + STD ht_1,24(r_ptr) ; rp[3] = ht[1] + + LDO -2(num),num ; num = num - 2; + LDO 16(a_ptr),a_ptr ; ap += 2 + CMPIB,<= 2,num,bn_sqr_words_unroll2 + LDO 32(r_ptr),r_ptr ; rp += 4 + + CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_sqr_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + + XMPYU fht_0,flt_0,fm ; m + FSTD fm,-24(%sp) ; store m + + XMPYU flt_0,flt_0,lt_temp ; lt + FSTD lt_temp,-16(%sp) ; store lt + + XMPYU fht_0,fht_0,ht_temp ; ht + FSTD ht_temp,-8(%sp) ; store ht + + LDD -24(%sp),m_0 ; load m + AND m_0,high_mask,tmp_0 ; m & Mask + DEPD,Z m_0,30,31,m_0 ; m << 32+1 + LDD -16(%sp),lt_0 ; lt + + LDD -8(%sp),ht_0 ; ht + EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 + ADD m_0,lt_0,lt_0 ; lt = lt+m + ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 + ADD,DC ht_0,%r0,ht_0 ; ht++ + + STD lt_0,0(r_ptr) ; rp[0] = lt + STD ht_0,8(r_ptr) ; rp[1] = ht + +bn_sqr_words_exit + .EXIT + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + .PROCEND ;in=23,24,25,26,29;out=28; + + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +; +; arg0 = rp +; arg1 = ap +; arg2 = bp +; arg3 = n + +t .reg %r22 +b .reg %r21 +l .reg %r20 + +bn_add_words + .proc + .entry + .callinfo + .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .align 64 + + CMPIB,>= 0,n,bn_add_words_exit + COPY %r0,%ret0 ; return 0 by default + + ; + ; If 2 or more numbers do the loop + ; + CMPIB,= 1,n,bn_add_words_single_top + NOP + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; +bn_add_words_unroll2 + LDD 0(a_ptr),t + LDD 0(b_ptr),b + ADD t,%ret0,t ; t = t+c; + ADD,DC %r0,%r0,%ret0 ; set c to carry + ADD t,b,l ; l = t + b[0] + ADD,DC %ret0,%r0,%ret0 ; c+= carry + STD l,0(r_ptr) + + LDD 8(a_ptr),t + LDD 8(b_ptr),b + ADD t,%ret0,t ; t = t+c; + ADD,DC %r0,%r0,%ret0 ; set c to carry + ADD t,b,l ; l = t + b[0] + ADD,DC %ret0,%r0,%ret0 ; c+= carry + STD l,8(r_ptr) + + LDO -2(n),n + LDO 16(a_ptr),a_ptr + LDO 16(b_ptr),b_ptr + + CMPIB,<= 2,n,bn_add_words_unroll2 + LDO 16(r_ptr),r_ptr + + CMPIB,=,N 0,n,bn_add_words_exit ; are we done? + +bn_add_words_single_top + LDD 0(a_ptr),t + LDD 0(b_ptr),b + + ADD t,%ret0,t ; t = t+c; + ADD,DC %r0,%r0,%ret0 ; set c to carry (could use CMPCLR??) + ADD t,b,l ; l = t + b[0] + ADD,DC %ret0,%r0,%ret0 ; c+= carry + STD l,0(r_ptr) + +bn_add_words_exit + .EXIT + BVE (%rp) + NOP + .PROCEND ;in=23,24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +; +; arg0 = rp +; arg1 = ap +; arg2 = bp +; arg3 = n + +t1 .reg %r22 +t2 .reg %r21 +sub_tmp1 .reg %r20 +sub_tmp2 .reg %r19 + + +bn_sub_words + .proc + .callinfo + .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + CMPIB,>= 0,n,bn_sub_words_exit + COPY %r0,%ret0 ; return 0 by default + + ; + ; If 2 or more numbers do the loop + ; + CMPIB,= 1,n,bn_sub_words_single_top + NOP + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; +bn_sub_words_unroll2 + LDD 0(a_ptr),t1 + LDD 0(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; + + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret0 + STD sub_tmp1,0(r_ptr) + + LDD 8(a_ptr),t1 + LDD 8(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret0 + STD sub_tmp1,8(r_ptr) + + LDO -2(n),n + LDO 16(a_ptr),a_ptr + LDO 16(b_ptr),b_ptr + + CMPIB,<= 2,n,bn_sub_words_unroll2 + LDO 16(r_ptr),r_ptr + + CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? + +bn_sub_words_single_top + LDD 0(a_ptr),t1 + LDD 0(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret0 + + STD sub_tmp1,0(r_ptr) + +bn_sub_words_exit + .EXIT + BVE (%rp) + NOP + .PROCEND ;in=23,24,25,26,29;out=28; + +;------------------------------------------------------------------------------ +; +; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) +; +; arg0 = h +; arg1 = l +; arg2 = d +; +; This is mainly just modified assembly from the compiler, thus the +; lack of variable names. +; +;------------------------------------------------------------------------------ +bn_div_words + .proc + .callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .IMPORT BN_num_bits_word,CODE,NO_RELOCATION + .IMPORT __iob,DATA + .IMPORT fprintf,CODE,NO_RELOCATION + .IMPORT abort,CODE,NO_RELOCATION + .IMPORT $$div2U,MILLICODE + .entry + STD %r2,-16(%r30) + STD,MA %r3,352(%r30) + STD %r4,-344(%r30) + STD %r5,-336(%r30) + STD %r6,-328(%r30) + STD %r7,-320(%r30) + STD %r8,-312(%r30) + STD %r9,-304(%r30) + STD %r10,-296(%r30) + + STD %r27,-288(%r30) ; save gp + + COPY %r24,%r3 ; save d + COPY %r26,%r4 ; save h (high 64-bits) + LDO -1(%r0),%ret0 ; return -1 by default + + CMPB,*= %r0,%arg2,$D3 ; if (d == 0) + COPY %r25,%r5 ; save l (low 64-bits) + + LDO -48(%r30),%r29 ; create ap + .CALL ;in=26,29;out=28; + B,L BN_num_bits_word,%r2 + COPY %r3,%r26 + LDD -288(%r30),%r27 ; restore gp + LDI 64,%r21 + + CMPB,= %r21,%ret0,$00000012 ;if (i == 64) (forward) + COPY %ret0,%r24 ; i + MTSARCM %r24 + DEPDI,Z -1,%sar,1,%r29 + CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<= d) + SUB %r4,%r3,%r4 ; h -= d + CMPB,= %r31,%r0,$0000001A ; if (i) + COPY %r0,%r10 ; ret = 0 + MTSARCM %r31 ; i to shift + DEPD,Z %r3,%sar,64,%r3 ; d <<= i; + SUBI 64,%r31,%r19 ; 64 - i; redundent + MTSAR %r19 ; (64 -i) to shift + SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i) + MTSARCM %r31 ; i to shift + DEPD,Z %r5,%sar,64,%r5 ; l <<= i; + +$0000001A + DEPDI,Z -1,31,32,%r19 + EXTRD,U %r3,31,32,%r6 ; dh=(d&0xfff)>>32 + EXTRD,U %r3,63,32,%r8 ; dl = d&0xffffff + LDO 2(%r0),%r9 + STD %r3,-280(%r30) ; "d" to stack + +$0000001C + DEPDI,Z -1,63,32,%r29 ; + EXTRD,U %r4,31,32,%r31 ; h >> 32 + CMPB,*=,N %r31,%r6,$D2 ; if ((h>>32) != dh)(forward) div + COPY %r4,%r26 + EXTRD,U %r4,31,32,%r25 + COPY %r6,%r24 + .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) + B,L $$div2U,%r2 + EXTRD,U %r6,31,32,%r23 + DEPD %r28,31,32,%r29 +$D2 + STD %r29,-272(%r30) ; q + AND %r5,%r19,%r24 ; t & 0xffffffff00000000; + EXTRD,U %r24,31,32,%r24 ; ??? + FLDD -272(%r30),%fr7 ; q + FLDD -280(%r30),%fr8 ; d + XMPYU %fr8L,%fr7L,%fr10 + FSTD %fr10,-256(%r30) + XMPYU %fr8L,%fr7R,%fr22 + FSTD %fr22,-264(%r30) + XMPYU %fr8R,%fr7L,%fr11 + XMPYU %fr8R,%fr7R,%fr23 + FSTD %fr11,-232(%r30) + FSTD %fr23,-240(%r30) + LDD -256(%r30),%r28 + DEPD,Z %r28,31,32,%r2 + LDD -264(%r30),%r20 + ADD,L %r20,%r2,%r31 + LDD -232(%r30),%r22 + DEPD,Z %r22,31,32,%r22 + LDD -240(%r30),%r21 + B $00000024 ; enter loop + ADD,L %r21,%r22,%r23 + +$0000002A + LDO -1(%r29),%r29 + SUB %r23,%r8,%r23 +$00000024 + SUB %r4,%r31,%r25 + AND %r25,%r19,%r26 + CMPB,*<>,N %r0,%r26,$00000046 ; (forward) + DEPD,Z %r25,31,32,%r20 + OR %r20,%r24,%r21 + CMPB,*<<,N %r21,%r23,$0000002A ;(backward) + SUB %r31,%r6,%r31 +;-------------Break path--------------------- + +$00000046 + DEPD,Z %r23,31,32,%r25 ;tl + EXTRD,U %r23,31,32,%r26 ;t + AND %r25,%r19,%r24 ;tl = (tl<<32)&0xfffffff0000000L + ADD,L %r31,%r26,%r31 ;th += t; + CMPCLR,*>>= %r5,%r24,%r0 ;if (l>32)); + DEPD,Z %r29,31,32,%r10 ; ret = q<<32 + b $0000001C + DEPD,Z %r28,31,32,%r5 ; l = l << 32 + +$D1 + OR %r10,%r29,%r28 ; ret |= q +$D3 + LDD -368(%r30),%r2 +$D0 + LDD -296(%r30),%r10 + LDD -304(%r30),%r9 + LDD -312(%r30),%r8 + LDD -320(%r30),%r7 + LDD -328(%r30),%r6 + LDD -336(%r30),%r5 + LDD -344(%r30),%r4 + BVE (%r2) + .EXIT + LDD,MB -352(%r30),%r3 + +bn_div_err_case + MFIA %r6 + ADDIL L'bn_div_words-bn_div_err_case,%r6,%r1 + LDO R'bn_div_words-bn_div_err_case(%r1),%r6 + ADDIL LT'__iob,%r27,%r1 + LDD RT'__iob(%r1),%r26 + ADDIL L'C$4-bn_div_words,%r6,%r1 + LDO R'C$4-bn_div_words(%r1),%r25 + LDO 64(%r26),%r26 + .CALL ;in=24,25,26,29;out=28; + B,L fprintf,%r2 + LDO -48(%r30),%r29 + LDD -288(%r30),%r27 + .CALL ;in=29; + B,L abort,%r2 + LDO -48(%r30),%r29 + LDD -288(%r30),%r27 + B $D0 + LDD -368(%r30),%r2 + .PROCEND ;in=24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +; Registers to hold 64-bit values to manipulate. The "L" part +; of the register corresponds to the upper 32-bits, while the "R" +; part corresponds to the lower 32-bits +; +; Note, that when using b6 and b7, the code must save these before +; using them because they are callee save registers +; +; +; Floating point registers to use to save values that +; are manipulated. These don't collide with ftemp1-6 and +; are all caller save registers +; +a0 .reg %fr22 +a0L .reg %fr22L +a0R .reg %fr22R + +a1 .reg %fr23 +a1L .reg %fr23L +a1R .reg %fr23R + +a2 .reg %fr24 +a2L .reg %fr24L +a2R .reg %fr24R + +a3 .reg %fr25 +a3L .reg %fr25L +a3R .reg %fr25R + +a4 .reg %fr26 +a4L .reg %fr26L +a4R .reg %fr26R + +a5 .reg %fr27 +a5L .reg %fr27L +a5R .reg %fr27R + +a6 .reg %fr28 +a6L .reg %fr28L +a6R .reg %fr28R + +a7 .reg %fr29 +a7L .reg %fr29L +a7R .reg %fr29R + +b0 .reg %fr30 +b0L .reg %fr30L +b0R .reg %fr30R + +b1 .reg %fr31 +b1L .reg %fr31L +b1R .reg %fr31R + +; +; Temporary floating point variables, these are all caller save +; registers +; +ftemp1 .reg %fr4 +ftemp2 .reg %fr5 +ftemp3 .reg %fr6 +ftemp4 .reg %fr7 + +; +; The B set of registers when used. +; + +b2 .reg %fr8 +b2L .reg %fr8L +b2R .reg %fr8R + +b3 .reg %fr9 +b3L .reg %fr9L +b3R .reg %fr9R + +b4 .reg %fr10 +b4L .reg %fr10L +b4R .reg %fr10R + +b5 .reg %fr11 +b5L .reg %fr11L +b5R .reg %fr11R + +b6 .reg %fr12 +b6L .reg %fr12L +b6R .reg %fr12R + +b7 .reg %fr13 +b7L .reg %fr13L +b7R .reg %fr13R + +c1 .reg %r21 ; only reg +temp1 .reg %r20 ; only reg +temp2 .reg %r19 ; only reg +temp3 .reg %r31 ; only reg + +m1 .reg %r28 +c2 .reg %r23 +high_one .reg %r1 +ht .reg %r6 +lt .reg %r5 +m .reg %r4 +c3 .reg %r3 + +SQR_ADD_C .macro A0L,A0R,C1,C2,C3 + XMPYU A0L,A0R,ftemp1 ; m + FSTD ftemp1,-24(%sp) ; store m + + XMPYU A0R,A0R,ftemp2 ; lt + FSTD ftemp2,-16(%sp) ; store lt + + XMPYU A0L,A0L,ftemp3 ; ht + FSTD ftemp3,-8(%sp) ; store ht + + LDD -24(%sp),m ; load m + AND m,high_mask,temp2 ; m & Mask + DEPD,Z m,30,31,temp3 ; m << 32+1 + LDD -16(%sp),lt ; lt + + LDD -8(%sp),ht ; ht + EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 + ADD temp3,lt,lt ; lt = lt+m + ADD,L ht,temp1,ht ; ht += temp1 + ADD,DC ht,%r0,ht ; ht++ + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC ht,%r0,ht ; ht++ + + ADD C2,ht,C2 ; c2=c2+ht + ADD,DC C3,%r0,C3 ; c3++ +.endm + +SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 + XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht + FSTD ftemp1,-16(%sp) ; + XMPYU A0R,A1L,ftemp2 ; m = bh*lt + FSTD ftemp2,-8(%sp) ; + XMPYU A0R,A1R,ftemp3 ; lt = bl*lt + FSTD ftemp3,-32(%sp) + XMPYU A0L,A1L,ftemp4 ; ht = bh*ht + FSTD ftemp4,-24(%sp) ; + + LDD -8(%sp),m ; r21 = m + LDD -16(%sp),m1 ; r19 = m1 + ADD,L m,m1,m ; m+m1 + + DEPD,Z m,31,32,temp3 ; (m+m1<<32) + LDD -24(%sp),ht ; r24 = ht + + CMPCLR,*>>= m,m1,%r0 ; if (m < m1) + ADD,L ht,high_one,ht ; ht+=high_one + + EXTRD,U m,31,32,temp1 ; m >> 32 + LDD -32(%sp),lt ; lt + ADD,L ht,temp1,ht ; ht+= m>>32 + ADD lt,temp3,lt ; lt = lt+m1 + ADD,DC ht,%r0,ht ; ht++ + + ADD ht,ht,ht ; ht=ht+ht; + ADD,DC C3,%r0,C3 ; add in carry (c3++) + + ADD lt,lt,lt ; lt=lt+lt; + ADD,DC ht,%r0,ht ; add in carry (ht++) + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) + LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise + + ADD C2,ht,C2 ; c2 = c2 + ht + ADD,DC C3,%r0,C3 ; add in carry (c3++) +.endm + +; +;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) +; arg0 = r_ptr +; arg1 = a_ptr +; + +bn_sqr_comba8 + .PROC + .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .ENTRY + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + SQR_ADD_C a0L,a0R,c1,c2,c3 + STD c1,0(r_ptr) ; r[0] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 + STD c2,8(r_ptr) ; r[1] = c2; + COPY %r0,c2 + + SQR_ADD_C a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 + STD c3,16(r_ptr) ; r[2] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 + SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 + STD c1,24(r_ptr) ; r[3] = c1; + COPY %r0,c1 + + SQR_ADD_C a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 + SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 + STD c2,32(r_ptr) ; r[4] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 + SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 + STD c3,40(r_ptr) ; r[5] = c3; + COPY %r0,c3 + + SQR_ADD_C a3L,a3R,c1,c2,c3 + SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 + SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 + SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 + STD c1,48(r_ptr) ; r[6] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 + SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 + SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 + STD c2,56(r_ptr) ; r[7] = c2; + COPY %r0,c2 + + SQR_ADD_C a4L,a4R,c3,c1,c2 + SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 + SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 + SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 + STD c3,64(r_ptr) ; r[8] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 + SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 + SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 + STD c1,72(r_ptr) ; r[9] = c1; + COPY %r0,c1 + + SQR_ADD_C a5L,a5R,c2,c3,c1 + SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 + SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 + STD c2,80(r_ptr) ; r[10] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 + SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 + STD c3,88(r_ptr) ; r[11] = c3; + COPY %r0,c3 + + SQR_ADD_C a6L,a6R,c1,c2,c3 + SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 + STD c1,96(r_ptr) ; r[12] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 + STD c2,104(r_ptr) ; r[13] = c2; + COPY %r0,c2 + + SQR_ADD_C a7L,a7R,c3,c1,c2 + STD c3, 112(r_ptr) ; r[14] = c3 + STD c1, 120(r_ptr) ; r[15] = c1 + + .EXIT + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + +;----------------------------------------------------------------------------- +; +;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) +; arg0 = r_ptr +; arg1 = a_ptr +; + +bn_sqr_comba4 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + SQR_ADD_C a0L,a0R,c1,c2,c3 + + STD c1,0(r_ptr) ; r[0] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 + + STD c2,8(r_ptr) ; r[1] = c2; + COPY %r0,c2 + + SQR_ADD_C a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 + + STD c3,16(r_ptr) ; r[2] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 + SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 + + STD c1,24(r_ptr) ; r[3] = c1; + COPY %r0,c1 + + SQR_ADD_C a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 + + STD c2,32(r_ptr) ; r[4] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 + STD c3,40(r_ptr) ; r[5] = c3; + COPY %r0,c3 + + SQR_ADD_C a3L,a3R,c1,c2,c3 + STD c1,48(r_ptr) ; r[6] = c1; + STD c2,56(r_ptr) ; r[7] = c2; + + .EXIT + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + + +;--------------------------------------------------------------------------- + +MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 + XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht + FSTD ftemp1,-16(%sp) ; + XMPYU A0R,B0L,ftemp2 ; m = bh*lt + FSTD ftemp2,-8(%sp) ; + XMPYU A0R,B0R,ftemp3 ; lt = bl*lt + FSTD ftemp3,-32(%sp) + XMPYU A0L,B0L,ftemp4 ; ht = bh*ht + FSTD ftemp4,-24(%sp) ; + + LDD -8(%sp),m ; r21 = m + LDD -16(%sp),m1 ; r19 = m1 + ADD,L m,m1,m ; m+m1 + + DEPD,Z m,31,32,temp3 ; (m+m1<<32) + LDD -24(%sp),ht ; r24 = ht + + CMPCLR,*>>= m,m1,%r0 ; if (m < m1) + ADD,L ht,high_one,ht ; ht+=high_one + + EXTRD,U m,31,32,temp1 ; m >> 32 + LDD -32(%sp),lt ; lt + ADD,L ht,temp1,ht ; ht+= m>>32 + ADD lt,temp3,lt ; lt = lt+m1 + ADD,DC ht,%r0,ht ; ht++ + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise + + ADD C2,ht,C2 ; c2 = c2 + ht + ADD,DC C3,%r0,C3 ; add in carry (c3++) +.endm + + +; +;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +; arg0 = r_ptr +; arg1 = a_ptr +; arg2 = b_ptr +; + +bn_mul_comba8 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + FSTD %fr12,32(%sp) ; save r6 + FSTD %fr13,40(%sp) ; save r7 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + FLDD 0(b_ptr),b0 + FLDD 8(b_ptr),b1 + FLDD 16(b_ptr),b2 + FLDD 24(b_ptr),b3 + FLDD 32(b_ptr),b4 + FLDD 40(b_ptr),b5 + FLDD 48(b_ptr),b6 + FLDD 56(b_ptr),b7 + + MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 + STD c1,0(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 + STD c2,8(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 + STD c3,16(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 + STD c1,24(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 + STD c2,32(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 + MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 + MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 + STD c3,40(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 + STD c1,48(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 + MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 + STD c2,56(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 + STD c3,64(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 + MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 + MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 + STD c1,72(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 + MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 + MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 + STD c2,80(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 + MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 + STD c3,88(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 + STD c1,96(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 + MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 + STD c2,104(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 + STD c3,112(r_ptr) + STD c1,120(r_ptr) + + .EXIT + FLDD -88(%sp),%fr13 + FLDD -96(%sp),%fr12 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + +;----------------------------------------------------------------------------- +; +;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +; arg0 = r_ptr +; arg1 = a_ptr +; arg2 = b_ptr +; + +bn_mul_comba4 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + FSTD %fr12,32(%sp) ; save r6 + FSTD %fr13,40(%sp) ; save r7 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + + FLDD 0(b_ptr),b0 + FLDD 8(b_ptr),b1 + FLDD 16(b_ptr),b2 + FLDD 24(b_ptr),b3 + + MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 + STD c1,0(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 + STD c2,8(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 + STD c3,16(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 + STD c1,24(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 + STD c2,32(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 + STD c3,40(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 + STD c1,48(r_ptr) + STD c2,56(r_ptr) + + .EXIT + FLDD -88(%sp),%fr13 + FLDD -96(%sp),%fr12 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + + + .SPACE $TEXT$ + .SUBSPA $CODE$ + .SPACE $PRIVATE$,SORT=16 + .IMPORT $global$,DATA + .SPACE $TEXT$ + .SUBSPA $CODE$ + .SUBSPA $LIT$,ACCESS=0x2c +C$4 + .ALIGN 8 + .STRINGZ "Division would overflow (%d)\n" + .END diff --git a/app/openssl/crypto/bn/bn_mont.c b/app/openssl/crypto/bn/bn_mont.c index 427b5cf4..ee8532c7 100644 --- a/app/openssl/crypto/bn/bn_mont.c +++ b/app/openssl/crypto/bn/bn_mont.c @@ -478,32 +478,38 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock, const BIGNUM *mod, BN_CTX *ctx) { - int got_write_lock = 0; BN_MONT_CTX *ret; CRYPTO_r_lock(lock); - if (!*pmont) + ret = *pmont; + CRYPTO_r_unlock(lock); + if (ret) + return ret; + + /* We don't want to serialise globally while doing our lazy-init math in + * BN_MONT_CTX_set. That punishes threads that are doing independent + * things. Instead, punish the case where more than one thread tries to + * lazy-init the same 'pmont', by having each do the lazy-init math work + * independently and only use the one from the thread that wins the race + * (the losers throw away the work they've done). */ + ret = BN_MONT_CTX_new(); + if (!ret) + return NULL; + if (!BN_MONT_CTX_set(ret, mod, ctx)) { - CRYPTO_r_unlock(lock); - CRYPTO_w_lock(lock); - got_write_lock = 1; + BN_MONT_CTX_free(ret); + return NULL; + } - if (!*pmont) - { - ret = BN_MONT_CTX_new(); - if (ret && !BN_MONT_CTX_set(ret, mod, ctx)) - BN_MONT_CTX_free(ret); - else - *pmont = ret; - } + /* The locked compare-and-set, after the local work is done. */ + CRYPTO_w_lock(lock); + if (*pmont) + { + BN_MONT_CTX_free(ret); + ret = *pmont; } - - ret = *pmont; - - if (got_write_lock) - CRYPTO_w_unlock(lock); else - CRYPTO_r_unlock(lock); - + *pmont = ret; + CRYPTO_w_unlock(lock); return ret; } diff --git a/app/openssl/crypto/cms/cms_env.c b/app/openssl/crypto/cms/cms_env.c index be20b1c0..add00bf9 100644 --- a/app/openssl/crypto/cms/cms_env.c +++ b/app/openssl/crypto/cms/cms_env.c @@ -185,6 +185,8 @@ CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms, if (flags & CMS_USE_KEYID) { ktri->version = 2; + if (env->version < 2) + env->version = 2; type = CMS_RECIPINFO_KEYIDENTIFIER; } else diff --git a/app/openssl/crypto/cms/cms_sd.c b/app/openssl/crypto/cms/cms_sd.c index 77fbd135..51dd33a1 100644 --- a/app/openssl/crypto/cms/cms_sd.c +++ b/app/openssl/crypto/cms/cms_sd.c @@ -158,8 +158,8 @@ static void cms_sd_set_version(CMS_SignedData *sd) if (sd->version < 3) sd->version = 3; } - else - sd->version = 1; + else if (si->version < 1) + si->version = 1; } if (sd->version < 1) diff --git a/app/openssl/crypto/cms/cms_smime.c b/app/openssl/crypto/cms/cms_smime.c index 8c56e3a8..1af9f3a6 100644 --- a/app/openssl/crypto/cms/cms_smime.c +++ b/app/openssl/crypto/cms/cms_smime.c @@ -611,7 +611,7 @@ int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert) STACK_OF(CMS_RecipientInfo) *ris; CMS_RecipientInfo *ri; int i, r; - int debug = 0; + int debug = 0, ri_match = 0; ris = CMS_get0_RecipientInfos(cms); if (ris) debug = cms->d.envelopedData->encryptedContentInfo->debug; @@ -620,6 +620,7 @@ int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert) ri = sk_CMS_RecipientInfo_value(ris, i); if (CMS_RecipientInfo_type(ri) != CMS_RECIPINFO_TRANS) continue; + ri_match = 1; /* If we have a cert try matching RecipientInfo * otherwise try them all. */ @@ -655,7 +656,7 @@ int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert) } } /* If no cert and not debugging always return success */ - if (!cert && !debug) + if (ri_match && !cert && !debug) { ERR_clear_error(); return 1; diff --git a/app/openssl/crypto/dso/dso_dlfcn.c b/app/openssl/crypto/dso/dso_dlfcn.c index 5f225480..4a56aace 100644 --- a/app/openssl/crypto/dso/dso_dlfcn.c +++ b/app/openssl/crypto/dso/dso_dlfcn.c @@ -464,7 +464,7 @@ static int dlfcn_pathbyaddr(void *addr,char *path,int sz) return len; } - ERR_add_error_data(4, "dlfcn_pathbyaddr(): ", dlerror()); + ERR_add_error_data(2, "dlfcn_pathbyaddr(): ", dlerror()); #endif return -1; } diff --git a/app/openssl/crypto/ec/ec_ameth.c b/app/openssl/crypto/ec/ec_ameth.c index 0ce45240..f715a238 100644 --- a/app/openssl/crypto/ec/ec_ameth.c +++ b/app/openssl/crypto/ec/ec_ameth.c @@ -352,6 +352,7 @@ static int eckey_priv_encode(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pkey) EC_KEY_set_enc_flags(ec_key, old_flags); OPENSSL_free(ep); ECerr(EC_F_ECKEY_PRIV_ENCODE, ERR_R_EC_LIB); + return 0; } /* restore old encoding flags */ EC_KEY_set_enc_flags(ec_key, old_flags); diff --git a/app/openssl/crypto/ec/ec_asn1.c b/app/openssl/crypto/ec/ec_asn1.c index 145807b6..e94f34e1 100644 --- a/app/openssl/crypto/ec/ec_asn1.c +++ b/app/openssl/crypto/ec/ec_asn1.c @@ -1435,8 +1435,11 @@ int i2o_ECPublicKey(EC_KEY *a, unsigned char **out) *out, buf_len, NULL)) { ECerr(EC_F_I2O_ECPUBLICKEY, ERR_R_EC_LIB); - OPENSSL_free(*out); - *out = NULL; + if (new_buffer) + { + OPENSSL_free(*out); + *out = NULL; + } return 0; } if (!new_buffer) diff --git a/app/openssl/crypto/ec/ec_lcl.h b/app/openssl/crypto/ec/ec_lcl.h index 6f714c75..dae91483 100644 --- a/app/openssl/crypto/ec/ec_lcl.h +++ b/app/openssl/crypto/ec/ec_lcl.h @@ -405,7 +405,7 @@ int ec_GF2m_simple_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, int ec_GF2m_precompute_mult(EC_GROUP *group, BN_CTX *ctx); int ec_GF2m_have_precompute_mult(const EC_GROUP *group); -#ifndef OPENSSL_EC_NISTP_64_GCC_128 +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 /* method functions in ecp_nistp224.c */ int ec_GFp_nistp224_group_init(EC_GROUP *group); int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *n, BN_CTX *); diff --git a/app/openssl/crypto/evp/bio_b64.c b/app/openssl/crypto/evp/bio_b64.c index ac6d441a..16863fe2 100644 --- a/app/openssl/crypto/evp/bio_b64.c +++ b/app/openssl/crypto/evp/bio_b64.c @@ -226,6 +226,7 @@ static int b64_read(BIO *b, char *out, int outl) else if (ctx->start) { q=p=(unsigned char *)ctx->tmp; + num = 0; for (j=0; jkey_len * 8, &gctx->ks); + aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f)aesni_encrypt); gctx->ctr = (ctr128_f)aesni_ctr32_encrypt_blocks; @@ -355,19 +355,19 @@ static int aesni_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, /* key_len is two AES keys */ if (enc) { - aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); + aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); xctx->xts.block1 = (block128_f)aesni_encrypt; xctx->stream = aesni_xts_encrypt; } else { - aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); + aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); xctx->xts.block1 = (block128_f)aesni_decrypt; xctx->stream = aesni_xts_decrypt; } aesni_set_encrypt_key(key + ctx->key_len/2, - ctx->key_len * 4, &xctx->ks2); + ctx->key_len * 4, &xctx->ks2.ks); xctx->xts.block2 = (block128_f)aesni_encrypt; xctx->xts.key1 = &xctx->ks1; @@ -394,7 +394,7 @@ static int aesni_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; if (key) { - aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); + aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, &cctx->ks, (block128_f)aesni_encrypt); cctx->str = enc?(ccm128_f)aesni_ccm64_encrypt_blocks : @@ -482,14 +482,38 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \ NULL,NULL,aes_##mode##_ctrl,NULL }; \ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ { return &aes_##keylen##_##mode; } - #endif -#if defined(AES_ASM) && defined(BSAES_ASM) && (defined(__arm__) || defined(__arm)) +#if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__)) #include "arm_arch.h" #if __ARM_ARCH__>=7 -#define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) +# if defined(BSAES_ASM) +# define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) +# endif +# define HWAES_CAPABLE (OPENSSL_armcap_P & ARMV8_AES) +# define HWAES_set_encrypt_key aes_v8_set_encrypt_key +# define HWAES_set_decrypt_key aes_v8_set_decrypt_key +# define HWAES_encrypt aes_v8_encrypt +# define HWAES_decrypt aes_v8_decrypt +# define HWAES_cbc_encrypt aes_v8_cbc_encrypt +# define HWAES_ctr32_encrypt_blocks aes_v8_ctr32_encrypt_blocks +#endif #endif + +#if defined(HWAES_CAPABLE) +int HWAES_set_encrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); +int HWAES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); +void HWAES_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void HWAES_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void HWAES_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t length, const AES_KEY *key, + unsigned char *ivec, const int enc); +void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, const unsigned char ivec[16]); #endif #define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \ @@ -510,10 +534,23 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, mode = ctx->cipher->flags & EVP_CIPH_MODE; if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) +#ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) + { + ret = HWAES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks); + dat->block = (block128_f)HWAES_decrypt; + dat->stream.cbc = NULL; +#ifdef HWAES_cbc_encrypt + if (mode==EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt; +#endif + } + else +#endif #ifdef BSAES_CAPABLE if (BSAES_CAPABLE && mode==EVP_CIPH_CBC_MODE) { - ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks); + ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks); dat->block = (block128_f)AES_decrypt; dat->stream.cbc = (cbc128_f)bsaes_cbc_encrypt; } @@ -522,7 +559,7 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, #ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - ret = vpaes_set_decrypt_key(key,ctx->key_len*8,&dat->ks); + ret = vpaes_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks); dat->block = (block128_f)vpaes_decrypt; dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? (cbc128_f)vpaes_cbc_encrypt : @@ -531,17 +568,37 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, else #endif { - ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks); + ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks); dat->block = (block128_f)AES_decrypt; dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? (cbc128_f)AES_cbc_encrypt : NULL; } else +#ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) + { + ret = HWAES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks); + dat->block = (block128_f)HWAES_encrypt; + dat->stream.cbc = NULL; +#ifdef HWAES_cbc_encrypt + if (mode==EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt; + else +#endif +#ifdef HWAES_ctr32_encrypt_blocks + if (mode==EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks; + else +#endif + (void)0; /* terminate potentially open 'else' */ + } + else +#endif #ifdef BSAES_CAPABLE if (BSAES_CAPABLE && mode==EVP_CIPH_CTR_MODE) { - ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks); + ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks); dat->block = (block128_f)AES_encrypt; dat->stream.ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks; } @@ -550,7 +607,7 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, #ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - ret = vpaes_set_encrypt_key(key,ctx->key_len*8,&dat->ks); + ret = vpaes_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks); dat->block = (block128_f)vpaes_encrypt; dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? (cbc128_f)vpaes_cbc_encrypt : @@ -559,7 +616,7 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, else #endif { - ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks); + ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks); dat->block = (block128_f)AES_encrypt; dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? (cbc128_f)AES_cbc_encrypt : @@ -830,10 +887,25 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; if (key) { do { +#ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) + { + HWAES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks); + CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, + (block128_f)HWAES_encrypt); +#ifdef HWAES_ctr32_encrypt_blocks + gctx->ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks; +#else + gctx->ctr = NULL; +#endif + break; + } + else +#endif #ifdef BSAES_CAPABLE if (BSAES_CAPABLE) { - AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks); + AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks); CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, (block128_f)AES_encrypt); gctx->ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks; @@ -844,7 +916,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, #ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks); + vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks); CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, (block128_f)vpaes_encrypt); gctx->ctr = NULL; @@ -854,7 +926,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, #endif (void)0; /* terminate potentially open 'else' */ - AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); + AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f)AES_encrypt); #ifdef AES_CTR_ASM gctx->ctr = (ctr128_f)AES_ctr32_encrypt; @@ -1075,29 +1147,50 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, xctx->stream = NULL; #endif /* key_len is two AES keys */ -#if !(defined(__arm__) || defined(__arm)) /* not yet? */ +#ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) + { + if (enc) + { + HWAES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); + xctx->xts.block1 = (block128_f)HWAES_encrypt; + } + else + { + HWAES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); + xctx->xts.block1 = (block128_f)HWAES_decrypt; + } + + HWAES_set_encrypt_key(key + ctx->key_len/2, + ctx->key_len * 4, &xctx->ks2.ks); + xctx->xts.block2 = (block128_f)HWAES_encrypt; + + xctx->xts.key1 = &xctx->ks1; + break; + } + else +#endif #ifdef BSAES_CAPABLE if (BSAES_CAPABLE) xctx->stream = enc ? bsaes_xts_encrypt : bsaes_xts_decrypt; else #endif -#endif #ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { if (enc) { - vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); + vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); xctx->xts.block1 = (block128_f)vpaes_encrypt; } else { - vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); + vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); xctx->xts.block1 = (block128_f)vpaes_decrypt; } vpaes_set_encrypt_key(key + ctx->key_len/2, - ctx->key_len * 4, &xctx->ks2); + ctx->key_len * 4, &xctx->ks2.ks); xctx->xts.block2 = (block128_f)vpaes_encrypt; xctx->xts.key1 = &xctx->ks1; @@ -1109,17 +1202,17 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, if (enc) { - AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); + AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); xctx->xts.block1 = (block128_f)AES_encrypt; } else { - AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); + AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); xctx->xts.block1 = (block128_f)AES_decrypt; } AES_set_encrypt_key(key + ctx->key_len/2, - ctx->key_len * 4, &xctx->ks2); + ctx->key_len * 4, &xctx->ks2.ks); xctx->xts.block2 = (block128_f)AES_encrypt; xctx->xts.key1 = &xctx->ks1; @@ -1227,10 +1320,23 @@ static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; if (key) do { +#ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) + { + HWAES_set_encrypt_key(key,ctx->key_len*8,&cctx->ks.ks); + + CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, + &cctx->ks, (block128_f)HWAES_encrypt); + cctx->str = NULL; + cctx->key_set = 1; + break; + } + else +#endif #ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks); + vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks.ks); CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, &cctx->ks, (block128_f)vpaes_encrypt); cctx->str = NULL; @@ -1238,7 +1344,7 @@ static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, break; } #endif - AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); + AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, &cctx->ks, (block128_f)AES_encrypt); cctx->str = NULL; diff --git a/app/openssl/crypto/evp/encode.c b/app/openssl/crypto/evp/encode.c index 28546a84..4654bdc6 100644 --- a/app/openssl/crypto/evp/encode.c +++ b/app/openssl/crypto/evp/encode.c @@ -324,6 +324,7 @@ int EVP_DecodeUpdate(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl, v=EVP_DecodeBlock(out,d,n); n=0; if (v < 0) { rv=0; goto end; } + if (eof > v) { rv=-1; goto end; } ret+=(v-eof); } else diff --git a/app/openssl/crypto/evp/p_lib.c b/app/openssl/crypto/evp/p_lib.c index bd1977d7..8ee53c1d 100644 --- a/app/openssl/crypto/evp/p_lib.c +++ b/app/openssl/crypto/evp/p_lib.c @@ -202,7 +202,7 @@ EVP_PKEY *EVP_PKEY_new(void) EVP_PKEY *EVP_PKEY_dup(EVP_PKEY *pkey) { - CRYPTO_add(&pkey->references, 1, CRYPTO_LOCK_EVP_PKEY); + CRYPTO_add(&pkey->references,1,CRYPTO_LOCK_EVP_PKEY); return pkey; } diff --git a/app/openssl/crypto/modes/asm/ghash-armv4.S b/app/openssl/crypto/modes/asm/ghash-armv4.S index d66c4cbf..6c453774 100644 --- a/app/openssl/crypto/modes/asm/ghash-armv4.S +++ b/app/openssl/crypto/modes/asm/ghash-armv4.S @@ -309,99 +309,213 @@ gcm_gmult_4bit: #if __ARM_ARCH__>=7 .fpu neon +.global gcm_init_neon +.type gcm_init_neon,%function +.align 4 +gcm_init_neon: + vld1.64 d7,[r1,:64]! @ load H + vmov.i8 q8,#0xe1 + vld1.64 d6,[r1,:64] + vshl.i64 d17,#57 + vshr.u64 d16,#63 @ t0=0xc2....01 + vdup.8 q9,d7[7] + vshr.u64 d26,d6,#63 + vshr.s8 q9,#7 @ broadcast carry bit + vshl.i64 q3,q3,#1 + vand q8,q8,q9 + vorr d7,d26 @ H<<<=1 + veor q3,q3,q8 @ twisted H + vstmia r0,{q3} + + bx lr @ bx lr +.size gcm_init_neon,.-gcm_init_neon + .global gcm_gmult_neon .type gcm_gmult_neon,%function .align 4 gcm_gmult_neon: - sub r1,#16 @ point at H in GCM128_CTX - vld1.64 d29,[r0,:64]!@ load Xi - vmov.i32 d5,#0xe1 @ our irreducible polynomial - vld1.64 d28,[r0,:64]! - vshr.u64 d5,#32 - vldmia r1,{d0-d1} @ load H - veor q12,q12 + vld1.64 d7,[r0,:64]! @ load Xi + vld1.64 d6,[r0,:64]! + vmov.i64 d29,#0x0000ffffffffffff + vldmia r1,{d26-d27} @ load twisted H + vmov.i64 d30,#0x00000000ffffffff #ifdef __ARMEL__ - vrev64.8 q14,q14 + vrev64.8 q3,q3 #endif - veor q13,q13 - veor q11,q11 - mov r1,#16 - veor q10,q10 + vmov.i64 d31,#0x000000000000ffff + veor d28,d26,d27 @ Karatsuba pre-processing mov r3,#16 - veor d2,d2 - vdup.8 d4,d28[0] @ broadcast lowest byte - b .Linner_neon + b .Lgmult_neon .size gcm_gmult_neon,.-gcm_gmult_neon .global gcm_ghash_neon .type gcm_ghash_neon,%function .align 4 gcm_ghash_neon: - vld1.64 d21,[r0,:64]! @ load Xi - vmov.i32 d5,#0xe1 @ our irreducible polynomial - vld1.64 d20,[r0,:64]! - vshr.u64 d5,#32 - vldmia r0,{d0-d1} @ load H - veor q12,q12 - nop + vld1.64 d1,[r0,:64]! @ load Xi + vld1.64 d0,[r0,:64]! + vmov.i64 d29,#0x0000ffffffffffff + vldmia r1,{d26-d27} @ load twisted H + vmov.i64 d30,#0x00000000ffffffff #ifdef __ARMEL__ - vrev64.8 q10,q10 + vrev64.8 q0,q0 #endif -.Louter_neon: - vld1.64 d29,[r2]! @ load inp - veor q13,q13 - vld1.64 d28,[r2]! - veor q11,q11 - mov r1,#16 + vmov.i64 d31,#0x000000000000ffff + veor d28,d26,d27 @ Karatsuba pre-processing + +.Loop_neon: + vld1.64 d7,[r2]! @ load inp + vld1.64 d6,[r2]! #ifdef __ARMEL__ - vrev64.8 q14,q14 + vrev64.8 q3,q3 #endif - veor d2,d2 - veor q14,q10 @ inp^=Xi - veor q10,q10 - vdup.8 d4,d28[0] @ broadcast lowest byte -.Linner_neon: - subs r1,r1,#1 - vmull.p8 q9,d1,d4 @ H.lo·Xi[i] - vmull.p8 q8,d0,d4 @ H.hi·Xi[i] - vext.8 q14,q12,#1 @ IN>>=8 - - veor q10,q13 @ modulo-scheduled part - vshl.i64 d22,#48 - vdup.8 d4,d28[0] @ broadcast lowest byte - veor d3,d18,d20 - - veor d21,d22 - vuzp.8 q9,q8 - vsli.8 d2,d3,#1 @ compose the "carry" byte - vext.8 q10,q12,#1 @ Z>>=8 + veor q3,q0 @ inp^=Xi +.Lgmult_neon: + vext.8 d16, d26, d26, #1 @ A1 + vmull.p8 q8, d16, d6 @ F = A1*B + vext.8 d0, d6, d6, #1 @ B1 + vmull.p8 q0, d26, d0 @ E = A*B1 + vext.8 d18, d26, d26, #2 @ A2 + vmull.p8 q9, d18, d6 @ H = A2*B + vext.8 d22, d6, d6, #2 @ B2 + vmull.p8 q11, d26, d22 @ G = A*B2 + vext.8 d20, d26, d26, #3 @ A3 + veor q8, q8, q0 @ L = E + F + vmull.p8 q10, d20, d6 @ J = A3*B + vext.8 d0, d6, d6, #3 @ B3 + veor q9, q9, q11 @ M = G + H + vmull.p8 q0, d26, d0 @ I = A*B3 + veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 + vand d17, d17, d29 + vext.8 d22, d6, d6, #4 @ B4 + veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 + vand d19, d19, d30 + vmull.p8 q11, d26, d22 @ K = A*B4 + veor q10, q10, q0 @ N = I + J + veor d16, d16, d17 + veor d18, d18, d19 + veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 + vand d21, d21, d31 + vext.8 q8, q8, q8, #15 + veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d23, #0 + vext.8 q9, q9, q9, #14 + veor d20, d20, d21 + vmull.p8 q0, d26, d6 @ D = A*B + vext.8 q11, q11, q11, #12 + vext.8 q10, q10, q10, #13 + veor q8, q8, q9 + veor q10, q10, q11 + veor q0, q0, q8 + veor q0, q0, q10 + veor d6,d6,d7 @ Karatsuba pre-processing + vext.8 d16, d28, d28, #1 @ A1 + vmull.p8 q8, d16, d6 @ F = A1*B + vext.8 d2, d6, d6, #1 @ B1 + vmull.p8 q1, d28, d2 @ E = A*B1 + vext.8 d18, d28, d28, #2 @ A2 + vmull.p8 q9, d18, d6 @ H = A2*B + vext.8 d22, d6, d6, #2 @ B2 + vmull.p8 q11, d28, d22 @ G = A*B2 + vext.8 d20, d28, d28, #3 @ A3 + veor q8, q8, q1 @ L = E + F + vmull.p8 q10, d20, d6 @ J = A3*B + vext.8 d2, d6, d6, #3 @ B3 + veor q9, q9, q11 @ M = G + H + vmull.p8 q1, d28, d2 @ I = A*B3 + veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 + vand d17, d17, d29 + vext.8 d22, d6, d6, #4 @ B4 + veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 + vand d19, d19, d30 + vmull.p8 q11, d28, d22 @ K = A*B4 + veor q10, q10, q1 @ N = I + J + veor d16, d16, d17 + veor d18, d18, d19 + veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 + vand d21, d21, d31 + vext.8 q8, q8, q8, #15 + veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d23, #0 + vext.8 q9, q9, q9, #14 + veor d20, d20, d21 + vmull.p8 q1, d28, d6 @ D = A*B + vext.8 q11, q11, q11, #12 + vext.8 q10, q10, q10, #13 + veor q8, q8, q9 + veor q10, q10, q11 + veor q1, q1, q8 + veor q1, q1, q10 + vext.8 d16, d27, d27, #1 @ A1 + vmull.p8 q8, d16, d7 @ F = A1*B + vext.8 d4, d7, d7, #1 @ B1 + vmull.p8 q2, d27, d4 @ E = A*B1 + vext.8 d18, d27, d27, #2 @ A2 + vmull.p8 q9, d18, d7 @ H = A2*B + vext.8 d22, d7, d7, #2 @ B2 + vmull.p8 q11, d27, d22 @ G = A*B2 + vext.8 d20, d27, d27, #3 @ A3 + veor q8, q8, q2 @ L = E + F + vmull.p8 q10, d20, d7 @ J = A3*B + vext.8 d4, d7, d7, #3 @ B3 + veor q9, q9, q11 @ M = G + H + vmull.p8 q2, d27, d4 @ I = A*B3 + veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 + vand d17, d17, d29 + vext.8 d22, d7, d7, #4 @ B4 + veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 + vand d19, d19, d30 + vmull.p8 q11, d27, d22 @ K = A*B4 + veor q10, q10, q2 @ N = I + J + veor d16, d16, d17 + veor d18, d18, d19 + veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 + vand d21, d21, d31 + vext.8 q8, q8, q8, #15 + veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d23, #0 + vext.8 q9, q9, q9, #14 + veor d20, d20, d21 + vmull.p8 q2, d27, d7 @ D = A*B + vext.8 q11, q11, q11, #12 + vext.8 q10, q10, q10, #13 + veor q8, q8, q9 + veor q10, q10, q11 + veor q2, q2, q8 + veor q2, q2, q10 + veor q1,q1,q0 @ Karatsuba post-processing + veor q1,q1,q2 + veor d1,d1,d2 + veor d4,d4,d3 @ Xh|Xl - 256-bit result - vmull.p8 q11,d2,d5 @ "carry"·0xe1 - vshr.u8 d2,d3,#7 @ save Z's bottom bit - vext.8 q13,q9,q12,#1 @ Qlo>>=8 - veor q10,q8 - bne .Linner_neon + @ equivalent of reduction_avx from ghash-x86_64.pl + vshl.i64 q9,q0,#57 @ 1st phase + vshl.i64 q10,q0,#62 + veor q10,q10,q9 @ + vshl.i64 q9,q0,#63 + veor q10, q10, q9 @ + veor d1,d1,d20 @ + veor d4,d4,d21 - veor q10,q13 @ modulo-scheduled artefact - vshl.i64 d22,#48 - veor d21,d22 + vshr.u64 q10,q0,#1 @ 2nd phase + veor q2,q2,q0 + veor q0,q0,q10 @ + vshr.u64 q10,q10,#6 + vshr.u64 q0,q0,#1 @ + veor q0,q0,q2 @ + veor q0,q0,q10 @ - @ finalization, normalize Z:Zo - vand d2,d5 @ suffices to mask the bit - vshr.u64 d3,d20,#63 - vshl.i64 q10,#1 subs r3,#16 - vorr q10,q1 @ Z=Z:Zo<<1 - bne .Louter_neon + bne .Loop_neon #ifdef __ARMEL__ - vrev64.8 q10,q10 + vrev64.8 q0,q0 #endif sub r0,#16 - vst1.64 d21,[r0,:64]! @ write out Xi - vst1.64 d20,[r0,:64] + vst1.64 d1,[r0,:64]! @ write out Xi + vst1.64 d0,[r0,:64] - .word 0xe12fff1e + bx lr @ bx lr .size gcm_ghash_neon,.-gcm_ghash_neon #endif .asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by " diff --git a/app/openssl/crypto/modes/asm/ghash-armv4.pl b/app/openssl/crypto/modes/asm/ghash-armv4.pl index e46f8e34..b79ecbcc 100644 --- a/app/openssl/crypto/modes/asm/ghash-armv4.pl +++ b/app/openssl/crypto/modes/asm/ghash-armv4.pl @@ -35,6 +35,20 @@ # Add NEON implementation featuring polynomial multiplication, i.e. no # lookup tables involved. On Cortex A8 it was measured to process one # byte in 15 cycles or 55% faster than integer-only code. +# +# April 2014 +# +# Switch to multiplication algorithm suggested in paper referred +# below and combine it with reduction algorithm from x86 module. +# Performance improvement over previous version varies from 65% on +# Snapdragon S4 to 110% on Cortex A9. In absolute terms Cortex A8 +# processes one byte in 8.45 cycles, A9 - in 10.2, Snapdragon S4 - +# in 9.33. +# +# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software +# Polynomial Multiplication on ARM Processors using the NEON Engine. +# +# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf # ==================================================================== # Note about "528B" variant. In ARM case it makes lesser sense to @@ -303,117 +317,160 @@ $code.=<<___; .size gcm_gmult_4bit,.-gcm_gmult_4bit ___ { -my $cnt=$Htbl; # $Htbl is used once in the very beginning - -my ($Hhi, $Hlo, $Zo, $T, $xi, $mod) = map("d$_",(0..7)); -my ($Qhi, $Qlo, $Z, $R, $zero, $Qpost, $IN) = map("q$_",(8..15)); - -# Z:Zo keeps 128-bit result shifted by 1 to the right, with bottom bit -# in Zo. Or should I say "top bit", because GHASH is specified in -# reverse bit order? Otherwise straightforward 128-bt H by one input -# byte multiplication and modulo-reduction, times 16. +my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3)); +my ($t0,$t1,$t2,$t3)=map("q$_",(8..12)); +my ($Hlo,$Hhi,$Hhl,$k48,$k32,$k16)=map("d$_",(26..31)); -sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } -sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } -sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; } +sub clmul64x64 { +my ($r,$a,$b)=@_; +$code.=<<___; + vext.8 $t0#lo, $a, $a, #1 @ A1 + vmull.p8 $t0, $t0#lo, $b @ F = A1*B + vext.8 $r#lo, $b, $b, #1 @ B1 + vmull.p8 $r, $a, $r#lo @ E = A*B1 + vext.8 $t1#lo, $a, $a, #2 @ A2 + vmull.p8 $t1, $t1#lo, $b @ H = A2*B + vext.8 $t3#lo, $b, $b, #2 @ B2 + vmull.p8 $t3, $a, $t3#lo @ G = A*B2 + vext.8 $t2#lo, $a, $a, #3 @ A3 + veor $t0, $t0, $r @ L = E + F + vmull.p8 $t2, $t2#lo, $b @ J = A3*B + vext.8 $r#lo, $b, $b, #3 @ B3 + veor $t1, $t1, $t3 @ M = G + H + vmull.p8 $r, $a, $r#lo @ I = A*B3 + veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8 + vand $t0#hi, $t0#hi, $k48 + vext.8 $t3#lo, $b, $b, #4 @ B4 + veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16 + vand $t1#hi, $t1#hi, $k32 + vmull.p8 $t3, $a, $t3#lo @ K = A*B4 + veor $t2, $t2, $r @ N = I + J + veor $t0#lo, $t0#lo, $t0#hi + veor $t1#lo, $t1#lo, $t1#hi + veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24 + vand $t2#hi, $t2#hi, $k16 + vext.8 $t0, $t0, $t0, #15 + veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32 + vmov.i64 $t3#hi, #0 + vext.8 $t1, $t1, $t1, #14 + veor $t2#lo, $t2#lo, $t2#hi + vmull.p8 $r, $a, $b @ D = A*B + vext.8 $t3, $t3, $t3, #12 + vext.8 $t2, $t2, $t2, #13 + veor $t0, $t0, $t1 + veor $t2, $t2, $t3 + veor $r, $r, $t0 + veor $r, $r, $t2 +___ +} $code.=<<___; #if __ARM_ARCH__>=7 .fpu neon +.global gcm_init_neon +.type gcm_init_neon,%function +.align 4 +gcm_init_neon: + vld1.64 $IN#hi,[r1,:64]! @ load H + vmov.i8 $t0,#0xe1 + vld1.64 $IN#lo,[r1,:64] + vshl.i64 $t0#hi,#57 + vshr.u64 $t0#lo,#63 @ t0=0xc2....01 + vdup.8 $t1,$IN#hi[7] + vshr.u64 $Hlo,$IN#lo,#63 + vshr.s8 $t1,#7 @ broadcast carry bit + vshl.i64 $IN,$IN,#1 + vand $t0,$t0,$t1 + vorr $IN#hi,$Hlo @ H<<<=1 + veor $IN,$IN,$t0 @ twisted H + vstmia r0,{$IN} + + ret @ bx lr +.size gcm_init_neon,.-gcm_init_neon + .global gcm_gmult_neon .type gcm_gmult_neon,%function .align 4 gcm_gmult_neon: - sub $Htbl,#16 @ point at H in GCM128_CTX - vld1.64 `&Dhi("$IN")`,[$Xi,:64]!@ load Xi - vmov.i32 $mod,#0xe1 @ our irreducible polynomial - vld1.64 `&Dlo("$IN")`,[$Xi,:64]! - vshr.u64 $mod,#32 - vldmia $Htbl,{$Hhi-$Hlo} @ load H - veor $zero,$zero + vld1.64 $IN#hi,[$Xi,:64]! @ load Xi + vld1.64 $IN#lo,[$Xi,:64]! + vmov.i64 $k48,#0x0000ffffffffffff + vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H + vmov.i64 $k32,#0x00000000ffffffff #ifdef __ARMEL__ vrev64.8 $IN,$IN #endif - veor $Qpost,$Qpost - veor $R,$R - mov $cnt,#16 - veor $Z,$Z + vmov.i64 $k16,#0x000000000000ffff + veor $Hhl,$Hlo,$Hhi @ Karatsuba pre-processing mov $len,#16 - veor $Zo,$Zo - vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte - b .Linner_neon + b .Lgmult_neon .size gcm_gmult_neon,.-gcm_gmult_neon .global gcm_ghash_neon .type gcm_ghash_neon,%function .align 4 gcm_ghash_neon: - vld1.64 `&Dhi("$Z")`,[$Xi,:64]! @ load Xi - vmov.i32 $mod,#0xe1 @ our irreducible polynomial - vld1.64 `&Dlo("$Z")`,[$Xi,:64]! - vshr.u64 $mod,#32 - vldmia $Xi,{$Hhi-$Hlo} @ load H - veor $zero,$zero - nop + vld1.64 $Xl#hi,[$Xi,:64]! @ load Xi + vld1.64 $Xl#lo,[$Xi,:64]! + vmov.i64 $k48,#0x0000ffffffffffff + vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H + vmov.i64 $k32,#0x00000000ffffffff #ifdef __ARMEL__ - vrev64.8 $Z,$Z + vrev64.8 $Xl,$Xl #endif -.Louter_neon: - vld1.64 `&Dhi($IN)`,[$inp]! @ load inp - veor $Qpost,$Qpost - vld1.64 `&Dlo($IN)`,[$inp]! - veor $R,$R - mov $cnt,#16 + vmov.i64 $k16,#0x000000000000ffff + veor $Hhl,$Hlo,$Hhi @ Karatsuba pre-processing + +.Loop_neon: + vld1.64 $IN#hi,[$inp]! @ load inp + vld1.64 $IN#lo,[$inp]! #ifdef __ARMEL__ vrev64.8 $IN,$IN #endif - veor $Zo,$Zo - veor $IN,$Z @ inp^=Xi - veor $Z,$Z - vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte -.Linner_neon: - subs $cnt,$cnt,#1 - vmull.p8 $Qlo,$Hlo,$xi @ H.lo·Xi[i] - vmull.p8 $Qhi,$Hhi,$xi @ H.hi·Xi[i] - vext.8 $IN,$zero,#1 @ IN>>=8 - - veor $Z,$Qpost @ modulo-scheduled part - vshl.i64 `&Dlo("$R")`,#48 - vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte - veor $T,`&Dlo("$Qlo")`,`&Dlo("$Z")` - - veor `&Dhi("$Z")`,`&Dlo("$R")` - vuzp.8 $Qlo,$Qhi - vsli.8 $Zo,$T,#1 @ compose the "carry" byte - vext.8 $Z,$zero,#1 @ Z>>=8 - - vmull.p8 $R,$Zo,$mod @ "carry"·0xe1 - vshr.u8 $Zo,$T,#7 @ save Z's bottom bit - vext.8 $Qpost,$Qlo,$zero,#1 @ Qlo>>=8 - veor $Z,$Qhi - bne .Linner_neon - - veor $Z,$Qpost @ modulo-scheduled artefact - vshl.i64 `&Dlo("$R")`,#48 - veor `&Dhi("$Z")`,`&Dlo("$R")` - - @ finalization, normalize Z:Zo - vand $Zo,$mod @ suffices to mask the bit - vshr.u64 `&Dhi(&Q("$Zo"))`,`&Dlo("$Z")`,#63 - vshl.i64 $Z,#1 + veor $IN,$Xl @ inp^=Xi +.Lgmult_neon: +___ + &clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo +$code.=<<___; + veor $IN#lo,$IN#lo,$IN#hi @ Karatsuba pre-processing +___ + &clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi) + &clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi +$code.=<<___; + veor $Xm,$Xm,$Xl @ Karatsuba post-processing + veor $Xm,$Xm,$Xh + veor $Xl#hi,$Xl#hi,$Xm#lo + veor $Xh#lo,$Xh#lo,$Xm#hi @ Xh|Xl - 256-bit result + + @ equivalent of reduction_avx from ghash-x86_64.pl + vshl.i64 $t1,$Xl,#57 @ 1st phase + vshl.i64 $t2,$Xl,#62 + veor $t2,$t2,$t1 @ + vshl.i64 $t1,$Xl,#63 + veor $t2, $t2, $t1 @ + veor $Xl#hi,$Xl#hi,$t2#lo @ + veor $Xh#lo,$Xh#lo,$t2#hi + + vshr.u64 $t2,$Xl,#1 @ 2nd phase + veor $Xh,$Xh,$Xl + veor $Xl,$Xl,$t2 @ + vshr.u64 $t2,$t2,#6 + vshr.u64 $Xl,$Xl,#1 @ + veor $Xl,$Xl,$Xh @ + veor $Xl,$Xl,$t2 @ + subs $len,#16 - vorr $Z,`&Q("$Zo")` @ Z=Z:Zo<<1 - bne .Louter_neon + bne .Loop_neon #ifdef __ARMEL__ - vrev64.8 $Z,$Z + vrev64.8 $Xl,$Xl #endif sub $Xi,#16 - vst1.64 `&Dhi("$Z")`,[$Xi,:64]! @ write out Xi - vst1.64 `&Dlo("$Z")`,[$Xi,:64] + vst1.64 $Xl#hi,[$Xi,:64]! @ write out Xi + vst1.64 $Xl#lo,[$Xi,:64] - bx lr + ret @ bx lr .size gcm_ghash_neon,.-gcm_ghash_neon #endif ___ @@ -423,7 +480,13 @@ $code.=<<___; .align 2 ___ -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + + s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or + s/\bret\b/bx lr/go or + s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 + + print $_,"\n"; +} close STDOUT; # enforce flush diff --git a/app/openssl/crypto/modes/asm/ghashv8-armx-64.S b/app/openssl/crypto/modes/asm/ghashv8-armx-64.S new file mode 100644 index 00000000..b77b6c40 --- /dev/null +++ b/app/openssl/crypto/modes/asm/ghashv8-armx-64.S @@ -0,0 +1,115 @@ +#include "arm_arch.h" + +.text +.arch armv8-a+crypto +.global gcm_init_v8 +.type gcm_init_v8,%function +.align 4 +gcm_init_v8: + ld1 {v17.2d},[x1] //load H + movi v16.16b,#0xe1 + ext v3.16b,v17.16b,v17.16b,#8 + shl v16.2d,v16.2d,#57 + ushr v18.2d,v16.2d,#63 + ext v16.16b,v18.16b,v16.16b,#8 //t0=0xc2....01 + dup v17.4s,v17.s[1] + ushr v19.2d,v3.2d,#63 + sshr v17.4s,v17.4s,#31 //broadcast carry bit + and v19.16b,v19.16b,v16.16b + shl v3.2d,v3.2d,#1 + ext v19.16b,v19.16b,v19.16b,#8 + and v16.16b,v16.16b,v17.16b + orr v3.16b,v3.16b,v19.16b //H<<<=1 + eor v3.16b,v3.16b,v16.16b //twisted H + st1 {v3.2d},[x0] + + ret +.size gcm_init_v8,.-gcm_init_v8 + +.global gcm_gmult_v8 +.type gcm_gmult_v8,%function +.align 4 +gcm_gmult_v8: + ld1 {v17.2d},[x0] //load Xi + movi v19.16b,#0xe1 + ld1 {v20.2d},[x1] //load twisted H + shl v19.2d,v19.2d,#57 +#ifndef __ARMEB__ + rev64 v17.16b,v17.16b +#endif + ext v21.16b,v20.16b,v20.16b,#8 + mov x3,#0 + ext v3.16b,v17.16b,v17.16b,#8 + mov x12,#0 + eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing + mov x2,x0 + b .Lgmult_v8 +.size gcm_gmult_v8,.-gcm_gmult_v8 + +.global gcm_ghash_v8 +.type gcm_ghash_v8,%function +.align 4 +gcm_ghash_v8: + ld1 {v0.2d},[x0] //load [rotated] Xi + subs x3,x3,#16 + movi v19.16b,#0xe1 + mov x12,#16 + ld1 {v20.2d},[x1] //load twisted H + csel x12,xzr,x12,eq + ext v0.16b,v0.16b,v0.16b,#8 + shl v19.2d,v19.2d,#57 + ld1 {v17.2d},[x2],x12 //load [rotated] inp + ext v21.16b,v20.16b,v20.16b,#8 +#ifndef __ARMEB__ + rev64 v0.16b,v0.16b + rev64 v17.16b,v17.16b +#endif + eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing + ext v3.16b,v17.16b,v17.16b,#8 + b .Loop_v8 + +.align 4 +.Loop_v8: + ext v18.16b,v0.16b,v0.16b,#8 + eor v3.16b,v3.16b,v0.16b //inp^=Xi + eor v17.16b,v17.16b,v18.16b //v17.16b is rotated inp^Xi + +.Lgmult_v8: + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi + subs x3,x3,#16 + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) + csel x12,xzr,x12,eq + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v17.2d},[x2],x12 //load [rotated] inp + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] +#ifndef __ARMEB__ + rev64 v17.16b,v17.16b +#endif + eor v0.16b,v1.16b,v18.16b + ext v3.16b,v17.16b,v17.16b,#8 + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + b.hs .Loop_v8 + +#ifndef __ARMEB__ + rev64 v0.16b,v0.16b +#endif + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] //write out Xi + + ret +.size gcm_ghash_v8,.-gcm_ghash_v8 +.asciz "GHASH for ARMv8, CRYPTOGAMS by " +.align 2 diff --git a/app/openssl/crypto/modes/asm/ghashv8-armx.S b/app/openssl/crypto/modes/asm/ghashv8-armx.S new file mode 100644 index 00000000..f388c54e --- /dev/null +++ b/app/openssl/crypto/modes/asm/ghashv8-armx.S @@ -0,0 +1,116 @@ +#include "arm_arch.h" + +.text +.fpu neon +.code 32 +.global gcm_init_v8 +.type gcm_init_v8,%function +.align 4 +gcm_init_v8: + vld1.64 {q9},[r1] @ load H + vmov.i8 q8,#0xe1 + vext.8 q3,q9,q9,#8 + vshl.i64 q8,q8,#57 + vshr.u64 q10,q8,#63 + vext.8 q8,q10,q8,#8 @ t0=0xc2....01 + vdup.32 q9,d18[1] + vshr.u64 q11,q3,#63 + vshr.s32 q9,q9,#31 @ broadcast carry bit + vand q11,q11,q8 + vshl.i64 q3,q3,#1 + vext.8 q11,q11,q11,#8 + vand q8,q8,q9 + vorr q3,q3,q11 @ H<<<=1 + veor q3,q3,q8 @ twisted H + vst1.64 {q3},[r0] + + bx lr +.size gcm_init_v8,.-gcm_init_v8 + +.global gcm_gmult_v8 +.type gcm_gmult_v8,%function +.align 4 +gcm_gmult_v8: + vld1.64 {q9},[r0] @ load Xi + vmov.i8 q11,#0xe1 + vld1.64 {q12},[r1] @ load twisted H + vshl.u64 q11,q11,#57 +#ifndef __ARMEB__ + vrev64.8 q9,q9 +#endif + vext.8 q13,q12,q12,#8 + mov r3,#0 + vext.8 q3,q9,q9,#8 + mov r12,#0 + veor q13,q13,q12 @ Karatsuba pre-processing + mov r2,r0 + b .Lgmult_v8 +.size gcm_gmult_v8,.-gcm_gmult_v8 + +.global gcm_ghash_v8 +.type gcm_ghash_v8,%function +.align 4 +gcm_ghash_v8: + vld1.64 {q0},[r0] @ load [rotated] Xi + subs r3,r3,#16 + vmov.i8 q11,#0xe1 + mov r12,#16 + vld1.64 {q12},[r1] @ load twisted H + moveq r12,#0 + vext.8 q0,q0,q0,#8 + vshl.u64 q11,q11,#57 + vld1.64 {q9},[r2],r12 @ load [rotated] inp + vext.8 q13,q12,q12,#8 +#ifndef __ARMEB__ + vrev64.8 q0,q0 + vrev64.8 q9,q9 +#endif + veor q13,q13,q12 @ Karatsuba pre-processing + vext.8 q3,q9,q9,#8 + b .Loop_v8 + +.align 4 +.Loop_v8: + vext.8 q10,q0,q0,#8 + veor q3,q3,q0 @ inp^=Xi + veor q9,q9,q10 @ q9 is rotated inp^Xi + +.Lgmult_v8: + .byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo + veor q9,q9,q3 @ Karatsuba pre-processing + .byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi + subs r3,r3,#16 + .byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) + moveq r12,#0 + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + vld1.64 {q9},[r2],r12 @ load [rotated] inp + veor q1,q1,q10 + .byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase + + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl +#ifndef __ARMEB__ + vrev64.8 q9,q9 +#endif + veor q0,q1,q10 + vext.8 q3,q9,q9,#8 + + vext.8 q10,q0,q0,#8 @ 2nd phase + .byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q10,q10,q2 + veor q0,q0,q10 + bhs .Loop_v8 + +#ifndef __ARMEB__ + vrev64.8 q0,q0 +#endif + vext.8 q0,q0,q0,#8 + vst1.64 {q0},[r0] @ write out Xi + + bx lr +.size gcm_ghash_v8,.-gcm_ghash_v8 +.asciz "GHASH for ARMv8, CRYPTOGAMS by " +.align 2 diff --git a/app/openssl/crypto/modes/asm/ghashv8-armx.pl b/app/openssl/crypto/modes/asm/ghashv8-armx.pl new file mode 100644 index 00000000..69e863e7 --- /dev/null +++ b/app/openssl/crypto/modes/asm/ghashv8-armx.pl @@ -0,0 +1,240 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# GHASH for ARMv8 Crypto Extension, 64-bit polynomial multiplication. +# +# June 2014 +# +# Initial version was developed in tight cooperation with Ard +# Biesheuvel from bits-n-pieces from +# other assembly modules. Just like aesv8-armx.pl this module +# supports both AArch32 and AArch64 execution modes. +# +# Current performance in cycles per processed byte: +# +# PMULL[2] 32-bit NEON(*) +# Apple A7 1.76 5.62 +# Cortex-A5x n/a n/a +# +# (*) presented for reference/comparison purposes; + +$flavour = shift; +open STDOUT,">".shift; + +$Xi="x0"; # argument block +$Htbl="x1"; +$inp="x2"; +$len="x3"; + +$inc="x12"; + +{ +my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3)); +my ($t0,$t1,$t2,$t3,$H,$Hhl)=map("q$_",(8..14)); + +$code=<<___; +#include "arm_arch.h" + +.text +___ +$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/); +$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/); + +$code.=<<___; +.global gcm_init_v8 +.type gcm_init_v8,%function +.align 4 +gcm_init_v8: + vld1.64 {$t1},[x1] @ load H + vmov.i8 $t0,#0xe1 + vext.8 $IN,$t1,$t1,#8 + vshl.i64 $t0,$t0,#57 + vshr.u64 $t2,$t0,#63 + vext.8 $t0,$t2,$t0,#8 @ t0=0xc2....01 + vdup.32 $t1,${t1}[1] + vshr.u64 $t3,$IN,#63 + vshr.s32 $t1,$t1,#31 @ broadcast carry bit + vand $t3,$t3,$t0 + vshl.i64 $IN,$IN,#1 + vext.8 $t3,$t3,$t3,#8 + vand $t0,$t0,$t1 + vorr $IN,$IN,$t3 @ H<<<=1 + veor $IN,$IN,$t0 @ twisted H + vst1.64 {$IN},[x0] + + ret +.size gcm_init_v8,.-gcm_init_v8 + +.global gcm_gmult_v8 +.type gcm_gmult_v8,%function +.align 4 +gcm_gmult_v8: + vld1.64 {$t1},[$Xi] @ load Xi + vmov.i8 $t3,#0xe1 + vld1.64 {$H},[$Htbl] @ load twisted H + vshl.u64 $t3,$t3,#57 +#ifndef __ARMEB__ + vrev64.8 $t1,$t1 +#endif + vext.8 $Hhl,$H,$H,#8 + mov $len,#0 + vext.8 $IN,$t1,$t1,#8 + mov $inc,#0 + veor $Hhl,$Hhl,$H @ Karatsuba pre-processing + mov $inp,$Xi + b .Lgmult_v8 +.size gcm_gmult_v8,.-gcm_gmult_v8 + +.global gcm_ghash_v8 +.type gcm_ghash_v8,%function +.align 4 +gcm_ghash_v8: + vld1.64 {$Xl},[$Xi] @ load [rotated] Xi + subs $len,$len,#16 + vmov.i8 $t3,#0xe1 + mov $inc,#16 + vld1.64 {$H},[$Htbl] @ load twisted H + cclr $inc,eq + vext.8 $Xl,$Xl,$Xl,#8 + vshl.u64 $t3,$t3,#57 + vld1.64 {$t1},[$inp],$inc @ load [rotated] inp + vext.8 $Hhl,$H,$H,#8 +#ifndef __ARMEB__ + vrev64.8 $Xl,$Xl + vrev64.8 $t1,$t1 +#endif + veor $Hhl,$Hhl,$H @ Karatsuba pre-processing + vext.8 $IN,$t1,$t1,#8 + b .Loop_v8 + +.align 4 +.Loop_v8: + vext.8 $t2,$Xl,$Xl,#8 + veor $IN,$IN,$Xl @ inp^=Xi + veor $t1,$t1,$t2 @ $t1 is rotated inp^Xi + +.Lgmult_v8: + vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo + veor $t1,$t1,$IN @ Karatsuba pre-processing + vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi + subs $len,$len,#16 + vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) + cclr $inc,eq + + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + vld1.64 {$t1},[$inp],$inc @ load [rotated] inp + veor $Xm,$Xm,$t2 + vpmull.p64 $t2,$Xl,$t3 @ 1st phase + + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl +#ifndef __ARMEB__ + vrev64.8 $t1,$t1 +#endif + veor $Xl,$Xm,$t2 + vext.8 $IN,$t1,$t1,#8 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase + vpmull.p64 $Xl,$Xl,$t3 + veor $t2,$t2,$Xh + veor $Xl,$Xl,$t2 + b.hs .Loop_v8 + +#ifndef __ARMEB__ + vrev64.8 $Xl,$Xl +#endif + vext.8 $Xl,$Xl,$Xl,#8 + vst1.64 {$Xl},[$Xi] @ write out Xi + + ret +.size gcm_ghash_v8,.-gcm_ghash_v8 +___ +} +$code.=<<___; +.asciz "GHASH for ARMv8, CRYPTOGAMS by " +.align 2 +___ + +if ($flavour =~ /64/) { ######## 64-bit code + sub unvmov { + my $arg=shift; + + $arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o && + sprintf "ins v%d.d[%d],v%d.d[%d]",$1,($2 eq "lo")?0:1,$3,($4 eq "lo")?0:1; + } + foreach(split("\n",$code)) { + s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or + s/vmov\.i8/movi/o or # fix up legacy mnemonics + s/vmov\s+(.*)/unvmov($1)/geo or + s/vext\.8/ext/o or + s/vshr\.s/sshr\.s/o or + s/vshr/ushr/o or + s/^(\s+)v/$1/o or # strip off v prefix + s/\bbx\s+lr\b/ret/o; + + s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers + s/@\s/\/\//o; # old->new style commentary + + # fix up remainig legacy suffixes + s/\.[ui]?8(\s)/$1/o; + s/\.[uis]?32//o and s/\.16b/\.4s/go; + m/\.p64/o and s/\.16b/\.1q/o; # 1st pmull argument + m/l\.p64/o and s/\.16b/\.1d/go; # 2nd and 3rd pmull arguments + s/\.[uisp]?64//o and s/\.16b/\.2d/go; + s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o; + + print $_,"\n"; + } +} else { ######## 32-bit code + sub unvdup32 { + my $arg=shift; + + $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o && + sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1; + } + sub unvpmullp64 { + my ($mnemonic,$arg)=@_; + + if ($arg =~ m/q([0-9]+),\s*q([0-9]+),\s*q([0-9]+)/o) { + my $word = 0xf2a00e00|(($1&7)<<13)|(($1&8)<<19) + |(($2&7)<<17)|(($2&8)<<4) + |(($3&7)<<1) |(($3&8)<<2); + $word |= 0x00010001 if ($mnemonic =~ "2"); + # since ARMv7 instructions are always encoded little-endian. + # correct solution is to use .inst directive, but older + # assemblers don't implement it:-( + sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", + $word&0xff,($word>>8)&0xff, + ($word>>16)&0xff,($word>>24)&0xff, + $mnemonic,$arg; + } + } + + foreach(split("\n",$code)) { + s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers + s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers + s/\/\/\s?/@ /o; # new->old style commentary + + # fix up remainig new-style suffixes + s/\],#[0-9]+/]!/o; + + s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or + s/vdup\.32\s+(.*)/unvdup32($1)/geo or + s/v?(pmull2?)\.p64\s+(.*)/unvpmullp64($1,$2)/geo or + s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or + s/^(\s+)b\./$1b/o or + s/^(\s+)ret/$1bx\tlr/o; + + print $_,"\n"; + } +} + +close STDOUT; # enforce flush diff --git a/app/openssl/crypto/modes/gcm128.c b/app/openssl/crypto/modes/gcm128.c index e1dc2b0f..79ebb66e 100644 --- a/app/openssl/crypto/modes/gcm128.c +++ b/app/openssl/crypto/modes/gcm128.c @@ -642,7 +642,7 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) #endif -#if TABLE_BITS==4 && defined(GHASH_ASM) +#if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ)) # if !defined(I386_ONLY) && \ (defined(__i386) || defined(__i386__) || \ defined(__x86_64) || defined(__x86_64__) || \ @@ -663,13 +663,21 @@ void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]); void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); # endif -# elif defined(__arm__) || defined(__arm) +# elif defined(__arm__) || defined(__arm) || defined(__aarch64__) # include "arm_arch.h" # if __ARM_ARCH__>=7 # define GHASH_ASM_ARM # define GCM_FUNCREF_4BIT +# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL) +# if defined(__arm__) || defined(__arm) +# define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) +# endif +void gcm_init_neon(u128 Htable[16],const u64 Xi[2]); void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]); void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); +void gcm_init_v8(u128 Htable[16],const u64 Xi[2]); +void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]); +void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); # endif # endif #endif @@ -739,10 +747,21 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) ctx->ghash = gcm_ghash_4bit; # endif # elif defined(GHASH_ASM_ARM) - if (OPENSSL_armcap_P & ARMV7_NEON) { +# ifdef PMULL_CAPABLE + if (PMULL_CAPABLE) { + gcm_init_v8(ctx->Htable,ctx->H.u); + ctx->gmult = gcm_gmult_v8; + ctx->ghash = gcm_ghash_v8; + } else +# endif +# ifdef NEON_CAPABLE + if (NEON_CAPABLE) { + gcm_init_neon(ctx->Htable,ctx->H.u); ctx->gmult = gcm_gmult_neon; ctx->ghash = gcm_ghash_neon; - } else { + } else +# endif + { gcm_init_4bit(ctx->Htable,ctx->H.u); ctx->gmult = gcm_gmult_4bit; ctx->ghash = gcm_ghash_4bit; diff --git a/app/openssl/crypto/opensslconf-32.h b/app/openssl/crypto/opensslconf-32.h index d6625489..caf6f1b8 100644 --- a/app/openssl/crypto/opensslconf-32.h +++ b/app/openssl/crypto/opensslconf-32.h @@ -53,6 +53,9 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif +#ifndef OPENSSL_NO_RIPEMD +# define OPENSSL_NO_RIPEMD +#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -137,6 +140,9 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif +# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) +# define NO_RIPEMD +# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/crypto/opensslconf-64.h b/app/openssl/crypto/opensslconf-64.h index 70c5a2cb..88fb0419 100644 --- a/app/openssl/crypto/opensslconf-64.h +++ b/app/openssl/crypto/opensslconf-64.h @@ -53,6 +53,9 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif +#ifndef OPENSSL_NO_RIPEMD +# define OPENSSL_NO_RIPEMD +#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -137,6 +140,9 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif +# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) +# define NO_RIPEMD +# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/crypto/opensslconf-static-32.h b/app/openssl/crypto/opensslconf-static-32.h index d6625489..caf6f1b8 100644 --- a/app/openssl/crypto/opensslconf-static-32.h +++ b/app/openssl/crypto/opensslconf-static-32.h @@ -53,6 +53,9 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif +#ifndef OPENSSL_NO_RIPEMD +# define OPENSSL_NO_RIPEMD +#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -137,6 +140,9 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif +# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) +# define NO_RIPEMD +# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/crypto/opensslconf-static-64.h b/app/openssl/crypto/opensslconf-static-64.h index 70c5a2cb..88fb0419 100644 --- a/app/openssl/crypto/opensslconf-static-64.h +++ b/app/openssl/crypto/opensslconf-static-64.h @@ -53,6 +53,9 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif +#ifndef OPENSSL_NO_RIPEMD +# define OPENSSL_NO_RIPEMD +#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -137,6 +140,9 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif +# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) +# define NO_RIPEMD +# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/crypto/opensslv.h b/app/openssl/crypto/opensslv.h index ebe71807..c3b6acec 100644 --- a/app/openssl/crypto/opensslv.h +++ b/app/openssl/crypto/opensslv.h @@ -25,11 +25,11 @@ * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for * major minor fix final patch/beta) */ -#define OPENSSL_VERSION_NUMBER 0x1000107fL +#define OPENSSL_VERSION_NUMBER 0x1000108fL #ifdef OPENSSL_FIPS -#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1g-fips 7 Apr 2014" +#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1h-fips 5 Jun 2014" #else -#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1g 7 Apr 2014" +#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1h 5 Jun 2014" #endif #define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT diff --git a/app/openssl/crypto/pkcs12/p12_crt.c b/app/openssl/crypto/pkcs12/p12_crt.c index a34915d0..35e8a4a8 100644 --- a/app/openssl/crypto/pkcs12/p12_crt.c +++ b/app/openssl/crypto/pkcs12/p12_crt.c @@ -96,7 +96,11 @@ PKCS12 *PKCS12_create(char *pass, char *name, EVP_PKEY *pkey, X509 *cert, nid_cert = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; else #endif +#ifdef OPENSSL_NO_RC2 + nid_cert = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; +#else nid_cert = NID_pbe_WithSHA1And40BitRC2_CBC; +#endif } if (!nid_key) nid_key = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; @@ -286,7 +290,11 @@ int PKCS12_add_safe(STACK_OF(PKCS7) **psafes, STACK_OF(PKCS12_SAFEBAG) *bags, free_safes = 0; if (nid_safe == 0) +#ifdef OPENSSL_NO_RC2 + nid_safe = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; +#else nid_safe = NID_pbe_WithSHA1And40BitRC2_CBC; +#endif if (nid_safe == -1) p7 = PKCS12_pack_p7data(bags); diff --git a/app/openssl/crypto/pkcs12/p12_kiss.c b/app/openssl/crypto/pkcs12/p12_kiss.c index 206b1b0b..c9b7ab61 100644 --- a/app/openssl/crypto/pkcs12/p12_kiss.c +++ b/app/openssl/crypto/pkcs12/p12_kiss.c @@ -269,7 +269,7 @@ static int parse_bag(PKCS12_SAFEBAG *bag, const char *pass, int passlen, int len, r; unsigned char *data; len = ASN1_STRING_to_UTF8(&data, fname); - if(len > 0) { + if(len >= 0) { r = X509_alias_set1(x509, data, len); OPENSSL_free(data); if (!r) diff --git a/app/openssl/crypto/pkcs7/pk7_doit.c b/app/openssl/crypto/pkcs7/pk7_doit.c index 77fda3b8..d91aa116 100644 --- a/app/openssl/crypto/pkcs7/pk7_doit.c +++ b/app/openssl/crypto/pkcs7/pk7_doit.c @@ -440,6 +440,11 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert) { case NID_pkcs7_signed: data_body=PKCS7_get_octet_string(p7->d.sign->contents); + if (!PKCS7_is_detached(p7) && data_body == NULL) + { + PKCS7err(PKCS7_F_PKCS7_DATADECODE,PKCS7_R_INVALID_SIGNED_DATA_TYPE); + goto err; + } md_sk=p7->d.sign->md_algs; break; case NID_pkcs7_signedAndEnveloped: @@ -928,6 +933,7 @@ int PKCS7_SIGNER_INFO_sign(PKCS7_SIGNER_INFO *si) if (EVP_DigestSignUpdate(&mctx,abuf,alen) <= 0) goto err; OPENSSL_free(abuf); + abuf = NULL; if (EVP_DigestSignFinal(&mctx, NULL, &siglen) <= 0) goto err; abuf = OPENSSL_malloc(siglen); diff --git a/app/openssl/crypto/pkcs7/pkcs7.h b/app/openssl/crypto/pkcs7/pkcs7.h index e4d44319..04f60379 100644 --- a/app/openssl/crypto/pkcs7/pkcs7.h +++ b/app/openssl/crypto/pkcs7/pkcs7.h @@ -453,6 +453,7 @@ void ERR_load_PKCS7_strings(void); #define PKCS7_R_ERROR_SETTING_CIPHER 121 #define PKCS7_R_INVALID_MIME_TYPE 131 #define PKCS7_R_INVALID_NULL_POINTER 143 +#define PKCS7_R_INVALID_SIGNED_DATA_TYPE 155 #define PKCS7_R_MIME_NO_CONTENT_TYPE 132 #define PKCS7_R_MIME_PARSE_ERROR 133 #define PKCS7_R_MIME_SIG_PARSE_ERROR 134 diff --git a/app/openssl/crypto/pkcs7/pkcs7err.c b/app/openssl/crypto/pkcs7/pkcs7err.c index d0af32a2..f3db08e0 100644 --- a/app/openssl/crypto/pkcs7/pkcs7err.c +++ b/app/openssl/crypto/pkcs7/pkcs7err.c @@ -1,6 +1,6 @@ /* crypto/pkcs7/pkcs7err.c */ /* ==================================================================== - * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2014 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -130,6 +130,7 @@ static ERR_STRING_DATA PKCS7_str_reasons[]= {ERR_REASON(PKCS7_R_ERROR_SETTING_CIPHER),"error setting cipher"}, {ERR_REASON(PKCS7_R_INVALID_MIME_TYPE) ,"invalid mime type"}, {ERR_REASON(PKCS7_R_INVALID_NULL_POINTER),"invalid null pointer"}, +{ERR_REASON(PKCS7_R_INVALID_SIGNED_DATA_TYPE),"invalid signed data type"}, {ERR_REASON(PKCS7_R_MIME_NO_CONTENT_TYPE),"mime no content type"}, {ERR_REASON(PKCS7_R_MIME_PARSE_ERROR) ,"mime parse error"}, {ERR_REASON(PKCS7_R_MIME_SIG_PARSE_ERROR),"mime sig parse error"}, diff --git a/app/openssl/crypto/rsa/rsa_ameth.c b/app/openssl/crypto/rsa/rsa_ameth.c index 5a2062f9..4c8ecd92 100644 --- a/app/openssl/crypto/rsa/rsa_ameth.c +++ b/app/openssl/crypto/rsa/rsa_ameth.c @@ -358,7 +358,7 @@ static int rsa_pss_param_print(BIO *bp, RSA_PSS_PARAMS *pss, if (i2a_ASN1_INTEGER(bp, pss->saltLength) <= 0) goto err; } - else if (BIO_puts(bp, "0x14 (default)") <= 0) + else if (BIO_puts(bp, "14 (default)") <= 0) goto err; BIO_puts(bp, "\n"); diff --git a/app/openssl/crypto/sha/asm/sha1-armv4-large.pl b/app/openssl/crypto/sha/asm/sha1-armv4-large.pl index 33da3e0e..50bd07b3 100644 --- a/app/openssl/crypto/sha/asm/sha1-armv4-large.pl +++ b/app/openssl/crypto/sha/asm/sha1-armv4-large.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -52,6 +52,20 @@ # Profiler-assisted and platform-specific optimization resulted in 10% # improvement on Cortex A8 core and 12.2 cycles per byte. +# September 2013. +# +# Add NEON implementation (see sha1-586.pl for background info). On +# Cortex A8 it was measured to process one byte in 6.7 cycles or >80% +# faster than integer-only code. Because [fully unrolled] NEON code +# is ~2.5x larger and there are some redundant instructions executed +# when processing last block, improvement is not as big for smallest +# blocks, only ~30%. Snapdragon S4 is a tad faster, 6.4 cycles per +# byte, which is also >80% faster than integer-only code. + +# May 2014. +# +# Add ARMv8 code path performing at 2.35 cpb on Apple A7. + while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; @@ -153,12 +167,22 @@ $code=<<___; #include "arm_arch.h" .text +.code 32 .global sha1_block_data_order .type sha1_block_data_order,%function -.align 2 +.align 5 sha1_block_data_order: +#if __ARM_ARCH__>=7 + sub r3,pc,#8 @ sha1_block_data_order + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA1 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif stmdb sp!,{r4-r12,lr} add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp ldmia $ctx,{$a,$b,$c,$d,$e} @@ -233,16 +257,422 @@ $code.=<<___; moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif -.align 2 +.size sha1_block_data_order,.-sha1_block_data_order + +.align 5 .LK_00_19: .word 0x5a827999 .LK_20_39: .word 0x6ed9eba1 .LK_40_59: .word 0x8f1bbcdc .LK_60_79: .word 0xca62c1d6 -.size sha1_block_data_order,.-sha1_block_data_order -.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by " -.align 2 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha1_block_data_order +.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " +.align 5 +___ +##################################################################### +# NEON stuff +# +{{{ +my @V=($a,$b,$c,$d,$e); +my ($K_XX_XX,$Ki,$t0,$t1,$Xfer,$saved_sp)=map("r$_",(8..12,14)); +my $Xi=4; +my @X=map("q$_",(8..11,0..3)); +my @Tx=("q12","q13"); +my ($K,$zero)=("q14","q15"); +my $j=0; + +sub AUTOLOAD() # thunk [simplified] x86-style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; + my $arg = pop; + $arg = "#$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; +} + +sub body_00_19 () { + ( + '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'. + '&bic ($t0,$d,$b)', + '&add ($e,$e,$Ki)', # e+=X[i]+K + '&and ($t1,$c,$b)', + '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))', + '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27) + '&eor ($t1,$t1,$t0)', # F_00_19 + '&mov ($b,$b,"ror#2")', # b=ROR(b,2) + '&add ($e,$e,$t1);'. # e+=F_00_19 + '$j++; unshift(@V,pop(@V));' + ) +} +sub body_20_39 () { + ( + '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'. + '&eor ($t0,$b,$d)', + '&add ($e,$e,$Ki)', # e+=X[i]+K + '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15)) if ($j<79)', + '&eor ($t1,$t0,$c)', # F_20_39 + '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27) + '&mov ($b,$b,"ror#2")', # b=ROR(b,2) + '&add ($e,$e,$t1);'. # e+=F_20_39 + '$j++; unshift(@V,pop(@V));' + ) +} +sub body_40_59 () { + ( + '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'. + '&add ($e,$e,$Ki)', # e+=X[i]+K + '&and ($t0,$c,$d)', + '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))', + '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27) + '&eor ($t1,$c,$d)', + '&add ($e,$e,$t0)', + '&and ($t1,$t1,$b)', + '&mov ($b,$b,"ror#2")', # b=ROR(b,2) + '&add ($e,$e,$t1);'. # e+=F_40_59 + '$j++; unshift(@V,pop(@V));' + ) +} + +sub Xupdate_16_31 () +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e); + + &vext_8 (@X[0],@X[-4&7],@X[-3&7],8); # compose "X[-14]" in "X[0]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@Tx[1],@X[-1&7],$K); + eval(shift(@insns)); + &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!") if ($Xi%5==0); + eval(shift(@insns)); + &vext_8 (@Tx[0],@X[-1&7],$zero,4); # "X[-3]", 3 words + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &veor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]" + eval(shift(@insns)); + eval(shift(@insns)); + &veor (@Tx[0],@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + &veor (@Tx[0],@Tx[0],@X[0]); # "X[0]"^="X[-3]"^"X[-8] + eval(shift(@insns)); + eval(shift(@insns)); + &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!"); # X[]+K xfer + &sub ($Xfer,$Xfer,64) if ($Xi%4==0); + eval(shift(@insns)); + eval(shift(@insns)); + &vext_8 (@Tx[1],$zero,@Tx[0],4); # "X[0]"<<96, extract one dword + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[0],@Tx[0],@Tx[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsri_32 (@X[0],@Tx[0],31); # "X[0]"<<<=1 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 (@Tx[0],@Tx[1],30); + eval(shift(@insns)); + eval(shift(@insns)); + &vshl_u32 (@Tx[1],@Tx[1],2); + eval(shift(@insns)); + eval(shift(@insns)); + &veor (@X[0],@X[0],@Tx[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor (@X[0],@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2 + + foreach (@insns) { eval; } # remaining instructions [if any] + + $Xi++; push(@X,shift(@X)); # "rotate" X[] +} + +sub Xupdate_32_79 () +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e); + + &vext_8 (@Tx[0],@X[-2&7],@X[-1&7],8); # compose "X[-6]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &veor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" + eval(shift(@insns)); + eval(shift(@insns)); + &veor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]" + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@Tx[1],@X[-1&7],$K); + eval(shift(@insns)); + &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!") if ($Xi%5==0); + eval(shift(@insns)); + &veor (@Tx[0],@Tx[0],@X[0]); # "X[-6]"^="X[0]" + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 (@X[0],@Tx[0],30); + eval(shift(@insns)); + eval(shift(@insns)); + &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!"); # X[]+K xfer + &sub ($Xfer,$Xfer,64) if ($Xi%4==0); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 (@X[0],@Tx[0],2); # "X[0]"="X[-6]"<<<2 + + foreach (@insns) { eval; } # remaining instructions [if any] + + $Xi++; push(@X,shift(@X)); # "rotate" X[] +} + +sub Xuplast_80 () +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e); + + &vadd_i32 (@Tx[1],@X[-1&7],$K); + eval(shift(@insns)); + eval(shift(@insns)); + &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!"); + &sub ($Xfer,$Xfer,64); + + &teq ($inp,$len); + &sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX + &subeq ($inp,$inp,64); # reload last block to avoid SEGV + &vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!"); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_8 ("{@X[-2&7]-@X[-1&7]}","[$inp]!"); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!"); # load K_00_19 + eval(shift(@insns)); + eval(shift(@insns)); + &vrev32_8 (@X[-4&7],@X[-4&7]); + + foreach (@insns) { eval; } # remaining instructions + + $Xi=0; +} + +sub Xloop() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e); + + &vrev32_8 (@X[($Xi-3)&7],@X[($Xi-3)&7]); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[$Xi&7],@X[($Xi-4)&7],$K); + eval(shift(@insns)); + eval(shift(@insns)); + &vst1_32 ("{@X[$Xi&7]}","[$Xfer,:128]!");# X[]+K xfer to IALU + + foreach (@insns) { eval; } + + $Xi++; +} + +$code.=<<___; +#if __ARM_ARCH__>=7 +.fpu neon + +.type sha1_block_data_order_neon,%function +.align 4 +sha1_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp + @ dmb @ errata #451034 on early Cortex A8 + @ vstmdb sp!,{d8-d15} @ ABI specification says so + mov $saved_sp,sp + sub sp,sp,#64 @ alloca + adr $K_XX_XX,.LK_00_19 + bic sp,sp,#15 @ align for 128-bit stores + + ldmia $ctx,{$a,$b,$c,$d,$e} @ load context + mov $Xfer,sp + + vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned + veor $zero,$zero,$zero + vld1.8 {@X[-2&7]-@X[-1&7]},[$inp]! + vld1.32 {${K}\[]},[$K_XX_XX,:32]! @ load K_00_19 + vrev32.8 @X[-4&7],@X[-4&7] @ yes, even on + vrev32.8 @X[-3&7],@X[-3&7] @ big-endian... + vrev32.8 @X[-2&7],@X[-2&7] + vadd.i32 @X[0],@X[-4&7],$K + vrev32.8 @X[-1&7],@X[-1&7] + vadd.i32 @X[1],@X[-3&7],$K + vst1.32 {@X[0]},[$Xfer,:128]! + vadd.i32 @X[2],@X[-2&7],$K + vst1.32 {@X[1]},[$Xfer,:128]! + vst1.32 {@X[2]},[$Xfer,:128]! + ldr $Ki,[sp] @ big RAW stall + +.Loop_neon: +___ + &Xupdate_16_31(\&body_00_19); + &Xupdate_16_31(\&body_00_19); + &Xupdate_16_31(\&body_00_19); + &Xupdate_16_31(\&body_00_19); + &Xupdate_32_79(\&body_00_19); + &Xupdate_32_79(\&body_20_39); + &Xupdate_32_79(\&body_20_39); + &Xupdate_32_79(\&body_20_39); + &Xupdate_32_79(\&body_20_39); + &Xupdate_32_79(\&body_20_39); + &Xupdate_32_79(\&body_40_59); + &Xupdate_32_79(\&body_40_59); + &Xupdate_32_79(\&body_40_59); + &Xupdate_32_79(\&body_40_59); + &Xupdate_32_79(\&body_40_59); + &Xupdate_32_79(\&body_20_39); + &Xuplast_80(\&body_20_39); + &Xloop(\&body_20_39); + &Xloop(\&body_20_39); + &Xloop(\&body_20_39); +$code.=<<___; + ldmia $ctx,{$Ki,$t0,$t1,$Xfer} @ accumulate context + add $a,$a,$Ki + ldr $Ki,[$ctx,#16] + add $b,$b,$t0 + add $c,$c,$t1 + add $d,$d,$Xfer + moveq sp,$saved_sp + add $e,$e,$Ki + ldrne $Ki,[sp] + stmia $ctx,{$a,$b,$c,$d,$e} + addne $Xfer,sp,#3*16 + bne .Loop_neon + + @ vldmia sp!,{d8-d15} + ldmia sp!,{r4-r12,pc} +.size sha1_block_data_order_neon,.-sha1_block_data_order_neon +#endif +___ +}}} +##################################################################### +# ARMv8 stuff +# +{{{ +my ($ABCD,$E,$E0,$E1)=map("q$_",(0..3)); +my @MSG=map("q$_",(4..7)); +my @Kxx=map("q$_",(8..11)); +my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14)); + +$code.=<<___; +#if __ARM_ARCH__>=7 +.type sha1_block_data_order_armv8,%function +.align 5 +sha1_block_data_order_armv8: +.LARMv8: + vstmdb sp!,{d8-d15} @ ABI specification says so + + veor $E,$E,$E + adr r3,.LK_00_19 + vld1.32 {$ABCD},[$ctx]! + vld1.32 {$E\[0]},[$ctx] + sub $ctx,$ctx,#16 + vld1.32 {@Kxx[0]\[]},[r3,:32]! + vld1.32 {@Kxx[1]\[]},[r3,:32]! + vld1.32 {@Kxx[2]\[]},[r3,:32]! + vld1.32 {@Kxx[3]\[]},[r3,:32] + +.Loop_v8: + vld1.8 {@MSG[0]-@MSG[1]},[$inp]! + vld1.8 {@MSG[2]-@MSG[3]},[$inp]! + vrev32.8 @MSG[0],@MSG[0] + vrev32.8 @MSG[1],@MSG[1] + + vadd.i32 $W0,@Kxx[0],@MSG[0] + vrev32.8 @MSG[2],@MSG[2] + vmov $ABCD_SAVE,$ABCD @ offload + subs $len,$len,#1 + + vadd.i32 $W1,@Kxx[0],@MSG[1] + vrev32.8 @MSG[3],@MSG[3] + sha1h $E1,$ABCD @ 0 + sha1c $ABCD,$E,$W0 + vadd.i32 $W0,@Kxx[$j],@MSG[2] + sha1su0 @MSG[0],@MSG[1],@MSG[2] +___ +for ($j=0,$i=1;$i<20-3;$i++) { +my $f=("c","p","m","p")[$i/5]; +$code.=<<___; + sha1h $E0,$ABCD @ $i + sha1$f $ABCD,$E1,$W1 + vadd.i32 $W1,@Kxx[$j],@MSG[3] + sha1su1 @MSG[0],@MSG[3] +___ +$code.=<<___ if ($i<20-4); + sha1su0 @MSG[1],@MSG[2],@MSG[3] ___ + ($E0,$E1)=($E1,$E0); ($W0,$W1)=($W1,$W0); + push(@MSG,shift(@MSG)); $j++ if ((($i+3)%5)==0); +} +$code.=<<___; + sha1h $E0,$ABCD @ $i + sha1p $ABCD,$E1,$W1 + vadd.i32 $W1,@Kxx[$j],@MSG[3] + + sha1h $E1,$ABCD @ 18 + sha1p $ABCD,$E0,$W0 + + sha1h $E0,$ABCD @ 19 + sha1p $ABCD,$E1,$W1 + + vadd.i32 $E,$E,$E0 + vadd.i32 $ABCD,$ABCD,$ABCD_SAVE + bne .Loop_v8 + + vst1.32 {$ABCD},[$ctx]! + vst1.32 {$E\[0]},[$ctx] + + vldmia sp!,{d8-d15} + ret @ bx lr +.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8 +#endif +___ +}}} +$code.=<<___; +.comm OPENSSL_armcap_P,4,4 +___ + +{ my %opcode = ( + "sha1c" => 0xf2000c40, "sha1p" => 0xf2100c40, + "sha1m" => 0xf2200c40, "sha1su0" => 0xf2300c40, + "sha1h" => 0xf3b902c0, "sha1su1" => 0xf3ba0380 ); + + sub unsha1 { + my ($mnemonic,$arg)=@_; + + if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { + my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) + |(($2&7)<<17)|(($2&8)<<4) + |(($3&7)<<1) |(($3&8)<<2); + # since ARMv7 instructions are always encoded little-endian. + # correct solution is to use .inst directive, but older + # assemblers don't implement it:-( + sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", + $word&0xff,($word>>8)&0xff, + ($word>>16)&0xff,($word>>24)&0xff, + $mnemonic,$arg; + } + } +} + +foreach (split($/,$code)) { + s/{q([0-9]+)\[\]}/sprintf "{d%d[],d%d[]}",2*$1,2*$1+1/eo or + s/{q([0-9]+)\[0\]}/sprintf "{d%d[0]}",2*$1/eo; + + s/\b(sha1\w+)\s+(q.*)/unsha1($1,$2)/geo; + + s/\bret\b/bx lr/o or + s/\bbx\s+lr\b/.word\t0xe12fff1e/o; # make it possible to compile with -march=armv4 + + print $_,$/; +} -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; close STDOUT; # enforce flush diff --git a/app/openssl/crypto/sha/asm/sha1-armv4-large.s b/app/openssl/crypto/sha/asm/sha1-armv4-large.s index 639ae78a..a1562883 100644 --- a/app/openssl/crypto/sha/asm/sha1-armv4-large.s +++ b/app/openssl/crypto/sha/asm/sha1-armv4-large.s @@ -1,12 +1,22 @@ #include "arm_arch.h" .text +.code 32 .global sha1_block_data_order .type sha1_block_data_order,%function -.align 2 +.align 5 sha1_block_data_order: +#if __ARM_ARCH__>=7 + sub r3,pc,#8 @ sha1_block_data_order + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA1 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif stmdb sp!,{r4-r12,lr} add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 ldmia r0,{r3,r4,r5,r6,r7} @@ -442,11 +452,999 @@ sha1_block_data_order: moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif -.align 2 +.size sha1_block_data_order,.-sha1_block_data_order + +.align 5 .LK_00_19: .word 0x5a827999 .LK_20_39: .word 0x6ed9eba1 .LK_40_59: .word 0x8f1bbcdc .LK_60_79: .word 0xca62c1d6 -.size sha1_block_data_order,.-sha1_block_data_order -.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by " -.align 2 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha1_block_data_order +.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " +.align 5 +#if __ARM_ARCH__>=7 +.fpu neon + +.type sha1_block_data_order_neon,%function +.align 4 +sha1_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 + @ dmb @ errata #451034 on early Cortex A8 + @ vstmdb sp!,{d8-d15} @ ABI specification says so + mov r14,sp + sub sp,sp,#64 @ alloca + adr r8,.LK_00_19 + bic sp,sp,#15 @ align for 128-bit stores + + ldmia r0,{r3,r4,r5,r6,r7} @ load context + mov r12,sp + + vld1.8 {q0-q1},[r1]! @ handles unaligned + veor q15,q15,q15 + vld1.8 {q2-q3},[r1]! + vld1.32 {d28[],d29[]},[r8,:32]! @ load K_00_19 + vrev32.8 q0,q0 @ yes, even on + vrev32.8 q1,q1 @ big-endian... + vrev32.8 q2,q2 + vadd.i32 q8,q0,q14 + vrev32.8 q3,q3 + vadd.i32 q9,q1,q14 + vst1.32 {q8},[r12,:128]! + vadd.i32 q10,q2,q14 + vst1.32 {q9},[r12,:128]! + vst1.32 {q10},[r12,:128]! + ldr r9,[sp] @ big RAW stall + +.Loop_neon: + vext.8 q8,q0,q1,#8 + bic r10,r6,r4 + add r7,r7,r9 + and r11,r5,r4 + vadd.i32 q13,q3,q14 + ldr r9,[sp,#4] + add r7,r7,r3,ror#27 + vext.8 q12,q3,q15,#4 + eor r11,r11,r10 + mov r4,r4,ror#2 + add r7,r7,r11 + veor q8,q8,q0 + bic r10,r5,r3 + add r6,r6,r9 + veor q12,q12,q2 + and r11,r4,r3 + ldr r9,[sp,#8] + veor q12,q12,q8 + add r6,r6,r7,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q13,q15,q12,#4 + bic r10,r4,r7 + add r5,r5,r9 + vadd.i32 q8,q12,q12 + and r11,r3,r7 + ldr r9,[sp,#12] + vsri.32 q8,q12,#31 + add r5,r5,r6,ror#27 + eor r11,r11,r10 + mov r7,r7,ror#2 + vshr.u32 q12,q13,#30 + add r5,r5,r11 + bic r10,r3,r6 + vshl.u32 q13,q13,#2 + add r4,r4,r9 + and r11,r7,r6 + veor q8,q8,q12 + ldr r9,[sp,#16] + add r4,r4,r5,ror#27 + veor q8,q8,q13 + eor r11,r11,r10 + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q9,q1,q2,#8 + bic r10,r7,r5 + add r3,r3,r9 + and r11,r6,r5 + vadd.i32 q13,q8,q14 + ldr r9,[sp,#20] + vld1.32 {d28[],d29[]},[r8,:32]! + add r3,r3,r4,ror#27 + vext.8 q12,q8,q15,#4 + eor r11,r11,r10 + mov r5,r5,ror#2 + add r3,r3,r11 + veor q9,q9,q1 + bic r10,r6,r4 + add r7,r7,r9 + veor q12,q12,q3 + and r11,r5,r4 + ldr r9,[sp,#24] + veor q12,q12,q9 + add r7,r7,r3,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q13,q15,q12,#4 + bic r10,r5,r3 + add r6,r6,r9 + vadd.i32 q9,q12,q12 + and r11,r4,r3 + ldr r9,[sp,#28] + vsri.32 q9,q12,#31 + add r6,r6,r7,ror#27 + eor r11,r11,r10 + mov r3,r3,ror#2 + vshr.u32 q12,q13,#30 + add r6,r6,r11 + bic r10,r4,r7 + vshl.u32 q13,q13,#2 + add r5,r5,r9 + and r11,r3,r7 + veor q9,q9,q12 + ldr r9,[sp,#32] + add r5,r5,r6,ror#27 + veor q9,q9,q13 + eor r11,r11,r10 + mov r7,r7,ror#2 + add r5,r5,r11 + vext.8 q10,q2,q3,#8 + bic r10,r3,r6 + add r4,r4,r9 + and r11,r7,r6 + vadd.i32 q13,q9,q14 + ldr r9,[sp,#36] + add r4,r4,r5,ror#27 + vext.8 q12,q9,q15,#4 + eor r11,r11,r10 + mov r6,r6,ror#2 + add r4,r4,r11 + veor q10,q10,q2 + bic r10,r7,r5 + add r3,r3,r9 + veor q12,q12,q8 + and r11,r6,r5 + ldr r9,[sp,#40] + veor q12,q12,q10 + add r3,r3,r4,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q13,q15,q12,#4 + bic r10,r6,r4 + add r7,r7,r9 + vadd.i32 q10,q12,q12 + and r11,r5,r4 + ldr r9,[sp,#44] + vsri.32 q10,q12,#31 + add r7,r7,r3,ror#27 + eor r11,r11,r10 + mov r4,r4,ror#2 + vshr.u32 q12,q13,#30 + add r7,r7,r11 + bic r10,r5,r3 + vshl.u32 q13,q13,#2 + add r6,r6,r9 + and r11,r4,r3 + veor q10,q10,q12 + ldr r9,[sp,#48] + add r6,r6,r7,ror#27 + veor q10,q10,q13 + eor r11,r11,r10 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q11,q3,q8,#8 + bic r10,r4,r7 + add r5,r5,r9 + and r11,r3,r7 + vadd.i32 q13,q10,q14 + ldr r9,[sp,#52] + add r5,r5,r6,ror#27 + vext.8 q12,q10,q15,#4 + eor r11,r11,r10 + mov r7,r7,ror#2 + add r5,r5,r11 + veor q11,q11,q3 + bic r10,r3,r6 + add r4,r4,r9 + veor q12,q12,q9 + and r11,r7,r6 + ldr r9,[sp,#56] + veor q12,q12,q11 + add r4,r4,r5,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q13,q15,q12,#4 + bic r10,r7,r5 + add r3,r3,r9 + vadd.i32 q11,q12,q12 + and r11,r6,r5 + ldr r9,[sp,#60] + vsri.32 q11,q12,#31 + add r3,r3,r4,ror#27 + eor r11,r11,r10 + mov r5,r5,ror#2 + vshr.u32 q12,q13,#30 + add r3,r3,r11 + bic r10,r6,r4 + vshl.u32 q13,q13,#2 + add r7,r7,r9 + and r11,r5,r4 + veor q11,q11,q12 + ldr r9,[sp,#0] + add r7,r7,r3,ror#27 + veor q11,q11,q13 + eor r11,r11,r10 + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q12,q10,q11,#8 + bic r10,r5,r3 + add r6,r6,r9 + and r11,r4,r3 + veor q0,q0,q8 + ldr r9,[sp,#4] + add r6,r6,r7,ror#27 + veor q0,q0,q1 + eor r11,r11,r10 + mov r3,r3,ror#2 + vadd.i32 q13,q11,q14 + add r6,r6,r11 + bic r10,r4,r7 + veor q12,q12,q0 + add r5,r5,r9 + and r11,r3,r7 + vshr.u32 q0,q12,#30 + ldr r9,[sp,#8] + add r5,r5,r6,ror#27 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + eor r11,r11,r10 + mov r7,r7,ror#2 + vsli.32 q0,q12,#2 + add r5,r5,r11 + bic r10,r3,r6 + add r4,r4,r9 + and r11,r7,r6 + ldr r9,[sp,#12] + add r4,r4,r5,ror#27 + eor r11,r11,r10 + mov r6,r6,ror#2 + add r4,r4,r11 + bic r10,r7,r5 + add r3,r3,r9 + and r11,r6,r5 + ldr r9,[sp,#16] + add r3,r3,r4,ror#27 + eor r11,r11,r10 + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q12,q11,q0,#8 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#20] + veor q1,q1,q9 + eor r11,r10,r5 + add r7,r7,r3,ror#27 + veor q1,q1,q2 + mov r4,r4,ror#2 + add r7,r7,r11 + vadd.i32 q13,q0,q14 + eor r10,r3,r5 + add r6,r6,r9 + veor q12,q12,q1 + ldr r9,[sp,#24] + eor r11,r10,r4 + vshr.u32 q1,q12,#30 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + vst1.32 {q13},[r12,:128]! + add r6,r6,r11 + eor r10,r7,r4 + vsli.32 q1,q12,#2 + add r5,r5,r9 + ldr r9,[sp,#28] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#32] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q12,q0,q1,#8 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#36] + veor q2,q2,q10 + eor r11,r10,r6 + add r3,r3,r4,ror#27 + veor q2,q2,q3 + mov r5,r5,ror#2 + add r3,r3,r11 + vadd.i32 q13,q1,q14 + eor r10,r4,r6 + vld1.32 {d28[],d29[]},[r8,:32]! + add r7,r7,r9 + veor q12,q12,q2 + ldr r9,[sp,#40] + eor r11,r10,r5 + vshr.u32 q2,q12,#30 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + vst1.32 {q13},[r12,:128]! + add r7,r7,r11 + eor r10,r3,r5 + vsli.32 q2,q12,#2 + add r6,r6,r9 + ldr r9,[sp,#44] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#48] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + vext.8 q12,q1,q2,#8 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#52] + veor q3,q3,q11 + eor r11,r10,r7 + add r4,r4,r5,ror#27 + veor q3,q3,q8 + mov r6,r6,ror#2 + add r4,r4,r11 + vadd.i32 q13,q2,q14 + eor r10,r5,r7 + add r3,r3,r9 + veor q12,q12,q3 + ldr r9,[sp,#56] + eor r11,r10,r6 + vshr.u32 q3,q12,#30 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + vst1.32 {q13},[r12,:128]! + add r3,r3,r11 + eor r10,r4,r6 + vsli.32 q3,q12,#2 + add r7,r7,r9 + ldr r9,[sp,#60] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#0] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q12,q2,q3,#8 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#4] + veor q8,q8,q0 + eor r11,r10,r3 + add r5,r5,r6,ror#27 + veor q8,q8,q9 + mov r7,r7,ror#2 + add r5,r5,r11 + vadd.i32 q13,q3,q14 + eor r10,r6,r3 + add r4,r4,r9 + veor q12,q12,q8 + ldr r9,[sp,#8] + eor r11,r10,r7 + vshr.u32 q8,q12,#30 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + add r4,r4,r11 + eor r10,r5,r7 + vsli.32 q8,q12,#2 + add r3,r3,r9 + ldr r9,[sp,#12] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#16] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q12,q3,q8,#8 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#20] + veor q9,q9,q1 + eor r11,r10,r4 + add r6,r6,r7,ror#27 + veor q9,q9,q10 + mov r3,r3,ror#2 + add r6,r6,r11 + vadd.i32 q13,q8,q14 + eor r10,r7,r4 + add r5,r5,r9 + veor q12,q12,q9 + ldr r9,[sp,#24] + eor r11,r10,r3 + vshr.u32 q9,q12,#30 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + vst1.32 {q13},[r12,:128]! + add r5,r5,r11 + eor r10,r6,r3 + vsli.32 q9,q12,#2 + add r4,r4,r9 + ldr r9,[sp,#28] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#32] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q12,q8,q9,#8 + add r7,r7,r9 + and r10,r5,r6 + ldr r9,[sp,#36] + veor q10,q10,q2 + add r7,r7,r3,ror#27 + eor r11,r5,r6 + veor q10,q10,q11 + add r7,r7,r10 + and r11,r11,r4 + vadd.i32 q13,q9,q14 + mov r4,r4,ror#2 + add r7,r7,r11 + veor q12,q12,q10 + add r6,r6,r9 + and r10,r4,r5 + vshr.u32 q10,q12,#30 + ldr r9,[sp,#40] + add r6,r6,r7,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r4,r5 + add r6,r6,r10 + vsli.32 q10,q12,#2 + and r11,r11,r3 + mov r3,r3,ror#2 + add r6,r6,r11 + add r5,r5,r9 + and r10,r3,r4 + ldr r9,[sp,#44] + add r5,r5,r6,ror#27 + eor r11,r3,r4 + add r5,r5,r10 + and r11,r11,r7 + mov r7,r7,ror#2 + add r5,r5,r11 + add r4,r4,r9 + and r10,r7,r3 + ldr r9,[sp,#48] + add r4,r4,r5,ror#27 + eor r11,r7,r3 + add r4,r4,r10 + and r11,r11,r6 + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q12,q9,q10,#8 + add r3,r3,r9 + and r10,r6,r7 + ldr r9,[sp,#52] + veor q11,q11,q3 + add r3,r3,r4,ror#27 + eor r11,r6,r7 + veor q11,q11,q0 + add r3,r3,r10 + and r11,r11,r5 + vadd.i32 q13,q10,q14 + mov r5,r5,ror#2 + vld1.32 {d28[],d29[]},[r8,:32]! + add r3,r3,r11 + veor q12,q12,q11 + add r7,r7,r9 + and r10,r5,r6 + vshr.u32 q11,q12,#30 + ldr r9,[sp,#56] + add r7,r7,r3,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r5,r6 + add r7,r7,r10 + vsli.32 q11,q12,#2 + and r11,r11,r4 + mov r4,r4,ror#2 + add r7,r7,r11 + add r6,r6,r9 + and r10,r4,r5 + ldr r9,[sp,#60] + add r6,r6,r7,ror#27 + eor r11,r4,r5 + add r6,r6,r10 + and r11,r11,r3 + mov r3,r3,ror#2 + add r6,r6,r11 + add r5,r5,r9 + and r10,r3,r4 + ldr r9,[sp,#0] + add r5,r5,r6,ror#27 + eor r11,r3,r4 + add r5,r5,r10 + and r11,r11,r7 + mov r7,r7,ror#2 + add r5,r5,r11 + vext.8 q12,q10,q11,#8 + add r4,r4,r9 + and r10,r7,r3 + ldr r9,[sp,#4] + veor q0,q0,q8 + add r4,r4,r5,ror#27 + eor r11,r7,r3 + veor q0,q0,q1 + add r4,r4,r10 + and r11,r11,r6 + vadd.i32 q13,q11,q14 + mov r6,r6,ror#2 + add r4,r4,r11 + veor q12,q12,q0 + add r3,r3,r9 + and r10,r6,r7 + vshr.u32 q0,q12,#30 + ldr r9,[sp,#8] + add r3,r3,r4,ror#27 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + eor r11,r6,r7 + add r3,r3,r10 + vsli.32 q0,q12,#2 + and r11,r11,r5 + mov r5,r5,ror#2 + add r3,r3,r11 + add r7,r7,r9 + and r10,r5,r6 + ldr r9,[sp,#12] + add r7,r7,r3,ror#27 + eor r11,r5,r6 + add r7,r7,r10 + and r11,r11,r4 + mov r4,r4,ror#2 + add r7,r7,r11 + add r6,r6,r9 + and r10,r4,r5 + ldr r9,[sp,#16] + add r6,r6,r7,ror#27 + eor r11,r4,r5 + add r6,r6,r10 + and r11,r11,r3 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q12,q11,q0,#8 + add r5,r5,r9 + and r10,r3,r4 + ldr r9,[sp,#20] + veor q1,q1,q9 + add r5,r5,r6,ror#27 + eor r11,r3,r4 + veor q1,q1,q2 + add r5,r5,r10 + and r11,r11,r7 + vadd.i32 q13,q0,q14 + mov r7,r7,ror#2 + add r5,r5,r11 + veor q12,q12,q1 + add r4,r4,r9 + and r10,r7,r3 + vshr.u32 q1,q12,#30 + ldr r9,[sp,#24] + add r4,r4,r5,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r7,r3 + add r4,r4,r10 + vsli.32 q1,q12,#2 + and r11,r11,r6 + mov r6,r6,ror#2 + add r4,r4,r11 + add r3,r3,r9 + and r10,r6,r7 + ldr r9,[sp,#28] + add r3,r3,r4,ror#27 + eor r11,r6,r7 + add r3,r3,r10 + and r11,r11,r5 + mov r5,r5,ror#2 + add r3,r3,r11 + add r7,r7,r9 + and r10,r5,r6 + ldr r9,[sp,#32] + add r7,r7,r3,ror#27 + eor r11,r5,r6 + add r7,r7,r10 + and r11,r11,r4 + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q12,q0,q1,#8 + add r6,r6,r9 + and r10,r4,r5 + ldr r9,[sp,#36] + veor q2,q2,q10 + add r6,r6,r7,ror#27 + eor r11,r4,r5 + veor q2,q2,q3 + add r6,r6,r10 + and r11,r11,r3 + vadd.i32 q13,q1,q14 + mov r3,r3,ror#2 + add r6,r6,r11 + veor q12,q12,q2 + add r5,r5,r9 + and r10,r3,r4 + vshr.u32 q2,q12,#30 + ldr r9,[sp,#40] + add r5,r5,r6,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r3,r4 + add r5,r5,r10 + vsli.32 q2,q12,#2 + and r11,r11,r7 + mov r7,r7,ror#2 + add r5,r5,r11 + add r4,r4,r9 + and r10,r7,r3 + ldr r9,[sp,#44] + add r4,r4,r5,ror#27 + eor r11,r7,r3 + add r4,r4,r10 + and r11,r11,r6 + mov r6,r6,ror#2 + add r4,r4,r11 + add r3,r3,r9 + and r10,r6,r7 + ldr r9,[sp,#48] + add r3,r3,r4,ror#27 + eor r11,r6,r7 + add r3,r3,r10 + and r11,r11,r5 + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q12,q1,q2,#8 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#52] + veor q3,q3,q11 + eor r11,r10,r5 + add r7,r7,r3,ror#27 + veor q3,q3,q8 + mov r4,r4,ror#2 + add r7,r7,r11 + vadd.i32 q13,q2,q14 + eor r10,r3,r5 + add r6,r6,r9 + veor q12,q12,q3 + ldr r9,[sp,#56] + eor r11,r10,r4 + vshr.u32 q3,q12,#30 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + vst1.32 {q13},[r12,:128]! + add r6,r6,r11 + eor r10,r7,r4 + vsli.32 q3,q12,#2 + add r5,r5,r9 + ldr r9,[sp,#60] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#0] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + vadd.i32 q13,q3,q14 + eor r10,r5,r7 + add r3,r3,r9 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + teq r1,r2 + sub r8,r8,#16 + subeq r1,r1,#64 + vld1.8 {q0-q1},[r1]! + ldr r9,[sp,#4] + eor r11,r10,r6 + vld1.8 {q2-q3},[r1]! + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + vld1.32 {d28[],d29[]},[r8,:32]! + add r3,r3,r11 + eor r10,r4,r6 + vrev32.8 q0,q0 + add r7,r7,r9 + ldr r9,[sp,#8] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#12] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#16] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + vrev32.8 q1,q1 + eor r10,r6,r3 + add r4,r4,r9 + vadd.i32 q8,q0,q14 + ldr r9,[sp,#20] + eor r11,r10,r7 + vst1.32 {q8},[r12,:128]! + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#24] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#28] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#32] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + vrev32.8 q2,q2 + eor r10,r7,r4 + add r5,r5,r9 + vadd.i32 q9,q1,q14 + ldr r9,[sp,#36] + eor r11,r10,r3 + vst1.32 {q9},[r12,:128]! + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#40] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#44] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#48] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + vrev32.8 q3,q3 + eor r10,r3,r5 + add r6,r6,r9 + vadd.i32 q10,q2,q14 + ldr r9,[sp,#52] + eor r11,r10,r4 + vst1.32 {q10},[r12,:128]! + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#56] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#60] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + ldmia r0,{r9,r10,r11,r12} @ accumulate context + add r3,r3,r9 + ldr r9,[r0,#16] + add r4,r4,r10 + add r5,r5,r11 + add r6,r6,r12 + moveq sp,r14 + add r7,r7,r9 + ldrne r9,[sp] + stmia r0,{r3,r4,r5,r6,r7} + addne r12,sp,#3*16 + bne .Loop_neon + + @ vldmia sp!,{d8-d15} + ldmia sp!,{r4-r12,pc} +.size sha1_block_data_order_neon,.-sha1_block_data_order_neon +#endif +#if __ARM_ARCH__>=7 +.type sha1_block_data_order_armv8,%function +.align 5 +sha1_block_data_order_armv8: +.LARMv8: + vstmdb sp!,{d8-d15} @ ABI specification says so + + veor q1,q1,q1 + adr r3,.LK_00_19 + vld1.32 {q0},[r0]! + vld1.32 {d2[0]},[r0] + sub r0,r0,#16 + vld1.32 {d16[],d17[]},[r3,:32]! + vld1.32 {d18[],d19[]},[r3,:32]! + vld1.32 {d20[],d21[]},[r3,:32]! + vld1.32 {d22[],d23[]},[r3,:32] + +.Loop_v8: + vld1.8 {q4-q5},[r1]! + vld1.8 {q6-q7},[r1]! + vrev32.8 q4,q4 + vrev32.8 q5,q5 + + vadd.i32 q12,q8,q4 + vrev32.8 q6,q6 + vmov q14,q0 @ offload + subs r2,r2,#1 + + vadd.i32 q13,q8,q5 + vrev32.8 q7,q7 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 0 + .byte 0x68,0x0c,0x02,0xf2 @ sha1c q0,q1,q12 + vadd.i32 q12,q8,q6 + .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 1 + .byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13 + vadd.i32 q13,q8,q7 + .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 + .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 2 + .byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12 + vadd.i32 q12,q8,q4 + .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 + .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 3 + .byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13 + vadd.i32 q13,q9,q5 + .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 + .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 4 + .byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12 + vadd.i32 q12,q9,q6 + .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 + .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 5 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q9,q7 + .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 + .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 6 + .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + vadd.i32 q12,q9,q4 + .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 + .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 7 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q9,q5 + .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 + .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 8 + .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + vadd.i32 q12,q10,q6 + .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 + .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 9 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q10,q7 + .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 + .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 10 + .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 + vadd.i32 q12,q10,q4 + .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 + .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 11 + .byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13 + vadd.i32 q13,q10,q5 + .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 + .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 12 + .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 + vadd.i32 q12,q10,q6 + .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 + .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 13 + .byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13 + vadd.i32 q13,q11,q7 + .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 + .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 14 + .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 + vadd.i32 q12,q11,q4 + .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 + .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 15 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q11,q5 + .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 + .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 16 + .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + vadd.i32 q12,q11,q6 + .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 17 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q11,q7 + + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 18 + .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 19 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + + vadd.i32 q1,q1,q2 + vadd.i32 q0,q0,q14 + bne .Loop_v8 + + vst1.32 {q0},[r0]! + vst1.32 {d2[0]},[r0] + + vldmia sp!,{d8-d15} + bx lr @ bx lr +.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8 +#endif +.comm OPENSSL_armcap_P,4,4 diff --git a/app/openssl/crypto/sha/asm/sha1-armv8.S b/app/openssl/crypto/sha/asm/sha1-armv8.S new file mode 100644 index 00000000..f9d12625 --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha1-armv8.S @@ -0,0 +1,1211 @@ +#include "arm_arch.h" + +.text + +.globl sha1_block_data_order +.type sha1_block_data_order,%function +.align 6 +sha1_block_data_order: + ldr x16,.LOPENSSL_armcap_P + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA1 + b.ne .Lv8_entry + + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp w20,w21,[x0] + ldp w22,w23,[x0,#8] + ldr w24,[x0,#16] + +.Loop: + ldr x3,[x1],#64 + movz w28,#0x7999 + sub x2,x2,#1 + movk w28,#0x5a82,lsl#16 +#ifdef __ARMEB__ + ror x3,x3,#32 +#else + rev32 x3,x3 +#endif + add w24,w24,w28 // warm it up + add w24,w24,w3 + lsr x4,x3,#32 + ldr x5,[x1,#-56] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w4 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x5,x5,#32 +#else + rev32 x5,x5 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w5 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x6,x5,#32 + ldr x7,[x1,#-48] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w6 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x7,x7,#32 +#else + rev32 x7,x7 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w7 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x8,x7,#32 + ldr x9,[x1,#-40] + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w8 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x9,x9,#32 +#else + rev32 x9,x9 +#endif + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w9 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + lsr x10,x9,#32 + ldr x11,[x1,#-32] + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w10 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x11,x11,#32 +#else + rev32 x11,x11 +#endif + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w11 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + lsr x12,x11,#32 + ldr x13,[x1,#-24] + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w12 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x13,x13,#32 +#else + rev32 x13,x13 +#endif + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w13 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + lsr x14,x13,#32 + ldr x15,[x1,#-16] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w14 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x15,x15,#32 +#else + rev32 x15,x15 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w15 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x16,x15,#32 + ldr x17,[x1,#-8] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w16 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x17,x17,#32 +#else + rev32 x17,x17 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w17 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x19,x17,#32 + eor w3,w3,w5 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w3,w3,w11 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w3,w3,w16 + ror w22,w22,#2 + add w24,w24,w19 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + eor w4,w4,w12 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + eor w4,w4,w17 + ror w21,w21,#2 + add w23,w23,w3 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + eor w5,w5,w13 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + eor w5,w5,w19 + ror w20,w20,#2 + add w22,w22,w4 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + eor w6,w6,w14 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + eor w6,w6,w3 + ror w24,w24,#2 + add w21,w21,w5 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + eor w7,w7,w15 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + eor w7,w7,w4 + ror w23,w23,#2 + add w20,w20,w6 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w7,w7,#31 + movz w28,#0xeba1 + movk w28,#0x6ed9,lsl#16 + eor w8,w8,w10 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w8,w8,w16 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w8,w8,w5 + ror w22,w22,#2 + add w24,w24,w7 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w9,w9,w6 + add w23,w23,w8 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w10,w10,w7 + add w22,w22,w9 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w11,w11,w8 + add w21,w21,w10 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w12,w12,w9 + add w20,w20,w11 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w13,w13,w10 + add w24,w24,w12 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w14,w14,w11 + add w23,w23,w13 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w15,w15,w12 + add w22,w22,w14 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w16,w16,w13 + add w21,w21,w15 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w17,w17,w14 + add w20,w20,w16 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w19,w19,w15 + add w24,w24,w17 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w3,w3,w16 + add w23,w23,w19 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w4,w4,w17 + add w22,w22,w3 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w5,w5,w19 + add w21,w21,w4 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w6,w6,w3 + add w20,w20,w5 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w7,w7,w4 + add w24,w24,w6 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w8,w8,w5 + add w23,w23,w7 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w9,w9,w6 + add w22,w22,w8 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w10,w10,w7 + add w21,w21,w9 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w11,w11,w8 + add w20,w20,w10 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w11,w11,#31 + movz w28,#0xbcdc + movk w28,#0x8f1b,lsl#16 + eor w12,w12,w14 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w12,w12,w9 + add w24,w24,w11 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w13,w13,w15 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w13,w13,w5 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w13,w13,w10 + add w23,w23,w12 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w14,w14,w16 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w14,w14,w6 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w14,w14,w11 + add w22,w22,w13 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w15,w15,w17 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w15,w15,w7 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w15,w15,w12 + add w21,w21,w14 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w15,w15,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w16,w16,w19 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w16,w16,w8 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w16,w16,w13 + add w20,w20,w15 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w16,w16,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w17,w17,w3 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w17,w17,w9 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w17,w17,w14 + add w24,w24,w16 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w17,w17,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w19,w19,w4 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w19,w19,w10 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w19,w19,w15 + add w23,w23,w17 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w19,w19,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w3,w3,w5 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w3,w3,w11 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w3,w3,w16 + add w22,w22,w19 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w3,w3,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w4,w4,w6 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w4,w4,w12 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w4,w4,w17 + add w21,w21,w3 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w4,w4,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w5,w5,w7 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w5,w5,w13 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w5,w5,w19 + add w20,w20,w4 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w5,w5,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w6,w6,w8 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w6,w6,w14 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w6,w6,w3 + add w24,w24,w5 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w6,w6,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w7,w7,w9 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w7,w7,w15 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w7,w7,w4 + add w23,w23,w6 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w7,w7,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w8,w8,w10 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w8,w8,w16 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w8,w8,w5 + add w22,w22,w7 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w8,w8,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w9,w9,w11 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w9,w9,w17 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w9,w9,w6 + add w21,w21,w8 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w9,w9,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w10,w10,w12 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w10,w10,w19 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w10,w10,w7 + add w20,w20,w9 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w10,w10,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w11,w11,w13 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w11,w11,w3 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w11,w11,w8 + add w24,w24,w10 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w11,w11,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w12,w12,w14 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w12,w12,w4 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w12,w12,w9 + add w23,w23,w11 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w13,w13,w15 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w13,w13,w5 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w13,w13,w10 + add w22,w22,w12 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w14,w14,w16 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w14,w14,w6 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w14,w14,w11 + add w21,w21,w13 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w15,w15,w17 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w15,w15,w7 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w15,w15,w12 + add w20,w20,w14 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w15,w15,#31 + movz w28,#0xc1d6 + movk w28,#0xca62,lsl#16 + orr w25,w22,w23 + and w26,w22,w23 + eor w16,w16,w19 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w16,w16,w8 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w16,w16,w13 + add w24,w24,w15 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w17,w17,w14 + add w23,w23,w16 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w19,w19,w15 + add w22,w22,w17 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w3,w3,w16 + add w21,w21,w19 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w4,w4,w17 + add w20,w20,w3 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w5,w5,w19 + add w24,w24,w4 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w6,w6,w3 + add w23,w23,w5 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w7,w7,w4 + add w22,w22,w6 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w8,w8,w5 + add w21,w21,w7 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w9,w9,w6 + add w20,w20,w8 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w10,w10,w7 + add w24,w24,w9 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w11,w11,w8 + add w23,w23,w10 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w12,w12,w9 + add w22,w22,w11 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w13,w13,w10 + add w21,w21,w12 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w14,w14,w11 + add w20,w20,w13 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w15,w15,w12 + add w24,w24,w14 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w16,w16,w13 + add w23,w23,w15 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w17,w17,w14 + add w22,w22,w16 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w19,w19,w15 + add w21,w21,w17 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w19,w19,#31 + ldp w4,w5,[x0] + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w19 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ldp w6,w7,[x0,#8] + eor w25,w24,w22 + ror w27,w21,#27 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + ldr w8,[x0,#16] + add w20,w20,w25 // e+=F(b,c,d) + add w21,w21,w5 + add w22,w22,w6 + add w20,w20,w4 + add w23,w23,w7 + add w24,w24,w8 + stp w20,w21,[x0] + stp w22,w23,[x0,#8] + str w24,[x0,#16] + cbnz x2,.Loop + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldp x25,x26,[sp,#64] + ldp x27,x28,[sp,#80] + ldr x29,[sp],#96 + ret +.size sha1_block_data_order,.-sha1_block_data_order +.type sha1_block_armv8,%function +.align 6 +sha1_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + adr x4,.Lconst + eor v1.16b,v1.16b,v1.16b + ld1 {v0.4s},[x0],#16 + ld1 {v1.s}[0],[x0] + sub x0,x0,#16 + ld1 {v16.4s-v19.4s},[x4] + +.Loop_hw: + ld1 {v4.16b-v7.16b},[x1],#64 + sub x2,x2,#1 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + + add v20.4s,v16.4s,v4.4s + rev32 v6.16b,v6.16b + orr v22.16b,v0.16b,v0.16b // offload + + add v21.4s,v16.4s,v5.4s + rev32 v7.16b,v7.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b + .inst 0x5e140020 //sha1c v0.16b,v1.16b,v20.4s // 0 + add v20.4s,v16.4s,v6.4s + .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 1 + .inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v16.4s,v7.4s + .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b + .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 2 + .inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v16.4s,v4.4s + .inst 0x5e281885 //sha1su1 v5.16b,v4.16b + .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 3 + .inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s + .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b + .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 4 + .inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v6.4s + .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b + .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 5 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v7.4s + .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b + .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 6 + .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v4.4s + .inst 0x5e281885 //sha1su1 v5.16b,v4.16b + .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 7 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s + .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b + .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 8 + .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s + .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b + .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 9 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v7.4s + .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b + .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 10 + .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v4.4s + .inst 0x5e281885 //sha1su1 v5.16b,v4.16b + .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 11 + .inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v5.4s + .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b + .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 12 + .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s + .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b + .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 13 + .inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s + .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b + .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 14 + .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v4.4s + .inst 0x5e281885 //sha1su1 v5.16b,v4.16b + .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 15 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v5.4s + .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b + .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 16 + .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v6.4s + .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 17 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s + + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 18 + .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 19 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + + add v1.4s,v1.4s,v2.4s + add v0.4s,v0.4s,v22.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s},[x0],#16 + st1 {v1.s}[0],[x0] + + ldr x29,[sp],#16 + ret +.size sha1_block_armv8,.-sha1_block_armv8 +.align 6 +.Lconst: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79 +.LOPENSSL_armcap_P: +.quad OPENSSL_armcap_P-. +.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by " +.align 2 +.comm OPENSSL_armcap_P,4,4 diff --git a/app/openssl/crypto/sha/asm/sha1-armv8.pl b/app/openssl/crypto/sha/asm/sha1-armv8.pl new file mode 100644 index 00000000..c1f552b6 --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha1-armv8.pl @@ -0,0 +1,333 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# SHA1 for ARMv8. +# +# Performance in cycles per processed byte and improvement coefficient +# over code generated with "default" compiler: +# +# hardware-assisted software(*) +# Apple A7 2.31 4.13 (+14%) +# Cortex-A5x n/a n/a +# +# (*) Software results are presented mostly for reference purposes. + +$flavour = shift; +open STDOUT,">".shift; + +($ctx,$inp,$num)=("x0","x1","x2"); +@Xw=map("w$_",(3..17,19)); +@Xx=map("x$_",(3..17,19)); +@V=($A,$B,$C,$D,$E)=map("w$_",(20..24)); +($t0,$t1,$t2,$K)=map("w$_",(25..28)); + + +sub BODY_00_19 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=($i+2)&15; + +$code.=<<___ if ($i<15 && !($i&1)); + lsr @Xx[$i+1],@Xx[$i],#32 +___ +$code.=<<___ if ($i<14 && !($i&1)); + ldr @Xx[$i+2],[$inp,#`($i+2)*4-64`] +___ +$code.=<<___ if ($i<14 && ($i&1)); +#ifdef __ARMEB__ + ror @Xx[$i+1],@Xx[$i+1],#32 +#else + rev32 @Xx[$i+1],@Xx[$i+1] +#endif +___ +$code.=<<___ if ($i<14); + bic $t0,$d,$b + and $t1,$c,$b + ror $t2,$a,#27 + add $d,$d,$K // future e+=K + orr $t0,$t0,$t1 + add $e,$e,$t2 // e+=rot(a,5) + ror $b,$b,#2 + add $d,$d,@Xw[($i+1)&15] // future e+=X[i] + add $e,$e,$t0 // e+=F(b,c,d) +___ +$code.=<<___ if ($i==19); + movz $K,#0xeba1 + movk $K,#0x6ed9,lsl#16 +___ +$code.=<<___ if ($i>=14); + eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15] + bic $t0,$d,$b + and $t1,$c,$b + ror $t2,$a,#27 + eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15] + add $d,$d,$K // future e+=K + orr $t0,$t0,$t1 + add $e,$e,$t2 // e+=rot(a,5) + eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15] + ror $b,$b,#2 + add $d,$d,@Xw[($i+1)&15] // future e+=X[i] + add $e,$e,$t0 // e+=F(b,c,d) + ror @Xw[$j],@Xw[$j],#31 +___ +} + +sub BODY_40_59 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=($i+2)&15; + +$code.=<<___ if ($i==59); + movz $K,#0xc1d6 + movk $K,#0xca62,lsl#16 +___ +$code.=<<___; + orr $t0,$b,$c + and $t1,$b,$c + eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15] + ror $t2,$a,#27 + and $t0,$t0,$d + add $d,$d,$K // future e+=K + eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15] + add $e,$e,$t2 // e+=rot(a,5) + orr $t0,$t0,$t1 + ror $b,$b,#2 + eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15] + add $d,$d,@Xw[($i+1)&15] // future e+=X[i] + add $e,$e,$t0 // e+=F(b,c,d) + ror @Xw[$j],@Xw[$j],#31 +___ +} + +sub BODY_20_39 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=($i+2)&15; + +$code.=<<___ if ($i==39); + movz $K,#0xbcdc + movk $K,#0x8f1b,lsl#16 +___ +$code.=<<___ if ($i<78); + eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15] + eor $t0,$d,$b + ror $t2,$a,#27 + add $d,$d,$K // future e+=K + eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15] + eor $t0,$t0,$c + add $e,$e,$t2 // e+=rot(a,5) + ror $b,$b,#2 + eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15] + add $d,$d,@Xw[($i+1)&15] // future e+=X[i] + add $e,$e,$t0 // e+=F(b,c,d) + ror @Xw[$j],@Xw[$j],#31 +___ +$code.=<<___ if ($i==78); + ldp @Xw[1],@Xw[2],[$ctx] + eor $t0,$d,$b + ror $t2,$a,#27 + add $d,$d,$K // future e+=K + eor $t0,$t0,$c + add $e,$e,$t2 // e+=rot(a,5) + ror $b,$b,#2 + add $d,$d,@Xw[($i+1)&15] // future e+=X[i] + add $e,$e,$t0 // e+=F(b,c,d) +___ +$code.=<<___ if ($i==79); + ldp @Xw[3],@Xw[4],[$ctx,#8] + eor $t0,$d,$b + ror $t2,$a,#27 + eor $t0,$t0,$c + add $e,$e,$t2 // e+=rot(a,5) + ror $b,$b,#2 + ldr @Xw[5],[$ctx,#16] + add $e,$e,$t0 // e+=F(b,c,d) +___ +} + +$code.=<<___; +#include "arm_arch.h" + +.text + +.globl sha1_block_data_order +.type sha1_block_data_order,%function +.align 6 +sha1_block_data_order: + ldr x16,.LOPENSSL_armcap_P + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA1 + b.ne .Lv8_entry + + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp $A,$B,[$ctx] + ldp $C,$D,[$ctx,#8] + ldr $E,[$ctx,#16] + +.Loop: + ldr @Xx[0],[$inp],#64 + movz $K,#0x7999 + sub $num,$num,#1 + movk $K,#0x5a82,lsl#16 +#ifdef __ARMEB__ + ror $Xx[0],@Xx[0],#32 +#else + rev32 @Xx[0],@Xx[0] +#endif + add $E,$E,$K // warm it up + add $E,$E,@Xw[0] +___ +for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } +for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } +for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + add $B,$B,@Xw[2] + add $C,$C,@Xw[3] + add $A,$A,@Xw[1] + add $D,$D,@Xw[4] + add $E,$E,@Xw[5] + stp $A,$B,[$ctx] + stp $C,$D,[$ctx,#8] + str $E,[$ctx,#16] + cbnz $num,.Loop + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldp x25,x26,[sp,#64] + ldp x27,x28,[sp,#80] + ldr x29,[sp],#96 + ret +.size sha1_block_data_order,.-sha1_block_data_order +___ +{{{ +my ($ABCD,$E,$E0,$E1)=map("v$_.16b",(0..3)); +my @MSG=map("v$_.16b",(4..7)); +my @Kxx=map("v$_.4s",(16..19)); +my ($W0,$W1)=("v20.4s","v21.4s"); +my $ABCD_SAVE="v22.16b"; + +$code.=<<___; +.type sha1_block_armv8,%function +.align 6 +sha1_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + adr x4,.Lconst + eor $E,$E,$E + ld1.32 {$ABCD},[$ctx],#16 + ld1.32 {$E}[0],[$ctx] + sub $ctx,$ctx,#16 + ld1.32 {@Kxx[0]-@Kxx[3]},[x4] + +.Loop_hw: + ld1 {@MSG[0]-@MSG[3]},[$inp],#64 + sub $num,$num,#1 + rev32 @MSG[0],@MSG[0] + rev32 @MSG[1],@MSG[1] + + add.i32 $W0,@Kxx[0],@MSG[0] + rev32 @MSG[2],@MSG[2] + orr $ABCD_SAVE,$ABCD,$ABCD // offload + + add.i32 $W1,@Kxx[0],@MSG[1] + rev32 @MSG[3],@MSG[3] + sha1h $E1,$ABCD + sha1c $ABCD,$E,$W0 // 0 + add.i32 $W0,@Kxx[$j],@MSG[2] + sha1su0 @MSG[0],@MSG[1],@MSG[2] +___ +for ($j=0,$i=1;$i<20-3;$i++) { +my $f=("c","p","m","p")[$i/5]; +$code.=<<___; + sha1h $E0,$ABCD // $i + sha1$f $ABCD,$E1,$W1 + add.i32 $W1,@Kxx[$j],@MSG[3] + sha1su1 @MSG[0],@MSG[3] +___ +$code.=<<___ if ($i<20-4); + sha1su0 @MSG[1],@MSG[2],@MSG[3] +___ + ($E0,$E1)=($E1,$E0); ($W0,$W1)=($W1,$W0); + push(@MSG,shift(@MSG)); $j++ if ((($i+3)%5)==0); +} +$code.=<<___; + sha1h $E0,$ABCD // $i + sha1p $ABCD,$E1,$W1 + add.i32 $W1,@Kxx[$j],@MSG[3] + + sha1h $E1,$ABCD // 18 + sha1p $ABCD,$E0,$W0 + + sha1h $E0,$ABCD // 19 + sha1p $ABCD,$E1,$W1 + + add.i32 $E,$E,$E0 + add.i32 $ABCD,$ABCD,$ABCD_SAVE + + cbnz $num,.Loop_hw + + st1.32 {$ABCD},[$ctx],#16 + st1.32 {$E}[0],[$ctx] + + ldr x29,[sp],#16 + ret +.size sha1_block_armv8,.-sha1_block_armv8 +.align 6 +.Lconst: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79 +.LOPENSSL_armcap_P: +.quad OPENSSL_armcap_P-. +.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by " +.align 2 +.comm OPENSSL_armcap_P,4,4 +___ +}}} + +{ my %opcode = ( + "sha1c" => 0x5e000000, "sha1p" => 0x5e001000, + "sha1m" => 0x5e002000, "sha1su0" => 0x5e003000, + "sha1h" => 0x5e280800, "sha1su1" => 0x5e281800 ); + + sub unsha1 { + my ($mnemonic,$arg)=@_; + + $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o + && + sprintf ".inst\t0x%08x\t//%s %s", + $opcode{$mnemonic}|$1|($2<<5)|($3<<16), + $mnemonic,$arg; + } +} + +foreach(split("\n",$code)) { + + s/\`([^\`]*)\`/eval($1)/geo; + + s/\b(sha1\w+)\s+([qv].*)/unsha1($1,$2)/geo; + + s/\.\w?32\b//o and s/\.16b/\.4s/go; + m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go; + + print $_,"\n"; +} + +close STDOUT; diff --git a/app/openssl/crypto/sha/asm/sha256-armv4.pl b/app/openssl/crypto/sha/asm/sha256-armv4.pl index 9c84e8d9..505ca8f3 100644 --- a/app/openssl/crypto/sha/asm/sha256-armv4.pl +++ b/app/openssl/crypto/sha/asm/sha256-armv4.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -21,15 +21,27 @@ # February 2011. # # Profiler-assisted and platform-specific optimization resulted in 16% -# improvement on Cortex A8 core and ~17 cycles per processed byte. +# improvement on Cortex A8 core and ~15.4 cycles per processed byte. + +# September 2013. +# +# Add NEON implementation. On Cortex A8 it was measured to process one +# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon +# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only +# code (meaning that latter performs sub-optimally, nothing was done +# about it). + +# May 2014. +# +# Add ARMv8 code path performing at 2.0 cpb on Apple A7. while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; $ctx="r0"; $t0="r0"; -$inp="r1"; $t3="r1"; +$inp="r1"; $t4="r1"; $len="r2"; $t1="r2"; -$T1="r3"; +$T1="r3"; $t3="r3"; $A="r4"; $B="r5"; $C="r6"; @@ -52,71 +64,88 @@ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; $code.=<<___ if ($i<16); #if __ARM_ARCH__>=7 - ldr $T1,[$inp],#4 + @ ldr $t1,[$inp],#4 @ $i +# if $i==15 + str $inp,[sp,#17*4] @ make room for $t4 +# endif + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) + rev $t1,$t1 #else - ldrb $T1,[$inp,#3] @ $i + @ ldrb $t1,[$inp,#3] @ $i + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past ldrb $t2,[$inp,#2] - ldrb $t1,[$inp,#1] - ldrb $t0,[$inp],#4 - orr $T1,$T1,$t2,lsl#8 - orr $T1,$T1,$t1,lsl#16 - orr $T1,$T1,$t0,lsl#24 + ldrb $t0,[$inp,#1] + orr $t1,$t1,$t2,lsl#8 + ldrb $t2,[$inp],#4 + orr $t1,$t1,$t0,lsl#16 +# if $i==15 + str $inp,[sp,#17*4] @ make room for $t4 +# endif + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` + orr $t1,$t1,$t2,lsl#24 + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) #endif ___ $code.=<<___; - mov $t0,$e,ror#$Sigma1[0] ldr $t2,[$Ktbl],#4 @ *K256++ - eor $t0,$t0,$e,ror#$Sigma1[1] + add $h,$h,$t1 @ h+=X[i] + str $t1,[sp,#`$i%16`*4] eor $t1,$f,$g -#if $i>=16 - add $T1,$T1,$t3 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev $T1,$T1 -#endif -#if $i==15 - str $inp,[sp,#17*4] @ leave room for $t3 -#endif - eor $t0,$t0,$e,ror#$Sigma1[2] @ Sigma1(e) + add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e) and $t1,$t1,$e - str $T1,[sp,#`$i%16`*4] - add $T1,$T1,$t0 + add $h,$h,$t2 @ h+=K256[i] eor $t1,$t1,$g @ Ch(e,f,g) - add $T1,$T1,$h - mov $h,$a,ror#$Sigma0[0] - add $T1,$T1,$t1 - eor $h,$h,$a,ror#$Sigma0[1] - add $T1,$T1,$t2 - eor $h,$h,$a,ror#$Sigma0[2] @ Sigma0(a) -#if $i>=15 - ldr $t3,[sp,#`($i+2)%16`*4] @ from BODY_16_xx + eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]` + add $h,$h,$t1 @ h+=Ch(e,f,g) +#if $i==31 + and $t2,$t2,#0xff + cmp $t2,#0xf2 @ done? #endif - orr $t0,$a,$b - and $t1,$a,$b - and $t0,$t0,$c - add $h,$h,$T1 - orr $t0,$t0,$t1 @ Maj(a,b,c) - add $d,$d,$T1 - add $h,$h,$t0 +#if $i<15 +# if __ARM_ARCH__>=7 + ldr $t1,[$inp],#4 @ prefetch +# else + ldrb $t1,[$inp,#3] +# endif + eor $t2,$a,$b @ a^b, b^c in next round +#else + ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx + eor $t2,$a,$b @ a^b, b^c in next round + ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx +#endif + eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a) + and $t3,$t3,$t2 @ (b^c)&=(a^b) + add $d,$d,$h @ d+=h + eor $t3,$t3,$b @ Maj(a,b,c) + add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a) + @ add $h,$h,$t3 @ h+=Maj(a,b,c) ___ + ($t2,$t3)=($t3,$t2); } sub BODY_16_XX { my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; $code.=<<___; - @ ldr $t3,[sp,#`($i+1)%16`*4] @ $i - ldr $t2,[sp,#`($i+14)%16`*4] - mov $t0,$t3,ror#$sigma0[0] - ldr $T1,[sp,#`($i+0)%16`*4] - eor $t0,$t0,$t3,ror#$sigma0[1] - ldr $t1,[sp,#`($i+9)%16`*4] - eor $t0,$t0,$t3,lsr#$sigma0[2] @ sigma0(X[i+1]) - mov $t3,$t2,ror#$sigma1[0] - add $T1,$T1,$t0 - eor $t3,$t3,$t2,ror#$sigma1[1] - add $T1,$T1,$t1 - eor $t3,$t3,$t2,lsr#$sigma1[2] @ sigma1(X[i+14]) - @ add $T1,$T1,$t3 + @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i + @ ldr $t4,[sp,#`($i+14)%16`*4] + mov $t0,$t1,ror#$sigma0[0] + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + mov $t2,$t4,ror#$sigma1[0] + eor $t0,$t0,$t1,ror#$sigma0[1] + eor $t2,$t2,$t4,ror#$sigma1[1] + eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) + ldr $t1,[sp,#`($i+0)%16`*4] + eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14]) + ldr $t4,[sp,#`($i+9)%16`*4] + + add $t2,$t2,$t0 + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15 + add $t1,$t1,$t2 + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) + add $t1,$t1,$t4 @ X[i] ___ &BODY_00_15(@_); } @@ -147,46 +176,64 @@ K256: .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .size K256,.-K256 +.word 0 @ terminator +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha256_block_data_order +.align 5 .global sha256_block_data_order .type sha256_block_data_order,%function sha256_block_data_order: sub r3,pc,#8 @ sha256_block_data_order add $len,$inp,$len,lsl#6 @ len to point at the end of inp +#if __ARM_ARCH__>=7 + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA256 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} - sub $Ktbl,r3,#256 @ K256 + sub $Ktbl,r3,#256+32 @ K256 sub sp,sp,#16*4 @ alloca(X[16]) .Loop: +# if __ARM_ARCH__>=7 + ldr $t1,[$inp],#4 +# else + ldrb $t1,[$inp,#3] +# endif + eor $t3,$B,$C @ magic + eor $t2,$t2,$t2 ___ for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } $code.=".Lrounds_16_xx:\n"; for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } $code.=<<___; - and $t2,$t2,#0xff - cmp $t2,#0xf2 + ldreq $t3,[sp,#16*4] @ pull ctx bne .Lrounds_16_xx - ldr $T1,[sp,#16*4] @ pull ctx - ldr $t0,[$T1,#0] - ldr $t1,[$T1,#4] - ldr $t2,[$T1,#8] + add $A,$A,$t2 @ h+=Maj(a,b,c) from the past + ldr $t0,[$t3,#0] + ldr $t1,[$t3,#4] + ldr $t2,[$t3,#8] add $A,$A,$t0 - ldr $t0,[$T1,#12] + ldr $t0,[$t3,#12] add $B,$B,$t1 - ldr $t1,[$T1,#16] + ldr $t1,[$t3,#16] add $C,$C,$t2 - ldr $t2,[$T1,#20] + ldr $t2,[$t3,#20] add $D,$D,$t0 - ldr $t0,[$T1,#24] + ldr $t0,[$t3,#24] add $E,$E,$t1 - ldr $t1,[$T1,#28] + ldr $t1,[$t3,#28] add $F,$F,$t2 ldr $inp,[sp,#17*4] @ pull inp ldr $t2,[sp,#18*4] @ pull inp+len add $G,$G,$t0 add $H,$H,$t1 - stmia $T1,{$A,$B,$C,$D,$E,$F,$G,$H} + stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H} cmp $inp,$t2 sub $Ktbl,$Ktbl,#256 @ rewind Ktbl bne .Loop @@ -200,12 +247,410 @@ $code.=<<___; moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif -.size sha256_block_data_order,.-sha256_block_data_order -.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by " +.size sha256_block_data_order,.-sha256_block_data_order +___ +###################################################################### +# NEON stuff +# +{{{ +my @X=map("q$_",(0..3)); +my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25"); +my $Xfer=$t4; +my $j=0; + +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } + +sub AUTOLOAD() # thunk [simplified] x86-style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; + my $arg = pop; + $arg = "#$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; +} + +sub Xupdate() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e,$f,$g,$h); + + &vext_8 ($T0,@X[0],@X[1],4); # X[1..4] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vext_8 ($T1,@X[2],@X[3],4); # X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T2,$T0,$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T1,$T0,$sigma0[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T2,$T0,32-$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T3,$T0,$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T1,$T1,$T2); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T3,$T0,32-$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T1,$T1,$T3); # sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); # sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_32 ("{$T0}","[$Ktbl,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); # sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 ($T0,$T0,@X[0]); + while($#insns>=2) { eval(shift(@insns)); } + &vst1_32 ("{$T0}","[$Xfer,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + + push(@X,shift(@X)); # "rotate" X[] +} + +sub Xpreload() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e,$f,$g,$h); + + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_32 ("{$T0}","[$Ktbl,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vrev32_8 (@X[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 ($T0,$T0,@X[0]); + foreach (@insns) { eval; } # remaining instructions + &vst1_32 ("{$T0}","[$Xfer,:128]!"); + + push(@X,shift(@X)); # "rotate" X[] +} + +sub body_00_15 () { + ( + '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. + '&add ($h,$h,$t1)', # h+=X[i]+K[i] + '&eor ($t1,$f,$g)', + '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', + '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past + '&and ($t1,$t1,$e)', + '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e) + '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', + '&eor ($t1,$t1,$g)', # Ch(e,f,g) + '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e) + '&eor ($t2,$a,$b)', # a^b, b^c in next round + '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) + '&add ($h,$h,$t1)', # h+=Ch(e,f,g) + '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'. + '&ldr ($t1,"[$Ktbl]") if ($j==15);'. + '&ldr ($t1,"[sp,#64]") if ($j==31)', + '&and ($t3,$t3,$t2)', # (b^c)&=(a^b) + '&add ($d,$d,$h)', # d+=h + '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a) + '&eor ($t3,$t3,$b)', # Maj(a,b,c) + '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' + ) +} + +$code.=<<___; +#if __ARM_ARCH__>=7 +.fpu neon + +.type sha256_block_data_order_neon,%function +.align 4 +sha256_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + + mov $t2,sp + sub sp,sp,#16*4+16 @ alloca + sub $Ktbl,r3,#256+32 @ K256 + bic sp,sp,#15 @ align for 128-bit stores + + vld1.8 {@X[0]},[$inp]! + vld1.8 {@X[1]},[$inp]! + vld1.8 {@X[2]},[$inp]! + vld1.8 {@X[3]},[$inp]! + vld1.32 {$T0},[$Ktbl,:128]! + vld1.32 {$T1},[$Ktbl,:128]! + vld1.32 {$T2},[$Ktbl,:128]! + vld1.32 {$T3},[$Ktbl,:128]! + vrev32.8 @X[0],@X[0] @ yes, even on + str $ctx,[sp,#64] + vrev32.8 @X[1],@X[1] @ big-endian + str $inp,[sp,#68] + mov $Xfer,sp + vrev32.8 @X[2],@X[2] + str $len,[sp,#72] + vrev32.8 @X[3],@X[3] + str $t2,[sp,#76] @ save original sp + vadd.i32 $T0,$T0,@X[0] + vadd.i32 $T1,$T1,@X[1] + vst1.32 {$T0},[$Xfer,:128]! + vadd.i32 $T2,$T2,@X[2] + vst1.32 {$T1},[$Xfer,:128]! + vadd.i32 $T3,$T3,@X[3] + vst1.32 {$T2},[$Xfer,:128]! + vst1.32 {$T3},[$Xfer,:128]! + + ldmia $ctx,{$A-$H} + sub $Xfer,$Xfer,#64 + ldr $t1,[sp,#0] + eor $t2,$t2,$t2 + eor $t3,$B,$C + b .L_00_48 + +.align 4 +.L_00_48: +___ + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); +$code.=<<___; + teq $t1,#0 @ check for K256 terminator + ldr $t1,[sp,#0] + sub $Xfer,$Xfer,#64 + bne .L_00_48 + + ldr $inp,[sp,#68] + ldr $t0,[sp,#72] + sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl + teq $inp,$t0 + subeq $inp,$inp,#64 @ avoid SEGV + vld1.8 {@X[0]},[$inp]! @ load next input block + vld1.8 {@X[1]},[$inp]! + vld1.8 {@X[2]},[$inp]! + vld1.8 {@X[3]},[$inp]! + strne $inp,[sp,#68] + mov $Xfer,sp +___ + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); +$code.=<<___; + ldr $t0,[$t1,#0] + add $A,$A,$t2 @ h+=Maj(a,b,c) from the past + ldr $t2,[$t1,#4] + ldr $t3,[$t1,#8] + ldr $t4,[$t1,#12] + add $A,$A,$t0 @ accumulate + ldr $t0,[$t1,#16] + add $B,$B,$t2 + ldr $t2,[$t1,#20] + add $C,$C,$t3 + ldr $t3,[$t1,#24] + add $D,$D,$t4 + ldr $t4,[$t1,#28] + add $E,$E,$t0 + str $A,[$t1],#4 + add $F,$F,$t2 + str $B,[$t1],#4 + add $G,$G,$t3 + str $C,[$t1],#4 + add $H,$H,$t4 + str $D,[$t1],#4 + stmia $t1,{$E-$H} + + movne $Xfer,sp + ldrne $t1,[sp,#0] + eorne $t2,$t2,$t2 + ldreq sp,[sp,#76] @ restore original sp + eorne $t3,$B,$C + bne .L_00_48 + + ldmia sp!,{r4-r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +___ +}}} +###################################################################### +# ARMv8 stuff +# +{{{ +my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2)); +my @MSG=map("q$_",(8..11)); +my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15)); +my $Ktbl="r3"; + +$code.=<<___; +#if __ARM_ARCH__>=7 +.type sha256_block_data_order_armv8,%function +.align 5 +sha256_block_data_order_armv8: +.LARMv8: + vld1.32 {$ABCD,$EFGH},[$ctx] + sub $Ktbl,r3,#sha256_block_data_order-K256 + +.Loop_v8: + vld1.8 {@MSG[0]-@MSG[1]},[$inp]! + vld1.8 {@MSG[2]-@MSG[3]},[$inp]! + vld1.32 {$W0},[$Ktbl]! + vrev32.8 @MSG[0],@MSG[0] + vrev32.8 @MSG[1],@MSG[1] + vrev32.8 @MSG[2],@MSG[2] + vrev32.8 @MSG[3],@MSG[3] + vmov $ABCD_SAVE,$ABCD @ offload + vmov $EFGH_SAVE,$EFGH + teq $inp,$len +___ +for($i=0;$i<12;$i++) { +$code.=<<___; + vld1.32 {$W1},[$Ktbl]! + vadd.i32 $W0,$W0,@MSG[0] + sha256su0 @MSG[0],@MSG[1] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + sha256su1 @MSG[0],@MSG[2],@MSG[3] +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); +} +$code.=<<___; + vld1.32 {$W1},[$Ktbl]! + vadd.i32 $W0,$W0,@MSG[0] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + vld1.32 {$W0},[$Ktbl]! + vadd.i32 $W1,$W1,@MSG[1] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + vld1.32 {$W1},[$Ktbl] + vadd.i32 $W0,$W0,@MSG[2] + sub $Ktbl,$Ktbl,#256-16 @ rewind + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + vadd.i32 $W1,$W1,@MSG[3] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + vadd.i32 $ABCD,$ABCD,$ABCD_SAVE + vadd.i32 $EFGH,$EFGH,$EFGH_SAVE + bne .Loop_v8 + + vst1.32 {$ABCD,$EFGH},[$ctx] + + ret @ bx lr +.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +#endif +___ +}}} +$code.=<<___; +.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " .align 2 +.comm OPENSSL_armcap_P,4,4 ___ -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; +{ my %opcode = ( + "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40, + "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 ); + + sub unsha256 { + my ($mnemonic,$arg)=@_; + + if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { + my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) + |(($2&7)<<17)|(($2&8)<<4) + |(($3&7)<<1) |(($3&8)<<2); + # since ARMv7 instructions are always encoded little-endian. + # correct solution is to use .inst directive, but older + # assemblers don't implement it:-( + sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", + $word&0xff,($word>>8)&0xff, + ($word>>16)&0xff,($word>>24)&0xff, + $mnemonic,$arg; + } + } +} + +foreach (split($/,$code)) { + + s/\`([^\`]*)\`/eval $1/geo; + + s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo; + + s/\bret\b/bx lr/go or + s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 + + print $_,"\n"; +} + close STDOUT; # enforce flush diff --git a/app/openssl/crypto/sha/asm/sha256-armv4.s b/app/openssl/crypto/sha/asm/sha256-armv4.s index 9c20a63c..853d7da5 100644 --- a/app/openssl/crypto/sha/asm/sha256-armv4.s +++ b/app/openssl/crypto/sha/asm/sha256-armv4.s @@ -23,1463 +23,1721 @@ K256: .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .size K256,.-K256 +.word 0 @ terminator +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha256_block_data_order +.align 5 .global sha256_block_data_order .type sha256_block_data_order,%function sha256_block_data_order: sub r3,pc,#8 @ sha256_block_data_order add r2,r1,r2,lsl#6 @ len to point at the end of inp +#if __ARM_ARCH__>=7 + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA256 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif stmdb sp!,{r0,r1,r2,r4-r11,lr} ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} - sub r14,r3,#256 @ K256 + sub r14,r3,#256+32 @ K256 sub sp,sp,#16*4 @ alloca(X[16]) .Loop: +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ magic + eor r12,r12,r12 #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 0 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 0 + @ ldrb r2,[r1,#3] @ 0 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) #endif - mov r0,r8,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r8,ror#11 + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] eor r2,r9,r10 -#if 0>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 0==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r8,ror#25 @ Sigma1(e) + add r11,r11,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r8 - str r3,[sp,#0*4] - add r3,r3,r0 + add r11,r11,r12 @ h+=K256[i] eor r2,r2,r10 @ Ch(e,f,g) - add r3,r3,r11 - mov r11,r4,ror#2 - add r3,r3,r2 - eor r11,r11,r4,ror#13 - add r3,r3,r12 - eor r11,r11,r4,ror#22 @ Sigma0(a) -#if 0>=15 - ldr r1,[sp,#2*4] @ from BODY_16_xx -#endif - orr r0,r4,r5 - and r2,r4,r5 - and r0,r0,r6 - add r11,r11,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r7,r7,r3 - add r11,r11,r0 + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 0==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 0<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 1 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 1 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r7,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r7,ror#11 + @ ldrb r2,[r1,#3] @ 1 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] eor r2,r8,r9 -#if 1>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 1==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r7,ror#25 @ Sigma1(e) + add r10,r10,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r7 - str r3,[sp,#1*4] - add r3,r3,r0 + add r10,r10,r3 @ h+=K256[i] eor r2,r2,r9 @ Ch(e,f,g) - add r3,r3,r10 - mov r10,r11,ror#2 - add r3,r3,r2 - eor r10,r10,r11,ror#13 - add r3,r3,r12 - eor r10,r10,r11,ror#22 @ Sigma0(a) -#if 1>=15 - ldr r1,[sp,#3*4] @ from BODY_16_xx -#endif - orr r0,r11,r4 - and r2,r11,r4 - and r0,r0,r5 - add r10,r10,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r6,r6,r3 - add r10,r10,r0 + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 1==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 1<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 2 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 2 + @ ldrb r2,[r1,#3] @ 2 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) #endif - mov r0,r6,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r6,ror#11 + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] eor r2,r7,r8 -#if 2>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 2==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r6,ror#25 @ Sigma1(e) + add r9,r9,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r6 - str r3,[sp,#2*4] - add r3,r3,r0 + add r9,r9,r12 @ h+=K256[i] eor r2,r2,r8 @ Ch(e,f,g) - add r3,r3,r9 - mov r9,r10,ror#2 - add r3,r3,r2 - eor r9,r9,r10,ror#13 - add r3,r3,r12 - eor r9,r9,r10,ror#22 @ Sigma0(a) -#if 2>=15 - ldr r1,[sp,#4*4] @ from BODY_16_xx -#endif - orr r0,r10,r11 - and r2,r10,r11 - and r0,r0,r4 - add r9,r9,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r5,r5,r3 - add r9,r9,r0 + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 2==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 2<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 3 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 3 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r5,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r5,ror#11 + @ ldrb r2,[r1,#3] @ 3 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] eor r2,r6,r7 -#if 3>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 3==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r5,ror#25 @ Sigma1(e) + add r8,r8,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r5 - str r3,[sp,#3*4] - add r3,r3,r0 + add r8,r8,r3 @ h+=K256[i] eor r2,r2,r7 @ Ch(e,f,g) - add r3,r3,r8 - mov r8,r9,ror#2 - add r3,r3,r2 - eor r8,r8,r9,ror#13 - add r3,r3,r12 - eor r8,r8,r9,ror#22 @ Sigma0(a) -#if 3>=15 - ldr r1,[sp,#5*4] @ from BODY_16_xx -#endif - orr r0,r9,r10 - and r2,r9,r10 - and r0,r0,r11 - add r8,r8,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r4,r4,r3 - add r8,r8,r0 + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 3==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 3<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 4 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 4 + @ ldrb r2,[r1,#3] @ 4 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) #endif - mov r0,r4,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r4,ror#11 + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] eor r2,r5,r6 -#if 4>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 4==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r4,ror#25 @ Sigma1(e) + add r7,r7,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r4 - str r3,[sp,#4*4] - add r3,r3,r0 + add r7,r7,r12 @ h+=K256[i] eor r2,r2,r6 @ Ch(e,f,g) - add r3,r3,r7 - mov r7,r8,ror#2 - add r3,r3,r2 - eor r7,r7,r8,ror#13 - add r3,r3,r12 - eor r7,r7,r8,ror#22 @ Sigma0(a) -#if 4>=15 - ldr r1,[sp,#6*4] @ from BODY_16_xx -#endif - orr r0,r8,r9 - and r2,r8,r9 - and r0,r0,r10 - add r7,r7,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r11,r11,r3 - add r7,r7,r0 + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 4==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 4<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 5 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 5 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r11,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r11,ror#11 + @ ldrb r2,[r1,#3] @ 5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] eor r2,r4,r5 -#if 5>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 5==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r11,ror#25 @ Sigma1(e) + add r6,r6,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r11 - str r3,[sp,#5*4] - add r3,r3,r0 + add r6,r6,r3 @ h+=K256[i] eor r2,r2,r5 @ Ch(e,f,g) - add r3,r3,r6 - mov r6,r7,ror#2 - add r3,r3,r2 - eor r6,r6,r7,ror#13 - add r3,r3,r12 - eor r6,r6,r7,ror#22 @ Sigma0(a) -#if 5>=15 - ldr r1,[sp,#7*4] @ from BODY_16_xx -#endif - orr r0,r7,r8 - and r2,r7,r8 - and r0,r0,r9 - add r6,r6,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r10,r10,r3 - add r6,r6,r0 + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 5==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 5<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 6 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 6 + @ ldrb r2,[r1,#3] @ 6 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) #endif - mov r0,r10,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r10,ror#11 + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] eor r2,r11,r4 -#if 6>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 6==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r10,ror#25 @ Sigma1(e) + add r5,r5,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r10 - str r3,[sp,#6*4] - add r3,r3,r0 + add r5,r5,r12 @ h+=K256[i] eor r2,r2,r4 @ Ch(e,f,g) - add r3,r3,r5 - mov r5,r6,ror#2 - add r3,r3,r2 - eor r5,r5,r6,ror#13 - add r3,r3,r12 - eor r5,r5,r6,ror#22 @ Sigma0(a) -#if 6>=15 - ldr r1,[sp,#8*4] @ from BODY_16_xx -#endif - orr r0,r6,r7 - and r2,r6,r7 - and r0,r0,r8 - add r5,r5,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r9,r9,r3 - add r5,r5,r0 + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 6==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 6<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 7 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 7 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r9,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r9,ror#11 + @ ldrb r2,[r1,#3] @ 7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] eor r2,r10,r11 -#if 7>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 7==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r9,ror#25 @ Sigma1(e) + add r4,r4,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r9 - str r3,[sp,#7*4] - add r3,r3,r0 + add r4,r4,r3 @ h+=K256[i] eor r2,r2,r11 @ Ch(e,f,g) - add r3,r3,r4 - mov r4,r5,ror#2 - add r3,r3,r2 - eor r4,r4,r5,ror#13 - add r3,r3,r12 - eor r4,r4,r5,ror#22 @ Sigma0(a) -#if 7>=15 - ldr r1,[sp,#9*4] @ from BODY_16_xx -#endif - orr r0,r5,r6 - and r2,r5,r6 - and r0,r0,r7 - add r4,r4,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r8,r8,r3 - add r4,r4,r0 + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 7==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 7<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 8 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 8 + @ ldrb r2,[r1,#3] @ 8 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) #endif - mov r0,r8,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r8,ror#11 + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] eor r2,r9,r10 -#if 8>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 8==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r8,ror#25 @ Sigma1(e) + add r11,r11,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r8 - str r3,[sp,#8*4] - add r3,r3,r0 + add r11,r11,r12 @ h+=K256[i] eor r2,r2,r10 @ Ch(e,f,g) - add r3,r3,r11 - mov r11,r4,ror#2 - add r3,r3,r2 - eor r11,r11,r4,ror#13 - add r3,r3,r12 - eor r11,r11,r4,ror#22 @ Sigma0(a) -#if 8>=15 - ldr r1,[sp,#10*4] @ from BODY_16_xx -#endif - orr r0,r4,r5 - and r2,r4,r5 - and r0,r0,r6 - add r11,r11,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r7,r7,r3 - add r11,r11,r0 + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 8==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 8<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 9 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 9 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r7,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r7,ror#11 + @ ldrb r2,[r1,#3] @ 9 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] eor r2,r8,r9 -#if 9>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 9==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r7,ror#25 @ Sigma1(e) + add r10,r10,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r7 - str r3,[sp,#9*4] - add r3,r3,r0 + add r10,r10,r3 @ h+=K256[i] eor r2,r2,r9 @ Ch(e,f,g) - add r3,r3,r10 - mov r10,r11,ror#2 - add r3,r3,r2 - eor r10,r10,r11,ror#13 - add r3,r3,r12 - eor r10,r10,r11,ror#22 @ Sigma0(a) -#if 9>=15 - ldr r1,[sp,#11*4] @ from BODY_16_xx -#endif - orr r0,r11,r4 - and r2,r11,r4 - and r0,r0,r5 - add r10,r10,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r6,r6,r3 - add r10,r10,r0 + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 9==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 9<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 10 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 10 + @ ldrb r2,[r1,#3] @ 10 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) #endif - mov r0,r6,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r6,ror#11 + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] eor r2,r7,r8 -#if 10>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 10==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r6,ror#25 @ Sigma1(e) + add r9,r9,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r6 - str r3,[sp,#10*4] - add r3,r3,r0 + add r9,r9,r12 @ h+=K256[i] eor r2,r2,r8 @ Ch(e,f,g) - add r3,r3,r9 - mov r9,r10,ror#2 - add r3,r3,r2 - eor r9,r9,r10,ror#13 - add r3,r3,r12 - eor r9,r9,r10,ror#22 @ Sigma0(a) -#if 10>=15 - ldr r1,[sp,#12*4] @ from BODY_16_xx -#endif - orr r0,r10,r11 - and r2,r10,r11 - and r0,r0,r4 - add r9,r9,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r5,r5,r3 - add r9,r9,r0 + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 10==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 10<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 11 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 11 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r5,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r5,ror#11 + @ ldrb r2,[r1,#3] @ 11 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] eor r2,r6,r7 -#if 11>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 11==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r5,ror#25 @ Sigma1(e) + add r8,r8,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r5 - str r3,[sp,#11*4] - add r3,r3,r0 + add r8,r8,r3 @ h+=K256[i] eor r2,r2,r7 @ Ch(e,f,g) - add r3,r3,r8 - mov r8,r9,ror#2 - add r3,r3,r2 - eor r8,r8,r9,ror#13 - add r3,r3,r12 - eor r8,r8,r9,ror#22 @ Sigma0(a) -#if 11>=15 - ldr r1,[sp,#13*4] @ from BODY_16_xx -#endif - orr r0,r9,r10 - and r2,r9,r10 - and r0,r0,r11 - add r8,r8,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r4,r4,r3 - add r8,r8,r0 + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 11==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 11<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 12 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 12 + @ ldrb r2,[r1,#3] @ 12 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) #endif - mov r0,r4,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r4,ror#11 + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] eor r2,r5,r6 -#if 12>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 12==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r4,ror#25 @ Sigma1(e) + add r7,r7,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r4 - str r3,[sp,#12*4] - add r3,r3,r0 + add r7,r7,r12 @ h+=K256[i] eor r2,r2,r6 @ Ch(e,f,g) - add r3,r3,r7 - mov r7,r8,ror#2 - add r3,r3,r2 - eor r7,r7,r8,ror#13 - add r3,r3,r12 - eor r7,r7,r8,ror#22 @ Sigma0(a) -#if 12>=15 - ldr r1,[sp,#14*4] @ from BODY_16_xx -#endif - orr r0,r8,r9 - and r2,r8,r9 - and r0,r0,r10 - add r7,r7,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r11,r11,r3 - add r7,r7,r0 + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 12==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 12<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 13 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 13 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r11,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r11,ror#11 + @ ldrb r2,[r1,#3] @ 13 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] eor r2,r4,r5 -#if 13>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 13==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r11,ror#25 @ Sigma1(e) + add r6,r6,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r11 - str r3,[sp,#13*4] - add r3,r3,r0 + add r6,r6,r3 @ h+=K256[i] eor r2,r2,r5 @ Ch(e,f,g) - add r3,r3,r6 - mov r6,r7,ror#2 - add r3,r3,r2 - eor r6,r6,r7,ror#13 - add r3,r3,r12 - eor r6,r6,r7,ror#22 @ Sigma0(a) -#if 13>=15 - ldr r1,[sp,#15*4] @ from BODY_16_xx -#endif - orr r0,r7,r8 - and r2,r7,r8 - and r0,r0,r9 - add r6,r6,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r10,r10,r3 - add r6,r6,r0 + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 13==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 13<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 14 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 14 + @ ldrb r2,[r1,#3] @ 14 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) #endif - mov r0,r10,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r10,ror#11 + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] eor r2,r11,r4 -#if 14>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 14==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r10,ror#25 @ Sigma1(e) + add r5,r5,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r10 - str r3,[sp,#14*4] - add r3,r3,r0 + add r5,r5,r12 @ h+=K256[i] eor r2,r2,r4 @ Ch(e,f,g) - add r3,r3,r5 - mov r5,r6,ror#2 - add r3,r3,r2 - eor r5,r5,r6,ror#13 - add r3,r3,r12 - eor r5,r5,r6,ror#22 @ Sigma0(a) -#if 14>=15 - ldr r1,[sp,#0*4] @ from BODY_16_xx -#endif - orr r0,r6,r7 - and r2,r6,r7 - and r0,r0,r8 - add r5,r5,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r9,r9,r3 - add r5,r5,r0 + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 14==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 14<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 15 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 15 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r9,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r9,ror#11 + @ ldrb r2,[r1,#3] @ 15 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] eor r2,r10,r11 -#if 15>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 15==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r9,ror#25 @ Sigma1(e) + add r4,r4,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r9 - str r3,[sp,#15*4] - add r3,r3,r0 + add r4,r4,r3 @ h+=K256[i] eor r2,r2,r11 @ Ch(e,f,g) - add r3,r3,r4 - mov r4,r5,ror#2 - add r3,r3,r2 - eor r4,r4,r5,ror#13 - add r3,r3,r12 - eor r4,r4,r5,ror#22 @ Sigma0(a) -#if 15>=15 - ldr r1,[sp,#1*4] @ from BODY_16_xx -#endif - orr r0,r5,r6 - and r2,r5,r6 - and r0,r0,r7 - add r4,r4,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r8,r8,r3 - add r4,r4,r0 + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 15==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 15<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) .Lrounds_16_xx: - @ ldr r1,[sp,#1*4] @ 16 - ldr r12,[sp,#14*4] - mov r0,r1,ror#7 - ldr r3,[sp,#0*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#9*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r8,ror#6 + @ ldr r2,[sp,#1*4] @ 16 + @ ldr r1,[sp,#14*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#0*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#9*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r8,ror#11 + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] eor r2,r9,r10 -#if 16>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 16==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r8,ror#25 @ Sigma1(e) + add r11,r11,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r8 - str r3,[sp,#0*4] - add r3,r3,r0 + add r11,r11,r12 @ h+=K256[i] eor r2,r2,r10 @ Ch(e,f,g) - add r3,r3,r11 - mov r11,r4,ror#2 - add r3,r3,r2 - eor r11,r11,r4,ror#13 - add r3,r3,r12 - eor r11,r11,r4,ror#22 @ Sigma0(a) -#if 16>=15 - ldr r1,[sp,#2*4] @ from BODY_16_xx -#endif - orr r0,r4,r5 - and r2,r4,r5 - and r0,r0,r6 - add r11,r11,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r7,r7,r3 - add r11,r11,r0 - @ ldr r1,[sp,#2*4] @ 17 - ldr r12,[sp,#15*4] - mov r0,r1,ror#7 - ldr r3,[sp,#1*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#10*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 16==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 16<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#2*4] @ 17 + @ ldr r1,[sp,#15*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#1*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#10*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r7,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r7,ror#11 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] eor r2,r8,r9 -#if 17>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 17==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r7,ror#25 @ Sigma1(e) + add r10,r10,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r7 - str r3,[sp,#1*4] - add r3,r3,r0 + add r10,r10,r3 @ h+=K256[i] eor r2,r2,r9 @ Ch(e,f,g) - add r3,r3,r10 - mov r10,r11,ror#2 - add r3,r3,r2 - eor r10,r10,r11,ror#13 - add r3,r3,r12 - eor r10,r10,r11,ror#22 @ Sigma0(a) -#if 17>=15 - ldr r1,[sp,#3*4] @ from BODY_16_xx -#endif - orr r0,r11,r4 - and r2,r11,r4 - and r0,r0,r5 - add r10,r10,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r6,r6,r3 - add r10,r10,r0 - @ ldr r1,[sp,#3*4] @ 18 - ldr r12,[sp,#0*4] - mov r0,r1,ror#7 - ldr r3,[sp,#2*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#11*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r6,ror#6 + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 17==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 17<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#3*4] @ 18 + @ ldr r1,[sp,#0*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#2*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#11*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r6,ror#11 + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] eor r2,r7,r8 -#if 18>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 18==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r6,ror#25 @ Sigma1(e) + add r9,r9,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r6 - str r3,[sp,#2*4] - add r3,r3,r0 + add r9,r9,r12 @ h+=K256[i] eor r2,r2,r8 @ Ch(e,f,g) - add r3,r3,r9 - mov r9,r10,ror#2 - add r3,r3,r2 - eor r9,r9,r10,ror#13 - add r3,r3,r12 - eor r9,r9,r10,ror#22 @ Sigma0(a) -#if 18>=15 - ldr r1,[sp,#4*4] @ from BODY_16_xx -#endif - orr r0,r10,r11 - and r2,r10,r11 - and r0,r0,r4 - add r9,r9,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r5,r5,r3 - add r9,r9,r0 - @ ldr r1,[sp,#4*4] @ 19 - ldr r12,[sp,#1*4] - mov r0,r1,ror#7 - ldr r3,[sp,#3*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#12*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 18==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 18<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#4*4] @ 19 + @ ldr r1,[sp,#1*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#3*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#12*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r5,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r5,ror#11 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] eor r2,r6,r7 -#if 19>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 19==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r5,ror#25 @ Sigma1(e) + add r8,r8,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r5 - str r3,[sp,#3*4] - add r3,r3,r0 + add r8,r8,r3 @ h+=K256[i] eor r2,r2,r7 @ Ch(e,f,g) - add r3,r3,r8 - mov r8,r9,ror#2 - add r3,r3,r2 - eor r8,r8,r9,ror#13 - add r3,r3,r12 - eor r8,r8,r9,ror#22 @ Sigma0(a) -#if 19>=15 - ldr r1,[sp,#5*4] @ from BODY_16_xx -#endif - orr r0,r9,r10 - and r2,r9,r10 - and r0,r0,r11 - add r8,r8,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r4,r4,r3 - add r8,r8,r0 - @ ldr r1,[sp,#5*4] @ 20 - ldr r12,[sp,#2*4] - mov r0,r1,ror#7 - ldr r3,[sp,#4*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#13*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r4,ror#6 + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 19==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 19<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#5*4] @ 20 + @ ldr r1,[sp,#2*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#4*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#13*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r4,ror#11 + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] eor r2,r5,r6 -#if 20>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 20==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r4,ror#25 @ Sigma1(e) + add r7,r7,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r4 - str r3,[sp,#4*4] - add r3,r3,r0 + add r7,r7,r12 @ h+=K256[i] eor r2,r2,r6 @ Ch(e,f,g) - add r3,r3,r7 - mov r7,r8,ror#2 - add r3,r3,r2 - eor r7,r7,r8,ror#13 - add r3,r3,r12 - eor r7,r7,r8,ror#22 @ Sigma0(a) -#if 20>=15 - ldr r1,[sp,#6*4] @ from BODY_16_xx -#endif - orr r0,r8,r9 - and r2,r8,r9 - and r0,r0,r10 - add r7,r7,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r11,r11,r3 - add r7,r7,r0 - @ ldr r1,[sp,#6*4] @ 21 - ldr r12,[sp,#3*4] - mov r0,r1,ror#7 - ldr r3,[sp,#5*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#14*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 20==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 20<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#6*4] @ 21 + @ ldr r1,[sp,#3*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#5*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#14*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r11,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r11,ror#11 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] eor r2,r4,r5 -#if 21>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 21==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r11,ror#25 @ Sigma1(e) + add r6,r6,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r11 - str r3,[sp,#5*4] - add r3,r3,r0 + add r6,r6,r3 @ h+=K256[i] eor r2,r2,r5 @ Ch(e,f,g) - add r3,r3,r6 - mov r6,r7,ror#2 - add r3,r3,r2 - eor r6,r6,r7,ror#13 - add r3,r3,r12 - eor r6,r6,r7,ror#22 @ Sigma0(a) -#if 21>=15 - ldr r1,[sp,#7*4] @ from BODY_16_xx -#endif - orr r0,r7,r8 - and r2,r7,r8 - and r0,r0,r9 - add r6,r6,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r10,r10,r3 - add r6,r6,r0 - @ ldr r1,[sp,#7*4] @ 22 - ldr r12,[sp,#4*4] - mov r0,r1,ror#7 - ldr r3,[sp,#6*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#15*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r10,ror#6 + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 21==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 21<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#7*4] @ 22 + @ ldr r1,[sp,#4*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#6*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#15*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r10,ror#11 + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] eor r2,r11,r4 -#if 22>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 22==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r10,ror#25 @ Sigma1(e) + add r5,r5,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r10 - str r3,[sp,#6*4] - add r3,r3,r0 + add r5,r5,r12 @ h+=K256[i] eor r2,r2,r4 @ Ch(e,f,g) - add r3,r3,r5 - mov r5,r6,ror#2 - add r3,r3,r2 - eor r5,r5,r6,ror#13 - add r3,r3,r12 - eor r5,r5,r6,ror#22 @ Sigma0(a) -#if 22>=15 - ldr r1,[sp,#8*4] @ from BODY_16_xx -#endif - orr r0,r6,r7 - and r2,r6,r7 - and r0,r0,r8 - add r5,r5,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r9,r9,r3 - add r5,r5,r0 - @ ldr r1,[sp,#8*4] @ 23 - ldr r12,[sp,#5*4] - mov r0,r1,ror#7 - ldr r3,[sp,#7*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#0*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 22==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 22<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#8*4] @ 23 + @ ldr r1,[sp,#5*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#7*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#0*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r9,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r9,ror#11 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] eor r2,r10,r11 -#if 23>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 23==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r9,ror#25 @ Sigma1(e) + add r4,r4,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r9 - str r3,[sp,#7*4] - add r3,r3,r0 + add r4,r4,r3 @ h+=K256[i] eor r2,r2,r11 @ Ch(e,f,g) - add r3,r3,r4 - mov r4,r5,ror#2 - add r3,r3,r2 - eor r4,r4,r5,ror#13 - add r3,r3,r12 - eor r4,r4,r5,ror#22 @ Sigma0(a) -#if 23>=15 - ldr r1,[sp,#9*4] @ from BODY_16_xx -#endif - orr r0,r5,r6 - and r2,r5,r6 - and r0,r0,r7 - add r4,r4,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r8,r8,r3 - add r4,r4,r0 - @ ldr r1,[sp,#9*4] @ 24 - ldr r12,[sp,#6*4] - mov r0,r1,ror#7 - ldr r3,[sp,#8*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#1*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r8,ror#6 + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 23==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 23<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#9*4] @ 24 + @ ldr r1,[sp,#6*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#8*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#1*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r8,ror#11 + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] eor r2,r9,r10 -#if 24>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 24==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r8,ror#25 @ Sigma1(e) + add r11,r11,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r8 - str r3,[sp,#8*4] - add r3,r3,r0 + add r11,r11,r12 @ h+=K256[i] eor r2,r2,r10 @ Ch(e,f,g) - add r3,r3,r11 - mov r11,r4,ror#2 - add r3,r3,r2 - eor r11,r11,r4,ror#13 - add r3,r3,r12 - eor r11,r11,r4,ror#22 @ Sigma0(a) -#if 24>=15 - ldr r1,[sp,#10*4] @ from BODY_16_xx -#endif - orr r0,r4,r5 - and r2,r4,r5 - and r0,r0,r6 - add r11,r11,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r7,r7,r3 - add r11,r11,r0 - @ ldr r1,[sp,#10*4] @ 25 - ldr r12,[sp,#7*4] - mov r0,r1,ror#7 - ldr r3,[sp,#9*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#2*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 24==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 24<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#10*4] @ 25 + @ ldr r1,[sp,#7*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#9*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#2*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r7,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r7,ror#11 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] eor r2,r8,r9 -#if 25>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 25==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r7,ror#25 @ Sigma1(e) + add r10,r10,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r7 - str r3,[sp,#9*4] - add r3,r3,r0 + add r10,r10,r3 @ h+=K256[i] eor r2,r2,r9 @ Ch(e,f,g) - add r3,r3,r10 - mov r10,r11,ror#2 - add r3,r3,r2 - eor r10,r10,r11,ror#13 - add r3,r3,r12 - eor r10,r10,r11,ror#22 @ Sigma0(a) -#if 25>=15 - ldr r1,[sp,#11*4] @ from BODY_16_xx -#endif - orr r0,r11,r4 - and r2,r11,r4 - and r0,r0,r5 - add r10,r10,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r6,r6,r3 - add r10,r10,r0 - @ ldr r1,[sp,#11*4] @ 26 - ldr r12,[sp,#8*4] - mov r0,r1,ror#7 - ldr r3,[sp,#10*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#3*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r6,ror#6 + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 25==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 25<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#11*4] @ 26 + @ ldr r1,[sp,#8*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#10*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#3*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r6,ror#11 + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] eor r2,r7,r8 -#if 26>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 26==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r6,ror#25 @ Sigma1(e) + add r9,r9,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r6 - str r3,[sp,#10*4] - add r3,r3,r0 + add r9,r9,r12 @ h+=K256[i] eor r2,r2,r8 @ Ch(e,f,g) - add r3,r3,r9 - mov r9,r10,ror#2 - add r3,r3,r2 - eor r9,r9,r10,ror#13 - add r3,r3,r12 - eor r9,r9,r10,ror#22 @ Sigma0(a) -#if 26>=15 - ldr r1,[sp,#12*4] @ from BODY_16_xx -#endif - orr r0,r10,r11 - and r2,r10,r11 - and r0,r0,r4 - add r9,r9,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r5,r5,r3 - add r9,r9,r0 - @ ldr r1,[sp,#12*4] @ 27 - ldr r12,[sp,#9*4] - mov r0,r1,ror#7 - ldr r3,[sp,#11*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#4*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 26==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 26<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#12*4] @ 27 + @ ldr r1,[sp,#9*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#11*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#4*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r5,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r5,ror#11 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] eor r2,r6,r7 -#if 27>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 27==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r5,ror#25 @ Sigma1(e) + add r8,r8,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r5 - str r3,[sp,#11*4] - add r3,r3,r0 + add r8,r8,r3 @ h+=K256[i] eor r2,r2,r7 @ Ch(e,f,g) - add r3,r3,r8 - mov r8,r9,ror#2 - add r3,r3,r2 - eor r8,r8,r9,ror#13 - add r3,r3,r12 - eor r8,r8,r9,ror#22 @ Sigma0(a) -#if 27>=15 - ldr r1,[sp,#13*4] @ from BODY_16_xx -#endif - orr r0,r9,r10 - and r2,r9,r10 - and r0,r0,r11 - add r8,r8,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r4,r4,r3 - add r8,r8,r0 - @ ldr r1,[sp,#13*4] @ 28 - ldr r12,[sp,#10*4] - mov r0,r1,ror#7 - ldr r3,[sp,#12*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#5*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r4,ror#6 + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 27==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 27<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#13*4] @ 28 + @ ldr r1,[sp,#10*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#12*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#5*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r4,ror#11 + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] eor r2,r5,r6 -#if 28>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 28==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r4,ror#25 @ Sigma1(e) + add r7,r7,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r4 - str r3,[sp,#12*4] - add r3,r3,r0 + add r7,r7,r12 @ h+=K256[i] eor r2,r2,r6 @ Ch(e,f,g) - add r3,r3,r7 - mov r7,r8,ror#2 - add r3,r3,r2 - eor r7,r7,r8,ror#13 - add r3,r3,r12 - eor r7,r7,r8,ror#22 @ Sigma0(a) -#if 28>=15 - ldr r1,[sp,#14*4] @ from BODY_16_xx -#endif - orr r0,r8,r9 - and r2,r8,r9 - and r0,r0,r10 - add r7,r7,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r11,r11,r3 - add r7,r7,r0 - @ ldr r1,[sp,#14*4] @ 29 - ldr r12,[sp,#11*4] - mov r0,r1,ror#7 - ldr r3,[sp,#13*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#6*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 28==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 28<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#14*4] @ 29 + @ ldr r1,[sp,#11*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#13*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#6*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r11,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r11,ror#11 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] eor r2,r4,r5 -#if 29>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 29==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r11,ror#25 @ Sigma1(e) + add r6,r6,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r11 - str r3,[sp,#13*4] - add r3,r3,r0 + add r6,r6,r3 @ h+=K256[i] eor r2,r2,r5 @ Ch(e,f,g) - add r3,r3,r6 - mov r6,r7,ror#2 - add r3,r3,r2 - eor r6,r6,r7,ror#13 - add r3,r3,r12 - eor r6,r6,r7,ror#22 @ Sigma0(a) -#if 29>=15 - ldr r1,[sp,#15*4] @ from BODY_16_xx -#endif - orr r0,r7,r8 - and r2,r7,r8 - and r0,r0,r9 - add r6,r6,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r10,r10,r3 - add r6,r6,r0 - @ ldr r1,[sp,#15*4] @ 30 - ldr r12,[sp,#12*4] - mov r0,r1,ror#7 - ldr r3,[sp,#14*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#7*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r10,ror#6 + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 29==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 29<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#15*4] @ 30 + @ ldr r1,[sp,#12*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#14*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#7*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r10,ror#11 + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] eor r2,r11,r4 -#if 30>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 30==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r10,ror#25 @ Sigma1(e) + add r5,r5,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r10 - str r3,[sp,#14*4] - add r3,r3,r0 + add r5,r5,r12 @ h+=K256[i] eor r2,r2,r4 @ Ch(e,f,g) - add r3,r3,r5 - mov r5,r6,ror#2 - add r3,r3,r2 - eor r5,r5,r6,ror#13 - add r3,r3,r12 - eor r5,r5,r6,ror#22 @ Sigma0(a) -#if 30>=15 - ldr r1,[sp,#0*4] @ from BODY_16_xx -#endif - orr r0,r6,r7 - and r2,r6,r7 - and r0,r0,r8 - add r5,r5,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r9,r9,r3 - add r5,r5,r0 - @ ldr r1,[sp,#0*4] @ 31 - ldr r12,[sp,#13*4] - mov r0,r1,ror#7 - ldr r3,[sp,#15*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#8*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 30==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 30<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#0*4] @ 31 + @ ldr r1,[sp,#13*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#15*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#8*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r9,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r9,ror#11 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] eor r2,r10,r11 -#if 31>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 31==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r9,ror#25 @ Sigma1(e) + add r4,r4,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r9 - str r3,[sp,#15*4] - add r3,r3,r0 + add r4,r4,r3 @ h+=K256[i] eor r2,r2,r11 @ Ch(e,f,g) - add r3,r3,r4 - mov r4,r5,ror#2 - add r3,r3,r2 - eor r4,r4,r5,ror#13 - add r3,r3,r12 - eor r4,r4,r5,ror#22 @ Sigma0(a) -#if 31>=15 - ldr r1,[sp,#1*4] @ from BODY_16_xx -#endif - orr r0,r5,r6 - and r2,r5,r6 - and r0,r0,r7 - add r4,r4,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r8,r8,r3 - add r4,r4,r0 - and r12,r12,#0xff - cmp r12,#0xf2 + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 31==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 31<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) + ldreq r3,[sp,#16*4] @ pull ctx bne .Lrounds_16_xx - ldr r3,[sp,#16*4] @ pull ctx + add r4,r4,r12 @ h+=Maj(a,b,c) from the past ldr r0,[r3,#0] ldr r2,[r3,#4] ldr r12,[r3,#8] @@ -1512,6 +1770,921 @@ sha256_block_data_order: moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif -.size sha256_block_data_order,.-sha256_block_data_order -.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by " +.size sha256_block_data_order,.-sha256_block_data_order +#if __ARM_ARCH__>=7 +.fpu neon + +.type sha256_block_data_order_neon,%function +.align 4 +sha256_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + + mov r12,sp + sub sp,sp,#16*4+16 @ alloca + sub r14,r3,#256+32 @ K256 + bic sp,sp,#15 @ align for 128-bit stores + + vld1.8 {q0},[r1]! + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + vld1.32 {q8},[r14,:128]! + vld1.32 {q9},[r14,:128]! + vld1.32 {q10},[r14,:128]! + vld1.32 {q11},[r14,:128]! + vrev32.8 q0,q0 @ yes, even on + str r0,[sp,#64] + vrev32.8 q1,q1 @ big-endian + str r1,[sp,#68] + mov r1,sp + vrev32.8 q2,q2 + str r2,[sp,#72] + vrev32.8 q3,q3 + str r12,[sp,#76] @ save original sp + vadd.i32 q8,q8,q0 + vadd.i32 q9,q9,q1 + vst1.32 {q8},[r1,:128]! + vadd.i32 q10,q10,q2 + vst1.32 {q9},[r1,:128]! + vadd.i32 q11,q11,q3 + vst1.32 {q10},[r1,:128]! + vst1.32 {q11},[r1,:128]! + + ldmia r0,{r4-r11} + sub r1,r1,#64 + ldr r2,[sp,#0] + eor r12,r12,r12 + eor r3,r5,r6 + b .L_00_48 + +.align 4 +.L_00_48: + vext.8 q8,q0,q1,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q2,q3,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q0,q0,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#4] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d7,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d7,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d7,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q0,q0,q9 + add r10,r10,r2 + ldr r2,[sp,#8] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d7,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d7,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d0,d0,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d0,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d0,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d0,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#12] + and r3,r3,r12 + vshr.u32 d24,d0,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d0,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d1,d1,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q0 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q1,q2,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q3,q0,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q1,q1,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#20] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d1,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d1,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d1,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q1,q1,q9 + add r6,r6,r2 + ldr r2,[sp,#24] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d1,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d1,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d2,d2,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d2,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d2,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d2,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#28] + and r3,r3,r12 + vshr.u32 d24,d2,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d2,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d3,d3,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q1 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vext.8 q8,q2,q3,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q0,q1,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q2,q2,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#36] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d3,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d3,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d3,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q2,q2,q9 + add r10,r10,r2 + ldr r2,[sp,#40] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d3,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d3,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d4,d4,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d4,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d4,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d4,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#44] + and r3,r3,r12 + vshr.u32 d24,d4,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d4,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d5,d5,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q2 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q3,q0,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q1,q2,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q3,q3,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#52] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d5,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d5,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d5,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q3,q3,q9 + add r6,r6,r2 + ldr r2,[sp,#56] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d5,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d5,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d6,d6,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d6,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d6,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d6,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#60] + and r3,r3,r12 + vshr.u32 d24,d6,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d6,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d7,d7,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q3 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[r14] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + teq r2,#0 @ check for K256 terminator + ldr r2,[sp,#0] + sub r1,r1,#64 + bne .L_00_48 + + ldr r1,[sp,#68] + ldr r0,[sp,#72] + sub r14,r14,#256 @ rewind r14 + teq r1,r0 + subeq r1,r1,#64 @ avoid SEGV + vld1.8 {q0},[r1]! @ load next input block + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + strne r1,[sp,#68] + mov r1,sp + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q0,q0 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q0 + ldr r2,[sp,#4] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#8] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#12] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q1,q1 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q1 + ldr r2,[sp,#20] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#24] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#28] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q2,q2 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q2 + ldr r2,[sp,#36] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#40] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#44] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q3,q3 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q3 + ldr r2,[sp,#52] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#56] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#60] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#64] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + ldr r0,[r2,#0] + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r12,[r2,#4] + ldr r3,[r2,#8] + ldr r1,[r2,#12] + add r4,r4,r0 @ accumulate + ldr r0,[r2,#16] + add r5,r5,r12 + ldr r12,[r2,#20] + add r6,r6,r3 + ldr r3,[r2,#24] + add r7,r7,r1 + ldr r1,[r2,#28] + add r8,r8,r0 + str r4,[r2],#4 + add r9,r9,r12 + str r5,[r2],#4 + add r10,r10,r3 + str r6,[r2],#4 + add r11,r11,r1 + str r7,[r2],#4 + stmia r2,{r8-r11} + + movne r1,sp + ldrne r2,[sp,#0] + eorne r12,r12,r12 + ldreq sp,[sp,#76] @ restore original sp + eorne r3,r5,r6 + bne .L_00_48 + + ldmia sp!,{r4-r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +#if __ARM_ARCH__>=7 +.type sha256_block_data_order_armv8,%function +.align 5 +sha256_block_data_order_armv8: +.LARMv8: + vld1.32 {q0,q1},[r0] + sub r3,r3,#sha256_block_data_order-K256 + +.Loop_v8: + vld1.8 {q8-q9},[r1]! + vld1.8 {q10-q11},[r1]! + vld1.32 {q12},[r3]! + vrev32.8 q8,q8 + vrev32.8 q9,q9 + vrev32.8 q10,q10 + vrev32.8 q11,q11 + vmov q14,q0 @ offload + vmov q15,q1 + teq r1,r2 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + + vld1.32 {q13},[r3] + vadd.i32 q12,q12,q10 + sub r3,r3,#256-16 @ rewind + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + + vadd.i32 q13,q13,q11 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + + vadd.i32 q0,q0,q14 + vadd.i32 q1,q1,q15 + bne .Loop_v8 + + vst1.32 {q0,q1},[r0] + + bx lr @ bx lr +.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +#endif +.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " .align 2 +.comm OPENSSL_armcap_P,4,4 diff --git a/app/openssl/crypto/sha/asm/sha256-armv8.S b/app/openssl/crypto/sha/asm/sha256-armv8.S new file mode 100644 index 00000000..bd43b1fe --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha256-armv8.S @@ -0,0 +1,1141 @@ +#include "arm_arch.h" + +.text + +.globl sha256_block_data_order +.type sha256_block_data_order,%function +.align 6 +sha256_block_data_order: + ldr x16,.LOPENSSL_armcap_P + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA256 + b.ne .Lv8_entry + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*4 + + ldp w20,w21,[x0] // load context + ldp w22,w23,[x0,#2*4] + ldp w24,w25,[x0,#4*4] + add x2,x1,x2,lsl#6 // end of input + ldp w26,w27,[x0,#6*4] + adr x30,K256 + stp x0,x2,[x29,#96] + +.Loop: + ldp w3,w4,[x1],#2*4 + ldr w19,[x30],#4 // *K++ + eor w28,w21,w22 // magic seed + str x1,[x29,#112] +#ifndef __ARMEB__ + rev w3,w3 // 0 +#endif + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w6,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w3 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w4,w4 // 1 +#endif + ldp w5,w6,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w7,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w4 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w5,w5 // 2 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w8,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w5 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w6,w6 // 3 +#endif + ldp w7,w8,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w9,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w6 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w7,w7 // 4 +#endif + add w24,w24,w17 // h+=Sigma0(a) + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w10,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w7 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w10,ror#11 // Sigma1(e) + ror w10,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w10,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w8,w8 // 5 +#endif + ldp w9,w10,[x1],#2*4 + add w23,w23,w17 // h+=Sigma0(a) + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w11,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w8 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w11,ror#11 // Sigma1(e) + ror w11,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w11,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w9,w9 // 6 +#endif + add w22,w22,w17 // h+=Sigma0(a) + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w12,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w9 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w12,ror#11 // Sigma1(e) + ror w12,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w12,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w10,w10 // 7 +#endif + ldp w11,w12,[x1],#2*4 + add w21,w21,w17 // h+=Sigma0(a) + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + eor w13,w25,w25,ror#14 + and w17,w26,w25 + bic w28,w27,w25 + add w20,w20,w10 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w13,ror#11 // Sigma1(e) + ror w13,w21,#2 + add w20,w20,w17 // h+=Ch(e,f,g) + eor w17,w21,w21,ror#9 + add w20,w20,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w24,w24,w20 // d+=h + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w13,w17,ror#13 // Sigma0(a) + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w20,w20,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w11,w11 // 8 +#endif + add w20,w20,w17 // h+=Sigma0(a) + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w14,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w11 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w14,ror#11 // Sigma1(e) + ror w14,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w14,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w12,w12 // 9 +#endif + ldp w13,w14,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w15,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w12 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w15,ror#11 // Sigma1(e) + ror w15,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w15,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w13,w13 // 10 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w0,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w13 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w0,ror#11 // Sigma1(e) + ror w0,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w0,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w14,w14 // 11 +#endif + ldp w15,w0,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w6,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w14 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w15,w15 // 12 +#endif + add w24,w24,w17 // h+=Sigma0(a) + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w7,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w15 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w0,w0 // 13 +#endif + ldp w1,w2,[x1] + add w23,w23,w17 // h+=Sigma0(a) + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w8,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w0 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w1,w1 // 14 +#endif + ldr w6,[sp,#12] + add w22,w22,w17 // h+=Sigma0(a) + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w9,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w1 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w2,w2 // 15 +#endif + ldr w7,[sp,#0] + add w21,w21,w17 // h+=Sigma0(a) + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 +.Loop_16_xx: + ldr w8,[sp,#4] + str w11,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w10,w5,#7 + and w17,w25,w24 + ror w9,w2,#17 + bic w19,w26,w24 + ror w11,w20,#2 + add w27,w27,w3 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w10,w10,w5,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w11,w11,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w9,w9,w2,ror#19 + eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w11,w20,ror#22 // Sigma0(a) + eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) + add w4,w4,w13 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w4,w4,w10 + add w27,w27,w17 // h+=Sigma0(a) + add w4,w4,w9 + ldr w9,[sp,#8] + str w12,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w11,w6,#7 + and w17,w24,w23 + ror w10,w3,#17 + bic w28,w25,w23 + ror w12,w27,#2 + add w26,w26,w4 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w11,w11,w6,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w12,w12,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w10,w10,w3,ror#19 + eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w12,w27,ror#22 // Sigma0(a) + eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) + add w5,w5,w14 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w5,w5,w11 + add w26,w26,w17 // h+=Sigma0(a) + add w5,w5,w10 + ldr w10,[sp,#12] + str w13,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w12,w7,#7 + and w17,w23,w22 + ror w11,w4,#17 + bic w19,w24,w22 + ror w13,w26,#2 + add w25,w25,w5 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w12,w12,w7,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w13,w13,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w11,w11,w4,ror#19 + eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w13,w26,ror#22 // Sigma0(a) + eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) + add w6,w6,w15 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w6,w6,w12 + add w25,w25,w17 // h+=Sigma0(a) + add w6,w6,w11 + ldr w11,[sp,#0] + str w14,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w13,w8,#7 + and w17,w22,w21 + ror w12,w5,#17 + bic w28,w23,w21 + ror w14,w25,#2 + add w24,w24,w6 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w13,w13,w8,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w14,w14,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w12,w12,w5,ror#19 + eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w14,w25,ror#22 // Sigma0(a) + eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) + add w7,w7,w0 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w7,w7,w13 + add w24,w24,w17 // h+=Sigma0(a) + add w7,w7,w12 + ldr w12,[sp,#4] + str w15,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w14,w9,#7 + and w17,w21,w20 + ror w13,w6,#17 + bic w19,w22,w20 + ror w15,w24,#2 + add w23,w23,w7 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w14,w14,w9,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w15,w15,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w13,w13,w6,ror#19 + eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w15,w24,ror#22 // Sigma0(a) + eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) + add w8,w8,w1 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w8,w8,w14 + add w23,w23,w17 // h+=Sigma0(a) + add w8,w8,w13 + ldr w13,[sp,#8] + str w0,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w15,w10,#7 + and w17,w20,w27 + ror w14,w7,#17 + bic w28,w21,w27 + ror w0,w23,#2 + add w22,w22,w8 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w15,w15,w10,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w0,w0,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w14,w14,w7,ror#19 + eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w0,w23,ror#22 // Sigma0(a) + eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) + add w9,w9,w2 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w9,w9,w15 + add w22,w22,w17 // h+=Sigma0(a) + add w9,w9,w14 + ldr w14,[sp,#12] + str w1,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w0,w11,#7 + and w17,w27,w26 + ror w15,w8,#17 + bic w19,w20,w26 + ror w1,w22,#2 + add w21,w21,w9 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w0,w0,w11,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w1,w1,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w15,w15,w8,ror#19 + eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w1,w22,ror#22 // Sigma0(a) + eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) + add w10,w10,w3 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w10,w10,w0 + add w21,w21,w17 // h+=Sigma0(a) + add w10,w10,w15 + ldr w15,[sp,#0] + str w2,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w1,w12,#7 + and w17,w26,w25 + ror w0,w9,#17 + bic w28,w27,w25 + ror w2,w21,#2 + add w20,w20,w10 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w1,w1,w12,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w2,w2,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w0,w0,w9,ror#19 + eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w2,w21,ror#22 // Sigma0(a) + eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) + add w11,w11,w4 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w11,w11,w1 + add w20,w20,w17 // h+=Sigma0(a) + add w11,w11,w0 + ldr w0,[sp,#4] + str w3,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w2,w13,#7 + and w17,w25,w24 + ror w1,w10,#17 + bic w19,w26,w24 + ror w3,w20,#2 + add w27,w27,w11 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w2,w2,w13,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w3,w3,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w1,w1,w10,ror#19 + eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w3,w20,ror#22 // Sigma0(a) + eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) + add w12,w12,w5 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w12,w12,w2 + add w27,w27,w17 // h+=Sigma0(a) + add w12,w12,w1 + ldr w1,[sp,#8] + str w4,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w3,w14,#7 + and w17,w24,w23 + ror w2,w11,#17 + bic w28,w25,w23 + ror w4,w27,#2 + add w26,w26,w12 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w3,w3,w14,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w4,w4,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w2,w2,w11,ror#19 + eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w4,w27,ror#22 // Sigma0(a) + eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) + add w13,w13,w6 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w13,w13,w3 + add w26,w26,w17 // h+=Sigma0(a) + add w13,w13,w2 + ldr w2,[sp,#12] + str w5,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w4,w15,#7 + and w17,w23,w22 + ror w3,w12,#17 + bic w19,w24,w22 + ror w5,w26,#2 + add w25,w25,w13 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w4,w4,w15,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w5,w5,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w3,w3,w12,ror#19 + eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w5,w26,ror#22 // Sigma0(a) + eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) + add w14,w14,w7 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w14,w14,w4 + add w25,w25,w17 // h+=Sigma0(a) + add w14,w14,w3 + ldr w3,[sp,#0] + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w5,w0,#7 + and w17,w22,w21 + ror w4,w13,#17 + bic w28,w23,w21 + ror w6,w25,#2 + add w24,w24,w14 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w5,w5,w0,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w6,w6,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w4,w4,w13,ror#19 + eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w25,ror#22 // Sigma0(a) + eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) + add w15,w15,w8 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w15,w15,w5 + add w24,w24,w17 // h+=Sigma0(a) + add w15,w15,w4 + ldr w4,[sp,#4] + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w6,w1,#7 + and w17,w21,w20 + ror w5,w14,#17 + bic w19,w22,w20 + ror w7,w24,#2 + add w23,w23,w15 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w6,w6,w1,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w7,w7,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w5,w5,w14,ror#19 + eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w24,ror#22 // Sigma0(a) + eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) + add w0,w0,w9 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w0,w0,w6 + add w23,w23,w17 // h+=Sigma0(a) + add w0,w0,w5 + ldr w5,[sp,#8] + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w7,w2,#7 + and w17,w20,w27 + ror w6,w15,#17 + bic w28,w21,w27 + ror w8,w23,#2 + add w22,w22,w0 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w7,w7,w2,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w8,w8,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w6,w6,w15,ror#19 + eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w23,ror#22 // Sigma0(a) + eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) + add w1,w1,w10 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w1,w1,w7 + add w22,w22,w17 // h+=Sigma0(a) + add w1,w1,w6 + ldr w6,[sp,#12] + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w8,w3,#7 + and w17,w27,w26 + ror w7,w0,#17 + bic w19,w20,w26 + ror w9,w22,#2 + add w21,w21,w1 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w8,w8,w3,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w9,w9,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w7,w7,w0,ror#19 + eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w22,ror#22 // Sigma0(a) + eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) + add w2,w2,w11 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w2,w2,w8 + add w21,w21,w17 // h+=Sigma0(a) + add w2,w2,w7 + ldr w7,[sp,#0] + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 + cbnz w19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#260 // rewind + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#2*4] + add x1,x1,#14*4 // advance input pointer + ldp w7,w8,[x0,#4*4] + add w20,w20,w3 + ldp w9,w10,[x0,#6*4] + add w21,w21,w4 + add w22,w22,w5 + add w23,w23,w6 + stp w20,w21,[x0] + add w24,w24,w7 + add w25,w25,w8 + stp w22,w23,[x0,#2*4] + add w26,w26,w9 + add w27,w27,w10 + cmp x1,x2 + stp w24,w25,[x0,#4*4] + stp w26,w27,[x0,#6*4] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*4 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size sha256_block_data_order,.-sha256_block_data_order + +.align 6 +.type K256,%object +K256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + .long 0 //terminator +.size K256,.-K256 +.align 3 +.LOPENSSL_armcap_P: + .quad OPENSSL_armcap_P-. +.asciz "SHA256 block transform for ARMv8, CRYPTOGAMS by " +.align 2 +.type sha256_block_armv8,%function +.align 6 +sha256_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v0.4s,v1.4s},[x0] + adr x3,K256 + +.Loop_hw: + ld1 {v4.16b-v7.16b},[x1],#64 + sub x2,x2,#1 + ld1 {v16.4s},[x3],#16 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + rev32 v6.16b,v6.16b + rev32 v7.16b,v7.16b + orr v18.16b,v0.16b,v0.16b // offload + orr v19.16b,v1.16b,v1.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + ld1 {v17.4s},[x3] + add v16.4s,v16.4s,v6.4s + sub x3,x3,#64*4-16 // rewind + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + add v17.4s,v17.4s,v7.4s + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + add v0.4s,v0.4s,v18.4s + add v1.4s,v1.4s,v19.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s,v1.4s},[x0] + + ldr x29,[sp],#16 + ret +.size sha256_block_armv8,.-sha256_block_armv8 +.comm OPENSSL_armcap_P,4,4 diff --git a/app/openssl/crypto/sha/asm/sha512-armv4.pl b/app/openssl/crypto/sha/asm/sha512-armv4.pl index 7faf37b1..71aa9356 100644 --- a/app/openssl/crypto/sha/asm/sha512-armv4.pl +++ b/app/openssl/crypto/sha/asm/sha512-armv4.pl @@ -565,7 +565,7 @@ $code.=<<___; bne .Loop_neon vldmia sp!,{d8-d15} @ epilogue - bx lr + ret @ bx lr #endif ___ } @@ -578,5 +578,6 @@ ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 +$code =~ s/\bret\b/bx lr/gm; print $code; close STDOUT; # enforce flush diff --git a/app/openssl/crypto/sha/asm/sha512-armv4.s b/app/openssl/crypto/sha/asm/sha512-armv4.s index 57301922..fd462771 100644 --- a/app/openssl/crypto/sha/asm/sha512-armv4.s +++ b/app/openssl/crypto/sha/asm/sha512-armv4.s @@ -1775,7 +1775,7 @@ sha512_block_data_order: bne .Loop_neon vldmia sp!,{d8-d15} @ epilogue - .word 0xe12fff1e + bx lr @ .word 0xe12fff1e #endif .size sha512_block_data_order,.-sha512_block_data_order .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by " diff --git a/app/openssl/crypto/sha/asm/sha512-armv8.S b/app/openssl/crypto/sha/asm/sha512-armv8.S new file mode 100644 index 00000000..6b0d1940 --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha512-armv8.S @@ -0,0 +1,1021 @@ +#include "arm_arch.h" + +.text + +.globl sha512_block_data_order +.type sha512_block_data_order,%function +.align 6 +sha512_block_data_order: + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*8 + + ldp x20,x21,[x0] // load context + ldp x22,x23,[x0,#2*8] + ldp x24,x25,[x0,#4*8] + add x2,x1,x2,lsl#7 // end of input + ldp x26,x27,[x0,#6*8] + adr x30,K512 + stp x0,x2,[x29,#96] + +.Loop: + ldp x3,x4,[x1],#2*8 + ldr x19,[x30],#8 // *K++ + eor x28,x21,x22 // magic seed + str x1,[x29,#112] +#ifndef __ARMEB__ + rev x3,x3 // 0 +#endif + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x6,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x3 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x4,x4 // 1 +#endif + ldp x5,x6,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x7,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x4 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x5,x5 // 2 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x8,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x5 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x6,x6 // 3 +#endif + ldp x7,x8,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x9,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x6 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x7,x7 // 4 +#endif + add x24,x24,x17 // h+=Sigma0(a) + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x10,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x7 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x10,ror#18 // Sigma1(e) + ror x10,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x10,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x8,x8 // 5 +#endif + ldp x9,x10,[x1],#2*8 + add x23,x23,x17 // h+=Sigma0(a) + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x11,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x8 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x11,ror#18 // Sigma1(e) + ror x11,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x11,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x9,x9 // 6 +#endif + add x22,x22,x17 // h+=Sigma0(a) + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x12,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x9 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x12,ror#18 // Sigma1(e) + ror x12,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x12,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x10,x10 // 7 +#endif + ldp x11,x12,[x1],#2*8 + add x21,x21,x17 // h+=Sigma0(a) + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + eor x13,x25,x25,ror#23 + and x17,x26,x25 + bic x28,x27,x25 + add x20,x20,x10 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x13,ror#18 // Sigma1(e) + ror x13,x21,#28 + add x20,x20,x17 // h+=Ch(e,f,g) + eor x17,x21,x21,ror#5 + add x20,x20,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x24,x24,x20 // d+=h + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x13,x17,ror#34 // Sigma0(a) + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x20,x20,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x11,x11 // 8 +#endif + add x20,x20,x17 // h+=Sigma0(a) + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x14,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x11 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x14,ror#18 // Sigma1(e) + ror x14,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x14,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x12,x12 // 9 +#endif + ldp x13,x14,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x15,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x12 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x15,ror#18 // Sigma1(e) + ror x15,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x15,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x13,x13 // 10 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x0,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x13 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x0,ror#18 // Sigma1(e) + ror x0,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x0,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x14,x14 // 11 +#endif + ldp x15,x0,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x6,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x14 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x15,x15 // 12 +#endif + add x24,x24,x17 // h+=Sigma0(a) + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x7,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x15 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x0,x0 // 13 +#endif + ldp x1,x2,[x1] + add x23,x23,x17 // h+=Sigma0(a) + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x8,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x0 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x1,x1 // 14 +#endif + ldr x6,[sp,#24] + add x22,x22,x17 // h+=Sigma0(a) + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x9,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x1 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x2,x2 // 15 +#endif + ldr x7,[sp,#0] + add x21,x21,x17 // h+=Sigma0(a) + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 +.Loop_16_xx: + ldr x8,[sp,#8] + str x11,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x10,x5,#1 + and x17,x25,x24 + ror x9,x2,#19 + bic x19,x26,x24 + ror x11,x20,#28 + add x27,x27,x3 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x10,x10,x5,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x11,x11,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x9,x9,x2,ror#61 + eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x11,x20,ror#39 // Sigma0(a) + eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) + add x4,x4,x13 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x4,x4,x10 + add x27,x27,x17 // h+=Sigma0(a) + add x4,x4,x9 + ldr x9,[sp,#16] + str x12,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x11,x6,#1 + and x17,x24,x23 + ror x10,x3,#19 + bic x28,x25,x23 + ror x12,x27,#28 + add x26,x26,x4 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x11,x11,x6,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x12,x12,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x10,x10,x3,ror#61 + eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x12,x27,ror#39 // Sigma0(a) + eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) + add x5,x5,x14 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x5,x5,x11 + add x26,x26,x17 // h+=Sigma0(a) + add x5,x5,x10 + ldr x10,[sp,#24] + str x13,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x12,x7,#1 + and x17,x23,x22 + ror x11,x4,#19 + bic x19,x24,x22 + ror x13,x26,#28 + add x25,x25,x5 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x12,x12,x7,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x13,x13,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x11,x11,x4,ror#61 + eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x13,x26,ror#39 // Sigma0(a) + eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) + add x6,x6,x15 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x6,x6,x12 + add x25,x25,x17 // h+=Sigma0(a) + add x6,x6,x11 + ldr x11,[sp,#0] + str x14,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x13,x8,#1 + and x17,x22,x21 + ror x12,x5,#19 + bic x28,x23,x21 + ror x14,x25,#28 + add x24,x24,x6 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x13,x13,x8,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x14,x14,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x12,x12,x5,ror#61 + eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x14,x25,ror#39 // Sigma0(a) + eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) + add x7,x7,x0 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x7,x7,x13 + add x24,x24,x17 // h+=Sigma0(a) + add x7,x7,x12 + ldr x12,[sp,#8] + str x15,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x14,x9,#1 + and x17,x21,x20 + ror x13,x6,#19 + bic x19,x22,x20 + ror x15,x24,#28 + add x23,x23,x7 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x14,x14,x9,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x15,x15,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x13,x13,x6,ror#61 + eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x15,x24,ror#39 // Sigma0(a) + eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) + add x8,x8,x1 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x8,x8,x14 + add x23,x23,x17 // h+=Sigma0(a) + add x8,x8,x13 + ldr x13,[sp,#16] + str x0,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x15,x10,#1 + and x17,x20,x27 + ror x14,x7,#19 + bic x28,x21,x27 + ror x0,x23,#28 + add x22,x22,x8 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x15,x15,x10,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x0,x0,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x14,x14,x7,ror#61 + eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x0,x23,ror#39 // Sigma0(a) + eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) + add x9,x9,x2 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x9,x9,x15 + add x22,x22,x17 // h+=Sigma0(a) + add x9,x9,x14 + ldr x14,[sp,#24] + str x1,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x0,x11,#1 + and x17,x27,x26 + ror x15,x8,#19 + bic x19,x20,x26 + ror x1,x22,#28 + add x21,x21,x9 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x0,x0,x11,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x1,x1,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x15,x15,x8,ror#61 + eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x1,x22,ror#39 // Sigma0(a) + eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) + add x10,x10,x3 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x10,x10,x0 + add x21,x21,x17 // h+=Sigma0(a) + add x10,x10,x15 + ldr x15,[sp,#0] + str x2,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x1,x12,#1 + and x17,x26,x25 + ror x0,x9,#19 + bic x28,x27,x25 + ror x2,x21,#28 + add x20,x20,x10 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x1,x1,x12,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x2,x2,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x0,x0,x9,ror#61 + eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x2,x21,ror#39 // Sigma0(a) + eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) + add x11,x11,x4 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x11,x11,x1 + add x20,x20,x17 // h+=Sigma0(a) + add x11,x11,x0 + ldr x0,[sp,#8] + str x3,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x2,x13,#1 + and x17,x25,x24 + ror x1,x10,#19 + bic x19,x26,x24 + ror x3,x20,#28 + add x27,x27,x11 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x2,x2,x13,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x3,x3,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x1,x1,x10,ror#61 + eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x3,x20,ror#39 // Sigma0(a) + eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) + add x12,x12,x5 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x12,x12,x2 + add x27,x27,x17 // h+=Sigma0(a) + add x12,x12,x1 + ldr x1,[sp,#16] + str x4,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x3,x14,#1 + and x17,x24,x23 + ror x2,x11,#19 + bic x28,x25,x23 + ror x4,x27,#28 + add x26,x26,x12 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x3,x3,x14,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x4,x4,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x2,x2,x11,ror#61 + eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x4,x27,ror#39 // Sigma0(a) + eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) + add x13,x13,x6 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x13,x13,x3 + add x26,x26,x17 // h+=Sigma0(a) + add x13,x13,x2 + ldr x2,[sp,#24] + str x5,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x4,x15,#1 + and x17,x23,x22 + ror x3,x12,#19 + bic x19,x24,x22 + ror x5,x26,#28 + add x25,x25,x13 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x4,x4,x15,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x5,x5,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x3,x3,x12,ror#61 + eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x5,x26,ror#39 // Sigma0(a) + eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) + add x14,x14,x7 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x14,x14,x4 + add x25,x25,x17 // h+=Sigma0(a) + add x14,x14,x3 + ldr x3,[sp,#0] + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x5,x0,#1 + and x17,x22,x21 + ror x4,x13,#19 + bic x28,x23,x21 + ror x6,x25,#28 + add x24,x24,x14 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x5,x5,x0,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x6,x6,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x4,x4,x13,ror#61 + eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x25,ror#39 // Sigma0(a) + eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) + add x15,x15,x8 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x15,x15,x5 + add x24,x24,x17 // h+=Sigma0(a) + add x15,x15,x4 + ldr x4,[sp,#8] + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x6,x1,#1 + and x17,x21,x20 + ror x5,x14,#19 + bic x19,x22,x20 + ror x7,x24,#28 + add x23,x23,x15 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x6,x6,x1,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x7,x7,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x5,x5,x14,ror#61 + eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x24,ror#39 // Sigma0(a) + eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) + add x0,x0,x9 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x0,x0,x6 + add x23,x23,x17 // h+=Sigma0(a) + add x0,x0,x5 + ldr x5,[sp,#16] + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x7,x2,#1 + and x17,x20,x27 + ror x6,x15,#19 + bic x28,x21,x27 + ror x8,x23,#28 + add x22,x22,x0 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x7,x7,x2,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x8,x8,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x6,x6,x15,ror#61 + eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x23,ror#39 // Sigma0(a) + eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) + add x1,x1,x10 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x1,x1,x7 + add x22,x22,x17 // h+=Sigma0(a) + add x1,x1,x6 + ldr x6,[sp,#24] + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x8,x3,#1 + and x17,x27,x26 + ror x7,x0,#19 + bic x19,x20,x26 + ror x9,x22,#28 + add x21,x21,x1 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x8,x8,x3,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x9,x9,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x7,x7,x0,ror#61 + eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x22,ror#39 // Sigma0(a) + eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) + add x2,x2,x11 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x2,x2,x8 + add x21,x21,x17 // h+=Sigma0(a) + add x2,x2,x7 + ldr x7,[sp,#0] + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 + cbnz x19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#648 // rewind + + ldp x3,x4,[x0] + ldp x5,x6,[x0,#2*8] + add x1,x1,#14*8 // advance input pointer + ldp x7,x8,[x0,#4*8] + add x20,x20,x3 + ldp x9,x10,[x0,#6*8] + add x21,x21,x4 + add x22,x22,x5 + add x23,x23,x6 + stp x20,x21,[x0] + add x24,x24,x7 + add x25,x25,x8 + stp x22,x23,[x0,#2*8] + add x26,x26,x9 + add x27,x27,x10 + cmp x1,x2 + stp x24,x25,[x0,#4*8] + stp x26,x27,[x0,#6*8] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*8 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size sha512_block_data_order,.-sha512_block_data_order + +.align 6 +.type K512,%object +K512: + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + .quad 0 // terminator +.size K512,.-K512 +.align 3 +.LOPENSSL_armcap_P: + .quad OPENSSL_armcap_P-. +.asciz "SHA512 block transform for ARMv8, CRYPTOGAMS by " +.align 2 +.comm OPENSSL_armcap_P,4,4 diff --git a/app/openssl/crypto/sha/asm/sha512-armv8.pl b/app/openssl/crypto/sha/asm/sha512-armv8.pl new file mode 100644 index 00000000..6935ed65 --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha512-armv8.pl @@ -0,0 +1,414 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# SHA256/512 for ARMv8. +# +# Performance in cycles per processed byte and improvement coefficient +# over code generated with "default" compiler: +# +# SHA256-hw SHA256(*) SHA512 +# Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) +# Cortex-A5x n/a n/a n/a +# +# (*) Software SHA256 results are of lesser relevance, presented +# mostly for informational purposes. +# (**) The result is a trade-off: it's possible to improve it by +# 10%, but at the cost of 20% loss on Cortex-A5x. + +$flavour=shift; +$output=shift; +open STDOUT,">$output"; + +if ($output =~ /512/) { + $BITS=512; + $SZ=8; + @Sigma0=(28,34,39); + @Sigma1=(14,18,41); + @sigma0=(1, 8, 7); + @sigma1=(19,61, 6); + $rounds=80; + $reg_t="x"; +} else { + $BITS=256; + $SZ=4; + @Sigma0=( 2,13,22); + @Sigma1=( 6,11,25); + @sigma0=( 7,18, 3); + @sigma1=(17,19,10); + $rounds=64; + $reg_t="w"; +} + +$func="sha${BITS}_block_data_order"; + +($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30)); + +@X=map("$reg_t$_",(3..15,0..2)); +@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("$reg_t$_",(20..27)); +($t0,$t1,$t2,$t3)=map("$reg_t$_",(16,17,19,28)); + +sub BODY_00_xx { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; +my $j=($i+1)&15; +my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]); + $T0=@X[$i+3] if ($i<11); + +$code.=<<___ if ($i<16); +#ifndef __ARMEB__ + rev @X[$i],@X[$i] // $i +#endif +___ +$code.=<<___ if ($i<13 && ($i&1)); + ldp @X[$i+1],@X[$i+2],[$inp],#2*$SZ +___ +$code.=<<___ if ($i==13); + ldp @X[14],@X[15],[$inp] +___ +$code.=<<___ if ($i>=14); + ldr @X[($i-11)&15],[sp,#`$SZ*(($i-11)%4)`] +___ +$code.=<<___ if ($i>0 && $i<16); + add $a,$a,$t1 // h+=Sigma0(a) +___ +$code.=<<___ if ($i>=11); + str @X[($i-8)&15],[sp,#`$SZ*(($i-8)%4)`] +___ +# While ARMv8 specifies merged rotate-n-logical operation such as +# 'eor x,y,z,ror#n', it was found to negatively affect performance +# on Apple A7. The reason seems to be that it requires even 'y' to +# be available earlier. This means that such merged instruction is +# not necessarily best choice on critical path... On the other hand +# Cortex-A5x handles merged instructions much better than disjoint +# rotate and logical... See (**) footnote above. +$code.=<<___ if ($i<15); + ror $t0,$e,#$Sigma1[0] + add $h,$h,$t2 // h+=K[i] + eor $T0,$e,$e,ror#`$Sigma1[2]-$Sigma1[1]` + and $t1,$f,$e + bic $t2,$g,$e + add $h,$h,@X[$i&15] // h+=X[i] + orr $t1,$t1,$t2 // Ch(e,f,g) + eor $t2,$a,$b // a^b, b^c in next round + eor $t0,$t0,$T0,ror#$Sigma1[1] // Sigma1(e) + ror $T0,$a,#$Sigma0[0] + add $h,$h,$t1 // h+=Ch(e,f,g) + eor $t1,$a,$a,ror#`$Sigma0[2]-$Sigma0[1]` + add $h,$h,$t0 // h+=Sigma1(e) + and $t3,$t3,$t2 // (b^c)&=(a^b) + add $d,$d,$h // d+=h + eor $t3,$t3,$b // Maj(a,b,c) + eor $t1,$T0,$t1,ror#$Sigma0[1] // Sigma0(a) + add $h,$h,$t3 // h+=Maj(a,b,c) + ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round + //add $h,$h,$t1 // h+=Sigma0(a) +___ +$code.=<<___ if ($i>=15); + ror $t0,$e,#$Sigma1[0] + add $h,$h,$t2 // h+=K[i] + ror $T1,@X[($j+1)&15],#$sigma0[0] + and $t1,$f,$e + ror $T2,@X[($j+14)&15],#$sigma1[0] + bic $t2,$g,$e + ror $T0,$a,#$Sigma0[0] + add $h,$h,@X[$i&15] // h+=X[i] + eor $t0,$t0,$e,ror#$Sigma1[1] + eor $T1,$T1,@X[($j+1)&15],ror#$sigma0[1] + orr $t1,$t1,$t2 // Ch(e,f,g) + eor $t2,$a,$b // a^b, b^c in next round + eor $t0,$t0,$e,ror#$Sigma1[2] // Sigma1(e) + eor $T0,$T0,$a,ror#$Sigma0[1] + add $h,$h,$t1 // h+=Ch(e,f,g) + and $t3,$t3,$t2 // (b^c)&=(a^b) + eor $T2,$T2,@X[($j+14)&15],ror#$sigma1[1] + eor $T1,$T1,@X[($j+1)&15],lsr#$sigma0[2] // sigma0(X[i+1]) + add $h,$h,$t0 // h+=Sigma1(e) + eor $t3,$t3,$b // Maj(a,b,c) + eor $t1,$T0,$a,ror#$Sigma0[2] // Sigma0(a) + eor $T2,$T2,@X[($j+14)&15],lsr#$sigma1[2] // sigma1(X[i+14]) + add @X[$j],@X[$j],@X[($j+9)&15] + add $d,$d,$h // d+=h + add $h,$h,$t3 // h+=Maj(a,b,c) + ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round + add @X[$j],@X[$j],$T1 + add $h,$h,$t1 // h+=Sigma0(a) + add @X[$j],@X[$j],$T2 +___ + ($t2,$t3)=($t3,$t2); +} + +$code.=<<___; +#include "arm_arch.h" + +.text + +.globl $func +.type $func,%function +.align 6 +$func: +___ +$code.=<<___ if ($SZ==4); + ldr x16,.LOPENSSL_armcap_P + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA256 + b.ne .Lv8_entry +___ +$code.=<<___; + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*$SZ + + ldp $A,$B,[$ctx] // load context + ldp $C,$D,[$ctx,#2*$SZ] + ldp $E,$F,[$ctx,#4*$SZ] + add $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input + ldp $G,$H,[$ctx,#6*$SZ] + adr $Ktbl,K$BITS + stp $ctx,$num,[x29,#96] + +.Loop: + ldp @X[0],@X[1],[$inp],#2*$SZ + ldr $t2,[$Ktbl],#$SZ // *K++ + eor $t3,$B,$C // magic seed + str $inp,[x29,#112] +___ +for ($i=0;$i<16;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); } +$code.=".Loop_16_xx:\n"; +for (;$i<32;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + cbnz $t2,.Loop_16_xx + + ldp $ctx,$num,[x29,#96] + ldr $inp,[x29,#112] + sub $Ktbl,$Ktbl,#`$SZ*($rounds+1)` // rewind + + ldp @X[0],@X[1],[$ctx] + ldp @X[2],@X[3],[$ctx,#2*$SZ] + add $inp,$inp,#14*$SZ // advance input pointer + ldp @X[4],@X[5],[$ctx,#4*$SZ] + add $A,$A,@X[0] + ldp @X[6],@X[7],[$ctx,#6*$SZ] + add $B,$B,@X[1] + add $C,$C,@X[2] + add $D,$D,@X[3] + stp $A,$B,[$ctx] + add $E,$E,@X[4] + add $F,$F,@X[5] + stp $C,$D,[$ctx,#2*$SZ] + add $G,$G,@X[6] + add $H,$H,@X[7] + cmp $inp,$num + stp $E,$F,[$ctx,#4*$SZ] + stp $G,$H,[$ctx,#6*$SZ] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*$SZ + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size $func,.-$func + +.align 6 +.type K$BITS,%object +K$BITS: +___ +$code.=<<___ if ($SZ==8); + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + .quad 0 // terminator +___ +$code.=<<___ if ($SZ==4); + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + .long 0 //terminator +___ +$code.=<<___; +.size K$BITS,.-K$BITS +.align 3 +.LOPENSSL_armcap_P: + .quad OPENSSL_armcap_P-. +.asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by " +.align 2 +___ + +if ($SZ==4) { +my $Ktbl="x3"; + +my ($ABCD,$EFGH,$abcd)=map("v$_.16b",(0..2)); +my @MSG=map("v$_.16b",(4..7)); +my ($W0,$W1)=("v16.4s","v17.4s"); +my ($ABCD_SAVE,$EFGH_SAVE)=("v18.16b","v19.16b"); + +$code.=<<___; +.type sha256_block_armv8,%function +.align 6 +sha256_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1.32 {$ABCD,$EFGH},[$ctx] + adr $Ktbl,K256 + +.Loop_hw: + ld1 {@MSG[0]-@MSG[3]},[$inp],#64 + sub $num,$num,#1 + ld1.32 {$W0},[$Ktbl],#16 + rev32 @MSG[0],@MSG[0] + rev32 @MSG[1],@MSG[1] + rev32 @MSG[2],@MSG[2] + rev32 @MSG[3],@MSG[3] + orr $ABCD_SAVE,$ABCD,$ABCD // offload + orr $EFGH_SAVE,$EFGH,$EFGH +___ +for($i=0;$i<12;$i++) { +$code.=<<___; + ld1.32 {$W1},[$Ktbl],#16 + add.i32 $W0,$W0,@MSG[0] + sha256su0 @MSG[0],@MSG[1] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + sha256su1 @MSG[0],@MSG[2],@MSG[3] +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); +} +$code.=<<___; + ld1.32 {$W1},[$Ktbl],#16 + add.i32 $W0,$W0,@MSG[0] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + ld1.32 {$W0},[$Ktbl],#16 + add.i32 $W1,$W1,@MSG[1] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + ld1.32 {$W1},[$Ktbl] + add.i32 $W0,$W0,@MSG[2] + sub $Ktbl,$Ktbl,#$rounds*$SZ-16 // rewind + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + add.i32 $W1,$W1,@MSG[3] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + add.i32 $ABCD,$ABCD,$ABCD_SAVE + add.i32 $EFGH,$EFGH,$EFGH_SAVE + + cbnz $num,.Loop_hw + + st1.32 {$ABCD,$EFGH},[$ctx] + + ldr x29,[sp],#16 + ret +.size sha256_block_armv8,.-sha256_block_armv8 +___ +} + +$code.=<<___; +.comm OPENSSL_armcap_P,4,4 +___ + +{ my %opcode = ( + "sha256h" => 0x5e004000, "sha256h2" => 0x5e005000, + "sha256su0" => 0x5e282800, "sha256su1" => 0x5e006000 ); + + sub unsha256 { + my ($mnemonic,$arg)=@_; + + $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o + && + sprintf ".inst\t0x%08x\t//%s %s", + $opcode{$mnemonic}|$1|($2<<5)|($3<<16), + $mnemonic,$arg; + } +} + +foreach(split("\n",$code)) { + + s/\`([^\`]*)\`/eval($1)/geo; + + s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/geo; + + s/\.\w?32\b//o and s/\.16b/\.4s/go; + m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go; + + print $_,"\n"; +} + +close STDOUT; diff --git a/app/openssl/crypto/srp/srp_vfy.c b/app/openssl/crypto/srp/srp_vfy.c index 4a3d13ed..fdca19ff 100644 --- a/app/openssl/crypto/srp/srp_vfy.c +++ b/app/openssl/crypto/srp/srp_vfy.c @@ -93,6 +93,9 @@ static int t_fromb64(unsigned char *a, const char *src) else a[i] = loc - b64table; ++i; } + /* if nothing valid to process we have a zero length response */ + if (i == 0) + return 0; size = i; i = size - 1; j = size; diff --git a/app/openssl/crypto/x509v3/v3_purp.c b/app/openssl/crypto/x509v3/v3_purp.c index ad688657..f59bfc18 100644 --- a/app/openssl/crypto/x509v3/v3_purp.c +++ b/app/openssl/crypto/x509v3/v3_purp.c @@ -389,8 +389,8 @@ static void x509v3_cache_extensions(X509 *x) /* Handle proxy certificates */ if((pci=X509_get_ext_d2i(x, NID_proxyCertInfo, NULL, NULL))) { if (x->ex_flags & EXFLAG_CA - || X509_get_ext_by_NID(x, NID_subject_alt_name, 0) >= 0 - || X509_get_ext_by_NID(x, NID_issuer_alt_name, 0) >= 0) { + || X509_get_ext_by_NID(x, NID_subject_alt_name, -1) >= 0 + || X509_get_ext_by_NID(x, NID_issuer_alt_name, -1) >= 0) { x->ex_flags |= EXFLAG_INVALID; } if (pci->pcPathLengthConstraint) { @@ -670,7 +670,7 @@ static int check_purpose_timestamp_sign(const X509_PURPOSE *xp, const X509 *x, return 0; /* Extended Key Usage MUST be critical */ - i_ext = X509_get_ext_by_NID((X509 *) x, NID_ext_key_usage, 0); + i_ext = X509_get_ext_by_NID((X509 *) x, NID_ext_key_usage, -1); if (i_ext >= 0) { X509_EXTENSION *ext = X509_get_ext((X509 *) x, i_ext); diff --git a/app/openssl/import_openssl.sh b/app/openssl/import_openssl.sh index 02d2ab1c..f16596bc 100755 --- a/app/openssl/import_openssl.sh +++ b/app/openssl/import_openssl.sh @@ -128,7 +128,16 @@ function default_asm_file () { function gen_asm_arm () { local OUT OUT=$(default_asm_file "$@") - $PERL_EXE "$1" > "$OUT" + $PERL_EXE "$1" void "$OUT" > "$OUT" +} + +# Generate an ARMv8 64-bit assembly file. +# $1: generator (perl script) +# $2: [optional] output file name +function gen_asm_arm64 () { + local OUT + OUT=$(default_asm_file "$@") + $PERL_EXE "$1" linux64 "$OUT" > "$OUT" } function gen_asm_mips () { @@ -177,6 +186,54 @@ function print_autogenerated_header() { echo "#" } +function run_verbose() { + echo Running: $@ + $@ +} + +function scan_opensslconf_for_flags() { + for flag in "$@"; do + awk "/^#define ${flag}$/ { print \$2 }" crypto/opensslconf.h + done +} + +CRYPTO_CONF_FLAGS=( +OPENSSL_CPUID_OBJ +DES_LONG +DES_PTR +DES_RISC1 +DES_RISC2 +DES_UNROLL +RC4_INT +RC4_CHUNK +RC4_INDEX +) + +function check_asm_flags() { + local arch="$1" + local target="$2" + local unsorted_flags + local expected_flags + local actual_flags + local defines="OPENSSL_CRYPTO_DEFINES_$arch" + + PERL=/usr/bin/perl run_verbose ./Configure $CONFIGURE_ARGS $target + + unsorted_flags="$(awk '/^CFLAG=/ { sub(/^CFLAG= .*-Wall /, ""); gsub(/-D/, ""); print; }' Makefile)" + unsorted_flags="$unsorted_flags $(scan_opensslconf_for_flags "${CRYPTO_CONF_FLAGS[@]}")" + + expected_flags="$(echo $unsorted_flags | tr ' ' '\n' | sort | tr '\n' ' ')" + actual_flags="$(echo ${!defines} | tr ' ' '\n' | sort | tr '\n' ' ')" + + if [[ $actual_flags != $expected_flags ]]; then + echo ${defines} is wrong! + echo " $actual_flags" + echo Please update to: + echo " $expected_flags" + exit 1 + fi +} + # Run Configure and generate headers # $1: 32 for 32-bit arch, 64 for 64-bit arch, trusty for Trusty # $2: 1 if building for static version @@ -192,9 +249,9 @@ function generate_build_config_headers() { fi if [[ $1 == trusty ]] ; then - PERL=/usr/bin/perl ./Configure $CONFIGURE_ARGS_TRUSTY + PERL=/usr/bin/perl run_verbose ./Configure $CONFIGURE_ARGS_TRUSTY else - PERL=/usr/bin/perl ./Configure $CONFIGURE_ARGS ${!configure_args_bits} ${!configure_args_stat} + PERL=/usr/bin/perl run_verbose ./Configure $CONFIGURE_ARGS ${!configure_args_bits} ${!configure_args_stat} fi rm -f apps/CA.pl.bak crypto/opensslconf.h.bak @@ -424,8 +481,16 @@ function import() { declare -r OPENSSL_SOURCE=$1 untar $OPENSSL_SOURCE readonly applypatches $OPENSSL_DIR + convert_iso8859_to_utf8 $OPENSSL_DIR cd $OPENSSL_DIR + + # Check the ASM flags for each arch + check_asm_flags arm linux-armv4 + check_asm_flags arm64 linux-aarch64 + check_asm_flags x86 linux-elf + check_asm_flags x86_64 linux-x86_64 + generate_build_config_mk generate_opensslconf_h @@ -443,14 +508,23 @@ function import() { # Generate arm asm gen_asm_arm crypto/aes/asm/aes-armv4.pl + gen_asm_arm crypto/aes/asm/aesv8-armx.pl gen_asm_arm crypto/aes/asm/bsaes-armv7.pl gen_asm_arm crypto/bn/asm/armv4-gf2m.pl gen_asm_arm crypto/bn/asm/armv4-mont.pl gen_asm_arm crypto/modes/asm/ghash-armv4.pl + gen_asm_arm crypto/modes/asm/ghashv8-armx.pl gen_asm_arm crypto/sha/asm/sha1-armv4-large.pl gen_asm_arm crypto/sha/asm/sha256-armv4.pl gen_asm_arm crypto/sha/asm/sha512-armv4.pl + # Generate armv8 asm + gen_asm_arm64 crypto/aes/asm/aesv8-armx.pl crypto/aes/asm/aesv8-armx-64.S + gen_asm_arm64 crypto/modes/asm/ghashv8-armx.pl crypto/modes/asm/ghashv8-armx-64.S + gen_asm_arm64 crypto/sha/asm/sha1-armv8.pl + gen_asm_arm64 crypto/sha/asm/sha512-armv8.pl crypto/sha/asm/sha256-armv8.S + gen_asm_arm64 crypto/sha/asm/sha512-armv8.pl + # Generate mips asm gen_asm_mips crypto/aes/asm/aes-mips.pl gen_asm_mips crypto/bn/asm/mips.pl crypto/bn/asm/bn-mips.S @@ -585,7 +659,6 @@ function untar() { # Process new source tar -zxf $OPENSSL_SOURCE - convert_iso8859_to_utf8 $OPENSSL_DIR cp -RfP $OPENSSL_DIR $OPENSSL_DIR_ORIG if [ ! -z $readonly ]; then find $OPENSSL_DIR_ORIG -type f -print0 | xargs -0 chmod a-w @@ -610,12 +683,13 @@ function applypatches () { cd $dir # Apply appropriate patches - for i in $OPENSSL_PATCHES; do - if [ ! "$skip_patch" = "patches/$i" ]; then + patches=(../patches/[0-9][0-9][0-9][0-9]-*.patch) + for i in "${patches[@]}"; do + if [[ $skip_patch != ${i##*/} ]]; then echo "Applying patch $i" - patch -p1 < ../patches/$i || die "Could not apply patches/$i. Fix source and run: $0 regenerate patches/$i" + patch -p1 < $i || die "Could not apply $i. Fix source and run: $0 regenerate patches/${i##*/}" else - echo "Skiping patch $i" + echo "Skiping patch ${i##*/}" fi done diff --git a/app/openssl/include/openssl/bio.h b/app/openssl/include/openssl/bio.h index 05699ab2..d05fa22a 100644 --- a/app/openssl/include/openssl/bio.h +++ b/app/openssl/include/openssl/bio.h @@ -266,6 +266,9 @@ void BIO_clear_flags(BIO *b, int flags); #define BIO_RR_CONNECT 0x02 /* Returned from the accept BIO when an accept would have blocked */ #define BIO_RR_ACCEPT 0x03 +/* Returned from the SSL bio when the channel id retrieval code cannot find the + * private key. */ +#define BIO_RR_SSL_CHANNEL_ID_LOOKUP 0x04 /* These are passed by the BIO callback */ #define BIO_CB_FREE 0x01 diff --git a/app/openssl/include/openssl/opensslconf-32.h b/app/openssl/include/openssl/opensslconf-32.h index d6625489..caf6f1b8 100644 --- a/app/openssl/include/openssl/opensslconf-32.h +++ b/app/openssl/include/openssl/opensslconf-32.h @@ -53,6 +53,9 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif +#ifndef OPENSSL_NO_RIPEMD +# define OPENSSL_NO_RIPEMD +#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -137,6 +140,9 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif +# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) +# define NO_RIPEMD +# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/include/openssl/opensslconf-64.h b/app/openssl/include/openssl/opensslconf-64.h index 70c5a2cb..88fb0419 100644 --- a/app/openssl/include/openssl/opensslconf-64.h +++ b/app/openssl/include/openssl/opensslconf-64.h @@ -53,6 +53,9 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif +#ifndef OPENSSL_NO_RIPEMD +# define OPENSSL_NO_RIPEMD +#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -137,6 +140,9 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif +# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) +# define NO_RIPEMD +# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/include/openssl/opensslconf-static-32.h b/app/openssl/include/openssl/opensslconf-static-32.h index d6625489..caf6f1b8 100644 --- a/app/openssl/include/openssl/opensslconf-static-32.h +++ b/app/openssl/include/openssl/opensslconf-static-32.h @@ -53,6 +53,9 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif +#ifndef OPENSSL_NO_RIPEMD +# define OPENSSL_NO_RIPEMD +#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -137,6 +140,9 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif +# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) +# define NO_RIPEMD +# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/include/openssl/opensslconf-static-64.h b/app/openssl/include/openssl/opensslconf-static-64.h index 70c5a2cb..88fb0419 100644 --- a/app/openssl/include/openssl/opensslconf-static-64.h +++ b/app/openssl/include/openssl/opensslconf-static-64.h @@ -53,6 +53,9 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif +#ifndef OPENSSL_NO_RIPEMD +# define OPENSSL_NO_RIPEMD +#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -137,6 +140,9 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif +# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) +# define NO_RIPEMD +# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/include/openssl/opensslv.h b/app/openssl/include/openssl/opensslv.h index ebe71807..c3b6acec 100644 --- a/app/openssl/include/openssl/opensslv.h +++ b/app/openssl/include/openssl/opensslv.h @@ -25,11 +25,11 @@ * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for * major minor fix final patch/beta) */ -#define OPENSSL_VERSION_NUMBER 0x1000107fL +#define OPENSSL_VERSION_NUMBER 0x1000108fL #ifdef OPENSSL_FIPS -#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1g-fips 7 Apr 2014" +#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1h-fips 5 Jun 2014" #else -#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1g 7 Apr 2014" +#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1h 5 Jun 2014" #endif #define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT diff --git a/app/openssl/include/openssl/pkcs7.h b/app/openssl/include/openssl/pkcs7.h index e4d44319..04f60379 100644 --- a/app/openssl/include/openssl/pkcs7.h +++ b/app/openssl/include/openssl/pkcs7.h @@ -453,6 +453,7 @@ void ERR_load_PKCS7_strings(void); #define PKCS7_R_ERROR_SETTING_CIPHER 121 #define PKCS7_R_INVALID_MIME_TYPE 131 #define PKCS7_R_INVALID_NULL_POINTER 143 +#define PKCS7_R_INVALID_SIGNED_DATA_TYPE 155 #define PKCS7_R_MIME_NO_CONTENT_TYPE 132 #define PKCS7_R_MIME_PARSE_ERROR 133 #define PKCS7_R_MIME_SIG_PARSE_ERROR 134 diff --git a/app/openssl/include/openssl/ssl.h b/app/openssl/include/openssl/ssl.h index 54b0eb6c..a85841b3 100644 --- a/app/openssl/include/openssl/ssl.h +++ b/app/openssl/include/openssl/ssl.h @@ -544,6 +544,13 @@ struct ssl_session_st #ifndef OPENSSL_NO_SRP char *srp_username; #endif + + /* original_handshake_hash contains the handshake hash (either + * SHA-1+MD5 or SHA-2, depending on TLS version) for the original, full + * handshake that created a session. This is used by Channel IDs during + * resumption. */ + unsigned char original_handshake_hash[EVP_MAX_MD_SIZE]; + unsigned int original_handshake_hash_len; }; #endif @@ -553,7 +560,7 @@ struct ssl_session_st /* Allow initial connection to servers that don't support RI */ #define SSL_OP_LEGACY_SERVER_CONNECT 0x00000004L #define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x00000008L -#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x00000010L +#define SSL_OP_TLSEXT_PADDING 0x00000010L #define SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER 0x00000020L #define SSL_OP_SAFARI_ECDHE_ECDSA_BUG 0x00000040L #define SSL_OP_SSLEAY_080_CLIENT_DH_BUG 0x00000080L @@ -562,6 +569,8 @@ struct ssl_session_st /* Hasn't done anything since OpenSSL 0.9.7h, retained for compatibility */ #define SSL_OP_MSIE_SSLV2_RSA_PADDING 0x0 +/* Refers to ancient SSLREF and SSLv2, retained for compatibility */ +#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x0 /* SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS is vestigial. Previously it disabled the * insertion of empty records in CBC mode, but the empty records were commonly @@ -648,12 +657,14 @@ struct ssl_session_st * TLS only.) "Released" buffers are put onto a free-list in the context * or just freed (depending on the context's setting for freelist_max_len). */ #define SSL_MODE_RELEASE_BUFFERS 0x00000010L + /* Send the current time in the Random fields of the ClientHello and * ServerHello records for compatibility with hypothetical implementations * that require it. */ #define SSL_MODE_SEND_CLIENTHELLO_TIME 0x00000020L #define SSL_MODE_SEND_SERVERHELLO_TIME 0x00000040L + /* When set, clients may send application data before receipt of CCS * and Finished. This mode enables full-handshakes to 'complete' in * one RTT. */ @@ -866,6 +877,9 @@ struct ssl_ctx_st /* get client cert callback */ int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey); + /* get channel id callback */ + void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey); + /* cookie generate callback */ int (*app_gen_cookie_cb)(SSL *ssl, unsigned char *cookie, unsigned int *cookie_len); @@ -1028,6 +1042,10 @@ struct ssl_ctx_st /* If true, a client will advertise the Channel ID extension and a * server will echo it. */ char tlsext_channel_id_enabled; + /* tlsext_channel_id_enabled_new is a hack to support both old and new + * ChannelID signatures. It indicates that a client should advertise the + * new ChannelID extension number. */ + char tlsext_channel_id_enabled_new; /* The client's Channel ID private key. */ EVP_PKEY *tlsext_channel_id_private; #endif @@ -1086,6 +1104,8 @@ void SSL_CTX_set_info_callback(SSL_CTX *ctx, void (*cb)(const SSL *ssl,int type, void (*SSL_CTX_get_info_callback(SSL_CTX *ctx))(const SSL *ssl,int type,int val); void SSL_CTX_set_client_cert_cb(SSL_CTX *ctx, int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey)); int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx))(SSL *ssl, X509 **x509, EVP_PKEY **pkey); +void SSL_CTX_set_channel_id_cb(SSL_CTX *ctx, void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey)); +void (*SSL_CTX_get_channel_id_cb(SSL_CTX *ctx))(SSL *ssl, EVP_PKEY **pkey); #ifndef OPENSSL_NO_ENGINE int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e); #endif @@ -1162,12 +1182,14 @@ const char *SSL_get_psk_identity(const SSL *s); #define SSL_WRITING 2 #define SSL_READING 3 #define SSL_X509_LOOKUP 4 +#define SSL_CHANNEL_ID_LOOKUP 5 /* These will only be used when doing non-blocking IO */ #define SSL_want_nothing(s) (SSL_want(s) == SSL_NOTHING) #define SSL_want_read(s) (SSL_want(s) == SSL_READING) #define SSL_want_write(s) (SSL_want(s) == SSL_WRITING) #define SSL_want_x509_lookup(s) (SSL_want(s) == SSL_X509_LOOKUP) +#define SSL_want_channel_id_lookup(s) (SSL_want(s) == SSL_CHANNEL_ID_LOOKUP) #define SSL_MAC_FLAG_READ_MAC_STREAM 1 #define SSL_MAC_FLAG_WRITE_MAC_STREAM 2 @@ -1602,6 +1624,7 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) #define SSL_ERROR_ZERO_RETURN 6 #define SSL_ERROR_WANT_CONNECT 7 #define SSL_ERROR_WANT_ACCEPT 8 +#define SSL_ERROR_WANT_CHANNEL_ID_LOOKUP 9 #define SSL_CTRL_NEED_TMP_RSA 1 #define SSL_CTRL_SET_TMP_RSA 2 @@ -1739,10 +1762,11 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) #define SSL_set_tmp_ecdh(ssl,ecdh) \ SSL_ctrl(ssl,SSL_CTRL_SET_TMP_ECDH,0,(char *)ecdh) -/* SSL_enable_tls_channel_id configures a TLS server to accept TLS client - * IDs from clients. Returns 1 on success. */ -#define SSL_enable_tls_channel_id(ctx) \ - SSL_ctrl(ctx,SSL_CTRL_CHANNEL_ID,0,NULL) +/* SSL_enable_tls_channel_id either configures a TLS server to accept TLS client + * IDs from clients, or configure a client to send TLS client IDs to server. + * Returns 1 on success. */ +#define SSL_enable_tls_channel_id(s) \ + SSL_ctrl(s,SSL_CTRL_CHANNEL_ID,0,NULL) /* SSL_set1_tls_channel_id configures a TLS client to send a TLS Channel ID to * compatible servers. private_key must be a P-256 EVP_PKEY*. Returns 1 on * success. */ @@ -1792,7 +1816,7 @@ int SSL_CIPHER_get_bits(const SSL_CIPHER *c,int *alg_bits); char * SSL_CIPHER_get_version(const SSL_CIPHER *c); const char * SSL_CIPHER_get_name(const SSL_CIPHER *c); unsigned long SSL_CIPHER_get_id(const SSL_CIPHER *c); -const char* SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher); +const char * SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher); int SSL_get_fd(const SSL *s); int SSL_get_rfd(const SSL *s); @@ -2707,7 +2731,6 @@ void ERR_load_SSL_strings(void); #define SSL_R_WRONG_VERSION_NUMBER 267 #define SSL_R_X509_LIB 268 #define SSL_R_X509_VERIFICATION_SETUP_PROBLEMS 269 -#define SSL_R_UNEXPECTED_CCS 388 #ifdef __cplusplus } diff --git a/app/openssl/include/openssl/ssl3.h b/app/openssl/include/openssl/ssl3.h index f205f73d..83d59bff 100644 --- a/app/openssl/include/openssl/ssl3.h +++ b/app/openssl/include/openssl/ssl3.h @@ -388,9 +388,6 @@ typedef struct ssl3_buffer_st #define TLS1_FLAGS_TLS_PADDING_BUG 0x0008 #define TLS1_FLAGS_SKIP_CERT_VERIFY 0x0010 #define TLS1_FLAGS_KEEP_HANDSHAKE 0x0020 -/* SSL3_FLAGS_CCS_OK indicates that a ChangeCipherSpec record is acceptable at - * this point in the handshake. If this flag is not set then received CCS - * records will cause a fatal error for the connection. */ #define SSL3_FLAGS_CCS_OK 0x0080 /* SSL3_FLAGS_SGC_RESTART_DONE is set when we @@ -558,6 +555,11 @@ typedef struct ssl3_state_st * for Channel IDs and that tlsext_channel_id will be valid after the * handshake. */ char tlsext_channel_id_valid; + /* tlsext_channel_id_new means that the updated Channel ID extension + * was negotiated. This is a temporary hack in the code to support both + * forms of Channel ID extension while we transition to the new format, + * which fixed a security issue. */ + char tlsext_channel_id_new; /* For a server: * If |tlsext_channel_id_valid| is true, then this contains the * verified Channel ID from the client: a P256 point, (x,y), where @@ -678,11 +680,11 @@ typedef struct ssl3_state_st #define SSL3_ST_SR_CERT_VRFY_B (0x1A1|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANGE_A (0x1B0|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANGE_B (0x1B1|SSL_ST_ACCEPT) -#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT) #ifndef OPENSSL_NO_NEXTPROTONEG #define SSL3_ST_SR_NEXT_PROTO_A (0x210|SSL_ST_ACCEPT) #define SSL3_ST_SR_NEXT_PROTO_B (0x211|SSL_ST_ACCEPT) #endif +#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANNEL_ID_A (0x220|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANNEL_ID_B (0x221|SSL_ST_ACCEPT) #define SSL3_ST_SR_FINISHED_A (0x1C0|SSL_ST_ACCEPT) diff --git a/app/openssl/include/openssl/tls1.h b/app/openssl/include/openssl/tls1.h index ec8948d5..b9a0899e 100644 --- a/app/openssl/include/openssl/tls1.h +++ b/app/openssl/include/openssl/tls1.h @@ -259,6 +259,7 @@ extern "C" { /* This is not an IANA defined extension number */ #define TLSEXT_TYPE_channel_id 30031 +#define TLSEXT_TYPE_channel_id_new 30032 /* NameType value from RFC 3546 */ #define TLSEXT_NAMETYPE_host_name 0 @@ -531,9 +532,11 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031 #define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032 -/* ECDHE PSK ciphersuites from RFC 5489 */ -#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256 0x0300C037 -#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384 0x0300C038 +/* ECDHE PSK ciphersuites from RFC5489 + * SHA-2 cipher suites are omitted because they cannot be used safely with + * SSLv3. */ +#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA 0x0300C035 +#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA 0x0300C036 /* XXX * Inconsistency alert: @@ -686,9 +689,9 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SHA256" #define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SHA384" -/* ECDHE PSK ciphersuites from RFC 5489 */ -#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256 "ECDHE-PSK-WITH-AES-128-CBC-SHA256" -#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384 "ECDHE-PSK-WITH-AES-256-CBC-SHA384" +/* ECDHE PSK ciphersuites from RFC5489 */ +#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA "ECDHE-PSK-AES128-CBC-SHA" +#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA "ECDHE-PSK-AES256-CBC-SHA" #define TLS_CT_RSA_SIGN 1 #define TLS_CT_DSS_SIGN 2 diff --git a/app/openssl/openssl.config b/app/openssl/openssl.config index aa028705..867711fe 100644 --- a/app/openssl/openssl.config +++ b/app/openssl/openssl.config @@ -13,6 +13,7 @@ no-md2 \ no-mdc2 \ no-rc5 \ no-rdrand \ +no-ripemd \ no-rfc3779 \ no-rsax \ no-sctp \ @@ -53,6 +54,7 @@ crypto/cast \ crypto/idea \ crypto/md2 \ crypto/rc5 \ +crypto/ripemd \ crypto/seed \ crypto/whrlpool \ demos \ @@ -94,7 +96,6 @@ README.ASN1 \ README.ENGINE \ apps/CA.com \ apps/Makefile \ -apps/Makefile.save \ apps/install-apps.com \ apps/makeapps.com \ apps/openssl-vms.cnf \ @@ -104,14 +105,10 @@ apps/vms_decc_init.c \ config \ crypto/LPdir_vms.c \ crypto/Makefile \ -crypto/Makefile.save \ crypto/aes/Makefile \ -crypto/aes/Makefile.save \ crypto/asn1/Makefile \ -crypto/asn1/Makefile.save \ crypto/bf/INSTALL \ crypto/bf/Makefile \ -crypto/bf/Makefile.save \ crypto/bf/README \ crypto/bf/VERSION \ crypto/bf/asm/readme \ @@ -121,117 +118,77 @@ crypto/bf/bfs.cpp \ crypto/bf/bfspeed.c \ crypto/bf/bftest.c \ crypto/bio/Makefile \ -crypto/bio/Makefile.save \ crypto/bio/bss_rtcp.c \ crypto/bn/Makefile \ -crypto/bn/Makefile.save \ crypto/bn/asm/vms.mar \ crypto/bn/bn_x931p.c \ crypto/bn/vms-helper.c \ crypto/buffer/Makefile \ -crypto/buffer/Makefile.save \ crypto/cmac/Makefile \ -crypto/cmac/Makefile.save \ crypto/cms/Makefile \ -crypto/cms/Makefile.save \ crypto/comp/Makefile \ -crypto/comp/Makefile.save \ crypto/conf/Makefile \ -crypto/conf/Makefile.save \ crypto/crypto-lib.com \ crypto/des/Makefile \ -crypto/des/Makefile.save \ crypto/des/des-lib.com \ crypto/dh/Makefile \ -crypto/dh/Makefile.save \ crypto/dh/dh_prn.c \ crypto/dsa/Makefile \ -crypto/dsa/Makefile.save \ crypto/dso/Makefile \ -crypto/dso/Makefile.save \ crypto/dso/dso_beos.c \ crypto/dso/dso_vms.c \ crypto/dso/dso_win32.c \ crypto/ec/Makefile \ -crypto/ec/Makefile.save \ crypto/ec/ecp_nistp224.c \ crypto/ec/ecp_nistp256.c \ crypto/ec/ecp_nistp521.c \ crypto/ec/ecp_nistputil.c \ crypto/ecdh/Makefile \ -crypto/ecdh/Makefile.save \ crypto/ecdsa/Makefile \ -crypto/ecdsa/Makefile.save \ crypto/engine/Makefile \ -crypto/engine/Makefile.save \ crypto/engine/eng_rdrand.c \ crypto/engine/eng_rsax.c \ crypto/err/Makefile \ -crypto/err/Makefile.save \ crypto/evp/Makefile \ -crypto/evp/Makefile.save \ crypto/evp/evp_fips.c \ crypto/evp/m_md2.c \ crypto/evp/m_sha.c \ crypto/fips_err.h \ crypto/fips_ers.c \ crypto/hmac/Makefile \ -crypto/hmac/Makefile.save \ crypto/install-crypto.com \ crypto/jpake/Makefile \ crypto/krb5/Makefile \ -crypto/krb5/Makefile.save \ crypto/lhash/Makefile \ -crypto/lhash/Makefile.save \ crypto/md4/Makefile \ -crypto/md4/Makefile.save \ crypto/md5/Makefile \ -crypto/md5/Makefile.save \ crypto/mdc2/Makefile \ -crypto/mdc2/Makefile.save \ crypto/modes/Makefile \ -crypto/modes/Makefile.save \ crypto/modes/cts128.c \ crypto/modes/modes.h \ crypto/o_fips.c \ crypto/objects/Makefile \ -crypto/objects/Makefile.save \ crypto/ocsp/Makefile \ -crypto/ocsp/Makefile.save \ crypto/pem/Makefile \ -crypto/pem/Makefile.save \ crypto/pkcs12/Makefile \ -crypto/pkcs12/Makefile.save \ crypto/pkcs7/Makefile \ -crypto/pkcs7/Makefile.save \ crypto/pkcs7/bio_pk7.c \ crypto/ppccap.c \ crypto/pqueue/Makefile \ -crypto/pqueue/Makefile.save \ crypto/rand/Makefile \ -crypto/rand/Makefile.save \ crypto/rand/rand_vms.c \ crypto/rc2/Makefile \ -crypto/rc2/Makefile.save \ crypto/rc4/Makefile \ -crypto/rc4/Makefile.save \ -crypto/ripemd/Makefile \ -crypto/ripemd/Makefile.save \ crypto/rsa/Makefile \ -crypto/rsa/Makefile.save \ crypto/sha/Makefile \ -crypto/sha/Makefile.save \ crypto/sha/sha_one.c \ crypto/srp/Makefile \ -crypto/srp/Makefile.save \ crypto/srp/srptest.c \ crypto/stack/Makefile \ -crypto/stack/Makefile.save \ crypto/store/Makefile \ crypto/threads/pthreads-vms.com \ crypto/threads/win32.bat \ crypto/ts/Makefile \ -crypto/ts/Makefile.save \ crypto/ts/ts.h \ crypto/ts/ts_asn1.c \ crypto/ts/ts_conf.c \ @@ -244,14 +201,10 @@ crypto/ts/ts_rsp_utils.c \ crypto/ts/ts_rsp_verify.c \ crypto/ts/ts_verify_ctx.c \ crypto/txt_db/Makefile \ -crypto/txt_db/Makefile.save \ crypto/ui/Makefile \ -crypto/ui/Makefile.save \ crypto/vms_rms.h crypto/x509/Makefile \ -crypto/x509/Makefile.save \ crypto/x509v3/Makefile \ -crypto/x509v3/Makefile.save \ include/openssl/camellia.h \ include/openssl/cast.h \ include/openssl/idea.h \ @@ -263,11 +216,11 @@ makevms.com \ openssl.doxy \ openssl.spec \ ssl/Makefile \ -ssl/Makefile.save \ +ssl/heartbeat_test.c \ ssl/install-ssl.com \ ssl/ssl-lib.com \ ssl/ssl_task.c \ -" +" NEEDED_SOURCES="\ apps \ @@ -285,19 +238,24 @@ NO_WINDOWS_BRAINDEATH \ " OPENSSL_CRYPTO_DEFINES_arm="\ +AES_ASM \ +BSAES_ASM \ +DES_UNROLL \ +GHASH_ASM \ OPENSSL_BN_ASM_GF2m \ OPENSSL_BN_ASM_MONT \ OPENSSL_CPUID_OBJ \ -GHASH_ASM \ -AES_ASM \ -BSAES_ASM \ SHA1_ASM \ SHA256_ASM \ SHA512_ASM \ " OPENSSL_CRYPTO_DEFINES_arm64="\ -OPENSSL_NO_ASM \ +DES_UNROLL \ +OPENSSL_CPUID_OBJ \ +SHA1_ASM \ +SHA256_ASM \ +SHA512_ASM \ " OPENSSL_CRYPTO_DEFINES_mips="\ @@ -308,39 +266,40 @@ SHA256_ASM \ " OPENSSL_CRYPTO_DEFINES_x86="\ -OPENSSL_IA32_SSE2 \ +AES_ASM \ +DES_PTR \ +DES_RISC1 \ +DES_UNROLL \ +GHASH_ASM \ +MD5_ASM \ OPENSSL_BN_ASM_GF2m \ OPENSSL_BN_ASM_MONT \ OPENSSL_BN_ASM_PART_WORDS \ -AES_ASM \ -VPAES_ASM \ -GHASH_ASM \ +OPENSSL_CPUID_OBJ \ +OPENSSL_IA32_SSE2 \ +RC4_INDEX \ +RMD160_ASM \ SHA1_ASM \ SHA256_ASM \ SHA512_ASM \ -MD5_ASM \ -DES_PTR \ -DES_RISC1 \ -DES_UNROLL \ -OPENSSL_CPUID_OBJ \ +VPAES_ASM \ " OPENSSL_CRYPTO_DEFINES_x86_64="\ -OPENSSL_BN_ASM_GF2m \ -OPENSSL_BN_ASM_MONT \ -OPENSSL_BN_ASM_MONT5 \ AES_ASM \ -VPAES_ASM \ BSAES_ASM \ +DES_UNROLL \ GHASH_ASM \ +MD5_ASM \ +OPENSSL_BN_ASM_GF2m \ +OPENSSL_BN_ASM_MONT \ +OPENSSL_BN_ASM_MONT5 \ +OPENSSL_CPUID_OBJ \ +OPENSSL_IA32_SSE2 \ SHA1_ASM \ SHA256_ASM \ SHA512_ASM \ -MD5_ASM \ -DES_PTR \ -DES_RISC1 \ -DES_UNROLL \ -OPENSSL_CPUID_OBJ \ +VPAES_ASM \ " OPENSSL_CRYPTO_INCLUDES="\ @@ -676,7 +635,6 @@ crypto/evp/m_md4.c \ crypto/evp/m_md5.c \ crypto/evp/m_mdc2.c \ crypto/evp/m_null.c \ -crypto/evp/m_ripemd.c \ crypto/evp/m_sha1.c \ crypto/evp/m_sigver.c \ crypto/evp/m_wp.c \ @@ -775,8 +733,6 @@ crypto/rc2/rc2ofb64.c \ crypto/rc4/rc4_enc.c \ crypto/rc4/rc4_skey.c \ crypto/rc4/rc4_utl.c \ -crypto/ripemd/rmd_dgst.c \ -crypto/ripemd/rmd_one.c \ crypto/rsa/rsa_ameth.c \ crypto/rsa/rsa_asn1.c \ crypto/rsa/rsa_chk.c \ @@ -873,12 +829,14 @@ crypto/x509v3/v3err.c \ OPENSSL_CRYPTO_SOURCES_arm="\ crypto/aes/asm/aes-armv4.S \ +crypto/aes/asm/aesv8-armx.S \ crypto/aes/asm/bsaes-armv7.S \ crypto/armcap.c \ crypto/armv4cpuid.S \ crypto/bn/asm/armv4-gf2m.S \ crypto/bn/asm/armv4-mont.S \ crypto/modes/asm/ghash-armv4.S \ +crypto/modes/asm/ghashv8-armx.S \ crypto/sha/asm/sha1-armv4-large.S \ crypto/sha/asm/sha256-armv4.S \ crypto/sha/asm/sha512-armv4.S \ @@ -890,6 +848,13 @@ crypto/mem_clr.c \ " OPENSSL_CRYPTO_SOURCES_arm64="\ +crypto/armcap.c \ +crypto/arm64cpuid.S \ +crypto/aes/asm/aesv8-armx-64.S \ +crypto/modes/asm/ghashv8-armx-64.S \ +crypto/sha/asm/sha1-armv8.S \ +crypto/sha/asm/sha256-armv8.S \ +crypto/sha/asm/sha512-armv8.S \ " OPENSSL_CRYPTO_SOURCES_EXCLUDES_arm64="\ @@ -1082,23 +1047,4 @@ apps/version.c \ apps/x509.c \ " -OPENSSL_PATCHES="\ -progs.patch \ -handshake_cutthrough.patch \ -jsse.patch \ -channelid.patch \ -eng_dyn_dirs.patch \ -fix_clang_build.patch \ -tls12_digests.patch \ -alpn.patch \ -cbc_record_splitting.patch \ -dsa_nonce.patch \ -ecdhe_psk.patch \ -wincrypt.patch \ -tls_psk_hint.patch \ -arm_asm.patch \ -psk_client_callback_128_byte_id_bug.patch \ -early_ccs.patch \ -" - source ./openssl.trusty.config diff --git a/app/openssl/openssl.version b/app/openssl/openssl.version index 2e849911..ab2e62bf 100644 --- a/app/openssl/openssl.version +++ b/app/openssl/openssl.version @@ -1 +1 @@ -OPENSSL_VERSION=1.0.1g +OPENSSL_VERSION=1.0.1h diff --git a/app/openssl/patches/README b/app/openssl/patches/README index 2ff69282..13e9bd8b 100644 --- a/app/openssl/patches/README +++ b/app/openssl/patches/README @@ -53,6 +53,19 @@ ecdhe_psk.patch Adds support for ECDHE Pre-Shared Key (PSK) TLS cipher suites. +ecdhe_psk_part2.patch + +Removes ECHDE-PSK cipher suites with SHA-2 because they cannot be used with +SSLv3 (and there's no way to express that in OpenSSL's configuration). Adds +SHA-1 based ECDHE-PSK AES-CBC cipher suites instead. + +arm_asm.patch + +Adds newer ARM assembly pack with BSAES for ARMv7 and acceleration for ARMv8 +Based on branch available at: +https://git.linaro.org/people/ard.biesheuvel/openssl.git/shortlog/refs/heads/openssl-1.0.1f-with-arm-patches +c7b582ef23eb6f4386664e841e6e406d984c38d3^..cb8b1ab03e5c179a719afe83f03fecb1c2c78730 + tls_psk_hint.patch Fixes issues with TLS-PSK identity hint implementation where diff --git a/app/openssl/ssl/bio_ssl.c b/app/openssl/ssl/bio_ssl.c index e9552cae..06a13de4 100644 --- a/app/openssl/ssl/bio_ssl.c +++ b/app/openssl/ssl/bio_ssl.c @@ -206,6 +206,10 @@ static int ssl_read(BIO *b, char *out, int outl) BIO_set_retry_special(b); retry_reason=BIO_RR_SSL_X509_LOOKUP; break; + case SSL_ERROR_WANT_CHANNEL_ID_LOOKUP: + BIO_set_retry_special(b); + retry_reason=BIO_RR_SSL_CHANNEL_ID_LOOKUP; + break; case SSL_ERROR_WANT_ACCEPT: BIO_set_retry_special(b); retry_reason=BIO_RR_ACCEPT; @@ -280,6 +284,10 @@ static int ssl_write(BIO *b, const char *out, int outl) BIO_set_retry_special(b); retry_reason=BIO_RR_SSL_X509_LOOKUP; break; + case SSL_ERROR_WANT_CHANNEL_ID_LOOKUP: + BIO_set_retry_special(b); + retry_reason=BIO_RR_SSL_CHANNEL_ID_LOOKUP; + break; case SSL_ERROR_WANT_CONNECT: BIO_set_retry_special(b); retry_reason=BIO_RR_CONNECT; diff --git a/app/openssl/ssl/d1_both.c b/app/openssl/ssl/d1_both.c index 2e8cf681..04aa2310 100644 --- a/app/openssl/ssl/d1_both.c +++ b/app/openssl/ssl/d1_both.c @@ -627,7 +627,16 @@ dtls1_reassemble_fragment(SSL *s, struct hm_header_st* msg_hdr, int *ok) frag->msg_header.frag_off = 0; } else + { frag = (hm_fragment*) item->data; + if (frag->msg_header.msg_len != msg_hdr->msg_len) + { + item = NULL; + frag = NULL; + goto err; + } + } + /* If message is already reassembled, this must be a * retransmit and can be dropped. @@ -674,8 +683,8 @@ dtls1_reassemble_fragment(SSL *s, struct hm_header_st* msg_hdr, int *ok) item = pitem_new(seq64be, frag); if (item == NULL) { - goto err; i = -1; + goto err; } pqueue_insert(s->d1->buffered_messages, item); @@ -784,6 +793,7 @@ dtls1_get_message_fragment(SSL *s, int st1, int stn, long max, int *ok) int i,al; struct hm_header_st msg_hdr; + redo: /* see if we have the required fragment already */ if ((frag_len = dtls1_retrieve_buffered_fragment(s,max,ok)) || *ok) { @@ -842,8 +852,7 @@ dtls1_get_message_fragment(SSL *s, int st1, int stn, long max, int *ok) s->msg_callback_arg); s->init_num = 0; - return dtls1_get_message_fragment(s, st1, stn, - max, ok); + goto redo; } else /* Incorrectly formated Hello request */ { diff --git a/app/openssl/ssl/d1_lib.c b/app/openssl/ssl/d1_lib.c index 106939f2..6bde16fa 100644 --- a/app/openssl/ssl/d1_lib.c +++ b/app/openssl/ssl/d1_lib.c @@ -176,9 +176,12 @@ static void dtls1_clear_queues(SSL *s) while ( (item = pqueue_pop(s->d1->buffered_app_data.q)) != NULL) { - frag = (hm_fragment *)item->data; - OPENSSL_free(frag->fragment); - OPENSSL_free(frag); + rdata = (DTLS1_RECORD_DATA *) item->data; + if (rdata->rbuf.buf) + { + OPENSSL_free(rdata->rbuf.buf); + } + OPENSSL_free(item->data); pitem_free(item); } } diff --git a/app/openssl/ssl/d1_pkt.c b/app/openssl/ssl/d1_pkt.c index 5b84e97c..363fc8c8 100644 --- a/app/openssl/ssl/d1_pkt.c +++ b/app/openssl/ssl/d1_pkt.c @@ -241,14 +241,6 @@ dtls1_buffer_record(SSL *s, record_pqueue *queue, unsigned char *priority) } #endif - /* insert should not fail, since duplicates are dropped */ - if (pqueue_insert(queue->q, item) == NULL) - { - OPENSSL_free(rdata); - pitem_free(item); - return(0); - } - s->packet = NULL; s->packet_length = 0; memset(&(s->s3->rbuf), 0, sizeof(SSL3_BUFFER)); @@ -261,7 +253,16 @@ dtls1_buffer_record(SSL *s, record_pqueue *queue, unsigned char *priority) pitem_free(item); return(0); } - + + /* insert should not fail, since duplicates are dropped */ + if (pqueue_insert(queue->q, item) == NULL) + { + SSLerr(SSL_F_DTLS1_BUFFER_RECORD, ERR_R_INTERNAL_ERROR); + OPENSSL_free(rdata); + pitem_free(item); + return(0); + } + return(1); } diff --git a/app/openssl/ssl/d1_srvr.c b/app/openssl/ssl/d1_srvr.c index 09f47627..c181db6d 100644 --- a/app/openssl/ssl/d1_srvr.c +++ b/app/openssl/ssl/d1_srvr.c @@ -1356,6 +1356,7 @@ int dtls1_send_server_key_exchange(SSL *s) (unsigned char *)encodedPoint, encodedlen); OPENSSL_free(encodedPoint); + encodedPoint = NULL; p += encodedlen; } #endif diff --git a/app/openssl/ssl/s3_both.c b/app/openssl/ssl/s3_both.c index d9e18a31..607990d0 100644 --- a/app/openssl/ssl/s3_both.c +++ b/app/openssl/ssl/s3_both.c @@ -561,7 +561,7 @@ long ssl3_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok) #endif /* Feed this message into MAC computation. */ - if (*(unsigned char*)s->init_buf->data != SSL3_MT_ENCRYPTED_EXTENSIONS) + if (*((unsigned char*) s->init_buf->data) != SSL3_MT_ENCRYPTED_EXTENSIONS) ssl3_finish_mac(s, (unsigned char *)s->init_buf->data, s->init_num + 4); if (s->msg_callback) s->msg_callback(0, s->version, SSL3_RT_HANDSHAKE, s->init_buf->data, (size_t)s->init_num + 4, s, s->msg_callback_arg); diff --git a/app/openssl/ssl/s3_clnt.c b/app/openssl/ssl/s3_clnt.c index 5e15b75c..486f538b 100644 --- a/app/openssl/ssl/s3_clnt.c +++ b/app/openssl/ssl/s3_clnt.c @@ -215,24 +215,12 @@ int ssl3_connect(SSL *s) } #endif -// BEGIN android-added -#if 0 -/* Send app data in separate packet, otherwise, some particular site - * (only one site so far) closes the socket. http://b/2511073 - * Note: there is a very small chance that two TCP packets - * could be arriving at server combined into a single TCP packet, - * then trigger that site to break. We haven't encounter that though. - */ -// END android-added if (SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) { /* Send app data along with CCS/Finished */ s->s3->flags |= SSL3_FLAGS_DELAY_CLIENT_FINISHED; } -// BEGIN android-added -#endif -// END android-added for (;;) { state=s->state; @@ -558,7 +546,20 @@ int ssl3_connect(SSL *s) } else { - if ((SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) && SSL_get_cipher_bits(s, NULL) >= 128 + /* This is a non-resumption handshake. If it + * involves ChannelID, then record the + * handshake hashes at this point in the + * session so that any resumption of this + * session with ChannelID can sign those + * hashes. */ + if (s->s3->tlsext_channel_id_new) + { + ret = tls1_record_handshake_hashes_for_channel_id(s); + if (ret <= 0) + goto end; + } + if ((SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) + && ssl3_can_cutthrough(s) && s->s3->previous_server_finished_len == 0 /* no cutthrough on renegotiation (would complicate the state machine) */ ) { @@ -607,6 +608,7 @@ int ssl3_connect(SSL *s) case SSL3_ST_CR_FINISHED_A: case SSL3_ST_CR_FINISHED_B: + s->s3->flags |= SSL3_FLAGS_CCS_OK; ret=ssl3_get_finished(s,SSL3_ST_CR_FINISHED_A, SSL3_ST_CR_FINISHED_B); @@ -2302,7 +2304,7 @@ int ssl3_get_server_done(SSL *s) int ssl3_send_client_key_exchange(SSL *s) { unsigned char *p,*d; - int n; + int n = 0; unsigned long alg_k; unsigned long alg_a; #ifndef OPENSSL_NO_RSA @@ -2688,6 +2690,13 @@ int ssl3_send_client_key_exchange(SSL *s) unsigned int i; #endif + if (s->session->sess_cert == NULL) + { + ssl3_send_alert(s,SSL3_AL_FATAL,SSL_AD_UNEXPECTED_MESSAGE); + SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,SSL_R_UNEXPECTED_MESSAGE); + goto err; + } + /* Did we send out the client's * ECDH share for use in premaster * computation as part of client certificate? @@ -3027,7 +3036,7 @@ int ssl3_send_client_key_exchange(SSL *s) } } #endif - else if (!(alg_k & SSL_kPSK)) + else if (!(alg_k & SSL_kPSK) || ((alg_k & SSL_kPSK) && !(alg_a & SSL_aPSK))) { ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE); @@ -3491,10 +3500,29 @@ int ssl3_send_channel_id(SSL *s) if (s->state != SSL3_ST_CW_CHANNEL_ID_A) return ssl3_do_write(s, SSL3_RT_HANDSHAKE); + if (!s->tlsext_channel_id_private && s->ctx->channel_id_cb) + { + EVP_PKEY *key = NULL; + s->ctx->channel_id_cb(s, &key); + if (key != NULL) + { + s->tlsext_channel_id_private = key; + } + } + if (!s->tlsext_channel_id_private) + { + s->rwstate=SSL_CHANNEL_ID_LOOKUP; + return (-1); + } + s->rwstate=SSL_NOTHING; + d = (unsigned char *)s->init_buf->data; *(d++)=SSL3_MT_ENCRYPTED_EXTENSIONS; l2n3(2 + 2 + TLSEXT_CHANNEL_ID_SIZE, d); - s2n(TLSEXT_TYPE_channel_id, d); + if (s->s3->tlsext_channel_id_new) + s2n(TLSEXT_TYPE_channel_id_new, d); + else + s2n(TLSEXT_TYPE_channel_id, d); s2n(TLSEXT_CHANNEL_ID_SIZE, d); EVP_MD_CTX_init(&md_ctx); @@ -3505,9 +3533,9 @@ int ssl3_send_channel_id(SSL *s) SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_CANNOT_SERIALIZE_PUBLIC_KEY); goto err; } - // i2d_PublicKey will produce an ANSI X9.62 public key which, for a - // P-256 key, is 0x04 (meaning uncompressed) followed by the x and y - // field elements as 32-byte, big-endian numbers. + /* i2d_PublicKey will produce an ANSI X9.62 public key which, for a + * P-256 key, is 0x04 (meaning uncompressed) followed by the x and y + * field elements as 32-byte, big-endian numbers. */ if (public_key_len != 65) { SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_CHANNEL_ID_NOT_P256); @@ -3553,14 +3581,14 @@ int ssl3_send_channel_id(SSL *s) } derp = der_sig; - sig = d2i_ECDSA_SIG(NULL, (const unsigned char**)&derp, sig_len); + sig = d2i_ECDSA_SIG(NULL, (const unsigned char**) &derp, sig_len); if (sig == NULL) { SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_D2I_ECDSA_SIG); goto err; } - // The first byte of public_key will be 0x4, denoting an uncompressed key. + /* The first byte of public_key will be 0x4, denoting an uncompressed key. */ memcpy(d, public_key + 1, 64); d += 64; memset(d, 0, 2 * 32); diff --git a/app/openssl/ssl/s3_enc.c b/app/openssl/ssl/s3_enc.c index 90fbb180..53b94b7c 100644 --- a/app/openssl/ssl/s3_enc.c +++ b/app/openssl/ssl/s3_enc.c @@ -728,7 +728,7 @@ int n_ssl3_mac(SSL *ssl, unsigned char *md, int send) } t=EVP_MD_CTX_size(hash); - if (t < 0) + if (t < 0 || t > 20) return -1; md_size=t; npad=(48/md_size)*md_size; diff --git a/app/openssl/ssl/s3_lib.c b/app/openssl/ssl/s3_lib.c index f84da7f5..896d1e19 100644 --- a/app/openssl/ssl/s3_lib.c +++ b/app/openssl/ssl/s3_lib.c @@ -2828,35 +2828,34 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ #ifndef OPENSSL_NO_PSK /* ECDH PSK ciphersuites from RFC 5489 */ - - /* Cipher C037 */ + /* Cipher C035 */ { 1, - TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256, - TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256, + TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA, + TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA, SSL_kEECDH, SSL_aPSK, SSL_AES128, - SSL_SHA256, + SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF_SHA256, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, 128, 128, }, - /* Cipher C038 */ + /* Cipher C036 */ { 1, - TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384, - TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384, + TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA, + TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA, SSL_kEECDH, SSL_aPSK, SSL_AES256, - SSL_SHA384, + SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF_SHA384, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, 256, 256, }, @@ -3412,8 +3411,6 @@ long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg) break; #endif case SSL_CTRL_CHANNEL_ID: - if (!s->server) - break; s->tlsext_channel_id_enabled = 1; ret = 1; break; @@ -3429,7 +3426,7 @@ long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg) } if (s->tlsext_channel_id_private) EVP_PKEY_free(s->tlsext_channel_id_private); - s->tlsext_channel_id_private = (EVP_PKEY*) parg; + s->tlsext_channel_id_private = EVP_PKEY_dup((EVP_PKEY*) parg); ret = 1; break; @@ -3744,7 +3741,7 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg) } if (ctx->tlsext_channel_id_private) EVP_PKEY_free(ctx->tlsext_channel_id_private); - ctx->tlsext_channel_id_private = (EVP_PKEY*) parg; + ctx->tlsext_channel_id_private = EVP_PKEY_dup((EVP_PKEY*) parg); break; default: diff --git a/app/openssl/ssl/s3_pkt.c b/app/openssl/ssl/s3_pkt.c index 75997ac2..60c4f1a4 100644 --- a/app/openssl/ssl/s3_pkt.c +++ b/app/openssl/ssl/s3_pkt.c @@ -110,6 +110,7 @@ */ #include +#include #include #define USE_SOCKETS #include "ssl_locl.h" @@ -580,10 +581,11 @@ int ssl3_do_compress(SSL *ssl) int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len) { const unsigned char *buf=buf_; - unsigned int tot,n,nw; - int i; + unsigned int n,nw; + int i,tot; s->rwstate=SSL_NOTHING; + OPENSSL_assert(s->s3->wnum <= INT_MAX); tot=s->s3->wnum; s->s3->wnum=0; @@ -598,6 +600,22 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len) } } + /* ensure that if we end up with a smaller value of data to write + * out than the the original len from a write which didn't complete + * for non-blocking I/O and also somehow ended up avoiding + * the check for this in ssl3_write_pending/SSL_R_BAD_WRITE_RETRY as + * it must never be possible to end up with (len-tot) as a large + * number that will then promptly send beyond the end of the users + * buffer ... so we trap and report the error in a way the user + * will notice + */ + if (len < tot) + { + SSLerr(SSL_F_SSL3_WRITE_BYTES,SSL_R_BAD_LENGTH); + return(-1); + } + + n=(len-tot); for (;;) { @@ -668,9 +686,6 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, SSL3_BUFFER *wb=&(s->s3->wbuf); SSL_SESSION *sess; - if (wb->buf == NULL) - if (!ssl3_setup_write_buffer(s)) - return -1; /* first check if there is a SSL3_BUFFER still being written * out. This will happen with non blocking IO */ @@ -686,6 +701,10 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, /* if it went, fall through and send more stuff */ } + if (wb->buf == NULL) + if (!ssl3_setup_write_buffer(s)) + return -1; + if (len == 0) return 0; @@ -1067,7 +1086,7 @@ start: { s->rstate=SSL_ST_READ_HEADER; rr->off=0; - if (s->mode & SSL_MODE_RELEASE_BUFFERS) + if (s->mode & SSL_MODE_RELEASE_BUFFERS && s->s3->rbuf.left == 0) ssl3_release_read_buffer(s); } } @@ -1312,10 +1331,12 @@ start: if (!(s->s3->flags & SSL3_FLAGS_CCS_OK)) { al=SSL_AD_UNEXPECTED_MESSAGE; - SSLerr(SSL_F_SSL3_READ_BYTES,SSL_R_UNEXPECTED_CCS); + SSLerr(SSL_F_SSL3_READ_BYTES,SSL_R_CCS_RECEIVED_EARLY); goto f_err; } + s->s3->flags &= ~SSL3_FLAGS_CCS_OK; + rr->length=0; if (s->msg_callback) @@ -1450,12 +1471,7 @@ int ssl3_do_change_cipher_spec(SSL *s) if (s->s3->tmp.key_block == NULL) { - if (s->session->master_key_length == 0) - { - SSLerr(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC,SSL_R_UNEXPECTED_CCS); - return (0); - } - if (s->session == NULL) + if (s->session == NULL || s->session->master_key_length == 0) { /* might happen if dtls1_read_bytes() calls this */ SSLerr(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC,SSL_R_CCS_RECEIVED_EARLY); diff --git a/app/openssl/ssl/s3_srvr.c b/app/openssl/ssl/s3_srvr.c index 1976efa7..f83c9366 100644 --- a/app/openssl/ssl/s3_srvr.c +++ b/app/openssl/ssl/s3_srvr.c @@ -675,8 +675,8 @@ int ssl3_accept(SSL *s) case SSL3_ST_SR_CERT_VRFY_A: case SSL3_ST_SR_CERT_VRFY_B: - /* we should decide if we expected this one */ s->s3->flags |= SSL3_FLAGS_CCS_OK; + /* we should decide if we expected this one */ ret=ssl3_get_cert_verify(s); if (ret <= 0) goto end; @@ -694,7 +694,6 @@ int ssl3_accept(SSL *s) channel_id = s->s3->tlsext_channel_id_valid; #endif - s->s3->flags |= SSL3_FLAGS_CCS_OK; if (next_proto_neg) s->state=SSL3_ST_SR_NEXT_PROTO_A; else if (channel_id) @@ -729,6 +728,7 @@ int ssl3_accept(SSL *s) case SSL3_ST_SR_FINISHED_A: case SSL3_ST_SR_FINISHED_B: + s->s3->flags |= SSL3_FLAGS_CCS_OK; ret=ssl3_get_finished(s,SSL3_ST_SR_FINISHED_A, SSL3_ST_SR_FINISHED_B); if (ret <= 0) goto end; @@ -740,6 +740,15 @@ int ssl3_accept(SSL *s) #endif else s->state=SSL3_ST_SW_CHANGE_A; + /* If this is a full handshake with ChannelID then + * record the hashshake hashes in |s->session| in case + * we need them to verify a ChannelID signature on a + * resumption of this session in the future. */ + if (!s->hit && s->s3->tlsext_channel_id_new) + { + ret = tls1_record_handshake_hashes_for_channel_id(s); + if (ret <= 0) goto end; + } s->init_num=0; break; @@ -1468,6 +1477,22 @@ int ssl3_send_server_hello(SSL *s) if (s->state == SSL3_ST_SW_SRVR_HELLO_A) { + /* We only accept ChannelIDs on connections with ECDHE in order + * to avoid a known attack while we fix ChannelID itself. */ + if (s->s3 && + s->s3->tlsext_channel_id_valid && + (s->s3->tmp.new_cipher->algorithm_mkey & SSL_kEECDH) == 0) + s->s3->tlsext_channel_id_valid = 0; + + /* If this is a resumption and the original handshake didn't + * support ChannelID then we didn't record the original + * handshake hashes in the session and so cannot resume with + * ChannelIDs. */ + if (s->hit && + s->s3->tlsext_channel_id_new && + s->session->original_handshake_hash_len == 0) + s->s3->tlsext_channel_id_valid = 0; + buf=(unsigned char *)s->init_buf->data; #ifdef OPENSSL_NO_TLSEXT p=s->s3->server_random; @@ -2143,6 +2168,11 @@ int ssl3_send_certificate_request(SSL *s) s->init_num=n+4; s->init_off=0; #ifdef NETSCAPE_HANG_BUG + if (!BUF_MEM_grow_clean(buf, s->init_num + 4)) + { + SSLerr(SSL_F_SSL3_SEND_CERTIFICATE_REQUEST,ERR_R_BUF_LIB); + goto err; + } p=(unsigned char *)s->init_buf->data + s->init_num; /* do the header */ @@ -2885,6 +2915,8 @@ int ssl3_get_client_key_exchange(SSL *s) unsigned char premaster_secret[32], *start; size_t outlen=32, inlen; unsigned long alg_a; + int Ttag, Tclass; + long Tlen; /* Get our certificate private key*/ alg_a = s->s3->tmp.new_cipher->algorithm_auth; @@ -2906,28 +2938,16 @@ int ssl3_get_client_key_exchange(SSL *s) ERR_clear_error(); } /* Decrypt session key */ - if ((*p!=( V_ASN1_SEQUENCE| V_ASN1_CONSTRUCTED))) - { - SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED); - goto gerr; - } - if (p[1] == 0x81) - { - start = p+3; - inlen = p[2]; - } - else if (p[1] < 0x80) - { - start = p+2; - inlen = p[1]; - } - else + if (ASN1_get_object((const unsigned char **)&p, &Tlen, &Ttag, &Tclass, n) != V_ASN1_CONSTRUCTED || + Ttag != V_ASN1_SEQUENCE || + Tclass != V_ASN1_UNIVERSAL) { SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED); goto gerr; } + start = p; + inlen = Tlen; if (EVP_PKEY_decrypt(pkey_ctx,premaster_secret,&outlen,start,inlen) <=0) - { SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED); goto gerr; @@ -3675,6 +3695,7 @@ int ssl3_get_channel_id(SSL *s) EC_POINT* point = NULL; ECDSA_SIG sig; BIGNUM x, y; + unsigned short expected_extension_type; if (s->state == SSL3_ST_SR_CHANNEL_ID_A && s->init_num == 0) { @@ -3732,7 +3753,11 @@ int ssl3_get_channel_id(SSL *s) n2s(p, extension_type); n2s(p, extension_len); - if (extension_type != TLSEXT_TYPE_channel_id || + expected_extension_type = TLSEXT_TYPE_channel_id; + if (s->s3->tlsext_channel_id_new) + expected_extension_type = TLSEXT_TYPE_channel_id_new; + + if (extension_type != expected_extension_type || extension_len != TLSEXT_CHANNEL_ID_SIZE) { SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_INVALID_MESSAGE); diff --git a/app/openssl/ssl/ssl.h b/app/openssl/ssl/ssl.h index 54b0eb6c..a85841b3 100644 --- a/app/openssl/ssl/ssl.h +++ b/app/openssl/ssl/ssl.h @@ -544,6 +544,13 @@ struct ssl_session_st #ifndef OPENSSL_NO_SRP char *srp_username; #endif + + /* original_handshake_hash contains the handshake hash (either + * SHA-1+MD5 or SHA-2, depending on TLS version) for the original, full + * handshake that created a session. This is used by Channel IDs during + * resumption. */ + unsigned char original_handshake_hash[EVP_MAX_MD_SIZE]; + unsigned int original_handshake_hash_len; }; #endif @@ -553,7 +560,7 @@ struct ssl_session_st /* Allow initial connection to servers that don't support RI */ #define SSL_OP_LEGACY_SERVER_CONNECT 0x00000004L #define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x00000008L -#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x00000010L +#define SSL_OP_TLSEXT_PADDING 0x00000010L #define SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER 0x00000020L #define SSL_OP_SAFARI_ECDHE_ECDSA_BUG 0x00000040L #define SSL_OP_SSLEAY_080_CLIENT_DH_BUG 0x00000080L @@ -562,6 +569,8 @@ struct ssl_session_st /* Hasn't done anything since OpenSSL 0.9.7h, retained for compatibility */ #define SSL_OP_MSIE_SSLV2_RSA_PADDING 0x0 +/* Refers to ancient SSLREF and SSLv2, retained for compatibility */ +#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x0 /* SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS is vestigial. Previously it disabled the * insertion of empty records in CBC mode, but the empty records were commonly @@ -648,12 +657,14 @@ struct ssl_session_st * TLS only.) "Released" buffers are put onto a free-list in the context * or just freed (depending on the context's setting for freelist_max_len). */ #define SSL_MODE_RELEASE_BUFFERS 0x00000010L + /* Send the current time in the Random fields of the ClientHello and * ServerHello records for compatibility with hypothetical implementations * that require it. */ #define SSL_MODE_SEND_CLIENTHELLO_TIME 0x00000020L #define SSL_MODE_SEND_SERVERHELLO_TIME 0x00000040L + /* When set, clients may send application data before receipt of CCS * and Finished. This mode enables full-handshakes to 'complete' in * one RTT. */ @@ -866,6 +877,9 @@ struct ssl_ctx_st /* get client cert callback */ int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey); + /* get channel id callback */ + void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey); + /* cookie generate callback */ int (*app_gen_cookie_cb)(SSL *ssl, unsigned char *cookie, unsigned int *cookie_len); @@ -1028,6 +1042,10 @@ struct ssl_ctx_st /* If true, a client will advertise the Channel ID extension and a * server will echo it. */ char tlsext_channel_id_enabled; + /* tlsext_channel_id_enabled_new is a hack to support both old and new + * ChannelID signatures. It indicates that a client should advertise the + * new ChannelID extension number. */ + char tlsext_channel_id_enabled_new; /* The client's Channel ID private key. */ EVP_PKEY *tlsext_channel_id_private; #endif @@ -1086,6 +1104,8 @@ void SSL_CTX_set_info_callback(SSL_CTX *ctx, void (*cb)(const SSL *ssl,int type, void (*SSL_CTX_get_info_callback(SSL_CTX *ctx))(const SSL *ssl,int type,int val); void SSL_CTX_set_client_cert_cb(SSL_CTX *ctx, int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey)); int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx))(SSL *ssl, X509 **x509, EVP_PKEY **pkey); +void SSL_CTX_set_channel_id_cb(SSL_CTX *ctx, void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey)); +void (*SSL_CTX_get_channel_id_cb(SSL_CTX *ctx))(SSL *ssl, EVP_PKEY **pkey); #ifndef OPENSSL_NO_ENGINE int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e); #endif @@ -1162,12 +1182,14 @@ const char *SSL_get_psk_identity(const SSL *s); #define SSL_WRITING 2 #define SSL_READING 3 #define SSL_X509_LOOKUP 4 +#define SSL_CHANNEL_ID_LOOKUP 5 /* These will only be used when doing non-blocking IO */ #define SSL_want_nothing(s) (SSL_want(s) == SSL_NOTHING) #define SSL_want_read(s) (SSL_want(s) == SSL_READING) #define SSL_want_write(s) (SSL_want(s) == SSL_WRITING) #define SSL_want_x509_lookup(s) (SSL_want(s) == SSL_X509_LOOKUP) +#define SSL_want_channel_id_lookup(s) (SSL_want(s) == SSL_CHANNEL_ID_LOOKUP) #define SSL_MAC_FLAG_READ_MAC_STREAM 1 #define SSL_MAC_FLAG_WRITE_MAC_STREAM 2 @@ -1602,6 +1624,7 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) #define SSL_ERROR_ZERO_RETURN 6 #define SSL_ERROR_WANT_CONNECT 7 #define SSL_ERROR_WANT_ACCEPT 8 +#define SSL_ERROR_WANT_CHANNEL_ID_LOOKUP 9 #define SSL_CTRL_NEED_TMP_RSA 1 #define SSL_CTRL_SET_TMP_RSA 2 @@ -1739,10 +1762,11 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) #define SSL_set_tmp_ecdh(ssl,ecdh) \ SSL_ctrl(ssl,SSL_CTRL_SET_TMP_ECDH,0,(char *)ecdh) -/* SSL_enable_tls_channel_id configures a TLS server to accept TLS client - * IDs from clients. Returns 1 on success. */ -#define SSL_enable_tls_channel_id(ctx) \ - SSL_ctrl(ctx,SSL_CTRL_CHANNEL_ID,0,NULL) +/* SSL_enable_tls_channel_id either configures a TLS server to accept TLS client + * IDs from clients, or configure a client to send TLS client IDs to server. + * Returns 1 on success. */ +#define SSL_enable_tls_channel_id(s) \ + SSL_ctrl(s,SSL_CTRL_CHANNEL_ID,0,NULL) /* SSL_set1_tls_channel_id configures a TLS client to send a TLS Channel ID to * compatible servers. private_key must be a P-256 EVP_PKEY*. Returns 1 on * success. */ @@ -1792,7 +1816,7 @@ int SSL_CIPHER_get_bits(const SSL_CIPHER *c,int *alg_bits); char * SSL_CIPHER_get_version(const SSL_CIPHER *c); const char * SSL_CIPHER_get_name(const SSL_CIPHER *c); unsigned long SSL_CIPHER_get_id(const SSL_CIPHER *c); -const char* SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher); +const char * SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher); int SSL_get_fd(const SSL *s); int SSL_get_rfd(const SSL *s); @@ -2707,7 +2731,6 @@ void ERR_load_SSL_strings(void); #define SSL_R_WRONG_VERSION_NUMBER 267 #define SSL_R_X509_LIB 268 #define SSL_R_X509_VERIFICATION_SETUP_PROBLEMS 269 -#define SSL_R_UNEXPECTED_CCS 388 #ifdef __cplusplus } diff --git a/app/openssl/ssl/ssl3.h b/app/openssl/ssl/ssl3.h index f205f73d..83d59bff 100644 --- a/app/openssl/ssl/ssl3.h +++ b/app/openssl/ssl/ssl3.h @@ -388,9 +388,6 @@ typedef struct ssl3_buffer_st #define TLS1_FLAGS_TLS_PADDING_BUG 0x0008 #define TLS1_FLAGS_SKIP_CERT_VERIFY 0x0010 #define TLS1_FLAGS_KEEP_HANDSHAKE 0x0020 -/* SSL3_FLAGS_CCS_OK indicates that a ChangeCipherSpec record is acceptable at - * this point in the handshake. If this flag is not set then received CCS - * records will cause a fatal error for the connection. */ #define SSL3_FLAGS_CCS_OK 0x0080 /* SSL3_FLAGS_SGC_RESTART_DONE is set when we @@ -558,6 +555,11 @@ typedef struct ssl3_state_st * for Channel IDs and that tlsext_channel_id will be valid after the * handshake. */ char tlsext_channel_id_valid; + /* tlsext_channel_id_new means that the updated Channel ID extension + * was negotiated. This is a temporary hack in the code to support both + * forms of Channel ID extension while we transition to the new format, + * which fixed a security issue. */ + char tlsext_channel_id_new; /* For a server: * If |tlsext_channel_id_valid| is true, then this contains the * verified Channel ID from the client: a P256 point, (x,y), where @@ -678,11 +680,11 @@ typedef struct ssl3_state_st #define SSL3_ST_SR_CERT_VRFY_B (0x1A1|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANGE_A (0x1B0|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANGE_B (0x1B1|SSL_ST_ACCEPT) -#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT) #ifndef OPENSSL_NO_NEXTPROTONEG #define SSL3_ST_SR_NEXT_PROTO_A (0x210|SSL_ST_ACCEPT) #define SSL3_ST_SR_NEXT_PROTO_B (0x211|SSL_ST_ACCEPT) #endif +#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANNEL_ID_A (0x220|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANNEL_ID_B (0x221|SSL_ST_ACCEPT) #define SSL3_ST_SR_FINISHED_A (0x1C0|SSL_ST_ACCEPT) diff --git a/app/openssl/ssl/ssl_asn1.c b/app/openssl/ssl/ssl_asn1.c index 38540be1..f83e18f8 100644 --- a/app/openssl/ssl/ssl_asn1.c +++ b/app/openssl/ssl/ssl_asn1.c @@ -117,12 +117,13 @@ typedef struct ssl_session_asn1_st #ifndef OPENSSL_NO_SRP ASN1_OCTET_STRING srp_username; #endif /* OPENSSL_NO_SRP */ + ASN1_OCTET_STRING original_handshake_hash; } SSL_SESSION_ASN1; int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp) { #define LSIZE2 (sizeof(long)*2) - int v1=0,v2=0,v3=0,v4=0,v5=0,v7=0,v8=0; + int v1=0,v2=0,v3=0,v4=0,v5=0,v7=0,v8=0,v14=0; unsigned char buf[4],ibuf1[LSIZE2],ibuf2[LSIZE2]; unsigned char ibuf3[LSIZE2],ibuf4[LSIZE2],ibuf5[LSIZE2]; #ifndef OPENSSL_NO_TLSEXT @@ -272,6 +273,13 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp) a.psk_identity.type=V_ASN1_OCTET_STRING; a.psk_identity.data=(unsigned char *)(in->psk_identity); } + + if (in->original_handshake_hash_len > 0) + { + a.original_handshake_hash.length = in->original_handshake_hash_len; + a.original_handshake_hash.type = V_ASN1_OCTET_STRING; + a.original_handshake_hash.data = in->original_handshake_hash; + } #endif /* OPENSSL_NO_PSK */ #ifndef OPENSSL_NO_SRP if (in->srp_username) @@ -325,6 +333,8 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp) if (in->srp_username) M_ASN1_I2D_len_EXP_opt(&(a.srp_username), i2d_ASN1_OCTET_STRING,12,v12); #endif /* OPENSSL_NO_SRP */ + if (in->original_handshake_hash_len > 0) + M_ASN1_I2D_len_EXP_opt(&(a.original_handshake_hash),i2d_ASN1_OCTET_STRING,14,v14); M_ASN1_I2D_seq_total(); @@ -373,6 +383,8 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp) if (in->srp_username) M_ASN1_I2D_put_EXP_opt(&(a.srp_username), i2d_ASN1_OCTET_STRING,12,v12); #endif /* OPENSSL_NO_SRP */ + if (in->original_handshake_hash_len > 0) + M_ASN1_I2D_put_EXP_opt(&(a.original_handshake_hash),i2d_ASN1_OCTET_STRING,14,v14); M_ASN1_I2D_finish(); } @@ -408,6 +420,7 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, if (os.length != 3) { c.error=SSL_R_CIPHER_CODE_WRONG_LENGTH; + c.line=__LINE__; goto err; } id=0x02000000L| @@ -420,6 +433,7 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, if (os.length != 2) { c.error=SSL_R_CIPHER_CODE_WRONG_LENGTH; + c.line=__LINE__; goto err; } id=0x03000000L| @@ -429,6 +443,7 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, else { c.error=SSL_R_UNKNOWN_SSL_VERSION; + c.line=__LINE__; goto err; } @@ -521,6 +536,7 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, if (os.length > SSL_MAX_SID_CTX_LENGTH) { c.error=SSL_R_BAD_LENGTH; + c.line=__LINE__; goto err; } else @@ -638,5 +654,16 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, ret->srp_username=NULL; #endif /* OPENSSL_NO_SRP */ + os.length=0; + os.data=NULL; + M_ASN1_D2I_get_EXP_opt(osp,d2i_ASN1_OCTET_STRING,14); + if (os.data && os.length < (int)sizeof(ret->original_handshake_hash)) + { + memcpy(ret->original_handshake_hash, os.data, os.length); + ret->original_handshake_hash_len = os.length; + OPENSSL_free(os.data); + os.data = NULL; + } + M_ASN1_D2I_Finish(a,SSL_SESSION_free,SSL_F_D2I_SSL_SESSION); } diff --git a/app/openssl/ssl/ssl_err.c b/app/openssl/ssl/ssl_err.c index bddd7949..ac0aad9b 100644 --- a/app/openssl/ssl/ssl_err.c +++ b/app/openssl/ssl/ssl_err.c @@ -553,7 +553,7 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_TLSV1_UNRECOGNIZED_NAME),"tlsv1 unrecognized name"}, {ERR_REASON(SSL_R_TLSV1_UNSUPPORTED_EXTENSION),"tlsv1 unsupported extension"}, {ERR_REASON(SSL_R_TLS_CLIENT_CERT_REQ_WITH_ANON_CIPHER),"tls client cert req with anon cipher"}, -{ERR_REASON(SSL_R_TLS_HEARTBEAT_PEER_DOESNT_ACCEPT),"peer does not accept heartbearts"}, +{ERR_REASON(SSL_R_TLS_HEARTBEAT_PEER_DOESNT_ACCEPT),"peer does not accept heartbeats"}, {ERR_REASON(SSL_R_TLS_HEARTBEAT_PENDING) ,"heartbeat request already pending"}, {ERR_REASON(SSL_R_TLS_ILLEGAL_EXPORTER_LABEL),"tls illegal exporter label"}, {ERR_REASON(SSL_R_TLS_INVALID_ECPOINTFORMAT_LIST),"tls invalid ecpointformat list"}, @@ -604,7 +604,6 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_WRONG_VERSION_NUMBER) ,"wrong version number"}, {ERR_REASON(SSL_R_X509_LIB) ,"x509 lib"}, {ERR_REASON(SSL_R_X509_VERIFICATION_SETUP_PROBLEMS),"x509 verification setup problems"}, -{ERR_REASON(SSL_R_UNEXPECTED_CCS),"unexpected CCS"}, {0,NULL} }; diff --git a/app/openssl/ssl/ssl_lib.c b/app/openssl/ssl/ssl_lib.c index 8d2c3a76..3de68a78 100644 --- a/app/openssl/ssl/ssl_lib.c +++ b/app/openssl/ssl/ssl_lib.c @@ -1403,6 +1403,10 @@ char *SSL_get_shared_ciphers(const SSL *s,char *buf,int len) p=buf; sk=s->session->ciphers; + + if (sk_SSL_CIPHER_num(sk) == 0) + return NULL; + for (i=0; iversion >= SSL3_VERSION && s->s3->in_read_app_data == 0 && /* cutthrough only applies to write() */ (SSL_get_mode((SSL*)s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) && /* cutthrough enabled */ - SSL_get_cipher_bits(s, NULL) >= 128 && /* strong cipher choosen */ + ssl3_can_cutthrough(s) && /* cutthrough allowed */ s->s3->previous_server_finished_len == 0 && /* not a renegotiation handshake */ (s->state == SSL3_ST_CR_SESSION_TICKET_A || /* ready to write app-data*/ s->state == SSL3_ST_CR_FINISHED_A)); } +int ssl3_can_cutthrough(const SSL *s) + { + const SSL_CIPHER *c; + + /* require a strong enough cipher */ + if (SSL_get_cipher_bits(s, NULL) < 128) + return 0; + + /* require ALPN or NPN extension */ + if (!s->s3->alpn_selected +#ifndef OPENSSL_NO_NEXTPROTONEG + && !s->s3->next_proto_neg_seen +#endif + ) + { + return 0; + } + + /* require a forward-secret cipher */ + c = SSL_get_current_cipher(s); + if (!c || (c->algorithm_mkey != SSL_kEDH && + c->algorithm_mkey != SSL_kEECDH)) + { + return 0; + } + + return 1; + } + /* Allocates new EVP_MD_CTX and sets pointer to it into given pointer * vairable, freeing EVP_MD_CTX previously stored in that variable, if * any. If EVP_MD pointer is passed, initializes ctx with this md diff --git a/app/openssl/ssl/ssl_locl.h b/app/openssl/ssl/ssl_locl.h index f79ab009..6b7731a4 100644 --- a/app/openssl/ssl/ssl_locl.h +++ b/app/openssl/ssl/ssl_locl.h @@ -1070,6 +1070,7 @@ void ssl_free_wbio_buffer(SSL *s); int tls1_change_cipher_state(SSL *s, int which); int tls1_setup_key_block(SSL *s); int tls1_enc(SSL *s, int snd); +int tls1_handshake_digest(SSL *s, unsigned char *out, size_t out_len); int tls1_final_finish_mac(SSL *s, const char *str, int slen, unsigned char *p); int tls1_cert_verify_mac(SSL *s, int md_nid, unsigned char *p); @@ -1126,8 +1127,10 @@ int tls12_get_sigid(const EVP_PKEY *pk); const EVP_MD *tls12_get_hash(unsigned char hash_alg); int tls1_channel_id_hash(EVP_MD_CTX *ctx, SSL *s); +int tls1_record_handshake_hashes_for_channel_id(SSL *s); #endif +int ssl3_can_cutthrough(const SSL *s); EVP_MD_CTX* ssl_replace_hash(EVP_MD_CTX **hash,const EVP_MD *md) ; void ssl_clear_hash_ctx(EVP_MD_CTX **hash); int ssl_add_serverhello_renegotiate_ext(SSL *s, unsigned char *p, int *len, diff --git a/app/openssl/ssl/ssl_sess.c b/app/openssl/ssl/ssl_sess.c index ec088404..7d170852 100644 --- a/app/openssl/ssl/ssl_sess.c +++ b/app/openssl/ssl/ssl_sess.c @@ -1144,6 +1144,17 @@ int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx))(SSL * ssl, X509 ** x509 , EVP_PK return ctx->client_cert_cb; } +void SSL_CTX_set_channel_id_cb(SSL_CTX *ctx, + void (*cb)(SSL *ssl, EVP_PKEY **pkey)) + { + ctx->channel_id_cb=cb; + } + +void (*SSL_CTX_get_channel_id_cb(SSL_CTX *ctx))(SSL * ssl, EVP_PKEY **pkey) + { + return ctx->channel_id_cb; + } + #ifndef OPENSSL_NO_ENGINE int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e) { diff --git a/app/openssl/ssl/t1_enc.c b/app/openssl/ssl/t1_enc.c index 2ed2e076..22dd3cab 100644 --- a/app/openssl/ssl/t1_enc.c +++ b/app/openssl/ssl/t1_enc.c @@ -895,54 +895,79 @@ int tls1_cert_verify_mac(SSL *s, int md_nid, unsigned char *out) return((int)ret); } +/* tls1_handshake_digest calculates the current handshake hash and writes it to + * |out|, which has space for |out_len| bytes. It returns the number of bytes + * written or -1 in the event of an error. This function works on a copy of the + * underlying digests so can be called multiple times and prior to the final + * update etc. */ +int tls1_handshake_digest(SSL *s, unsigned char *out, size_t out_len) + { + const EVP_MD *md; + EVP_MD_CTX ctx; + int i, err = 0, len = 0; + long mask; + + EVP_MD_CTX_init(&ctx); + + for (i = 0; ssl_get_handshake_digest(i, &mask, &md); i++) + { + int hash_size; + unsigned int digest_len; + EVP_MD_CTX *hdgst = s->s3->handshake_dgst[i]; + + if ((mask & ssl_get_algorithm2(s)) == 0) + continue; + + hash_size = EVP_MD_size(md); + if (!hdgst || hash_size < 0 || (size_t)hash_size > out_len) + { + err = 1; + break; + } + + if (!EVP_MD_CTX_copy_ex(&ctx, hdgst) || + !EVP_DigestFinal_ex(&ctx, out, &digest_len) || + digest_len != (unsigned int)hash_size) /* internal error */ + { + err = 1; + break; + } + out += digest_len; + out_len -= digest_len; + len += digest_len; + } + + EVP_MD_CTX_cleanup(&ctx); + + if (err != 0) + return -1; + return len; + } + int tls1_final_finish_mac(SSL *s, const char *str, int slen, unsigned char *out) { - unsigned int i; - EVP_MD_CTX ctx; unsigned char buf[2*EVP_MAX_MD_SIZE]; - unsigned char *q,buf2[12]; - int idx; - long mask; + unsigned char buf2[12]; int err=0; - const EVP_MD *md; + int digests_len; - q=buf; - - if (s->s3->handshake_buffer) + if (s->s3->handshake_buffer) if (!ssl3_digest_cached_records(s)) return 0; - EVP_MD_CTX_init(&ctx); - - for (idx=0;ssl_get_handshake_digest(idx,&mask,&md);idx++) + digests_len = tls1_handshake_digest(s, buf, sizeof(buf)); + if (digests_len < 0) { - if (mask & ssl_get_algorithm2(s)) - { - int hashsize = EVP_MD_size(md); - EVP_MD_CTX *hdgst = s->s3->handshake_dgst[idx]; - if (!hdgst || hashsize < 0 || hashsize > (int)(sizeof buf - (size_t)(q-buf))) - { - /* internal error: 'buf' is too small for this cipersuite! */ - err = 1; - } - else - { - if (!EVP_MD_CTX_copy_ex(&ctx, hdgst) || - !EVP_DigestFinal_ex(&ctx,q,&i) || - (i != (unsigned int)hashsize)) - err = 1; - q+=hashsize; - } - } + err = 1; + digests_len = 0; } - + if (!tls1_PRF(ssl_get_algorithm2(s), - str,slen, buf,(int)(q-buf), NULL,0, NULL,0, NULL,0, + str,slen, buf, digests_len, NULL,0, NULL,0, NULL,0, s->session->master_key,s->session->master_key_length, out,buf2,sizeof buf2)) err = 1; - EVP_MD_CTX_cleanup(&ctx); if (err) return 0; @@ -1048,14 +1073,10 @@ int tls1_mac(SSL *ssl, unsigned char *md, int send) if (!stream_mac) EVP_MD_CTX_cleanup(&hmac); #ifdef TLS_DEBUG -printf("sec="); -{unsigned int z; for (z=0; zlength; z++) printf("%02X ",rec->data[z]); printf("\n"); } #endif if (ssl->version != DTLS1_VERSION && ssl->version != DTLS1_BAD_VER) @@ -1185,7 +1206,7 @@ int tls1_export_keying_material(SSL *s, unsigned char *out, size_t olen, if (memcmp(val, TLS_MD_KEY_EXPANSION_CONST, TLS_MD_KEY_EXPANSION_CONST_SIZE) == 0) goto err1; - rv = tls1_PRF(s->s3->tmp.new_cipher->algorithm2, + rv = tls1_PRF(ssl_get_algorithm2(s), val, vallen, NULL, 0, NULL, 0, diff --git a/app/openssl/ssl/t1_lib.c b/app/openssl/ssl/t1_lib.c index 369e09f4..122a25f5 100644 --- a/app/openssl/ssl/t1_lib.c +++ b/app/openssl/ssl/t1_lib.c @@ -617,6 +617,8 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha #ifndef OPENSSL_NO_HEARTBEATS /* Add Heartbeat extension */ + if ((limit - ret - 4 - 1) < 0) + return NULL; s2n(TLSEXT_TYPE_heartbeat,ret); s2n(1,ret); /* Set mode: @@ -647,7 +649,10 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha * support for Channel ID. */ if (limit - ret - 4 < 0) return NULL; - s2n(TLSEXT_TYPE_channel_id,ret); + if (s->ctx->tlsext_channel_id_enabled_new) + s2n(TLSEXT_TYPE_channel_id_new,ret); + else + s2n(TLSEXT_TYPE_channel_id,ret); s2n(0,ret); } @@ -683,36 +688,35 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha ret += el; } #endif - -#ifdef TLSEXT_TYPE_padding /* Add padding to workaround bugs in F5 terminators. * See https://tools.ietf.org/html/draft-agl-tls-padding-03 * * NB: because this code works out the length of all existing * extensions it MUST always appear last. */ - { - int hlen = ret - (unsigned char *)s->init_buf->data; - /* The code in s23_clnt.c to build ClientHello messages includes the - * 5-byte record header in the buffer, while the code in s3_clnt.c does - * not. */ - if (s->state == SSL23_ST_CW_CLNT_HELLO_A) - hlen -= 5; - if (hlen > 0xff && hlen < 0x200) + if (s->options & SSL_OP_TLSEXT_PADDING) { - hlen = 0x200 - hlen; - if (hlen >= 4) - hlen -= 4; - else - hlen = 0; + int hlen = ret - (unsigned char *)s->init_buf->data; + /* The code in s23_clnt.c to build ClientHello messages + * includes the 5-byte record header in the buffer, while + * the code in s3_clnt.c does not. + */ + if (s->state == SSL23_ST_CW_CLNT_HELLO_A) + hlen -= 5; + if (hlen > 0xff && hlen < 0x200) + { + hlen = 0x200 - hlen; + if (hlen >= 4) + hlen -= 4; + else + hlen = 0; - s2n(TLSEXT_TYPE_padding, ret); - s2n(hlen, ret); - memset(ret, 0, hlen); - ret += hlen; + s2n(TLSEXT_TYPE_padding, ret); + s2n(hlen, ret); + memset(ret, 0, hlen); + ret += hlen; + } } - } -#endif if ((extdatalen = ret-p-2)== 0) return p; @@ -867,6 +871,8 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *p, unsigned cha /* Add Heartbeat extension if we've received one */ if (s->tlsext_heartbeat & SSL_TLSEXT_HB_ENABLED) { + if ((limit - ret - 4 - 1) < 0) + return NULL; s2n(TLSEXT_TYPE_heartbeat,ret); s2n(1,ret); /* Set mode: @@ -909,7 +915,10 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *p, unsigned cha { if (limit - ret - 4 < 0) return NULL; - s2n(TLSEXT_TYPE_channel_id,ret); + if (s->s3->tlsext_channel_id_new) + s2n(TLSEXT_TYPE_channel_id_new,ret); + else + s2n(TLSEXT_TYPE_channel_id,ret); s2n(0,ret); } @@ -1572,6 +1581,13 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in else if (type == TLSEXT_TYPE_channel_id && s->tlsext_channel_id_enabled) s->s3->tlsext_channel_id_valid = 1; + else if (type == TLSEXT_TYPE_channel_id_new && + s->tlsext_channel_id_enabled) + { + s->s3->tlsext_channel_id_valid = 1; + s->s3->tlsext_channel_id_new = 1; + } + else if (type == TLSEXT_TYPE_application_layer_protocol_negotiation && s->ctx->alpn_select_cb && s->s3->tmp.finish_md_len == 0) @@ -1821,6 +1837,12 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in else if (type == TLSEXT_TYPE_channel_id) s->s3->tlsext_channel_id_valid = 1; + else if (type == TLSEXT_TYPE_channel_id_new) + { + s->s3->tlsext_channel_id_valid = 1; + s->s3->tlsext_channel_id_new = 1; + } + else if (type == TLSEXT_TYPE_application_layer_protocol_negotiation) { unsigned len; @@ -2908,6 +2930,17 @@ tls1_channel_id_hash(EVP_MD_CTX *md, SSL *s) EVP_DigestUpdate(md, kClientIDMagic, sizeof(kClientIDMagic)); + if (s->hit && s->s3->tlsext_channel_id_new) + { + static const char kResumptionMagic[] = "Resumption"; + EVP_DigestUpdate(md, kResumptionMagic, + sizeof(kResumptionMagic)); + if (s->session->original_handshake_hash_len == 0) + return 0; + EVP_DigestUpdate(md, s->session->original_handshake_hash, + s->session->original_handshake_hash_len); + } + EVP_MD_CTX_init(&ctx); for (i = 0; i < SSL_MAX_DIGEST; i++) { @@ -2922,3 +2955,29 @@ tls1_channel_id_hash(EVP_MD_CTX *md, SSL *s) return 1; } #endif + +/* tls1_record_handshake_hashes_for_channel_id records the current handshake + * hashes in |s->session| so that Channel ID resumptions can sign that data. */ +int tls1_record_handshake_hashes_for_channel_id(SSL *s) + { + int digest_len; + /* This function should never be called for a resumed session because + * the handshake hashes that we wish to record are for the original, + * full handshake. */ + if (s->hit) + return -1; + /* It only makes sense to call this function if Channel IDs have been + * negotiated. */ + if (!s->s3->tlsext_channel_id_new) + return -1; + + digest_len = tls1_handshake_digest( + s, s->session->original_handshake_hash, + sizeof(s->session->original_handshake_hash)); + if (digest_len < 0) + return -1; + + s->session->original_handshake_hash_len = digest_len; + + return 1; + } diff --git a/app/openssl/ssl/tls1.h b/app/openssl/ssl/tls1.h index ec8948d5..b9a0899e 100644 --- a/app/openssl/ssl/tls1.h +++ b/app/openssl/ssl/tls1.h @@ -259,6 +259,7 @@ extern "C" { /* This is not an IANA defined extension number */ #define TLSEXT_TYPE_channel_id 30031 +#define TLSEXT_TYPE_channel_id_new 30032 /* NameType value from RFC 3546 */ #define TLSEXT_NAMETYPE_host_name 0 @@ -531,9 +532,11 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031 #define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032 -/* ECDHE PSK ciphersuites from RFC 5489 */ -#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256 0x0300C037 -#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384 0x0300C038 +/* ECDHE PSK ciphersuites from RFC5489 + * SHA-2 cipher suites are omitted because they cannot be used safely with + * SSLv3. */ +#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA 0x0300C035 +#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA 0x0300C036 /* XXX * Inconsistency alert: @@ -686,9 +689,9 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SHA256" #define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SHA384" -/* ECDHE PSK ciphersuites from RFC 5489 */ -#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256 "ECDHE-PSK-WITH-AES-128-CBC-SHA256" -#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384 "ECDHE-PSK-WITH-AES-256-CBC-SHA384" +/* ECDHE PSK ciphersuites from RFC5489 */ +#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA "ECDHE-PSK-AES128-CBC-SHA" +#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA "ECDHE-PSK-AES256-CBC-SHA" #define TLS_CT_RSA_SIGN 1 #define TLS_CT_DSS_SIGN 2 diff --git a/app/openvpn/doc/android.txt b/app/openvpn/doc/android.txt index 871e3997..cf8b3c79 100644 --- a/app/openvpn/doc/android.txt +++ b/app/openvpn/doc/android.txt @@ -2,7 +2,7 @@ This file documents the support in OpenVPN for Android 4.0 and up. This support is primarily used in the "OpenVPN for Android" app (http://code.google.com/p/ics-openvpn/). For building see the developer -README: http://code.google.com/p/ics-openvpn/source/browse/README.txt. +README: http://code.google.com/p/ics-openvpn/source/browse/doc/README.txt. Android provides the VPNService API (http://developer.android.com/reference/android/net/VpnService.html) @@ -74,3 +74,12 @@ are not specific to Android but are rarely used on other platform. For example using SIGUSR1 and management-hold to restart, pause, continue the VPN on network changes or the external key management --management-external-key option and inline files. + +Due to a bug in Android 4.4-4.4.2 there the Android Control will also +query what action the daemon should take when opening the fd. The GUI +should compare the last configuration of the tun device with the current +tun configuration and reply with either + +- NOACTION: Keep using the old fd +- OPEN_AFTER_CLOSE: First close the old fd and then open a new to workaround the bug +- OPEN_BEFORE_CLOSE: the normal behaviour when the VPN configuration changed diff --git a/app/openvpn/doc/openvpn.8 b/app/openvpn/doc/openvpn.8 index aee0bc83..f2911c0e 100644 --- a/app/openvpn/doc/openvpn.8 +++ b/app/openvpn/doc/openvpn.8 @@ -1011,13 +1011,6 @@ table (not supported on all OSes). address if OpenVPN is being run in client mode, and is undefined in server mode. .\"********************************************************* .TP -.B \-\-max-routes n -Allow a maximum number of n -.B \-\-route -options to be specified, either in the local configuration file, -or pulled from an OpenVPN server. By default, n=100. -.\"********************************************************* -.TP .B \-\-route-gateway gw|'dhcp' Specify a default gateway .B gw diff --git a/app/openvpn/openvpn.sln b/app/openvpn/openvpn.sln index 90c01b89..f832e7a4 100644 --- a/app/openvpn/openvpn.sln +++ b/app/openvpn/openvpn.sln @@ -1,38 +1,38 @@ - -Microsoft Visual Studio Solution File, Format Version 11.00 -# Visual C++ Express 2010 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "openvpnserv", "src\openvpnserv\openvpnserv.vcxproj", "{9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "openvpn", "src\openvpn\openvpn.vcxproj", "{29DF226E-4D4E-440F-ADAF-5829CFD4CA94}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "msvc-generate", "build\msvc\msvc-generate\msvc-generate.vcxproj", "{8598C2C8-34C4-47A1-99B0-7C295A890615}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "compat", "src\compat\compat.vcxproj", "{4B2E2719-E661-45D7-9203-F6F456B22F19}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Win32 = Debug|Win32 - Release|Win32 = Release|Win32 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Debug|Win32.ActiveCfg = Debug|Win32 - {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Debug|Win32.Build.0 = Debug|Win32 - {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Release|Win32.ActiveCfg = Release|Win32 - {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Release|Win32.Build.0 = Release|Win32 - {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Debug|Win32.ActiveCfg = Debug|Win32 - {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Debug|Win32.Build.0 = Debug|Win32 - {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Release|Win32.ActiveCfg = Release|Win32 - {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Release|Win32.Build.0 = Release|Win32 - {8598C2C8-34C4-47A1-99B0-7C295A890615}.Debug|Win32.ActiveCfg = Debug|Win32 - {8598C2C8-34C4-47A1-99B0-7C295A890615}.Debug|Win32.Build.0 = Debug|Win32 - {8598C2C8-34C4-47A1-99B0-7C295A890615}.Release|Win32.ActiveCfg = Release|Win32 - {8598C2C8-34C4-47A1-99B0-7C295A890615}.Release|Win32.Build.0 = Release|Win32 - {4B2E2719-E661-45D7-9203-F6F456B22F19}.Debug|Win32.ActiveCfg = Debug|Win32 - {4B2E2719-E661-45D7-9203-F6F456B22F19}.Debug|Win32.Build.0 = Debug|Win32 - {4B2E2719-E661-45D7-9203-F6F456B22F19}.Release|Win32.ActiveCfg = Release|Win32 - {4B2E2719-E661-45D7-9203-F6F456B22F19}.Release|Win32.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal + +Microsoft Visual Studio Solution File, Format Version 11.00 +# Visual C++ Express 2010 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "openvpnserv", "src\openvpnserv\openvpnserv.vcxproj", "{9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "openvpn", "src\openvpn\openvpn.vcxproj", "{29DF226E-4D4E-440F-ADAF-5829CFD4CA94}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "msvc-generate", "build\msvc\msvc-generate\msvc-generate.vcxproj", "{8598C2C8-34C4-47A1-99B0-7C295A890615}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "compat", "src\compat\compat.vcxproj", "{4B2E2719-E661-45D7-9203-F6F456B22F19}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Debug|Win32.ActiveCfg = Debug|Win32 + {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Debug|Win32.Build.0 = Debug|Win32 + {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Release|Win32.ActiveCfg = Release|Win32 + {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Release|Win32.Build.0 = Release|Win32 + {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Debug|Win32.ActiveCfg = Debug|Win32 + {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Debug|Win32.Build.0 = Debug|Win32 + {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Release|Win32.ActiveCfg = Release|Win32 + {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Release|Win32.Build.0 = Release|Win32 + {8598C2C8-34C4-47A1-99B0-7C295A890615}.Debug|Win32.ActiveCfg = Debug|Win32 + {8598C2C8-34C4-47A1-99B0-7C295A890615}.Debug|Win32.Build.0 = Debug|Win32 + {8598C2C8-34C4-47A1-99B0-7C295A890615}.Release|Win32.ActiveCfg = Release|Win32 + {8598C2C8-34C4-47A1-99B0-7C295A890615}.Release|Win32.Build.0 = Release|Win32 + {4B2E2719-E661-45D7-9203-F6F456B22F19}.Debug|Win32.ActiveCfg = Debug|Win32 + {4B2E2719-E661-45D7-9203-F6F456B22F19}.Debug|Win32.Build.0 = Debug|Win32 + {4B2E2719-E661-45D7-9203-F6F456B22F19}.Release|Win32.ActiveCfg = Release|Win32 + {4B2E2719-E661-45D7-9203-F6F456B22F19}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/app/openvpn/sample/sample-keys/pkcs12.p12 b/app/openvpn/sample/sample-keys/pkcs12.p12 index 8df2ccb5..253d4081 100644 Binary files a/app/openvpn/sample/sample-keys/pkcs12.p12 and b/app/openvpn/sample/sample-keys/pkcs12.p12 differ diff --git a/app/openvpn/src/compat/compat.vcxproj b/app/openvpn/src/compat/compat.vcxproj index 42979c11..d872fa75 100644 --- a/app/openvpn/src/compat/compat.vcxproj +++ b/app/openvpn/src/compat/compat.vcxproj @@ -1,87 +1,87 @@ - - - - - Debug - Win32 - - - Release - Win32 - - - - {4B2E2719-E661-45D7-9203-F6F456B22F19} - compat - Win32Proj - - - - StaticLibrary - MultiByte - true - - - StaticLibrary - MultiByte - - - - - - - - - - - - - <_ProjectFileVersion>10.0.30319.1 - $(SolutionDir)$(Platform)-Output\$(Configuration)\ - $(Configuration)\ - $(SolutionDir)$(Platform)-Output\$(Configuration)\ - $(Configuration)\ - - - - Disabled - $(SOURCEBASE);$(SOURCEBASE)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) - WIN32;_DEBUG;_LIB;$(CPPFLAGS);%(PreprocessorDefinitions) - true - EnableFastChecks - MultiThreadedDebugDLL - - - Level3 - EditAndContinue - - - - - MaxSpeed - true - $(SOURCEBASE);$(SOURCEBASE)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) - WIN32;NDEBUG;_LIB;$(CPPFLAGS);%(PreprocessorDefinitions) - MultiThreadedDLL - true - - - Level3 - ProgramDatabase - - - - - - - - - - - - - - - - + + + + + Debug + Win32 + + + Release + Win32 + + + + {4B2E2719-E661-45D7-9203-F6F456B22F19} + compat + Win32Proj + + + + StaticLibrary + MultiByte + true + + + StaticLibrary + MultiByte + + + + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + $(SolutionDir)$(Platform)-Output\$(Configuration)\ + $(Configuration)\ + $(SolutionDir)$(Platform)-Output\$(Configuration)\ + $(Configuration)\ + + + + Disabled + $(SOURCEBASE);$(SOURCEBASE)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_LIB;$(CPPFLAGS);%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebugDLL + + + Level3 + EditAndContinue + + + + + MaxSpeed + true + $(SOURCEBASE);$(SOURCEBASE)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_LIB;$(CPPFLAGS);%(PreprocessorDefinitions) + MultiThreadedDLL + true + + + Level3 + ProgramDatabase + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/app/openvpn/src/compat/compat.vcxproj.filters b/app/openvpn/src/compat/compat.vcxproj.filters index 00bb0ffa..9576c512 100644 --- a/app/openvpn/src/compat/compat.vcxproj.filters +++ b/app/openvpn/src/compat/compat.vcxproj.filters @@ -1,42 +1,42 @@ - - - - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx - - - {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hpp;hxx;hm;inl;inc;xsd - - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav - - - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - - - Header Files - - + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + \ No newline at end of file diff --git a/app/openvpn/src/openvpn/openvpn.vcxproj b/app/openvpn/src/openvpn/openvpn.vcxproj index 3b2340ee..452876fc 100755 --- a/app/openvpn/src/openvpn/openvpn.vcxproj +++ b/app/openvpn/src/openvpn/openvpn.vcxproj @@ -1,263 +1,263 @@ - - - - - Debug - Win32 - - - Release - Win32 - - - - {29DF226E-4D4E-440F-ADAF-5829CFD4CA94} - openvpn - Win32Proj - - - - Application - true - Unicode - - - Application - Unicode - - - - - - - - - - - - - <_ProjectFileVersion>10.0.30319.1 - $(SolutionDir)$(Platform)-Output\$(Configuration)\ - $(Configuration)\ - true - $(SolutionDir)$(Platform)-Output\$(Configuration)\ - $(Configuration)\ - false - - - - Disabled - $(SOURCEBASE);$(SOURCEBASE)/src/compat;$(SOURCEBASE)/include;$(TAP_WINDOWS_HOME)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) - WIN32;_DEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) - true - EnableFastChecks - MultiThreadedDebugDLL - - - Level3 - EditAndContinue - UNICODE - - - $(SOURCEBASE);%(AdditionalIncludeDirectories) - - - libeay32.lib;ssleay32.lib;lzo2.lib;pkcs11-helper.dll.lib;gdi32.lib;ws2_32.lib;wininet.lib;crypt32.lib;iphlpapi.lib;winmm.lib;%(AdditionalDependencies) - $(OPENSSL_HOME)/lib;$(LZO_HOME)/lib;$(PKCS11H_HOME)/lib;%(AdditionalLibraryDirectories) - true - Console - MachineX86 - - - - - MaxSpeed - true - $(SOURCEBASE);$(SOURCEBASE)/src/compat;$(SOURCEBASE)/include;$(TAP_WINDOWS_HOME)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) - WIN32;NDEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) - MultiThreadedDLL - true - - - Level3 - ProgramDatabase - UNICODE - - - $(SOURCEBASE);%(AdditionalIncludeDirectories) - - - libeay32.lib;ssleay32.lib;lzo2.lib;pkcs11-helper.dll.lib;gdi32.lib;ws2_32.lib;wininet.lib;crypt32.lib;iphlpapi.lib;winmm.lib;%(AdditionalDependencies) - $(OPENSSL_HOME)/lib;$(LZO_HOME)/lib;$(PKCS11H_HOME)/lib;%(AdditionalLibraryDirectories) - true - Console - true - true - MachineX86 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - {8598c2c8-34c4-47a1-99b0-7c295a890615} - false - - - {4b2e2719-e661-45d7-9203-f6f456b22f19} - false - - - - - + + + + + Debug + Win32 + + + Release + Win32 + + + + {29DF226E-4D4E-440F-ADAF-5829CFD4CA94} + openvpn + Win32Proj + + + + Application + true + Unicode + + + Application + Unicode + + + + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + $(SolutionDir)$(Platform)-Output\$(Configuration)\ + $(Configuration)\ + true + $(SolutionDir)$(Platform)-Output\$(Configuration)\ + $(Configuration)\ + false + + + + Disabled + $(SOURCEBASE);$(SOURCEBASE)/src/compat;$(SOURCEBASE)/include;$(TAP_WINDOWS_HOME)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebugDLL + + + Level3 + EditAndContinue + UNICODE + + + $(SOURCEBASE);%(AdditionalIncludeDirectories) + + + libeay32.lib;ssleay32.lib;lzo2.lib;pkcs11-helper.dll.lib;gdi32.lib;ws2_32.lib;wininet.lib;crypt32.lib;iphlpapi.lib;winmm.lib;%(AdditionalDependencies) + $(OPENSSL_HOME)/lib;$(LZO_HOME)/lib;$(PKCS11H_HOME)/lib;%(AdditionalLibraryDirectories) + true + Console + MachineX86 + + + + + MaxSpeed + true + $(SOURCEBASE);$(SOURCEBASE)/src/compat;$(SOURCEBASE)/include;$(TAP_WINDOWS_HOME)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) + MultiThreadedDLL + true + + + Level3 + ProgramDatabase + UNICODE + + + $(SOURCEBASE);%(AdditionalIncludeDirectories) + + + libeay32.lib;ssleay32.lib;lzo2.lib;pkcs11-helper.dll.lib;gdi32.lib;ws2_32.lib;wininet.lib;crypt32.lib;iphlpapi.lib;winmm.lib;%(AdditionalDependencies) + $(OPENSSL_HOME)/lib;$(LZO_HOME)/lib;$(PKCS11H_HOME)/lib;%(AdditionalLibraryDirectories) + true + Console + true + true + MachineX86 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {8598c2c8-34c4-47a1-99b0-7c295a890615} + false + + + {4b2e2719-e661-45d7-9203-f6f456b22f19} + false + + + + + \ No newline at end of file diff --git a/app/openvpn/src/openvpn/openvpn.vcxproj.filters b/app/openvpn/src/openvpn/openvpn.vcxproj.filters index 40336ba8..ec5e676c 100644 --- a/app/openvpn/src/openvpn/openvpn.vcxproj.filters +++ b/app/openvpn/src/openvpn/openvpn.vcxproj.filters @@ -1,458 +1,458 @@ - - - - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx - - - {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hpp;hxx;hm;inl;inc;xsd - - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav - - - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - - - Resource Files - - + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Resource Files + + \ No newline at end of file diff --git a/app/openvpn/src/openvpnserv/openvpnserv.vcxproj b/app/openvpn/src/openvpnserv/openvpnserv.vcxproj index 0b75ed01..f2c00718 100644 --- a/app/openvpn/src/openvpnserv/openvpnserv.vcxproj +++ b/app/openvpn/src/openvpnserv/openvpnserv.vcxproj @@ -1,112 +1,112 @@ - - - - - Debug - Win32 - - - Release - Win32 - - - - {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD} - openvpnserv - Win32Proj - - - - Application - MultiByte - true - - - Application - MultiByte - - - - - - - - - - - - - <_ProjectFileVersion>10.0.30319.1 - $(SolutionDir)$(Platform)-Output\$(Configuration)\ - $(Configuration)\ - true - $(SolutionDir)$(Platform)-Output\$(Configuration)\ - $(Configuration)\ - false - - - - Disabled - $(SOURCEBASE);%(AdditionalIncludeDirectories) - WIN32;_DEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) - true - EnableFastChecks - MultiThreadedDebugDLL - - - Level3 - EditAndContinue - - - $(SOURCEBASE);%(AdditionalIncludeDirectories) - - - true - Console - MachineX86 - - - - - MaxSpeed - true - $(SOURCEBASE);%(AdditionalIncludeDirectories) - WIN32;NDEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) - MultiThreadedDLL - true - - - Level3 - ProgramDatabase - - - $(SOURCEBASE);%(AdditionalIncludeDirectories) - - - true - Console - true - true - MachineX86 - - - - - - - - - - - - - - - {8598c2c8-34c4-47a1-99b0-7c295a890615} - false - - - - - + + + + + Debug + Win32 + + + Release + Win32 + + + + {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD} + openvpnserv + Win32Proj + + + + Application + MultiByte + true + + + Application + MultiByte + + + + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + $(SolutionDir)$(Platform)-Output\$(Configuration)\ + $(Configuration)\ + true + $(SolutionDir)$(Platform)-Output\$(Configuration)\ + $(Configuration)\ + false + + + + Disabled + $(SOURCEBASE);%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebugDLL + + + Level3 + EditAndContinue + + + $(SOURCEBASE);%(AdditionalIncludeDirectories) + + + true + Console + MachineX86 + + + + + MaxSpeed + true + $(SOURCEBASE);%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) + MultiThreadedDLL + true + + + Level3 + ProgramDatabase + + + $(SOURCEBASE);%(AdditionalIncludeDirectories) + + + true + Console + true + true + MachineX86 + + + + + + + + + + + + + + + {8598c2c8-34c4-47a1-99b0-7c295a890615} + false + + + + + \ No newline at end of file diff --git a/app/openvpn/src/openvpnserv/openvpnserv.vcxproj.filters b/app/openvpn/src/openvpnserv/openvpnserv.vcxproj.filters index 0c89b4f4..a6f8ecc6 100644 --- a/app/openvpn/src/openvpnserv/openvpnserv.vcxproj.filters +++ b/app/openvpn/src/openvpnserv/openvpnserv.vcxproj.filters @@ -1,35 +1,35 @@ - - - - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx - - - {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hpp;hxx;hm;inl;inc;xsd - - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav - - - - - Source Files - - - Source Files - - - - - Header Files - - - - - Resource Files - - + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav + + + + + Source Files + + + Source Files + + + + + Header Files + + + + + Resource Files + + \ No newline at end of file diff --git a/app/ovpnlibs/assets/nopievpn.arm64-v8a b/app/ovpnlibs/assets/nopievpn.arm64-v8a new file mode 100755 index 00000000..0816917c Binary files /dev/null and b/app/ovpnlibs/assets/nopievpn.arm64-v8a differ diff --git a/app/ovpnlibs/assets/nopievpn.armeabi b/app/ovpnlibs/assets/nopievpn.armeabi new file mode 100755 index 00000000..c4d430ca Binary files /dev/null and b/app/ovpnlibs/assets/nopievpn.armeabi differ diff --git a/app/ovpnlibs/assets/nopievpn.armeabi-v7a b/app/ovpnlibs/assets/nopievpn.armeabi-v7a new file mode 100755 index 00000000..c77575b6 Binary files /dev/null and b/app/ovpnlibs/assets/nopievpn.armeabi-v7a differ diff --git a/app/ovpnlibs/assets/nopievpn.mips b/app/ovpnlibs/assets/nopievpn.mips new file mode 100755 index 00000000..1c186383 Binary files /dev/null and b/app/ovpnlibs/assets/nopievpn.mips differ diff --git a/app/ovpnlibs/assets/pievpn.arm64-v8a b/app/ovpnlibs/assets/pievpn.arm64-v8a new file mode 100755 index 00000000..0816917c Binary files /dev/null and b/app/ovpnlibs/assets/pievpn.arm64-v8a differ diff --git a/app/ovpnlibs/assets/pievpn.armeabi b/app/ovpnlibs/assets/pievpn.armeabi new file mode 100755 index 00000000..c4d430ca Binary files /dev/null and b/app/ovpnlibs/assets/pievpn.armeabi differ diff --git a/app/ovpnlibs/assets/pievpn.armeabi-v7a b/app/ovpnlibs/assets/pievpn.armeabi-v7a new file mode 100755 index 00000000..c77575b6 Binary files /dev/null and b/app/ovpnlibs/assets/pievpn.armeabi-v7a differ diff --git a/app/ovpnlibs/assets/pievpn.mips b/app/ovpnlibs/assets/pievpn.mips new file mode 100755 index 00000000..c975d015 Binary files /dev/null and b/app/ovpnlibs/assets/pievpn.mips differ -- cgit v1.2.3 From 6fa7a801199bcd19f6f1629fe91ae873d754f41c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Parm=C3=A9nides=20GV?= Date: Tue, 23 Sep 2014 18:18:26 +0200 Subject: Gitignore ovpnlibs --- .gitignore | 2 +- app/ovpnlibs/assets/nopievpn.arm64-v8a | Bin 5368 -> 0 bytes app/ovpnlibs/assets/nopievpn.armeabi | Bin 5240 -> 0 bytes app/ovpnlibs/assets/nopievpn.armeabi-v7a | Bin 5248 -> 0 bytes app/ovpnlibs/assets/nopievpn.mips | Bin 5276 -> 0 bytes app/ovpnlibs/assets/pievpn.arm64-v8a | Bin 5368 -> 0 bytes app/ovpnlibs/assets/pievpn.armeabi | Bin 5240 -> 0 bytes app/ovpnlibs/assets/pievpn.armeabi-v7a | Bin 5248 -> 0 bytes app/ovpnlibs/assets/pievpn.mips | Bin 5276 -> 0 bytes 9 files changed, 1 insertion(+), 1 deletion(-) delete mode 100755 app/ovpnlibs/assets/nopievpn.arm64-v8a delete mode 100755 app/ovpnlibs/assets/nopievpn.armeabi delete mode 100755 app/ovpnlibs/assets/nopievpn.armeabi-v7a delete mode 100755 app/ovpnlibs/assets/nopievpn.mips delete mode 100755 app/ovpnlibs/assets/pievpn.arm64-v8a delete mode 100755 app/ovpnlibs/assets/pievpn.armeabi delete mode 100755 app/ovpnlibs/assets/pievpn.armeabi-v7a delete mode 100755 app/ovpnlibs/assets/pievpn.mips diff --git a/.gitignore b/.gitignore index fa1293d0..804337f0 100644 --- a/.gitignore +++ b/.gitignore @@ -75,9 +75,9 @@ gradle.properties* build .gradle jniLibs -bitmask_android/ovpnlibs/ G* *.dot /ics-openvpn/ /TAGS /ics-openvpn +app/ovpnlibs diff --git a/app/ovpnlibs/assets/nopievpn.arm64-v8a b/app/ovpnlibs/assets/nopievpn.arm64-v8a deleted file mode 100755 index 0816917c..00000000 Binary files a/app/ovpnlibs/assets/nopievpn.arm64-v8a and /dev/null differ diff --git a/app/ovpnlibs/assets/nopievpn.armeabi b/app/ovpnlibs/assets/nopievpn.armeabi deleted file mode 100755 index c4d430ca..00000000 Binary files a/app/ovpnlibs/assets/nopievpn.armeabi and /dev/null differ diff --git a/app/ovpnlibs/assets/nopievpn.armeabi-v7a b/app/ovpnlibs/assets/nopievpn.armeabi-v7a deleted file mode 100755 index c77575b6..00000000 Binary files a/app/ovpnlibs/assets/nopievpn.armeabi-v7a and /dev/null differ diff --git a/app/ovpnlibs/assets/nopievpn.mips b/app/ovpnlibs/assets/nopievpn.mips deleted file mode 100755 index 1c186383..00000000 Binary files a/app/ovpnlibs/assets/nopievpn.mips and /dev/null differ diff --git a/app/ovpnlibs/assets/pievpn.arm64-v8a b/app/ovpnlibs/assets/pievpn.arm64-v8a deleted file mode 100755 index 0816917c..00000000 Binary files a/app/ovpnlibs/assets/pievpn.arm64-v8a and /dev/null differ diff --git a/app/ovpnlibs/assets/pievpn.armeabi b/app/ovpnlibs/assets/pievpn.armeabi deleted file mode 100755 index c4d430ca..00000000 Binary files a/app/ovpnlibs/assets/pievpn.armeabi and /dev/null differ diff --git a/app/ovpnlibs/assets/pievpn.armeabi-v7a b/app/ovpnlibs/assets/pievpn.armeabi-v7a deleted file mode 100755 index c77575b6..00000000 Binary files a/app/ovpnlibs/assets/pievpn.armeabi-v7a and /dev/null differ diff --git a/app/ovpnlibs/assets/pievpn.mips b/app/ovpnlibs/assets/pievpn.mips deleted file mode 100755 index c975d015..00000000 Binary files a/app/ovpnlibs/assets/pievpn.mips and /dev/null differ -- cgit v1.2.3 From 644fd02cf8da95b0b5a99fb9f2142628dd27f7c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Parm=C3=A9nides=20GV?= Date: Tue, 23 Sep 2014 20:07:44 +0200 Subject: Remove connectedCheck test before assembleRelease. --- app/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/build.gradle b/app/build.gradle index 78f2f791..1057da44 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -156,7 +156,7 @@ task updateIcsOpenVpn( type: Copy, dependsOn: 'mergeUntranslatable') { } into '.' } -assembleRelease.dependsOn connectedCheck +//assembleRelease.dependsOn connectedCheck afterEvaluate { //installRelease.dependsOn connectedCheck } -- cgit v1.2.3 From 394451dbae3e71282611058e00b5fd16c865f147 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Parm=C3=A9nides=20GV?= Date: Fri, 26 Sep 2014 09:46:26 +0200 Subject: Revert "Updated native subprojects from ics-openvpn." This reverts commit d0e7ba3029b2fd42582413aa95773fe7dbdede90. I'll postpone this work for the next cycle, it's not trivial because it doesn't link properly. --- app/jni/Android.mk | 13 +- app/jni/Application.mk | 2 +- app/lzo/B/00README.TXT | 102 +- app/lzo/B/clean.bat | 8 +- app/lzo/B/done.bat | 4 +- app/lzo/B/dos16/bc.bat | 78 +- app/lzo/B/dos16/bc.rsp | 134 +- app/lzo/B/dos16/bc_286.bat | 84 +- app/lzo/B/dos16/bc_pp.bat | 84 +- app/lzo/B/dos16/dm.bat | 78 +- app/lzo/B/dos16/mc.bat | 104 +- app/lzo/B/dos16/mc_qc.bat | 104 +- app/lzo/B/dos16/qc.bat | 80 +- app/lzo/B/dos16/sc.bat | 106 +- app/lzo/B/dos16/tc.bat | 78 +- app/lzo/B/dos16/vc.bat | 78 +- app/lzo/B/dos16/vc_qc.bat | 78 +- app/lzo/B/dos16/wc.bat | 78 +- app/lzo/B/dos16/wc.rsp | 134 +- app/lzo/B/dos32/bc_pp.bat | 84 +- app/lzo/B/dos32/dj2.bat | 90 +- app/lzo/B/dos32/dj2.opt | 12 +- app/lzo/B/dos32/dm.bat | 86 +- app/lzo/B/dos32/emx.bat | 90 +- app/lzo/B/dos32/highc.bat | 78 +- app/lzo/B/dos32/highc.rsp | 146 +- app/lzo/B/dos32/ndp.bat | 80 +- app/lzo/B/dos32/ndp.rsp | 134 +- app/lzo/B/dos32/sc.bat | 102 +- app/lzo/B/dos32/wc.bat | 78 +- app/lzo/B/dos32/zc.bat | 102 +- app/lzo/B/os2/emx.bat | 90 +- app/lzo/B/os2/wc.bat | 84 +- app/lzo/B/os2/zc.bat | 102 +- app/lzo/B/os2_16/mc.bat | 86 +- app/lzo/B/os2_16/wc.bat | 84 +- app/lzo/B/prepare.bat | 20 +- app/lzo/B/src.rsp | 134 +- app/lzo/B/unset.bat | 20 +- app/lzo/B/win16/bc.bat | 84 +- app/lzo/B/win16/dm.bat | 86 +- app/lzo/B/win16/mc.bat | 112 +- app/lzo/B/win16/sc.bat | 106 +- app/lzo/B/win16/vc.bat | 86 +- app/lzo/B/win16/wc.bat | 84 +- app/lzo/B/win32/bc.bat | 84 +- app/lzo/B/win32/bc.rsp | 146 +- app/lzo/B/win32/cygwin.bat | 90 +- app/lzo/B/win32/cygwin.rsp | 146 +- app/lzo/B/win32/dm.bat | 84 +- app/lzo/B/win32/ic.bat | 84 +- app/lzo/B/win32/lccwin32.bat | 118 +- app/lzo/B/win32/mingw.bat | 90 +- app/lzo/B/win32/mwerks.bat | 84 +- app/lzo/B/win32/pellesc.bat | 84 +- app/lzo/B/win32/pgi.bat | 86 +- app/lzo/B/win32/pw32.bat | 90 +- app/lzo/B/win32/rsxnt.bat | 90 +- app/lzo/B/win32/sc.bat | 112 +- app/lzo/B/win32/vc.bat | 84 +- app/lzo/B/win32/vc.rsp | 146 +- app/lzo/B/win32/vc_dll.bat | 84 +- app/lzo/B/win32/vc_dll.def | 14 +- app/lzo/B/win32/wc.bat | 78 +- app/lzo/B/win32/wc.rsp | 146 +- app/lzo/B/win64/ic.bat | 84 +- app/lzo/B/win64/ic_dll.bat | 84 +- app/lzo/B/win64/vc.bat | 84 +- app/lzo/B/win64/vc.rsp | 134 +- app/lzo/B/win64/vc_dll.bat | 84 +- app/lzo/B/win64/vc_dll.def | 2 +- app/lzo/autoconf/shtool | 12 +- app/openssl/Apps-config-host.mk | 2 +- app/openssl/Apps-config-target.mk | 2 +- app/openssl/Apps.mk | 9 +- app/openssl/Crypto-config-host.mk | 28 +- app/openssl/Crypto-config-target.mk | 28 +- app/openssl/Crypto-config-trusty.mk | 2 +- app/openssl/Crypto.mk | 29 +- app/openssl/Ssl-config-host.mk | 2 +- app/openssl/Ssl-config-target.mk | 2 +- app/openssl/Ssl.mk | 14 +- app/openssl/apps/enc.c | 6 - app/openssl/apps/ocsp.c | 22 +- app/openssl/apps/req.c | 15 +- app/openssl/apps/s_cb.c | 4 - app/openssl/apps/s_socket.c | 5 +- app/openssl/apps/smime.c | 4 +- app/openssl/build-config-32.mk | 4 +- app/openssl/build-config-64.mk | 4 +- app/openssl/build-config-trusty.mk | 2 +- app/openssl/check-all-builds.sh | 16 +- app/openssl/crypto/aes/asm/aes-armv4.pl | 139 +- app/openssl/crypto/aes/asm/aes-armv4.s | 160 +- app/openssl/crypto/aes/asm/aesv8-armx-64.S | 761 ---- app/openssl/crypto/aes/asm/aesv8-armx.S | 767 ---- app/openssl/crypto/aes/asm/aesv8-armx.pl | 980 ----- app/openssl/crypto/arm64cpuid.S | 46 - app/openssl/crypto/arm_arch.h | 19 +- app/openssl/crypto/armcap.c | 85 +- app/openssl/crypto/armv4cpuid.S | 82 +- app/openssl/crypto/asn1/a_strnid.c | 2 +- app/openssl/crypto/bio/bio.h | 3 - app/openssl/crypto/bio/bss_dgram.c | 9 +- app/openssl/crypto/bn/asm/armv4-gf2m.S | 106 +- app/openssl/crypto/bn/asm/armv4-gf2m.pl | 139 +- app/openssl/crypto/bn/asm/armv4-mont.pl | 483 +-- app/openssl/crypto/bn/asm/armv4-mont.s | 444 +-- app/openssl/crypto/bn/asm/mips3.S | 2201 ------------ app/openssl/crypto/bn/asm/pa-risc2.S | 1618 --------- app/openssl/crypto/bn/asm/pa-risc2W.S | 1605 --------- app/openssl/crypto/bn/bn_mont.c | 46 +- app/openssl/crypto/cms/cms_env.c | 2 - app/openssl/crypto/cms/cms_sd.c | 4 +- app/openssl/crypto/cms/cms_smime.c | 5 +- app/openssl/crypto/dso/dso_dlfcn.c | 2 +- app/openssl/crypto/ec/ec_ameth.c | 1 - app/openssl/crypto/ec/ec_asn1.c | 7 +- app/openssl/crypto/ec/ec_lcl.h | 2 +- app/openssl/crypto/evp/bio_b64.c | 1 - app/openssl/crypto/evp/e_aes.c | 170 +- app/openssl/crypto/evp/encode.c | 1 - app/openssl/crypto/evp/p_lib.c | 2 +- app/openssl/crypto/modes/asm/ghash-armv4.S | 248 +- app/openssl/crypto/modes/asm/ghash-armv4.pl | 229 +- app/openssl/crypto/modes/asm/ghashv8-armx-64.S | 115 - app/openssl/crypto/modes/asm/ghashv8-armx.S | 116 - app/openssl/crypto/modes/asm/ghashv8-armx.pl | 240 -- app/openssl/crypto/modes/gcm128.c | 27 +- app/openssl/crypto/opensslconf-32.h | 6 - app/openssl/crypto/opensslconf-64.h | 6 - app/openssl/crypto/opensslconf-static-32.h | 6 - app/openssl/crypto/opensslconf-static-64.h | 6 - app/openssl/crypto/opensslv.h | 6 +- app/openssl/crypto/pkcs12/p12_crt.c | 8 - app/openssl/crypto/pkcs12/p12_kiss.c | 2 +- app/openssl/crypto/pkcs7/pk7_doit.c | 6 - app/openssl/crypto/pkcs7/pkcs7.h | 1 - app/openssl/crypto/pkcs7/pkcs7err.c | 3 +- app/openssl/crypto/rsa/rsa_ameth.c | 2 +- app/openssl/crypto/sha/asm/sha1-armv4-large.pl | 446 +-- app/openssl/crypto/sha/asm/sha1-armv4-large.s | 1008 +----- app/openssl/crypto/sha/asm/sha1-armv8.S | 1211 ------- app/openssl/crypto/sha/asm/sha1-armv8.pl | 333 -- app/openssl/crypto/sha/asm/sha256-armv4.pl | 585 +-- app/openssl/crypto/sha/asm/sha256-armv4.s | 3729 +++++++------------- app/openssl/crypto/sha/asm/sha256-armv8.S | 1141 ------ app/openssl/crypto/sha/asm/sha512-armv4.pl | 3 +- app/openssl/crypto/sha/asm/sha512-armv4.s | 2 +- app/openssl/crypto/sha/asm/sha512-armv8.S | 1021 ------ app/openssl/crypto/sha/asm/sha512-armv8.pl | 414 --- app/openssl/crypto/srp/srp_vfy.c | 3 - app/openssl/crypto/x509v3/v3_purp.c | 6 +- app/openssl/import_openssl.sh | 90 +- app/openssl/include/openssl/bio.h | 3 - app/openssl/include/openssl/opensslconf-32.h | 6 - app/openssl/include/openssl/opensslconf-64.h | 6 - .../include/openssl/opensslconf-static-32.h | 6 - .../include/openssl/opensslconf-static-64.h | 6 - app/openssl/include/openssl/opensslv.h | 6 +- app/openssl/include/openssl/pkcs7.h | 1 - app/openssl/include/openssl/ssl.h | 37 +- app/openssl/include/openssl/ssl3.h | 10 +- app/openssl/include/openssl/tls1.h | 15 +- app/openssl/openssl.config | 136 +- app/openssl/openssl.version | 2 +- app/openssl/patches/README | 13 - app/openssl/ssl/bio_ssl.c | 8 - app/openssl/ssl/d1_both.c | 15 +- app/openssl/ssl/d1_lib.c | 9 +- app/openssl/ssl/d1_pkt.c | 19 +- app/openssl/ssl/d1_srvr.c | 1 - app/openssl/ssl/s3_both.c | 2 +- app/openssl/ssl/s3_clnt.c | 70 +- app/openssl/ssl/s3_enc.c | 2 +- app/openssl/ssl/s3_lib.c | 31 +- app/openssl/ssl/s3_pkt.c | 42 +- app/openssl/ssl/s3_srvr.c | 65 +- app/openssl/ssl/ssl.h | 37 +- app/openssl/ssl/ssl3.h | 10 +- app/openssl/ssl/ssl_asn1.c | 29 +- app/openssl/ssl/ssl_err.c | 3 +- app/openssl/ssl/ssl_lib.c | 39 +- app/openssl/ssl/ssl_locl.h | 3 - app/openssl/ssl/ssl_sess.c | 11 - app/openssl/ssl/t1_enc.c | 99 +- app/openssl/ssl/t1_lib.c | 105 +- app/openssl/ssl/tls1.h | 15 +- app/openvpn/doc/android.txt | 11 +- app/openvpn/doc/openvpn.8 | 7 + app/openvpn/openvpn.sln | 76 +- app/openvpn/sample/sample-keys/pkcs12.p12 | Bin 2685 -> 4756 bytes app/openvpn/src/compat/compat.vcxproj | 172 +- app/openvpn/src/compat/compat.vcxproj.filters | 82 +- app/openvpn/src/openvpn/openvpn.vcxproj | 524 +-- app/openvpn/src/openvpn/openvpn.vcxproj.filters | 914 ++--- app/openvpn/src/openvpnserv/openvpnserv.vcxproj | 222 +- .../src/openvpnserv/openvpnserv.vcxproj.filters | 68 +- 198 files changed, 6253 insertions(+), 23907 deletions(-) delete mode 100644 app/openssl/crypto/aes/asm/aesv8-armx-64.S delete mode 100644 app/openssl/crypto/aes/asm/aesv8-armx.S delete mode 100644 app/openssl/crypto/aes/asm/aesv8-armx.pl delete mode 100644 app/openssl/crypto/arm64cpuid.S delete mode 100644 app/openssl/crypto/bn/asm/mips3.S delete mode 100644 app/openssl/crypto/bn/asm/pa-risc2.S delete mode 100644 app/openssl/crypto/bn/asm/pa-risc2W.S delete mode 100644 app/openssl/crypto/modes/asm/ghashv8-armx-64.S delete mode 100644 app/openssl/crypto/modes/asm/ghashv8-armx.S delete mode 100644 app/openssl/crypto/modes/asm/ghashv8-armx.pl delete mode 100644 app/openssl/crypto/sha/asm/sha1-armv8.S delete mode 100644 app/openssl/crypto/sha/asm/sha1-armv8.pl delete mode 100644 app/openssl/crypto/sha/asm/sha256-armv8.S delete mode 100644 app/openssl/crypto/sha/asm/sha512-armv8.S delete mode 100644 app/openssl/crypto/sha/asm/sha512-armv8.pl diff --git a/app/jni/Android.mk b/app/jni/Android.mk index 8418c9b8..0c906f97 100644 --- a/app/jni/Android.mk +++ b/app/jni/Android.mk @@ -13,18 +13,15 @@ include snappy/Android.mk include openssl/Android.mk -ifeq ($(TARGET_ARCH),mips) - USE_BREAKPAD=0 -endif -ifeq ($(TARGET_ARCH),mips64) - USE_BREAKPAD=0 -endif - ifneq ($(WITH_BREAKPAD),0) + ifneq ($(TARGET_ARCH),mips) WITH_BREAKPAD=1 include google-breakpad/android/google_breakpad/Android.mk -else + else WITH_BREAKPAD=0 + endif +else +WITH_BREAKPAD=0 endif ifeq ($(WITH_POLAR),1) diff --git a/app/jni/Application.mk b/app/jni/Application.mk index 21718248..718e79a8 100644 --- a/app/jni/Application.mk +++ b/app/jni/Application.mk @@ -1,4 +1,4 @@ -APP_ABI := arm64-v8a armeabi armeabi-v7a mips x86 x86_64 +APP_ABI := all APP_PLATFORM := android-14 APP_STL:=stlport_shared diff --git a/app/lzo/B/00README.TXT b/app/lzo/B/00README.TXT index 17d8ef8e..b5352a86 100644 --- a/app/lzo/B/00README.TXT +++ b/app/lzo/B/00README.TXT @@ -1,51 +1,51 @@ -Simple make drivers for DOS, Windows, OS/2 and other systems -============================================================ - -This directory contains a bunch of simple build scripts - I've tried -to make them as foolproof as possible. - -To build LZO for your system type 'b\OS\COMPILER' in the base directory, -e.g. 'b\win32\vc.bat' will build the Win32 Visual C/C++ version. - -After building do a basic test by running 'lzotest.exe -mlzo COPYING'. -util\check.sh is an example of a more thorough test driver. - -Please send me your additional/improved versions. - - -Overview: ---------- - -b\dos32\bc_pp.bat Borland C/C++ (1) -b\dos32\dj2.bat djgpp v2 + gcc (1) -b\dos32\dm.bat Digital Mars C/C++ (1) -b\dos32\emx.bat emx + gcc (1) -b\dos32\sc.bat Symantec C/C++ (1) -b\dos32\wc.bat Watcom C/C++ (1) - -b\os2\emx.bat emx + gcc (1) -b\os2\wc.bat Watcom C/C++ (1) - -b\win32\bc.bat Borland C/C++ (1) -b\win32\cygwin.bat Cygwin + gcc (1) -b\win32\dm.bat Digital Mars C/C++ (1) -b\win32\ic.bat Intel C/C++ (1) -b\win32\mingw.bat MinGW + gcc (1) -b\win32\mwerks.bat Metrowerks CodeWarrior C/C++ (1) -b\win32\pellesc.bat Pelles C (1) -b\win32\pgi.bat Portland Group PGI C/C++ (1) -b\win32\rsxnt.bat rsxnt + gcc (1) -b\win32\sc.bat Symantec C/C++ (1) -b\win32\vc.bat Visual C/C++ (1) -b\win32\vc_dll.bat Visual C/C++ (DLL version) (1) -b\win32\wc.bat Watcom C/C++ (1) - -b\win64\ic.bat Intel C/C++ (Itanium) -b\win64\ic_dll.bat Intel C/C++ (Itanium) (DLL version) -b\win64\vc.bat Visual C/C++ (AMD64 or Itanium) -b\win64\vc_dll.bat Visual C/C++ (AMD64 or Itanium) (DLL version) - - -Notes: - (1) includes support for i386 assembler versions - +Simple make drivers for DOS, Windows, OS/2 and other systems +============================================================ + +This directory contains a bunch of simple build scripts - I've tried +to make them as foolproof as possible. + +To build LZO for your system type 'b\OS\COMPILER' in the base directory, +e.g. 'b\win32\vc.bat' will build the Win32 Visual C/C++ version. + +After building do a basic test by running 'lzotest.exe -mlzo COPYING'. +util\check.sh is an example of a more thorough test driver. + +Please send me your additional/improved versions. + + +Overview: +--------- + +b\dos32\bc_pp.bat Borland C/C++ (1) +b\dos32\dj2.bat djgpp v2 + gcc (1) +b\dos32\dm.bat Digital Mars C/C++ (1) +b\dos32\emx.bat emx + gcc (1) +b\dos32\sc.bat Symantec C/C++ (1) +b\dos32\wc.bat Watcom C/C++ (1) + +b\os2\emx.bat emx + gcc (1) +b\os2\wc.bat Watcom C/C++ (1) + +b\win32\bc.bat Borland C/C++ (1) +b\win32\cygwin.bat Cygwin + gcc (1) +b\win32\dm.bat Digital Mars C/C++ (1) +b\win32\ic.bat Intel C/C++ (1) +b\win32\mingw.bat MinGW + gcc (1) +b\win32\mwerks.bat Metrowerks CodeWarrior C/C++ (1) +b\win32\pellesc.bat Pelles C (1) +b\win32\pgi.bat Portland Group PGI C/C++ (1) +b\win32\rsxnt.bat rsxnt + gcc (1) +b\win32\sc.bat Symantec C/C++ (1) +b\win32\vc.bat Visual C/C++ (1) +b\win32\vc_dll.bat Visual C/C++ (DLL version) (1) +b\win32\wc.bat Watcom C/C++ (1) + +b\win64\ic.bat Intel C/C++ (Itanium) +b\win64\ic_dll.bat Intel C/C++ (Itanium) (DLL version) +b\win64\vc.bat Visual C/C++ (AMD64 or Itanium) +b\win64\vc_dll.bat Visual C/C++ (AMD64 or Itanium) (DLL version) + + +Notes: + (1) includes support for i386 assembler versions + diff --git a/app/lzo/B/clean.bat b/app/lzo/B/clean.bat index 1f7de592..453e9479 100644 --- a/app/lzo/B/clean.bat +++ b/app/lzo/B/clean.bat @@ -1,4 +1,4 @@ -@del /q *.def *.err *.exp *.map *.o *.obj *.res *.tds > nul 2> nul -@del /q liblzo2.a lzo2.a lzo2.dll lzo2.lib > nul 2> nul -@del /q a.exe a.out dict.exe lzopack.exe precomp.exe precomp2.exe simple.exe > nul 2> nul -@del /q lzotest.exe testmini.exe > nul 2> nul +@del /q *.def *.err *.exp *.map *.o *.obj *.res *.tds > nul 2> nul +@del /q liblzo2.a lzo2.a lzo2.dll lzo2.lib > nul 2> nul +@del /q a.exe a.out dict.exe lzopack.exe precomp.exe precomp2.exe simple.exe > nul 2> nul +@del /q lzotest.exe testmini.exe > nul 2> nul diff --git a/app/lzo/B/done.bat b/app/lzo/B/done.bat index b9bf6aa0..0ae243d4 100644 --- a/app/lzo/B/done.bat +++ b/app/lzo/B/done.bat @@ -1,2 +1,2 @@ -@echo // -@echo // Building LZO was successful. All done. +@echo // +@echo // Building LZO was successful. All done. diff --git a/app/lzo/B/dos16/bc.bat b/app/lzo/B/dos16/bc.bat index 292b793a..bfcf38c6 100644 --- a/app/lzo/B/dos16/bc.bat +++ b/app/lzo/B/dos16/bc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Borland C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=bcc -ml -f- -set CF=-O1 -d -w -w-rch -w-sig %CFI% -Iinclude\lzo -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -tlib %BLIB% @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -f -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -f -ls -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Borland C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=bcc -ml -f- +set CF=-O1 -d -w -w-rch -w-sig %CFI% -Iinclude\lzo +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +tlib %BLIB% @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -f -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -f -ls -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/bc.rsp b/app/lzo/B/dos16/bc.rsp index 8a1d83e4..39b28f9f 100644 --- a/app/lzo/B/dos16/bc.rsp +++ b/app/lzo/B/dos16/bc.rsp @@ -1,67 +1,67 @@ -+lzo1.obj & -+lzo1_99.obj & -+lzo1a.obj & -+lzo1a_99.obj & -+lzo1b_1.obj & -+lzo1b_2.obj & -+lzo1b_3.obj & -+lzo1b_4.obj & -+lzo1b_5.obj & -+lzo1b_6.obj & -+lzo1b_7.obj & -+lzo1b_8.obj & -+lzo1b_9.obj & -+lzo1b_99.obj & -+lzo1b_9x.obj & -+lzo1b_cc.obj & -+lzo1b_d1.obj & -+lzo1b_d2.obj & -+lzo1b_rr.obj & -+lzo1b_xx.obj & -+lzo1c_1.obj & -+lzo1c_2.obj & -+lzo1c_3.obj & -+lzo1c_4.obj & -+lzo1c_5.obj & -+lzo1c_6.obj & -+lzo1c_7.obj & -+lzo1c_8.obj & -+lzo1c_9.obj & -+lzo1c_99.obj & -+lzo1c_9x.obj & -+lzo1c_cc.obj & -+lzo1c_d1.obj & -+lzo1c_d2.obj & -+lzo1c_rr.obj & -+lzo1c_xx.obj & -+lzo1f_1.obj & -+lzo1f_9x.obj & -+lzo1f_d1.obj & -+lzo1f_d2.obj & -+lzo1x_1.obj & -+lzo1x_1k.obj & -+lzo1x_1l.obj & -+lzo1x_1o.obj & -+lzo1x_9x.obj & -+lzo1x_d1.obj & -+lzo1x_d2.obj & -+lzo1x_d3.obj & -+lzo1x_o.obj & -+lzo1y_1.obj & -+lzo1y_9x.obj & -+lzo1y_d1.obj & -+lzo1y_d2.obj & -+lzo1y_d3.obj & -+lzo1y_o.obj & -+lzo1z_9x.obj & -+lzo1z_d1.obj & -+lzo1z_d2.obj & -+lzo1z_d3.obj & -+lzo2a_9x.obj & -+lzo2a_d1.obj & -+lzo2a_d2.obj & -+lzo_crc.obj & -+lzo_init.obj & -+lzo_ptr.obj & -+lzo_str.obj & -+lzo_util.obj ++lzo1.obj & ++lzo1_99.obj & ++lzo1a.obj & ++lzo1a_99.obj & ++lzo1b_1.obj & ++lzo1b_2.obj & ++lzo1b_3.obj & ++lzo1b_4.obj & ++lzo1b_5.obj & ++lzo1b_6.obj & ++lzo1b_7.obj & ++lzo1b_8.obj & ++lzo1b_9.obj & ++lzo1b_99.obj & ++lzo1b_9x.obj & ++lzo1b_cc.obj & ++lzo1b_d1.obj & ++lzo1b_d2.obj & ++lzo1b_rr.obj & ++lzo1b_xx.obj & ++lzo1c_1.obj & ++lzo1c_2.obj & ++lzo1c_3.obj & ++lzo1c_4.obj & ++lzo1c_5.obj & ++lzo1c_6.obj & ++lzo1c_7.obj & ++lzo1c_8.obj & ++lzo1c_9.obj & ++lzo1c_99.obj & ++lzo1c_9x.obj & ++lzo1c_cc.obj & ++lzo1c_d1.obj & ++lzo1c_d2.obj & ++lzo1c_rr.obj & ++lzo1c_xx.obj & ++lzo1f_1.obj & ++lzo1f_9x.obj & ++lzo1f_d1.obj & ++lzo1f_d2.obj & ++lzo1x_1.obj & ++lzo1x_1k.obj & ++lzo1x_1l.obj & ++lzo1x_1o.obj & ++lzo1x_9x.obj & ++lzo1x_d1.obj & ++lzo1x_d2.obj & ++lzo1x_d3.obj & ++lzo1x_o.obj & ++lzo1y_1.obj & ++lzo1y_9x.obj & ++lzo1y_d1.obj & ++lzo1y_d2.obj & ++lzo1y_d3.obj & ++lzo1y_o.obj & ++lzo1z_9x.obj & ++lzo1z_d1.obj & ++lzo1z_d2.obj & ++lzo1z_d3.obj & ++lzo2a_9x.obj & ++lzo2a_d1.obj & ++lzo2a_d2.obj & ++lzo_crc.obj & ++lzo_init.obj & ++lzo_ptr.obj & ++lzo_str.obj & ++lzo_util.obj diff --git a/app/lzo/B/dos16/bc_286.bat b/app/lzo/B/dos16/bc_286.bat index 025a48ea..e68fd4e2 100644 --- a/app/lzo/B/dos16/bc_286.bat +++ b/app/lzo/B/dos16/bc_286.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Borland C/C++ + Pharlap 286DOS-Extender -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=bcc286 -ml -2 -set CF=-O1 -d -w -w-rch -w-sig %CFI% -Iinclude\lzo -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -tlib %BLIB% @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples -DWITH_TIMER examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iminilzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Borland C/C++ + Pharlap 286DOS-Extender +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=bcc286 -ml -2 +set CF=-O1 -d -w -w-rch -w-sig %CFI% -Iinclude\lzo +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +tlib %BLIB% @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples -DWITH_TIMER examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iminilzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/bc_pp.bat b/app/lzo/B/dos16/bc_pp.bat index 6c0aac37..2a09ba01 100644 --- a/app/lzo/B/dos16/bc_pp.bat +++ b/app/lzo/B/dos16/bc_pp.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Borland C/C++ + Borland PowerPack 1.0 (DPMI16) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=bcc -ml -2 -WX -set CF=-O1 -d -w -w-sig %CFI% -Iinclude\lzo -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -tlib %BLIB% @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples -DWITH_TIMER examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iminilzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Borland C/C++ + Borland PowerPack 1.0 (DPMI16) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=bcc -ml -2 -WX +set CF=-O1 -d -w -w-sig %CFI% -Iinclude\lzo +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +tlib %BLIB% @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples -DWITH_TIMER examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iminilzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/dm.bat b/app/lzo/B/dos16/dm.bat index 1ec6feca..24335982 100644 --- a/app/lzo/B/dos16/dm.bat +++ b/app/lzo/B/dos16/dm.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Digital Mars C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=dmc -ml -set CF=-o -w- %CFI% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Digital Mars C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=dmc -ml +set CF=-o -w- %CFI% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/mc.bat b/app/lzo/B/dos16/mc.bat index 4f34a36f..077401af 100644 --- a/app/lzo/B/dos16/mc.bat +++ b/app/lzo/B/dos16/mc.bat @@ -1,52 +1,52 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Microsoft C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -AL -set CF=-O -Gf -W3 %CFI% -set LF=/map - -@REM %CC% %CF% -c src\*.c -for %%f in (src\*.c) do %CC% %CF% -c %%f -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -link %LF% dict.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -link %LF% lzopack.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -link %LF% precomp.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -link %LF% precomp2.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -link %LF% simple.obj,,,%BLIB%; -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -link %LF% lzotest.obj,,,%BLIB%; -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Microsoft C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -AL +set CF=-O -Gf -W3 %CFI% +set LF=/map + +@REM %CC% %CF% -c src\*.c +for %%f in (src\*.c) do %CC% %CF% -c %%f +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +link %LF% dict.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +link %LF% lzopack.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +link %LF% precomp.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +link %LF% precomp2.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +link %LF% simple.obj,,,%BLIB%; +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +link %LF% lzotest.obj,,,%BLIB%; +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/mc_qc.bat b/app/lzo/B/dos16/mc_qc.bat index 1c2ce940..cb814dd7 100644 --- a/app/lzo/B/dos16/mc_qc.bat +++ b/app/lzo/B/dos16/mc_qc.bat @@ -1,52 +1,52 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Microsoft C/C++ (QuickC) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -qc -AL -set CF=-O -Gf -W3 %CFI% -set LF=/map - -@REM %CC% %CF% -c src\*.c -for %%f in (src\*.c) do %CC% %CF% -c %%f -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -link %LF% dict.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -link %LF% lzopack.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -link %LF% precomp.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -link %LF% precomp2.obj,,,%BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -link %LF% simple.obj,,,%BLIB%; -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -link %LF% lzotest.obj,,,%BLIB%; -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Microsoft C/C++ (QuickC) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -qc -AL +set CF=-O -Gf -W3 %CFI% +set LF=/map + +@REM %CC% %CF% -c src\*.c +for %%f in (src\*.c) do %CC% %CF% -c %%f +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +link %LF% dict.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +link %LF% lzopack.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +link %LF% precomp.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +link %LF% precomp2.obj,,,%BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +link %LF% simple.obj,,,%BLIB%; +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +link %LF% lzotest.obj,,,%BLIB%; +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/qc.bat b/app/lzo/B/dos16/qc.bat index 82734fd7..e51166ec 100644 --- a/app/lzo/B/dos16/qc.bat +++ b/app/lzo/B/dos16/qc.bat @@ -1,40 +1,40 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Microsoft QuickC -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=qcl -nologo -AL -set CF=-O -Gf -W3 %CFI% -set LF=%BLIB% -Fm - -@REM %CC% %CF% -c src\*.c -for %%f in (src\*.c) do %CC% %CF% -c %%f -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Microsoft QuickC +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=qcl -nologo -AL +set CF=-O -Gf -W3 %CFI% +set LF=%BLIB% -Fm + +@REM %CC% %CF% -c src\*.c +for %%f in (src\*.c) do %CC% %CF% -c %%f +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/sc.bat b/app/lzo/B/dos16/sc.bat index 056c04a9..52960232 100644 --- a/app/lzo/B/dos16/sc.bat +++ b/app/lzo/B/dos16/sc.bat @@ -1,53 +1,53 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Symantec C/C++ -@echo // -@echo // NOTE: LZO breaks the optimizer, so we disable optimizations -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=sc -ml -set CF=-w- %CFI% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -%CC% dict.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -%CC% lzopack.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -%CC% precomp.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -%CC% precomp2.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -%CC% simple.obj %LF% -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -%CC% lzotest.obj %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Symantec C/C++ +@echo // +@echo // NOTE: LZO breaks the optimizer, so we disable optimizations +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=sc -ml +set CF=-w- %CFI% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +%CC% dict.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +%CC% lzopack.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +%CC% precomp.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +%CC% precomp2.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +%CC% simple.obj %LF% +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +%CC% lzotest.obj %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/tc.bat b/app/lzo/B/dos16/tc.bat index a3c32770..c397ae1b 100644 --- a/app/lzo/B/dos16/tc.bat +++ b/app/lzo/B/dos16/tc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Turbo C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=tcc -ml -f- -set CF=-O -G -w -w-rch -w-sig %CFI% -Iinclude\lzo -set LF=%BLIB% - -%CC% %CF% -Isrc -c src\*.c -@if errorlevel 1 goto error -tlib %BLIB% @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -f -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -f -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Turbo C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=tcc -ml -f- +set CF=-O -G -w -w-rch -w-sig %CFI% -Iinclude\lzo +set LF=%BLIB% + +%CC% %CF% -Isrc -c src\*.c +@if errorlevel 1 goto error +tlib %BLIB% @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -f -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -f -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/vc.bat b/app/lzo/B/dos16/vc.bat index b0617859..17ec4ccd 100644 --- a/app/lzo/B/dos16/vc.bat +++ b/app/lzo/B/dos16/vc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Microsoft Visual C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -AL -set CF=-O -Gf -Gs -Gy -W3 %CFI% -set LF=%BLIB% -Fm - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Microsoft Visual C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -AL +set CF=-O -Gf -Gs -Gy -W3 %CFI% +set LF=%BLIB% -Fm + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/vc_qc.bat b/app/lzo/B/dos16/vc_qc.bat index 329b092f..09fc0d3d 100644 --- a/app/lzo/B/dos16/vc_qc.bat +++ b/app/lzo/B/dos16/vc_qc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Microsoft Visual C/C++ (QuickC) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -qc -AL -set CF=-O -Gf -Gy -W3 %CFI% -set LF=%BLIB% -Fm - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Microsoft Visual C/C++ (QuickC) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -qc -AL +set CF=-O -Gf -Gy -W3 %CFI% +set LF=%BLIB% -Fm + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/wc.bat b/app/lzo/B/dos16/wc.bat index c92135c0..4effecc6 100644 --- a/app/lzo/B/dos16/wc.bat +++ b/app/lzo/B/dos16/wc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 16-bit -@echo // Watcom C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=wcl -zq -ml -bt#dos -l#dos -set CF=-ox %CFI% -set LF=%BLIB% - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -wlib -q -b -n -t %BLIB% @b\dos16\wc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 16-bit +@echo // Watcom C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=wcl -zq -ml -bt#dos -l#dos +set CF=-ox %CFI% +set LF=%BLIB% + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +wlib -q -b -n -t %BLIB% @b\dos16\wc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos16/wc.rsp b/app/lzo/B/dos16/wc.rsp index c8e1c9ff..f36b8cba 100644 --- a/app/lzo/B/dos16/wc.rsp +++ b/app/lzo/B/dos16/wc.rsp @@ -1,67 +1,67 @@ -+'lzo1.obj' -+'lzo1_99.obj' -+'lzo1a.obj' -+'lzo1a_99.obj' -+'lzo1b_1.obj' -+'lzo1b_2.obj' -+'lzo1b_3.obj' -+'lzo1b_4.obj' -+'lzo1b_5.obj' -+'lzo1b_6.obj' -+'lzo1b_7.obj' -+'lzo1b_8.obj' -+'lzo1b_9.obj' -+'lzo1b_99.obj' -+'lzo1b_9x.obj' -+'lzo1b_cc.obj' -+'lzo1b_d1.obj' -+'lzo1b_d2.obj' -+'lzo1b_rr.obj' -+'lzo1b_xx.obj' -+'lzo1c_1.obj' -+'lzo1c_2.obj' -+'lzo1c_3.obj' -+'lzo1c_4.obj' -+'lzo1c_5.obj' -+'lzo1c_6.obj' -+'lzo1c_7.obj' -+'lzo1c_8.obj' -+'lzo1c_9.obj' -+'lzo1c_99.obj' -+'lzo1c_9x.obj' -+'lzo1c_cc.obj' -+'lzo1c_d1.obj' -+'lzo1c_d2.obj' -+'lzo1c_rr.obj' -+'lzo1c_xx.obj' -+'lzo1f_1.obj' -+'lzo1f_9x.obj' -+'lzo1f_d1.obj' -+'lzo1f_d2.obj' -+'lzo1x_1.obj' -+'lzo1x_1k.obj' -+'lzo1x_1l.obj' -+'lzo1x_1o.obj' -+'lzo1x_9x.obj' -+'lzo1x_d1.obj' -+'lzo1x_d2.obj' -+'lzo1x_d3.obj' -+'lzo1x_o.obj' -+'lzo1y_1.obj' -+'lzo1y_9x.obj' -+'lzo1y_d1.obj' -+'lzo1y_d2.obj' -+'lzo1y_d3.obj' -+'lzo1y_o.obj' -+'lzo1z_9x.obj' -+'lzo1z_d1.obj' -+'lzo1z_d2.obj' -+'lzo1z_d3.obj' -+'lzo2a_9x.obj' -+'lzo2a_d1.obj' -+'lzo2a_d2.obj' -+'lzo_crc.obj' -+'lzo_init.obj' -+'lzo_ptr.obj' -+'lzo_str.obj' -+'lzo_util.obj' ++'lzo1.obj' ++'lzo1_99.obj' ++'lzo1a.obj' ++'lzo1a_99.obj' ++'lzo1b_1.obj' ++'lzo1b_2.obj' ++'lzo1b_3.obj' ++'lzo1b_4.obj' ++'lzo1b_5.obj' ++'lzo1b_6.obj' ++'lzo1b_7.obj' ++'lzo1b_8.obj' ++'lzo1b_9.obj' ++'lzo1b_99.obj' ++'lzo1b_9x.obj' ++'lzo1b_cc.obj' ++'lzo1b_d1.obj' ++'lzo1b_d2.obj' ++'lzo1b_rr.obj' ++'lzo1b_xx.obj' ++'lzo1c_1.obj' ++'lzo1c_2.obj' ++'lzo1c_3.obj' ++'lzo1c_4.obj' ++'lzo1c_5.obj' ++'lzo1c_6.obj' ++'lzo1c_7.obj' ++'lzo1c_8.obj' ++'lzo1c_9.obj' ++'lzo1c_99.obj' ++'lzo1c_9x.obj' ++'lzo1c_cc.obj' ++'lzo1c_d1.obj' ++'lzo1c_d2.obj' ++'lzo1c_rr.obj' ++'lzo1c_xx.obj' ++'lzo1f_1.obj' ++'lzo1f_9x.obj' ++'lzo1f_d1.obj' ++'lzo1f_d2.obj' ++'lzo1x_1.obj' ++'lzo1x_1k.obj' ++'lzo1x_1l.obj' ++'lzo1x_1o.obj' ++'lzo1x_9x.obj' ++'lzo1x_d1.obj' ++'lzo1x_d2.obj' ++'lzo1x_d3.obj' ++'lzo1x_o.obj' ++'lzo1y_1.obj' ++'lzo1y_9x.obj' ++'lzo1y_d1.obj' ++'lzo1y_d2.obj' ++'lzo1y_d3.obj' ++'lzo1y_o.obj' ++'lzo1z_9x.obj' ++'lzo1z_d1.obj' ++'lzo1z_d2.obj' ++'lzo1z_d3.obj' ++'lzo2a_9x.obj' ++'lzo2a_d1.obj' ++'lzo2a_d2.obj' ++'lzo_crc.obj' ++'lzo_init.obj' ++'lzo_ptr.obj' ++'lzo_str.obj' ++'lzo_util.obj' diff --git a/app/lzo/B/dos32/bc_pp.bat b/app/lzo/B/dos32/bc_pp.bat index 77063464..dbaf460c 100644 --- a/app/lzo/B/dos32/bc_pp.bat +++ b/app/lzo/B/dos32/bc_pp.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // Borland C/C++ + Borland PowerPack 1.0 (DPMI32) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=bcc32 -WX -set CF=-O2 -w -w-aus %CFI% -Iinclude\lzo %CFASM% -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -tlib %BLIB% @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -ls -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -ls -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -ls -Iminilzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // Borland C/C++ + Borland PowerPack 1.0 (DPMI32) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=bcc32 -WX +set CF=-O2 -w -w-aus %CFI% -Iinclude\lzo %CFASM% +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +tlib %BLIB% @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -ls -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -ls -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -ls -Iminilzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/dj2.bat b/app/lzo/B/dos32/dj2.bat index 85928d02..02da9232 100644 --- a/app/lzo/B/dos32/dj2.bat +++ b/app/lzo/B/dos32/dj2.bat @@ -1,45 +1,45 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // djgpp2 + gcc -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=lib%BNAME%.a -set CC=gcc -set CF=@b/dos32/dj2.opt %CFI% %CFASM% -set LF=%BLIB% -s - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S -@if errorlevel 1 goto error -ar rcs %BLIB% @b/win32/cygwin.rsp -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples/dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples/precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples/simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -s -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // djgpp2 + gcc +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=lib%BNAME%.a +set CC=gcc +set CF=@b/dos32/dj2.opt %CFI% %CFASM% +set LF=%BLIB% -s + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S +@if errorlevel 1 goto error +ar rcs %BLIB% @b/win32/cygwin.rsp +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples/dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples/precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples/simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -s -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/dj2.opt b/app/lzo/B/dos32/dj2.opt index 9cd02cd0..782eda58 100644 --- a/app/lzo/B/dos32/dj2.opt +++ b/app/lzo/B/dos32/dj2.opt @@ -1,6 +1,6 @@ --O2 --fomit-frame-pointer --Wall --Wcast-align --Wcast-qual --Wwrite-strings +-O2 +-fomit-frame-pointer +-Wall +-Wcast-align +-Wcast-qual +-Wwrite-strings diff --git a/app/lzo/B/dos32/dm.bat b/app/lzo/B/dos32/dm.bat index 9384bed5..3328e1f3 100644 --- a/app/lzo/B/dos32/dm.bat +++ b/app/lzo/B/dos32/dm.bat @@ -1,43 +1,43 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // Digital Mars C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=dmc -mx -set CF=-o -w- %CFI% %CFASM% -set LF=%BLIB% x32.lib - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -set LF=x32.lib -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // Digital Mars C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=dmc -mx +set CF=-o -w- %CFI% %CFASM% +set LF=%BLIB% x32.lib + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +set LF=x32.lib +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/emx.bat b/app/lzo/B/dos32/emx.bat index 04423424..631dceb3 100644 --- a/app/lzo/B/dos32/emx.bat +++ b/app/lzo/B/dos32/emx.bat @@ -1,45 +1,45 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // emx + gcc -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=%BNAME%.a -set CC=gcc -set CF=@b/dos32/dj2.opt %CFI% %CFASM% -set LF=%BLIB% -s - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S -@if errorlevel 1 goto error -ar rcs %BLIB% @b/win32/cygwin.rsp -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples/dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples/precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples/simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // emx + gcc +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=%BNAME%.a +set CC=gcc +set CF=@b/dos32/dj2.opt %CFI% %CFASM% +set LF=%BLIB% -s + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S +@if errorlevel 1 goto error +ar rcs %BLIB% @b/win32/cygwin.rsp +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples/dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples/precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples/simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/highc.bat b/app/lzo/B/dos32/highc.bat index a9881164..83d4cdc0 100644 --- a/app/lzo/B/dos32/highc.bat +++ b/app/lzo/B/dos32/highc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // MetaWare High C/C++ (using Pharlap DOS extender) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=hc386 -set CF=-O3 -w4 %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -w1 -c src\*.c -@if errorlevel 1 goto error -386lib %BLIB% -nobanner @b\dos32\highc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // MetaWare High C/C++ (using Pharlap DOS extender) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=hc386 +set CF=-O3 -w4 %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -w1 -c src\*.c +@if errorlevel 1 goto error +386lib %BLIB% -nobanner @b\dos32\highc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/highc.rsp b/app/lzo/B/dos32/highc.rsp index d477b962..d17a52ae 100644 --- a/app/lzo/B/dos32/highc.rsp +++ b/app/lzo/B/dos32/highc.rsp @@ -1,73 +1,73 @@ --create lzo1.obj --create lzo1_99.obj --create lzo1a.obj --create lzo1a_99.obj --create lzo1b_1.obj --create lzo1b_2.obj --create lzo1b_3.obj --create lzo1b_4.obj --create lzo1b_5.obj --create lzo1b_6.obj --create lzo1b_7.obj --create lzo1b_8.obj --create lzo1b_9.obj --create lzo1b_99.obj --create lzo1b_9x.obj --create lzo1b_cc.obj --create lzo1b_d1.obj --create lzo1b_d2.obj --create lzo1b_rr.obj --create lzo1b_xx.obj --create lzo1c_1.obj --create lzo1c_2.obj --create lzo1c_3.obj --create lzo1c_4.obj --create lzo1c_5.obj --create lzo1c_6.obj --create lzo1c_7.obj --create lzo1c_8.obj --create lzo1c_9.obj --create lzo1c_99.obj --create lzo1c_9x.obj --create lzo1c_cc.obj --create lzo1c_d1.obj --create lzo1c_d2.obj --create lzo1c_rr.obj --create lzo1c_xx.obj --create lzo1f_1.obj --create lzo1f_9x.obj --create lzo1f_d1.obj --create lzo1f_d2.obj --create lzo1x_1.obj --create lzo1x_1k.obj --create lzo1x_1l.obj --create lzo1x_1o.obj --create lzo1x_9x.obj --create lzo1x_d1.obj --create lzo1x_d2.obj --create lzo1x_d3.obj --create lzo1x_o.obj --create lzo1y_1.obj --create lzo1y_9x.obj --create lzo1y_d1.obj --create lzo1y_d2.obj --create lzo1y_d3.obj --create lzo1y_o.obj --create lzo1z_9x.obj --create lzo1z_d1.obj --create lzo1z_d2.obj --create lzo1z_d3.obj --create lzo2a_9x.obj --create lzo2a_d1.obj --create lzo2a_d2.obj --create lzo_crc.obj --create lzo_init.obj --create lzo_ptr.obj --create lzo_str.obj --create lzo_util.obj --create asm\i386\obj\omf32\lzo1c_s1.obj --create asm\i386\obj\omf32\lzo1f_f1.obj --create asm\i386\obj\omf32\lzo1x_f1.obj --create asm\i386\obj\omf32\lzo1x_s1.obj --create asm\i386\obj\omf32\lzo1y_f1.obj --create asm\i386\obj\omf32\lzo1y_s1.obj +-create lzo1.obj +-create lzo1_99.obj +-create lzo1a.obj +-create lzo1a_99.obj +-create lzo1b_1.obj +-create lzo1b_2.obj +-create lzo1b_3.obj +-create lzo1b_4.obj +-create lzo1b_5.obj +-create lzo1b_6.obj +-create lzo1b_7.obj +-create lzo1b_8.obj +-create lzo1b_9.obj +-create lzo1b_99.obj +-create lzo1b_9x.obj +-create lzo1b_cc.obj +-create lzo1b_d1.obj +-create lzo1b_d2.obj +-create lzo1b_rr.obj +-create lzo1b_xx.obj +-create lzo1c_1.obj +-create lzo1c_2.obj +-create lzo1c_3.obj +-create lzo1c_4.obj +-create lzo1c_5.obj +-create lzo1c_6.obj +-create lzo1c_7.obj +-create lzo1c_8.obj +-create lzo1c_9.obj +-create lzo1c_99.obj +-create lzo1c_9x.obj +-create lzo1c_cc.obj +-create lzo1c_d1.obj +-create lzo1c_d2.obj +-create lzo1c_rr.obj +-create lzo1c_xx.obj +-create lzo1f_1.obj +-create lzo1f_9x.obj +-create lzo1f_d1.obj +-create lzo1f_d2.obj +-create lzo1x_1.obj +-create lzo1x_1k.obj +-create lzo1x_1l.obj +-create lzo1x_1o.obj +-create lzo1x_9x.obj +-create lzo1x_d1.obj +-create lzo1x_d2.obj +-create lzo1x_d3.obj +-create lzo1x_o.obj +-create lzo1y_1.obj +-create lzo1y_9x.obj +-create lzo1y_d1.obj +-create lzo1y_d2.obj +-create lzo1y_d3.obj +-create lzo1y_o.obj +-create lzo1z_9x.obj +-create lzo1z_d1.obj +-create lzo1z_d2.obj +-create lzo1z_d3.obj +-create lzo2a_9x.obj +-create lzo2a_d1.obj +-create lzo2a_d2.obj +-create lzo_crc.obj +-create lzo_init.obj +-create lzo_ptr.obj +-create lzo_str.obj +-create lzo_util.obj +-create asm\i386\obj\omf32\lzo1c_s1.obj +-create asm\i386\obj\omf32\lzo1f_f1.obj +-create asm\i386\obj\omf32\lzo1x_f1.obj +-create asm\i386\obj\omf32\lzo1x_s1.obj +-create asm\i386\obj\omf32\lzo1y_f1.obj +-create asm\i386\obj\omf32\lzo1y_s1.obj diff --git a/app/lzo/B/dos32/ndp.bat b/app/lzo/B/dos32/ndp.bat index 2b203118..c2c1beba 100644 --- a/app/lzo/B/dos32/ndp.bat +++ b/app/lzo/B/dos32/ndp.bat @@ -1,40 +1,40 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // Microway NDP C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=mx486 -set CF=-ansi -on %CFI% -set LF=%BLIB% -bind -map - -@REM %CC% %CF% -Isrc -c src\*.c -for %%f in (src\*.c) do %CC% %CF% -Isrc -c %%f -@if errorlevel 1 goto error -ndplib %BLIB% @b\dos32\ndp.rsp -@if errorlevel 1 goto error - -%CC% %CF% -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Dconst= -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // Microway NDP C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=mx486 +set CF=-ansi -on %CFI% +set LF=%BLIB% -bind -map + +@REM %CC% %CF% -Isrc -c src\*.c +for %%f in (src\*.c) do %CC% %CF% -Isrc -c %%f +@if errorlevel 1 goto error +ndplib %BLIB% @b\dos32\ndp.rsp +@if errorlevel 1 goto error + +%CC% %CF% -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Dconst= -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/ndp.rsp b/app/lzo/B/dos32/ndp.rsp index c62b19eb..24c1d438 100644 --- a/app/lzo/B/dos32/ndp.rsp +++ b/app/lzo/B/dos32/ndp.rsp @@ -1,67 +1,67 @@ --add lzo1.obj --add lzo1_99.obj --add lzo1a.obj --add lzo1a_99.obj --add lzo1b_1.obj --add lzo1b_2.obj --add lzo1b_3.obj --add lzo1b_4.obj --add lzo1b_5.obj --add lzo1b_6.obj --add lzo1b_7.obj --add lzo1b_8.obj --add lzo1b_9.obj --add lzo1b_99.obj --add lzo1b_9x.obj --add lzo1b_cc.obj --add lzo1b_d1.obj --add lzo1b_d2.obj --add lzo1b_rr.obj --add lzo1b_xx.obj --add lzo1c_1.obj --add lzo1c_2.obj --add lzo1c_3.obj --add lzo1c_4.obj --add lzo1c_5.obj --add lzo1c_6.obj --add lzo1c_7.obj --add lzo1c_8.obj --add lzo1c_9.obj --add lzo1c_99.obj --add lzo1c_9x.obj --add lzo1c_cc.obj --add lzo1c_d1.obj --add lzo1c_d2.obj --add lzo1c_rr.obj --add lzo1c_xx.obj --add lzo1f_1.obj --add lzo1f_9x.obj --add lzo1f_d1.obj --add lzo1f_d2.obj --add lzo1x_1.obj --add lzo1x_1k.obj --add lzo1x_1l.obj --add lzo1x_1o.obj --add lzo1x_9x.obj --add lzo1x_d1.obj --add lzo1x_d2.obj --add lzo1x_d3.obj --add lzo1x_o.obj --add lzo1y_1.obj --add lzo1y_9x.obj --add lzo1y_d1.obj --add lzo1y_d2.obj --add lzo1y_d3.obj --add lzo1y_o.obj --add lzo1z_9x.obj --add lzo1z_d1.obj --add lzo1z_d2.obj --add lzo1z_d3.obj --add lzo2a_9x.obj --add lzo2a_d1.obj --add lzo2a_d2.obj --add lzo_crc.obj --add lzo_init.obj --add lzo_ptr.obj --add lzo_str.obj --add lzo_util.obj +-add lzo1.obj +-add lzo1_99.obj +-add lzo1a.obj +-add lzo1a_99.obj +-add lzo1b_1.obj +-add lzo1b_2.obj +-add lzo1b_3.obj +-add lzo1b_4.obj +-add lzo1b_5.obj +-add lzo1b_6.obj +-add lzo1b_7.obj +-add lzo1b_8.obj +-add lzo1b_9.obj +-add lzo1b_99.obj +-add lzo1b_9x.obj +-add lzo1b_cc.obj +-add lzo1b_d1.obj +-add lzo1b_d2.obj +-add lzo1b_rr.obj +-add lzo1b_xx.obj +-add lzo1c_1.obj +-add lzo1c_2.obj +-add lzo1c_3.obj +-add lzo1c_4.obj +-add lzo1c_5.obj +-add lzo1c_6.obj +-add lzo1c_7.obj +-add lzo1c_8.obj +-add lzo1c_9.obj +-add lzo1c_99.obj +-add lzo1c_9x.obj +-add lzo1c_cc.obj +-add lzo1c_d1.obj +-add lzo1c_d2.obj +-add lzo1c_rr.obj +-add lzo1c_xx.obj +-add lzo1f_1.obj +-add lzo1f_9x.obj +-add lzo1f_d1.obj +-add lzo1f_d2.obj +-add lzo1x_1.obj +-add lzo1x_1k.obj +-add lzo1x_1l.obj +-add lzo1x_1o.obj +-add lzo1x_9x.obj +-add lzo1x_d1.obj +-add lzo1x_d2.obj +-add lzo1x_d3.obj +-add lzo1x_o.obj +-add lzo1y_1.obj +-add lzo1y_9x.obj +-add lzo1y_d1.obj +-add lzo1y_d2.obj +-add lzo1y_d3.obj +-add lzo1y_o.obj +-add lzo1z_9x.obj +-add lzo1z_d1.obj +-add lzo1z_d2.obj +-add lzo1z_d3.obj +-add lzo2a_9x.obj +-add lzo2a_d1.obj +-add lzo2a_d2.obj +-add lzo_crc.obj +-add lzo_init.obj +-add lzo_ptr.obj +-add lzo_str.obj +-add lzo_util.obj diff --git a/app/lzo/B/dos32/sc.bat b/app/lzo/B/dos32/sc.bat index 44fed188..5751fa3b 100644 --- a/app/lzo/B/dos32/sc.bat +++ b/app/lzo/B/dos32/sc.bat @@ -1,51 +1,51 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // Symantec C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=sc -mx -set CF=-o -w- %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -%CC% dict.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -%CC% lzopack.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -%CC% precomp.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -%CC% precomp2.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -%CC% simple.obj %LF% -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -%CC% lzotest.obj %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // Symantec C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=sc -mx +set CF=-o -w- %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +%CC% dict.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +%CC% lzopack.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +%CC% precomp.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +%CC% precomp2.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +%CC% simple.obj %LF% +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +%CC% lzotest.obj %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/wc.bat b/app/lzo/B/dos32/wc.bat index 21de11dc..8817b19c 100644 --- a/app/lzo/B/dos32/wc.bat +++ b/app/lzo/B/dos32/wc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // Watcom C/C++ (using DOS/4G extender) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=wcl386 -zq -mf -5r -bt#dos -l#dos4g -set CF=-ox -zc %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -wlib -q -b -n -t %BLIB% @b\win32\wc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // Watcom C/C++ (using DOS/4G extender) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=wcl386 -zq -mf -5r -bt#dos -l#dos4g +set CF=-ox -zc %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +wlib -q -b -n -t %BLIB% @b\win32\wc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/dos32/zc.bat b/app/lzo/B/dos32/zc.bat index 193502f9..159e99ca 100644 --- a/app/lzo/B/dos32/zc.bat +++ b/app/lzo/B/dos32/zc.bat @@ -1,51 +1,51 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // DOS 32-bit -@echo // Zortech C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=ztc -b -v0 -mx -set CF=-o -w- -r %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -zorlib %BLIB% @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -%CC% dict.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -%CC% lzopack.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -%CC% precomp.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -%CC% precomp2.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -%CC% simple.obj %LF% -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -%CC% lzotest.obj %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // DOS 32-bit +@echo // Zortech C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=ztc -b -v0 -mx +set CF=-o -w- -r %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +zorlib %BLIB% @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +%CC% dict.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +%CC% lzopack.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +%CC% precomp.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +%CC% precomp2.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +%CC% simple.obj %LF% +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +%CC% lzotest.obj %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/os2/emx.bat b/app/lzo/B/os2/emx.bat index 2367d65c..35e26fc5 100644 --- a/app/lzo/B/os2/emx.bat +++ b/app/lzo/B/os2/emx.bat @@ -1,45 +1,45 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // OS/2 32-bit -@echo // emx + gcc -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=%BNAME%.a -set CC=gcc -set CF=@b/dos32/dj2.opt %CFI% %CFASM% -set LF=%BLIB% -s - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S -@if errorlevel 1 goto error -ar rcs %BLIB% @b/win32/cygwin.rsp -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples/dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples/precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples/simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // OS/2 32-bit +@echo // emx + gcc +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=%BNAME%.a +set CC=gcc +set CF=@b/dos32/dj2.opt %CFI% %CFASM% +set LF=%BLIB% -s + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S +@if errorlevel 1 goto error +ar rcs %BLIB% @b/win32/cygwin.rsp +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples/dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples/precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples/simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/os2/wc.bat b/app/lzo/B/os2/wc.bat index 44b43f50..44ca3ab1 100644 --- a/app/lzo/B/os2/wc.bat +++ b/app/lzo/B/os2/wc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // OS/2 32-bit -@echo // Watcom C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=wcl386 -zq -mf -5r -bt#os2 -l#os2v2 -set CF=-ox -zc %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -wlib -q -b -n -t %BLIB% @b\win32\wc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // OS/2 32-bit +@echo // Watcom C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=wcl386 -zq -mf -5r -bt#os2 -l#os2v2 +set CF=-ox -zc %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +wlib -q -b -n -t %BLIB% @b\win32\wc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/os2/zc.bat b/app/lzo/B/os2/zc.bat index 5e4e7f68..db751a67 100644 --- a/app/lzo/B/os2/zc.bat +++ b/app/lzo/B/os2/zc.bat @@ -1,51 +1,51 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // OS/2 32-bit -@echo // Zortech C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=ztc -b -v0 -mf -set CF=-o -w- -r %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -zorlib %BLIB% @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -%CC% dict.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -%CC% lzopack.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -%CC% precomp.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -%CC% precomp2.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -%CC% simple.obj %LF% -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -%CC% lzotest.obj %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // OS/2 32-bit +@echo // Zortech C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=ztc -b -v0 -mf +set CF=-o -w- -r %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +zorlib %BLIB% @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +%CC% dict.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +%CC% lzopack.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +%CC% precomp.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +%CC% precomp2.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +%CC% simple.obj %LF% +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +%CC% lzotest.obj %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/os2_16/mc.bat b/app/lzo/B/os2_16/mc.bat index 95742d41..378d43f0 100644 --- a/app/lzo/B/os2_16/mc.bat +++ b/app/lzo/B/os2_16/mc.bat @@ -1,43 +1,43 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // OS/2 16-bit -@echo // Microsoft C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -AL -G2 -set CF=-D__OS2__ -O -Gf -Gs -W3 %CFI% -set LF=%BLIB% -Lp -Fm /link /stack:8096 - -@REM %CC% %CF% -c src\*.c -for %%f in (src\*.c) do %CC% %CF% -c %%f -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // OS/2 16-bit +@echo // Microsoft C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -AL -G2 +set CF=-D__OS2__ -O -Gf -Gs -W3 %CFI% +set LF=%BLIB% -Lp -Fm /link /stack:8096 + +@REM %CC% %CF% -c src\*.c +for %%f in (src\*.c) do %CC% %CF% -c %%f +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/os2_16/wc.bat b/app/lzo/B/os2_16/wc.bat index bbe17430..fcf7fc99 100644 --- a/app/lzo/B/os2_16/wc.bat +++ b/app/lzo/B/os2_16/wc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // OS/2 16-bit -@echo // Watcom C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=wcl -zq -ml -2 -bt#os2 -l#os2 -set CF=-ox %CFI% -set LF=%BLIB% - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -wlib -q -b -n -t %BLIB% @b\dos16\wc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // OS/2 16-bit +@echo // Watcom C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=wcl -zq -ml -2 -bt#os2 -l#os2 +set CF=-ox %CFI% +set LF=%BLIB% + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +wlib -q -b -n -t %BLIB% @b\dos16\wc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/prepare.bat b/app/lzo/B/prepare.bat index 34bac191..5f70fb77 100644 --- a/app/lzo/B/prepare.bat +++ b/app/lzo/B/prepare.bat @@ -1,10 +1,10 @@ -@call b\unset.bat -@call b\clean.bat - -@set CFI=-Iinclude -I. -Isrc -@set CFASM=-DLZO_USE_ASM -@set BNAME=lzo2 -@set BLIB=lzo2.lib -@set BDLL=lzo2.dll - -@echo Compiling, please be patient... +@call b\unset.bat +@call b\clean.bat + +@set CFI=-Iinclude -I. -Isrc +@set CFASM=-DLZO_USE_ASM +@set BNAME=lzo2 +@set BLIB=lzo2.lib +@set BDLL=lzo2.dll + +@echo Compiling, please be patient... diff --git a/app/lzo/B/src.rsp b/app/lzo/B/src.rsp index 2a1dbce6..26fd0110 100644 --- a/app/lzo/B/src.rsp +++ b/app/lzo/B/src.rsp @@ -1,67 +1,67 @@ -src/lzo1.c -src/lzo1_99.c -src/lzo1a.c -src/lzo1a_99.c -src/lzo1b_1.c -src/lzo1b_2.c -src/lzo1b_3.c -src/lzo1b_4.c -src/lzo1b_5.c -src/lzo1b_6.c -src/lzo1b_7.c -src/lzo1b_8.c -src/lzo1b_9.c -src/lzo1b_99.c -src/lzo1b_9x.c -src/lzo1b_cc.c -src/lzo1b_d1.c -src/lzo1b_d2.c -src/lzo1b_rr.c -src/lzo1b_xx.c -src/lzo1c_1.c -src/lzo1c_2.c -src/lzo1c_3.c -src/lzo1c_4.c -src/lzo1c_5.c -src/lzo1c_6.c -src/lzo1c_7.c -src/lzo1c_8.c -src/lzo1c_9.c -src/lzo1c_99.c -src/lzo1c_9x.c -src/lzo1c_cc.c -src/lzo1c_d1.c -src/lzo1c_d2.c -src/lzo1c_rr.c -src/lzo1c_xx.c -src/lzo1f_1.c -src/lzo1f_9x.c -src/lzo1f_d1.c -src/lzo1f_d2.c -src/lzo1x_1.c -src/lzo1x_1k.c -src/lzo1x_1l.c -src/lzo1x_1o.c -src/lzo1x_9x.c -src/lzo1x_d1.c -src/lzo1x_d2.c -src/lzo1x_d3.c -src/lzo1x_o.c -src/lzo1y_1.c -src/lzo1y_9x.c -src/lzo1y_d1.c -src/lzo1y_d2.c -src/lzo1y_d3.c -src/lzo1y_o.c -src/lzo1z_9x.c -src/lzo1z_d1.c -src/lzo1z_d2.c -src/lzo1z_d3.c -src/lzo2a_9x.c -src/lzo2a_d1.c -src/lzo2a_d2.c -src/lzo_crc.c -src/lzo_init.c -src/lzo_ptr.c -src/lzo_str.c -src/lzo_util.c +src/lzo1.c +src/lzo1_99.c +src/lzo1a.c +src/lzo1a_99.c +src/lzo1b_1.c +src/lzo1b_2.c +src/lzo1b_3.c +src/lzo1b_4.c +src/lzo1b_5.c +src/lzo1b_6.c +src/lzo1b_7.c +src/lzo1b_8.c +src/lzo1b_9.c +src/lzo1b_99.c +src/lzo1b_9x.c +src/lzo1b_cc.c +src/lzo1b_d1.c +src/lzo1b_d2.c +src/lzo1b_rr.c +src/lzo1b_xx.c +src/lzo1c_1.c +src/lzo1c_2.c +src/lzo1c_3.c +src/lzo1c_4.c +src/lzo1c_5.c +src/lzo1c_6.c +src/lzo1c_7.c +src/lzo1c_8.c +src/lzo1c_9.c +src/lzo1c_99.c +src/lzo1c_9x.c +src/lzo1c_cc.c +src/lzo1c_d1.c +src/lzo1c_d2.c +src/lzo1c_rr.c +src/lzo1c_xx.c +src/lzo1f_1.c +src/lzo1f_9x.c +src/lzo1f_d1.c +src/lzo1f_d2.c +src/lzo1x_1.c +src/lzo1x_1k.c +src/lzo1x_1l.c +src/lzo1x_1o.c +src/lzo1x_9x.c +src/lzo1x_d1.c +src/lzo1x_d2.c +src/lzo1x_d3.c +src/lzo1x_o.c +src/lzo1y_1.c +src/lzo1y_9x.c +src/lzo1y_d1.c +src/lzo1y_d2.c +src/lzo1y_d3.c +src/lzo1y_o.c +src/lzo1z_9x.c +src/lzo1z_d1.c +src/lzo1z_d2.c +src/lzo1z_d3.c +src/lzo2a_9x.c +src/lzo2a_d1.c +src/lzo2a_d2.c +src/lzo_crc.c +src/lzo_init.c +src/lzo_ptr.c +src/lzo_str.c +src/lzo_util.c diff --git a/app/lzo/B/unset.bat b/app/lzo/B/unset.bat index 21672dd6..98f13786 100644 --- a/app/lzo/B/unset.bat +++ b/app/lzo/B/unset.bat @@ -1,10 +1,10 @@ -@set CC= -@set CF= -@set CFI= -@set CFX= -@set CFASM= -@set LF= -@set BNAME= -@set BLIB= -@set BDLL= -@set BECHO= +@set CC= +@set CF= +@set CFI= +@set CFX= +@set CFASM= +@set LF= +@set BNAME= +@set BLIB= +@set BDLL= +@set BECHO= diff --git a/app/lzo/B/win16/bc.bat b/app/lzo/B/win16/bc.bat index f252ada8..06f64bda 100644 --- a/app/lzo/B/win16/bc.bat +++ b/app/lzo/B/win16/bc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 16-bit -@echo // Borland C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=bcc -ml -2 -tW -h -set CF=-O1 -d -w -w-rch -w-sig %CFI% -Iinclude\lzo -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -tlib %BLIB% @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -ls -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -ls -Iminilzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 16-bit +@echo // Borland C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=bcc -ml -2 -tW -h +set CF=-O1 -d -w -w-rch -w-sig %CFI% -Iinclude\lzo +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +tlib %BLIB% @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -ls -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -ls -Iminilzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win16/dm.bat b/app/lzo/B/win16/dm.bat index 8009a996..d844ef83 100644 --- a/app/lzo/B/win16/dm.bat +++ b/app/lzo/B/win16/dm.bat @@ -1,43 +1,43 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 16-bit -@echo // Digital Mars C/C++ (using WINIO) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=dmc -ml -2 -W -set CF=-o -w- %CFI% -set LF=%BLIB% libw.lib commdlg.lib lwindos.lib /L/map/stack:8096 - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -set LF=libw.lib commdlg.lib lwindos.lib /L/map/stack:8096 -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 16-bit +@echo // Digital Mars C/C++ (using WINIO) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=dmc -ml -2 -W +set CF=-o -w- %CFI% +set LF=%BLIB% libw.lib commdlg.lib lwindos.lib /L/map/stack:8096 + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +set LF=libw.lib commdlg.lib lwindos.lib /L/map/stack:8096 +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win16/mc.bat b/app/lzo/B/win16/mc.bat index 4404e421..26af7b6e 100644 --- a/app/lzo/B/win16/mc.bat +++ b/app/lzo/B/win16/mc.bat @@ -1,56 +1,56 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 16-bit -@echo // Microsoft C/C++ (using QuickWin) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -AL -G2 -Mq -set CF=-O -Gf -W3 %CFI% -set LF=/seg:256 /stack:8096 /nod:llibce /map - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -link %LF% dict.obj,,,llibcewq.lib libw.lib %BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -link %LF% lzopack.obj,,,llibcewq.lib libw.lib %BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -link %LF% precomp.obj,,,llibcewq.lib libw.lib %BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -link %LF% precomp2.obj,,,llibcewq.lib libw.lib %BLIB%; -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -link %LF% simple.obj,,,llibcewq.lib libw.lib %BLIB%; -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -link %LF% lzotest.obj,,,llibcewq.lib libw.lib %BLIB%; -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo -c minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error -link %LF% testmini.obj minilzo.obj,,,llibcewq.lib libw.lib; -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 16-bit +@echo // Microsoft C/C++ (using QuickWin) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -AL -G2 -Mq +set CF=-O -Gf -W3 %CFI% +set LF=/seg:256 /stack:8096 /nod:llibce /map + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +link %LF% dict.obj,,,llibcewq.lib libw.lib %BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +link %LF% lzopack.obj,,,llibcewq.lib libw.lib %BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +link %LF% precomp.obj,,,llibcewq.lib libw.lib %BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +link %LF% precomp2.obj,,,llibcewq.lib libw.lib %BLIB%; +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +link %LF% simple.obj,,,llibcewq.lib libw.lib %BLIB%; +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +link %LF% lzotest.obj,,,llibcewq.lib libw.lib %BLIB%; +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo -c minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error +link %LF% testmini.obj minilzo.obj,,,llibcewq.lib libw.lib; +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win16/sc.bat b/app/lzo/B/win16/sc.bat index 0dab9e3c..89dd73c5 100644 --- a/app/lzo/B/win16/sc.bat +++ b/app/lzo/B/win16/sc.bat @@ -1,53 +1,53 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 16-bit -@echo // Symantec C/C++ (using WINIO) -@echo // -@echo // NOTE: LZO breaks the optimizer, so we disable optimizations -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=sc -ml -2 -W -set CF=-w- %CFI% -set LF=%BLIB% libw.lib commdlg.lib lwindos.lib /L/map/stack:8096 - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -%CC% dict.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -%CC% lzopack.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -%CC% precomp.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -%CC% precomp2.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -%CC% simple.obj %LF% -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -%CC% lzotest.obj %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 16-bit +@echo // Symantec C/C++ (using WINIO) +@echo // +@echo // NOTE: LZO breaks the optimizer, so we disable optimizations +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=sc -ml -2 -W +set CF=-w- %CFI% +set LF=%BLIB% libw.lib commdlg.lib lwindos.lib /L/map/stack:8096 + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\dos16\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +%CC% dict.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +%CC% lzopack.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +%CC% precomp.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +%CC% precomp2.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +%CC% simple.obj %LF% +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +%CC% lzotest.obj %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win16/vc.bat b/app/lzo/B/win16/vc.bat index 2c35cfc4..74962326 100644 --- a/app/lzo/B/win16/vc.bat +++ b/app/lzo/B/win16/vc.bat @@ -1,43 +1,43 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 16-bit -@echo // Microsoft Visual C/C++ (using QuickWin) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -AL -G2 -Mq -set CF=-O -Gf -Gs -Gy -W3 %CFI% -set LF=%BLIB% -Fm /link /seg:256 - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib /nologo %BLIB% @b\dos16\bc.rsp; -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -set LF=-Fm /link /seg:256 -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 16-bit +@echo // Microsoft Visual C/C++ (using QuickWin) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -AL -G2 -Mq +set CF=-O -Gf -Gs -Gy -W3 %CFI% +set LF=%BLIB% -Fm /link /seg:256 + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib /nologo %BLIB% @b\dos16\bc.rsp; +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +set LF=-Fm /link /seg:256 +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win16/wc.bat b/app/lzo/B/win16/wc.bat index b5dbf22a..be06828a 100644 --- a/app/lzo/B/win16/wc.bat +++ b/app/lzo/B/win16/wc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 16-bit -@echo // Watcom C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=wcl -zq -ml -2 -bw -bt#windows -l#windows -set CF=-ox %CFI% -set LF=%BLIB% - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -wlib -q -b -n -t %BLIB% @b\dos16\wc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2011 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 16-bit +@echo // Watcom C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=wcl -zq -ml -2 -bw -bt#windows -l#windows +set CF=-ox %CFI% +set LF=%BLIB% + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +wlib -q -b -n -t %BLIB% @b\dos16\wc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/bc.bat b/app/lzo/B/win32/bc.bat index 811acdbb..21fafa0e 100644 --- a/app/lzo/B/win32/bc.bat +++ b/app/lzo/B/win32/bc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Borland C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=bcc32 -set CF=-O2 -w -w-aus %CFI% -Iinclude\lzo %CFASM% -set LF=%BLIB% - -%CC% %CF% -Isrc -c @b\src.rsp -@if errorlevel 1 goto error -tlib %BLIB% @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -ls -Iexamples examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -ls -Iexamples examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -ls -Ilzotest lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -ls -Iminilzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Borland C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=bcc32 +set CF=-O2 -w -w-aus %CFI% -Iinclude\lzo %CFASM% +set LF=%BLIB% + +%CC% %CF% -Isrc -c @b\src.rsp +@if errorlevel 1 goto error +tlib %BLIB% @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -ls -Iexamples examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -ls -Iexamples examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -ls -Ilzotest lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -ls -Iminilzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/bc.rsp b/app/lzo/B/win32/bc.rsp index b48a182d..d62b5b38 100644 --- a/app/lzo/B/win32/bc.rsp +++ b/app/lzo/B/win32/bc.rsp @@ -1,73 +1,73 @@ -+lzo1.obj & -+lzo1_99.obj & -+lzo1a.obj & -+lzo1a_99.obj & -+lzo1b_1.obj & -+lzo1b_2.obj & -+lzo1b_3.obj & -+lzo1b_4.obj & -+lzo1b_5.obj & -+lzo1b_6.obj & -+lzo1b_7.obj & -+lzo1b_8.obj & -+lzo1b_9.obj & -+lzo1b_99.obj & -+lzo1b_9x.obj & -+lzo1b_cc.obj & -+lzo1b_d1.obj & -+lzo1b_d2.obj & -+lzo1b_rr.obj & -+lzo1b_xx.obj & -+lzo1c_1.obj & -+lzo1c_2.obj & -+lzo1c_3.obj & -+lzo1c_4.obj & -+lzo1c_5.obj & -+lzo1c_6.obj & -+lzo1c_7.obj & -+lzo1c_8.obj & -+lzo1c_9.obj & -+lzo1c_99.obj & -+lzo1c_9x.obj & -+lzo1c_cc.obj & -+lzo1c_d1.obj & -+lzo1c_d2.obj & -+lzo1c_rr.obj & -+lzo1c_xx.obj & -+lzo1f_1.obj & -+lzo1f_9x.obj & -+lzo1f_d1.obj & -+lzo1f_d2.obj & -+lzo1x_1.obj & -+lzo1x_1k.obj & -+lzo1x_1l.obj & -+lzo1x_1o.obj & -+lzo1x_9x.obj & -+lzo1x_d1.obj & -+lzo1x_d2.obj & -+lzo1x_d3.obj & -+lzo1x_o.obj & -+lzo1y_1.obj & -+lzo1y_9x.obj & -+lzo1y_d1.obj & -+lzo1y_d2.obj & -+lzo1y_d3.obj & -+lzo1y_o.obj & -+lzo1z_9x.obj & -+lzo1z_d1.obj & -+lzo1z_d2.obj & -+lzo1z_d3.obj & -+lzo2a_9x.obj & -+lzo2a_d1.obj & -+lzo2a_d2.obj & -+lzo_crc.obj & -+lzo_init.obj & -+lzo_ptr.obj & -+lzo_str.obj & -+lzo_util.obj & -+asm\i386\obj\omf32\lzo1c_s1.obj & -+asm\i386\obj\omf32\lzo1f_f1.obj & -+asm\i386\obj\omf32\lzo1x_f1.obj & -+asm\i386\obj\omf32\lzo1x_s1.obj & -+asm\i386\obj\omf32\lzo1y_f1.obj & -+asm\i386\obj\omf32\lzo1y_s1.obj ++lzo1.obj & ++lzo1_99.obj & ++lzo1a.obj & ++lzo1a_99.obj & ++lzo1b_1.obj & ++lzo1b_2.obj & ++lzo1b_3.obj & ++lzo1b_4.obj & ++lzo1b_5.obj & ++lzo1b_6.obj & ++lzo1b_7.obj & ++lzo1b_8.obj & ++lzo1b_9.obj & ++lzo1b_99.obj & ++lzo1b_9x.obj & ++lzo1b_cc.obj & ++lzo1b_d1.obj & ++lzo1b_d2.obj & ++lzo1b_rr.obj & ++lzo1b_xx.obj & ++lzo1c_1.obj & ++lzo1c_2.obj & ++lzo1c_3.obj & ++lzo1c_4.obj & ++lzo1c_5.obj & ++lzo1c_6.obj & ++lzo1c_7.obj & ++lzo1c_8.obj & ++lzo1c_9.obj & ++lzo1c_99.obj & ++lzo1c_9x.obj & ++lzo1c_cc.obj & ++lzo1c_d1.obj & ++lzo1c_d2.obj & ++lzo1c_rr.obj & ++lzo1c_xx.obj & ++lzo1f_1.obj & ++lzo1f_9x.obj & ++lzo1f_d1.obj & ++lzo1f_d2.obj & ++lzo1x_1.obj & ++lzo1x_1k.obj & ++lzo1x_1l.obj & ++lzo1x_1o.obj & ++lzo1x_9x.obj & ++lzo1x_d1.obj & ++lzo1x_d2.obj & ++lzo1x_d3.obj & ++lzo1x_o.obj & ++lzo1y_1.obj & ++lzo1y_9x.obj & ++lzo1y_d1.obj & ++lzo1y_d2.obj & ++lzo1y_d3.obj & ++lzo1y_o.obj & ++lzo1z_9x.obj & ++lzo1z_d1.obj & ++lzo1z_d2.obj & ++lzo1z_d3.obj & ++lzo2a_9x.obj & ++lzo2a_d1.obj & ++lzo2a_d2.obj & ++lzo_crc.obj & ++lzo_init.obj & ++lzo_ptr.obj & ++lzo_str.obj & ++lzo_util.obj & ++asm\i386\obj\omf32\lzo1c_s1.obj & ++asm\i386\obj\omf32\lzo1f_f1.obj & ++asm\i386\obj\omf32\lzo1x_f1.obj & ++asm\i386\obj\omf32\lzo1x_s1.obj & ++asm\i386\obj\omf32\lzo1y_f1.obj & ++asm\i386\obj\omf32\lzo1y_s1.obj diff --git a/app/lzo/B/win32/cygwin.bat b/app/lzo/B/win32/cygwin.bat index 57533e0e..35baf058 100644 --- a/app/lzo/B/win32/cygwin.bat +++ b/app/lzo/B/win32/cygwin.bat @@ -1,45 +1,45 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // cygwin + gcc -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=lib%BNAME%.a -set CC=gcc -set CF=-O2 -fomit-frame-pointer -Wall %CFI% %CFASM% -set LF=%BLIB% -lwinmm -s - -%CC% %CF% -c src/*.c -@if errorlevel 1 goto error -%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S -@if errorlevel 1 goto error -ar rcs %BLIB% @b/win32/cygwin.rsp -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples/dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples/precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples/simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // cygwin + gcc +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=lib%BNAME%.a +set CC=gcc +set CF=-O2 -fomit-frame-pointer -Wall %CFI% %CFASM% +set LF=%BLIB% -lwinmm -s + +%CC% %CF% -c src/*.c +@if errorlevel 1 goto error +%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S +@if errorlevel 1 goto error +ar rcs %BLIB% @b/win32/cygwin.rsp +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples/dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples/precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples/simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/cygwin.rsp b/app/lzo/B/win32/cygwin.rsp index 62eab38c..d6b3da0b 100644 --- a/app/lzo/B/win32/cygwin.rsp +++ b/app/lzo/B/win32/cygwin.rsp @@ -1,73 +1,73 @@ -lzo1.o -lzo1_99.o -lzo1a.o -lzo1a_99.o -lzo1b_1.o -lzo1b_2.o -lzo1b_3.o -lzo1b_4.o -lzo1b_5.o -lzo1b_6.o -lzo1b_7.o -lzo1b_8.o -lzo1b_9.o -lzo1b_99.o -lzo1b_9x.o -lzo1b_cc.o -lzo1b_d1.o -lzo1b_d2.o -lzo1b_rr.o -lzo1b_xx.o -lzo1c_1.o -lzo1c_2.o -lzo1c_3.o -lzo1c_4.o -lzo1c_5.o -lzo1c_6.o -lzo1c_7.o -lzo1c_8.o -lzo1c_9.o -lzo1c_99.o -lzo1c_9x.o -lzo1c_cc.o -lzo1c_d1.o -lzo1c_d2.o -lzo1c_rr.o -lzo1c_xx.o -lzo1f_1.o -lzo1f_9x.o -lzo1f_d1.o -lzo1f_d2.o -lzo1x_1.o -lzo1x_1k.o -lzo1x_1l.o -lzo1x_1o.o -lzo1x_9x.o -lzo1x_d1.o -lzo1x_d2.o -lzo1x_d3.o -lzo1x_o.o -lzo1y_1.o -lzo1y_9x.o -lzo1y_d1.o -lzo1y_d2.o -lzo1y_d3.o -lzo1y_o.o -lzo1z_9x.o -lzo1z_d1.o -lzo1z_d2.o -lzo1z_d3.o -lzo2a_9x.o -lzo2a_d1.o -lzo2a_d2.o -lzo_crc.o -lzo_init.o -lzo_ptr.o -lzo_str.o -lzo_util.o -lzo1c_s1.o -lzo1f_f1.o -lzo1x_f1.o -lzo1x_s1.o -lzo1y_f1.o -lzo1y_s1.o +lzo1.o +lzo1_99.o +lzo1a.o +lzo1a_99.o +lzo1b_1.o +lzo1b_2.o +lzo1b_3.o +lzo1b_4.o +lzo1b_5.o +lzo1b_6.o +lzo1b_7.o +lzo1b_8.o +lzo1b_9.o +lzo1b_99.o +lzo1b_9x.o +lzo1b_cc.o +lzo1b_d1.o +lzo1b_d2.o +lzo1b_rr.o +lzo1b_xx.o +lzo1c_1.o +lzo1c_2.o +lzo1c_3.o +lzo1c_4.o +lzo1c_5.o +lzo1c_6.o +lzo1c_7.o +lzo1c_8.o +lzo1c_9.o +lzo1c_99.o +lzo1c_9x.o +lzo1c_cc.o +lzo1c_d1.o +lzo1c_d2.o +lzo1c_rr.o +lzo1c_xx.o +lzo1f_1.o +lzo1f_9x.o +lzo1f_d1.o +lzo1f_d2.o +lzo1x_1.o +lzo1x_1k.o +lzo1x_1l.o +lzo1x_1o.o +lzo1x_9x.o +lzo1x_d1.o +lzo1x_d2.o +lzo1x_d3.o +lzo1x_o.o +lzo1y_1.o +lzo1y_9x.o +lzo1y_d1.o +lzo1y_d2.o +lzo1y_d3.o +lzo1y_o.o +lzo1z_9x.o +lzo1z_d1.o +lzo1z_d2.o +lzo1z_d3.o +lzo2a_9x.o +lzo2a_d1.o +lzo2a_d2.o +lzo_crc.o +lzo_init.o +lzo_ptr.o +lzo_str.o +lzo_util.o +lzo1c_s1.o +lzo1f_f1.o +lzo1x_f1.o +lzo1x_s1.o +lzo1y_f1.o +lzo1y_s1.o diff --git a/app/lzo/B/win32/dm.bat b/app/lzo/B/win32/dm.bat index 5c65dfe4..3e152a6a 100644 --- a/app/lzo/B/win32/dm.bat +++ b/app/lzo/B/win32/dm.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Digital Mars C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=dmc -mn -set CF=-o -w- %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Digital Mars C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=dmc -mn +set CF=-o -w- %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/ic.bat b/app/lzo/B/win32/ic.bat index f9f6f5a5..9fe53821 100644 --- a/app/lzo/B/win32/ic.bat +++ b/app/lzo/B/win32/ic.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Intel C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=icl -nologo -MT -set CF=-O2 -GF -W3 -Qvec-report0 %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -link -lib -nologo -out:%BLIB% @b\win32\vc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Intel C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=icl -nologo -MT +set CF=-O2 -GF -W3 -Qvec-report0 %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +link -lib -nologo -out:%BLIB% @b\win32\vc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/lccwin32.bat b/app/lzo/B/win32/lccwin32.bat index e01ee98a..1b5265f0 100644 --- a/app/lzo/B/win32/lccwin32.bat +++ b/app/lzo/B/win32/lccwin32.bat @@ -1,59 +1,59 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // lcc-win32 -@echo // -@echo // NOTE: some lcc-win32 versions are buggy, so we disable optimizations -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=lcc -set CF=-O -A %CFI% -Iinclude\lzo %CFASM% -set CF=-A %CFI% -Iinclude\lzo %CFASM% -set LF=%BLIB% winmm.lib - -for %%f in (src\*.c) do %CC% %CF% -c %%f -@if errorlevel 1 goto error -lcclib /out:%BLIB% @b\win32\vc.rsp -@if errorlevel 1 goto error - -%CC% -c %CF% examples\dict.c -@if errorlevel 1 goto error -lc dict.obj %LF% -@if errorlevel 1 goto error -%CC% -c %CF% examples\lzopack.c -@if errorlevel 1 goto error -lc lzopack.obj %LF% -@if errorlevel 1 goto error -%CC% -c %CF% examples\precomp.c -@if errorlevel 1 goto error -lc precomp.obj %LF% -@if errorlevel 1 goto error -%CC% -c %CF% examples\precomp2.c -@if errorlevel 1 goto error -lc precomp2.obj %LF% -@if errorlevel 1 goto error -%CC% -c %CF% examples\simple.c -@if errorlevel 1 goto error -lc simple.obj %LF% -@if errorlevel 1 goto error - -%CC% -c %CF% lzotest\lzotest.c -@if errorlevel 1 goto error -lc lzotest.obj %LF% -@if errorlevel 1 goto error - -%CC% -c %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error -lc testmini.obj minilzo.obj -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // lcc-win32 +@echo // +@echo // NOTE: some lcc-win32 versions are buggy, so we disable optimizations +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=lcc +set CF=-O -A %CFI% -Iinclude\lzo %CFASM% +set CF=-A %CFI% -Iinclude\lzo %CFASM% +set LF=%BLIB% winmm.lib + +for %%f in (src\*.c) do %CC% %CF% -c %%f +@if errorlevel 1 goto error +lcclib /out:%BLIB% @b\win32\vc.rsp +@if errorlevel 1 goto error + +%CC% -c %CF% examples\dict.c +@if errorlevel 1 goto error +lc dict.obj %LF% +@if errorlevel 1 goto error +%CC% -c %CF% examples\lzopack.c +@if errorlevel 1 goto error +lc lzopack.obj %LF% +@if errorlevel 1 goto error +%CC% -c %CF% examples\precomp.c +@if errorlevel 1 goto error +lc precomp.obj %LF% +@if errorlevel 1 goto error +%CC% -c %CF% examples\precomp2.c +@if errorlevel 1 goto error +lc precomp2.obj %LF% +@if errorlevel 1 goto error +%CC% -c %CF% examples\simple.c +@if errorlevel 1 goto error +lc simple.obj %LF% +@if errorlevel 1 goto error + +%CC% -c %CF% lzotest\lzotest.c +@if errorlevel 1 goto error +lc lzotest.obj %LF% +@if errorlevel 1 goto error + +%CC% -c %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error +lc testmini.obj minilzo.obj +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/mingw.bat b/app/lzo/B/win32/mingw.bat index 81830ae6..3bba6e07 100644 --- a/app/lzo/B/win32/mingw.bat +++ b/app/lzo/B/win32/mingw.bat @@ -1,45 +1,45 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // MinGW + gcc -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=lib%BNAME%.a -set CC=gcc -set CF=-O2 -fomit-frame-pointer -Wall %CFI% %CFASM% -set LF=%BLIB% -lwinmm -s - -%CC% %CF% -c src/*.c -@if errorlevel 1 goto error -%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S -@if errorlevel 1 goto error -ar rcs %BLIB% *.o -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples/dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples/precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples/simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // MinGW + gcc +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=lib%BNAME%.a +set CC=gcc +set CF=-O2 -fomit-frame-pointer -Wall %CFI% %CFASM% +set LF=%BLIB% -lwinmm -s + +%CC% %CF% -c src/*.c +@if errorlevel 1 goto error +%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S +@if errorlevel 1 goto error +ar rcs %BLIB% *.o +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples/dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples/precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples/simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/mwerks.bat b/app/lzo/B/win32/mwerks.bat index 8aad1488..3cc27428 100644 --- a/app/lzo/B/win32/mwerks.bat +++ b/app/lzo/B/win32/mwerks.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Metrowerks CodeWarrior C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=mwcc -gccinc -set CF=-opt full %CFI% %CFASM% -set LF=%BLIB% -lwinmm.lib - -%CC% -w on %CF% -w nounusedexpr -c @b\src.rsp -@if errorlevel 1 goto error -mwld -library -o %BLIB% @b\win32\vc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Metrowerks CodeWarrior C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=mwcc -gccinc +set CF=-opt full %CFI% %CFASM% +set LF=%BLIB% -lwinmm.lib + +%CC% -w on %CF% -w nounusedexpr -c @b\src.rsp +@if errorlevel 1 goto error +mwld -library -o %BLIB% @b\win32\vc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/pellesc.bat b/app/lzo/B/win32/pellesc.bat index 7123f1b1..5e92ab94 100644 --- a/app/lzo/B/win32/pellesc.bat +++ b/app/lzo/B/win32/pellesc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Pelles C -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cc -Ze -Go -set CF=-O2 -W2 %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -polib -out:%BLIB% @b\win32\vc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Pelles C +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cc -Ze -Go +set CF=-O2 -W2 %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +polib -out:%BLIB% @b\win32\vc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/pgi.bat b/app/lzo/B/win32/pgi.bat index 82e17727..079a7142 100644 --- a/app/lzo/B/win32/pgi.bat +++ b/app/lzo/B/win32/pgi.bat @@ -1,43 +1,43 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Portland Group PGI C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=lib%BNAME%.a -set CC=pgcc -set CF=-fast %CFI% %CFASM% -set LF=%BLIB% -lwinmm - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -ar rcs %BLIB% *.o asm/i386/obj/win32/*.obj -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Portland Group PGI C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=lib%BNAME%.a +set CC=pgcc +set CF=-fast %CFI% %CFASM% +set LF=%BLIB% -lwinmm + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +ar rcs %BLIB% *.o asm/i386/obj/win32/*.obj +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/pw32.bat b/app/lzo/B/win32/pw32.bat index 583ca493..a156242f 100644 --- a/app/lzo/B/win32/pw32.bat +++ b/app/lzo/B/win32/pw32.bat @@ -1,45 +1,45 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // PW32 + gcc -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=lib%BNAME%.a -set CC=gcc -set CF=-O2 -fomit-frame-pointer -Wall %CFI% %CFASM% -set LF=%BLIB% -lwinmm -s - -%CC% %CF% -c src/*.c -@if errorlevel 1 goto error -%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S -@if errorlevel 1 goto error -ar rcs %BLIB% *.o -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples/dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples/precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples/simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // PW32 + gcc +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=lib%BNAME%.a +set CC=gcc +set CF=-O2 -fomit-frame-pointer -Wall %CFI% %CFASM% +set LF=%BLIB% -lwinmm -s + +%CC% %CF% -c src/*.c +@if errorlevel 1 goto error +%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S +@if errorlevel 1 goto error +ar rcs %BLIB% *.o +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples/dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples/precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples/simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/rsxnt.bat b/app/lzo/B/win32/rsxnt.bat index 3077f6f4..1b9d87e6 100644 --- a/app/lzo/B/win32/rsxnt.bat +++ b/app/lzo/B/win32/rsxnt.bat @@ -1,45 +1,45 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // rsxnt + gcc -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set BLIB=%BNAME%.a -set CC=gcc -Zwin32 -Zsys -mprobe -set CF=@b/dos32/dj2.opt %CFI% %CFASM% -set LF=%BLIB% -s - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S -@if errorlevel 1 goto error -ar rcs %BLIB% @b/win32/cygwin.rsp -@if errorlevel 1 goto error - -%CC% %CF% -o dict.exe examples/dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp.exe examples/precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% -o simple.exe examples/simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // rsxnt + gcc +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set BLIB=%BNAME%.a +set CC=gcc -Zwin32 -Zsys -mprobe +set CF=@b/dos32/dj2.opt %CFI% %CFASM% +set LF=%BLIB% -s + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +%CC% -x assembler-with-cpp -c asm/i386/src_gas/*.S +@if errorlevel 1 goto error +ar rcs %BLIB% @b/win32/cygwin.rsp +@if errorlevel 1 goto error + +%CC% %CF% -o dict.exe examples/dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o lzopack.exe examples/lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp.exe examples/precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o precomp2.exe examples/precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% -o simple.exe examples/simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -o lzotest.exe lzotest/lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude/lzo -o testmini.exe minilzo/testmini.c minilzo/minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/sc.bat b/app/lzo/B/win32/sc.bat index b1f5c463..38edbc7f 100644 --- a/app/lzo/B/win32/sc.bat +++ b/app/lzo/B/win32/sc.bat @@ -1,56 +1,56 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Symantec C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=sc -mn -set CF=-o -w- %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -lib %BLIB% /b /c /n /noi @b\win32\bc.rsp -@if errorlevel 1 goto error - -%CC% %CF% -c examples\dict.c -@if errorlevel 1 goto error -%CC% dict.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\lzopack.c -@if errorlevel 1 goto error -%CC% lzopack.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp.c -@if errorlevel 1 goto error -%CC% precomp.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\precomp2.c -@if errorlevel 1 goto error -%CC% precomp2.obj %LF% -@if errorlevel 1 goto error -%CC% %CF% -c examples\simple.c -@if errorlevel 1 goto error -%CC% simple.obj %LF% -@if errorlevel 1 goto error - -%CC% %CF% -c lzotest\lzotest.c -@if errorlevel 1 goto error -%CC% lzotest.obj %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo -c minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error -%CC% testmini.obj minilzo.obj -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Symantec C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=sc -mn +set CF=-o -w- %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +lib %BLIB% /b /c /n /noi @b\win32\bc.rsp +@if errorlevel 1 goto error + +%CC% %CF% -c examples\dict.c +@if errorlevel 1 goto error +%CC% dict.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\lzopack.c +@if errorlevel 1 goto error +%CC% lzopack.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp.c +@if errorlevel 1 goto error +%CC% precomp.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\precomp2.c +@if errorlevel 1 goto error +%CC% precomp2.obj %LF% +@if errorlevel 1 goto error +%CC% %CF% -c examples\simple.c +@if errorlevel 1 goto error +%CC% simple.obj %LF% +@if errorlevel 1 goto error + +%CC% %CF% -c lzotest\lzotest.c +@if errorlevel 1 goto error +%CC% lzotest.obj %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo -c minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error +%CC% testmini.obj minilzo.obj +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/vc.bat b/app/lzo/B/win32/vc.bat index 0b7d1f65..db278b58 100644 --- a/app/lzo/B/win32/vc.bat +++ b/app/lzo/B/win32/vc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Microsoft Visual C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -MT -set CF=-O2 -GF -W3 %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -link -lib -nologo -out:%BLIB% @b\win32\vc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Microsoft Visual C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -MT +set CF=-O2 -GF -W3 %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +link -lib -nologo -out:%BLIB% @b\win32\vc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/vc.rsp b/app/lzo/B/win32/vc.rsp index 690bec1c..1cf09fe2 100644 --- a/app/lzo/B/win32/vc.rsp +++ b/app/lzo/B/win32/vc.rsp @@ -1,73 +1,73 @@ -.\lzo1.obj -.\lzo1_99.obj -.\lzo1a.obj -.\lzo1a_99.obj -.\lzo1b_1.obj -.\lzo1b_2.obj -.\lzo1b_3.obj -.\lzo1b_4.obj -.\lzo1b_5.obj -.\lzo1b_6.obj -.\lzo1b_7.obj -.\lzo1b_8.obj -.\lzo1b_9.obj -.\lzo1b_99.obj -.\lzo1b_9x.obj -.\lzo1b_cc.obj -.\lzo1b_d1.obj -.\lzo1b_d2.obj -.\lzo1b_rr.obj -.\lzo1b_xx.obj -.\lzo1c_1.obj -.\lzo1c_2.obj -.\lzo1c_3.obj -.\lzo1c_4.obj -.\lzo1c_5.obj -.\lzo1c_6.obj -.\lzo1c_7.obj -.\lzo1c_8.obj -.\lzo1c_9.obj -.\lzo1c_99.obj -.\lzo1c_9x.obj -.\lzo1c_cc.obj -.\lzo1c_d1.obj -.\lzo1c_d2.obj -.\lzo1c_rr.obj -.\lzo1c_xx.obj -.\lzo1f_1.obj -.\lzo1f_9x.obj -.\lzo1f_d1.obj -.\lzo1f_d2.obj -.\lzo1x_1.obj -.\lzo1x_1k.obj -.\lzo1x_1l.obj -.\lzo1x_1o.obj -.\lzo1x_9x.obj -.\lzo1x_d1.obj -.\lzo1x_d2.obj -.\lzo1x_d3.obj -.\lzo1x_o.obj -.\lzo1y_1.obj -.\lzo1y_9x.obj -.\lzo1y_d1.obj -.\lzo1y_d2.obj -.\lzo1y_d3.obj -.\lzo1y_o.obj -.\lzo1z_9x.obj -.\lzo1z_d1.obj -.\lzo1z_d2.obj -.\lzo1z_d3.obj -.\lzo2a_9x.obj -.\lzo2a_d1.obj -.\lzo2a_d2.obj -.\lzo_crc.obj -.\lzo_init.obj -.\lzo_ptr.obj -.\lzo_str.obj -.\lzo_util.obj -.\asm\i386\obj\win32\lzo1c_s1.obj -.\asm\i386\obj\win32\lzo1f_f1.obj -.\asm\i386\obj\win32\lzo1x_f1.obj -.\asm\i386\obj\win32\lzo1x_s1.obj -.\asm\i386\obj\win32\lzo1y_f1.obj -.\asm\i386\obj\win32\lzo1y_s1.obj +.\lzo1.obj +.\lzo1_99.obj +.\lzo1a.obj +.\lzo1a_99.obj +.\lzo1b_1.obj +.\lzo1b_2.obj +.\lzo1b_3.obj +.\lzo1b_4.obj +.\lzo1b_5.obj +.\lzo1b_6.obj +.\lzo1b_7.obj +.\lzo1b_8.obj +.\lzo1b_9.obj +.\lzo1b_99.obj +.\lzo1b_9x.obj +.\lzo1b_cc.obj +.\lzo1b_d1.obj +.\lzo1b_d2.obj +.\lzo1b_rr.obj +.\lzo1b_xx.obj +.\lzo1c_1.obj +.\lzo1c_2.obj +.\lzo1c_3.obj +.\lzo1c_4.obj +.\lzo1c_5.obj +.\lzo1c_6.obj +.\lzo1c_7.obj +.\lzo1c_8.obj +.\lzo1c_9.obj +.\lzo1c_99.obj +.\lzo1c_9x.obj +.\lzo1c_cc.obj +.\lzo1c_d1.obj +.\lzo1c_d2.obj +.\lzo1c_rr.obj +.\lzo1c_xx.obj +.\lzo1f_1.obj +.\lzo1f_9x.obj +.\lzo1f_d1.obj +.\lzo1f_d2.obj +.\lzo1x_1.obj +.\lzo1x_1k.obj +.\lzo1x_1l.obj +.\lzo1x_1o.obj +.\lzo1x_9x.obj +.\lzo1x_d1.obj +.\lzo1x_d2.obj +.\lzo1x_d3.obj +.\lzo1x_o.obj +.\lzo1y_1.obj +.\lzo1y_9x.obj +.\lzo1y_d1.obj +.\lzo1y_d2.obj +.\lzo1y_d3.obj +.\lzo1y_o.obj +.\lzo1z_9x.obj +.\lzo1z_d1.obj +.\lzo1z_d2.obj +.\lzo1z_d3.obj +.\lzo2a_9x.obj +.\lzo2a_d1.obj +.\lzo2a_d2.obj +.\lzo_crc.obj +.\lzo_init.obj +.\lzo_ptr.obj +.\lzo_str.obj +.\lzo_util.obj +.\asm\i386\obj\win32\lzo1c_s1.obj +.\asm\i386\obj\win32\lzo1f_f1.obj +.\asm\i386\obj\win32\lzo1x_f1.obj +.\asm\i386\obj\win32\lzo1x_s1.obj +.\asm\i386\obj\win32\lzo1y_f1.obj +.\asm\i386\obj\win32\lzo1y_s1.obj diff --git a/app/lzo/B/win32/vc_dll.bat b/app/lzo/B/win32/vc_dll.bat index 72788c0a..c6ba0863 100644 --- a/app/lzo/B/win32/vc_dll.bat +++ b/app/lzo/B/win32/vc_dll.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Microsoft Visual C/C++ (DLL) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -MT -set CF=-O2 -GF -W3 %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -D__LZO_EXPORT1#__declspec(dllexport) -c @b\src.rsp -@if errorlevel 1 goto error -%CC% -LD -Fe%BDLL% @b\win32\vc.rsp /link /map /def:b\win32\vc_dll.def -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Microsoft Visual C/C++ (DLL) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -MT +set CF=-O2 -GF -W3 %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -D__LZO_EXPORT1#__declspec(dllexport) -c @b\src.rsp +@if errorlevel 1 goto error +%CC% -LD -Fe%BDLL% @b\win32\vc.rsp /link /map /def:b\win32\vc_dll.def +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/vc_dll.def b/app/lzo/B/win32/vc_dll.def index 215fb86d..a5abd35a 100644 --- a/app/lzo/B/win32/vc_dll.def +++ b/app/lzo/B/win32/vc_dll.def @@ -1,7 +1,7 @@ -EXPORTS - _lzo1c_decompress_asm=lzo1c_decompress_asm - _lzo1f_decompress_asm_fast=lzo1f_decompress_asm_fast - _lzo1x_decompress_asm=lzo1x_decompress_asm - _lzo1x_decompress_asm_fast=lzo1x_decompress_asm_fast - _lzo1y_decompress_asm=lzo1y_decompress_asm - _lzo1y_decompress_asm_fast=lzo1y_decompress_asm_fast +EXPORTS + _lzo1c_decompress_asm=lzo1c_decompress_asm + _lzo1f_decompress_asm_fast=lzo1f_decompress_asm_fast + _lzo1x_decompress_asm=lzo1x_decompress_asm + _lzo1x_decompress_asm_fast=lzo1x_decompress_asm_fast + _lzo1y_decompress_asm=lzo1y_decompress_asm + _lzo1y_decompress_asm_fast=lzo1y_decompress_asm_fast diff --git a/app/lzo/B/win32/wc.bat b/app/lzo/B/win32/wc.bat index 64e34a23..ab9f5f44 100644 --- a/app/lzo/B/win32/wc.bat +++ b/app/lzo/B/win32/wc.bat @@ -1,39 +1,39 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 32-bit -@echo // Watcom C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=wcl386 -zq -mf -5r -bt#nt -l#nt -set CF=-ox -zc %CFI% %CFASM% -set LF=%BLIB% - -%CC% %CF% -c src\*.c -@if errorlevel 1 goto error -wlib -q -b -n -t %BLIB% @b\win32\wc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 32-bit +@echo // Watcom C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=wcl386 -zq -mf -5r -bt#nt -l#nt +set CF=-ox -zc %CFI% %CFASM% +set LF=%BLIB% + +%CC% %CF% -c src\*.c +@if errorlevel 1 goto error +wlib -q -b -n -t %BLIB% @b\win32\wc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win32/wc.rsp b/app/lzo/B/win32/wc.rsp index 503667ba..12760d98 100644 --- a/app/lzo/B/win32/wc.rsp +++ b/app/lzo/B/win32/wc.rsp @@ -1,73 +1,73 @@ -+'lzo1.obj' -+'lzo1_99.obj' -+'lzo1a.obj' -+'lzo1a_99.obj' -+'lzo1b_1.obj' -+'lzo1b_2.obj' -+'lzo1b_3.obj' -+'lzo1b_4.obj' -+'lzo1b_5.obj' -+'lzo1b_6.obj' -+'lzo1b_7.obj' -+'lzo1b_8.obj' -+'lzo1b_9.obj' -+'lzo1b_99.obj' -+'lzo1b_9x.obj' -+'lzo1b_cc.obj' -+'lzo1b_d1.obj' -+'lzo1b_d2.obj' -+'lzo1b_rr.obj' -+'lzo1b_xx.obj' -+'lzo1c_1.obj' -+'lzo1c_2.obj' -+'lzo1c_3.obj' -+'lzo1c_4.obj' -+'lzo1c_5.obj' -+'lzo1c_6.obj' -+'lzo1c_7.obj' -+'lzo1c_8.obj' -+'lzo1c_9.obj' -+'lzo1c_99.obj' -+'lzo1c_9x.obj' -+'lzo1c_cc.obj' -+'lzo1c_d1.obj' -+'lzo1c_d2.obj' -+'lzo1c_rr.obj' -+'lzo1c_xx.obj' -+'lzo1f_1.obj' -+'lzo1f_9x.obj' -+'lzo1f_d1.obj' -+'lzo1f_d2.obj' -+'lzo1x_1.obj' -+'lzo1x_1k.obj' -+'lzo1x_1l.obj' -+'lzo1x_1o.obj' -+'lzo1x_9x.obj' -+'lzo1x_d1.obj' -+'lzo1x_d2.obj' -+'lzo1x_d3.obj' -+'lzo1x_o.obj' -+'lzo1y_1.obj' -+'lzo1y_9x.obj' -+'lzo1y_d1.obj' -+'lzo1y_d2.obj' -+'lzo1y_d3.obj' -+'lzo1y_o.obj' -+'lzo1z_9x.obj' -+'lzo1z_d1.obj' -+'lzo1z_d2.obj' -+'lzo1z_d3.obj' -+'lzo2a_9x.obj' -+'lzo2a_d1.obj' -+'lzo2a_d2.obj' -+'lzo_crc.obj' -+'lzo_init.obj' -+'lzo_ptr.obj' -+'lzo_str.obj' -+'lzo_util.obj' -+'asm\i386\obj\omf32\lzo1c_s1.obj' -+'asm\i386\obj\omf32\lzo1f_f1.obj' -+'asm\i386\obj\omf32\lzo1x_f1.obj' -+'asm\i386\obj\omf32\lzo1x_s1.obj' -+'asm\i386\obj\omf32\lzo1y_f1.obj' -+'asm\i386\obj\omf32\lzo1y_s1.obj' ++'lzo1.obj' ++'lzo1_99.obj' ++'lzo1a.obj' ++'lzo1a_99.obj' ++'lzo1b_1.obj' ++'lzo1b_2.obj' ++'lzo1b_3.obj' ++'lzo1b_4.obj' ++'lzo1b_5.obj' ++'lzo1b_6.obj' ++'lzo1b_7.obj' ++'lzo1b_8.obj' ++'lzo1b_9.obj' ++'lzo1b_99.obj' ++'lzo1b_9x.obj' ++'lzo1b_cc.obj' ++'lzo1b_d1.obj' ++'lzo1b_d2.obj' ++'lzo1b_rr.obj' ++'lzo1b_xx.obj' ++'lzo1c_1.obj' ++'lzo1c_2.obj' ++'lzo1c_3.obj' ++'lzo1c_4.obj' ++'lzo1c_5.obj' ++'lzo1c_6.obj' ++'lzo1c_7.obj' ++'lzo1c_8.obj' ++'lzo1c_9.obj' ++'lzo1c_99.obj' ++'lzo1c_9x.obj' ++'lzo1c_cc.obj' ++'lzo1c_d1.obj' ++'lzo1c_d2.obj' ++'lzo1c_rr.obj' ++'lzo1c_xx.obj' ++'lzo1f_1.obj' ++'lzo1f_9x.obj' ++'lzo1f_d1.obj' ++'lzo1f_d2.obj' ++'lzo1x_1.obj' ++'lzo1x_1k.obj' ++'lzo1x_1l.obj' ++'lzo1x_1o.obj' ++'lzo1x_9x.obj' ++'lzo1x_d1.obj' ++'lzo1x_d2.obj' ++'lzo1x_d3.obj' ++'lzo1x_o.obj' ++'lzo1y_1.obj' ++'lzo1y_9x.obj' ++'lzo1y_d1.obj' ++'lzo1y_d2.obj' ++'lzo1y_d3.obj' ++'lzo1y_o.obj' ++'lzo1z_9x.obj' ++'lzo1z_d1.obj' ++'lzo1z_d2.obj' ++'lzo1z_d3.obj' ++'lzo2a_9x.obj' ++'lzo2a_d1.obj' ++'lzo2a_d2.obj' ++'lzo_crc.obj' ++'lzo_init.obj' ++'lzo_ptr.obj' ++'lzo_str.obj' ++'lzo_util.obj' ++'asm\i386\obj\omf32\lzo1c_s1.obj' ++'asm\i386\obj\omf32\lzo1f_f1.obj' ++'asm\i386\obj\omf32\lzo1x_f1.obj' ++'asm\i386\obj\omf32\lzo1x_s1.obj' ++'asm\i386\obj\omf32\lzo1y_f1.obj' ++'asm\i386\obj\omf32\lzo1y_s1.obj' diff --git a/app/lzo/B/win64/ic.bat b/app/lzo/B/win64/ic.bat index 154d5e4d..cdc9762f 100644 --- a/app/lzo/B/win64/ic.bat +++ b/app/lzo/B/win64/ic.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 64-bit (Itanium) -@echo // Intel C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=icl -nologo -MT -set CF=-O2 -GF -W3 -Qvec-report0 %CFI% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -link -lib -nologo -out:%BLIB% @b\win64\vc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 64-bit (Itanium) +@echo // Intel C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=icl -nologo -MT +set CF=-O2 -GF -W3 -Qvec-report0 %CFI% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +link -lib -nologo -out:%BLIB% @b\win64\vc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win64/ic_dll.bat b/app/lzo/B/win64/ic_dll.bat index 620fadcc..ec806084 100644 --- a/app/lzo/B/win64/ic_dll.bat +++ b/app/lzo/B/win64/ic_dll.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 64-bit (Itanium) -@echo // Intel C/C++ (DLL) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=icl -nologo -MT -set CF=-O2 -GF -W3 %CFI% -set LF=%BLIB% - -%CC% %CF% -D__LZO_EXPORT1#__declspec(dllexport) -c @b\src.rsp -@if errorlevel 1 goto error -%CC% -LD -Fe%BDLL% @b\win64\vc.rsp /link /map /def:b\win64\vc_dll.def -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 64-bit (Itanium) +@echo // Intel C/C++ (DLL) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=icl -nologo -MT +set CF=-O2 -GF -W3 %CFI% +set LF=%BLIB% + +%CC% %CF% -D__LZO_EXPORT1#__declspec(dllexport) -c @b\src.rsp +@if errorlevel 1 goto error +%CC% -LD -Fe%BDLL% @b\win64\vc.rsp /link /map /def:b\win64\vc_dll.def +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win64/vc.bat b/app/lzo/B/win64/vc.bat index 1aada926..03694ca9 100644 --- a/app/lzo/B/win64/vc.bat +++ b/app/lzo/B/win64/vc.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 64-bit (AMD64 or Itanium) -@echo // Microsoft Visual C/C++ -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -MT -set CF=-O2 -GF -W3 %CFI% -set LF=%BLIB% - -%CC% %CF% -c @b\src.rsp -@if errorlevel 1 goto error -link -lib -nologo -out:%BLIB% @b\win64\vc.rsp -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 64-bit (AMD64 or Itanium) +@echo // Microsoft Visual C/C++ +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -MT +set CF=-O2 -GF -W3 %CFI% +set LF=%BLIB% + +%CC% %CF% -c @b\src.rsp +@if errorlevel 1 goto error +link -lib -nologo -out:%BLIB% @b\win64\vc.rsp +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win64/vc.rsp b/app/lzo/B/win64/vc.rsp index 182de60c..da684c94 100644 --- a/app/lzo/B/win64/vc.rsp +++ b/app/lzo/B/win64/vc.rsp @@ -1,67 +1,67 @@ -.\lzo1.obj -.\lzo1_99.obj -.\lzo1a.obj -.\lzo1a_99.obj -.\lzo1b_1.obj -.\lzo1b_2.obj -.\lzo1b_3.obj -.\lzo1b_4.obj -.\lzo1b_5.obj -.\lzo1b_6.obj -.\lzo1b_7.obj -.\lzo1b_8.obj -.\lzo1b_9.obj -.\lzo1b_99.obj -.\lzo1b_9x.obj -.\lzo1b_cc.obj -.\lzo1b_d1.obj -.\lzo1b_d2.obj -.\lzo1b_rr.obj -.\lzo1b_xx.obj -.\lzo1c_1.obj -.\lzo1c_2.obj -.\lzo1c_3.obj -.\lzo1c_4.obj -.\lzo1c_5.obj -.\lzo1c_6.obj -.\lzo1c_7.obj -.\lzo1c_8.obj -.\lzo1c_9.obj -.\lzo1c_99.obj -.\lzo1c_9x.obj -.\lzo1c_cc.obj -.\lzo1c_d1.obj -.\lzo1c_d2.obj -.\lzo1c_rr.obj -.\lzo1c_xx.obj -.\lzo1f_1.obj -.\lzo1f_9x.obj -.\lzo1f_d1.obj -.\lzo1f_d2.obj -.\lzo1x_1.obj -.\lzo1x_1k.obj -.\lzo1x_1l.obj -.\lzo1x_1o.obj -.\lzo1x_9x.obj -.\lzo1x_d1.obj -.\lzo1x_d2.obj -.\lzo1x_d3.obj -.\lzo1x_o.obj -.\lzo1y_1.obj -.\lzo1y_9x.obj -.\lzo1y_d1.obj -.\lzo1y_d2.obj -.\lzo1y_d3.obj -.\lzo1y_o.obj -.\lzo1z_9x.obj -.\lzo1z_d1.obj -.\lzo1z_d2.obj -.\lzo1z_d3.obj -.\lzo2a_9x.obj -.\lzo2a_d1.obj -.\lzo2a_d2.obj -.\lzo_crc.obj -.\lzo_init.obj -.\lzo_ptr.obj -.\lzo_str.obj -.\lzo_util.obj +.\lzo1.obj +.\lzo1_99.obj +.\lzo1a.obj +.\lzo1a_99.obj +.\lzo1b_1.obj +.\lzo1b_2.obj +.\lzo1b_3.obj +.\lzo1b_4.obj +.\lzo1b_5.obj +.\lzo1b_6.obj +.\lzo1b_7.obj +.\lzo1b_8.obj +.\lzo1b_9.obj +.\lzo1b_99.obj +.\lzo1b_9x.obj +.\lzo1b_cc.obj +.\lzo1b_d1.obj +.\lzo1b_d2.obj +.\lzo1b_rr.obj +.\lzo1b_xx.obj +.\lzo1c_1.obj +.\lzo1c_2.obj +.\lzo1c_3.obj +.\lzo1c_4.obj +.\lzo1c_5.obj +.\lzo1c_6.obj +.\lzo1c_7.obj +.\lzo1c_8.obj +.\lzo1c_9.obj +.\lzo1c_99.obj +.\lzo1c_9x.obj +.\lzo1c_cc.obj +.\lzo1c_d1.obj +.\lzo1c_d2.obj +.\lzo1c_rr.obj +.\lzo1c_xx.obj +.\lzo1f_1.obj +.\lzo1f_9x.obj +.\lzo1f_d1.obj +.\lzo1f_d2.obj +.\lzo1x_1.obj +.\lzo1x_1k.obj +.\lzo1x_1l.obj +.\lzo1x_1o.obj +.\lzo1x_9x.obj +.\lzo1x_d1.obj +.\lzo1x_d2.obj +.\lzo1x_d3.obj +.\lzo1x_o.obj +.\lzo1y_1.obj +.\lzo1y_9x.obj +.\lzo1y_d1.obj +.\lzo1y_d2.obj +.\lzo1y_d3.obj +.\lzo1y_o.obj +.\lzo1z_9x.obj +.\lzo1z_d1.obj +.\lzo1z_d2.obj +.\lzo1z_d3.obj +.\lzo2a_9x.obj +.\lzo2a_d1.obj +.\lzo2a_d2.obj +.\lzo_crc.obj +.\lzo_init.obj +.\lzo_ptr.obj +.\lzo_str.obj +.\lzo_util.obj diff --git a/app/lzo/B/win64/vc_dll.bat b/app/lzo/B/win64/vc_dll.bat index 63ea5fc8..f21361a7 100644 --- a/app/lzo/B/win64/vc_dll.bat +++ b/app/lzo/B/win64/vc_dll.bat @@ -1,42 +1,42 @@ -@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer -@echo // -@echo // Windows 64-bit (AMD64 or Itanium) -@echo // Microsoft Visual C/C++ (DLL) -@echo // -@call b\prepare.bat -@if "%BECHO%"=="n" echo off - - -set CC=cl -nologo -MT -set CF=-O2 -GF -W3 %CFI% -set LF=%BLIB% - -%CC% %CF% -D__LZO_EXPORT1#__declspec(dllexport) -c @b\src.rsp -@if errorlevel 1 goto error -%CC% -LD -Fe%BDLL% @b\win64\vc.rsp /link /map /def:b\win64\vc_dll.def -@if errorlevel 1 goto error - -%CC% %CF% examples\dict.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\lzopack.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\precomp2.c %LF% -@if errorlevel 1 goto error -%CC% %CF% examples\simple.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% lzotest\lzotest.c %LF% -@if errorlevel 1 goto error - -%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c -@if errorlevel 1 goto error - - -@call b\done.bat -@goto end -:error -@echo ERROR during build! -:end -@call b\unset.bat +@echo // Copyright (C) 1996-2014 Markus F.X.J. Oberhumer +@echo // +@echo // Windows 64-bit (AMD64 or Itanium) +@echo // Microsoft Visual C/C++ (DLL) +@echo // +@call b\prepare.bat +@if "%BECHO%"=="n" echo off + + +set CC=cl -nologo -MT +set CF=-O2 -GF -W3 %CFI% +set LF=%BLIB% + +%CC% %CF% -D__LZO_EXPORT1#__declspec(dllexport) -c @b\src.rsp +@if errorlevel 1 goto error +%CC% -LD -Fe%BDLL% @b\win64\vc.rsp /link /map /def:b\win64\vc_dll.def +@if errorlevel 1 goto error + +%CC% %CF% examples\dict.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\lzopack.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\precomp2.c %LF% +@if errorlevel 1 goto error +%CC% %CF% examples\simple.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% lzotest\lzotest.c %LF% +@if errorlevel 1 goto error + +%CC% %CF% -Iinclude\lzo minilzo\testmini.c minilzo\minilzo.c +@if errorlevel 1 goto error + + +@call b\done.bat +@goto end +:error +@echo ERROR during build! +:end +@call b\unset.bat diff --git a/app/lzo/B/win64/vc_dll.def b/app/lzo/B/win64/vc_dll.def index e80a637a..aabd2247 100644 --- a/app/lzo/B/win64/vc_dll.def +++ b/app/lzo/B/win64/vc_dll.def @@ -1 +1 @@ -EXPORTS +EXPORTS diff --git a/app/lzo/autoconf/shtool b/app/lzo/autoconf/shtool index a1f4e7ee..78c0b125 100755 --- a/app/lzo/autoconf/shtool +++ b/app/lzo/autoconf/shtool @@ -3102,9 +3102,9 @@ platform ) sed -e 's/^://' \ -e 's;\[\([^]]*\)\];\1;g' \ -e 's;<\([^>]*\)>;\1;g' \ - -e "s; ;§§;g" \ + -e "s; ;��;g" \ -e "s;/;%%;g" \ - -e "s;§§;${opt_S};g" \ + -e "s;��;${opt_S};g" \ -e "s;%%;${opt_C};g"` eval "${var_uc}_V=\"\${val_V}\"" ;; @@ -3115,9 +3115,9 @@ platform ) sed -e 's/^://' \ -e 's;\[\([^]]*\)\];;g' \ -e 's;<\([^>]*\)>;\1;g' \ - -e "s; ;§§;g" \ + -e "s; ;��;g" \ -e "s;/;%%;g" \ - -e "s;§§;${opt_S};g" \ + -e "s;��;${opt_S};g" \ -e "s;%%;${opt_C};g"` eval "${var_uc}_N=\"\${val_N}\"" ;; @@ -3128,9 +3128,9 @@ platform ) sed -e 's/^://' \ -e 's;\[\([^]]*\)\];;g' \ -e 's;[^<]*<\([^>]*\)>[^<]*;\1;g' \ - -e "s; ;§§;g" \ + -e "s; ;��;g" \ -e "s;/;%%;g" \ - -e "s;§§;${opt_S};g" \ + -e "s;��;${opt_S};g" \ -e "s;%%;${opt_C};g"` eval "${var_uc}_C=\"\${val_C}\"" ;; diff --git a/app/openssl/Apps-config-host.mk b/app/openssl/Apps-config-host.mk index 5c1604e0..37dcb78b 100644 --- a/app/openssl/Apps-config-host.mk +++ b/app/openssl/Apps-config-host.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz # # This script will append to the following variables: # diff --git a/app/openssl/Apps-config-target.mk b/app/openssl/Apps-config-target.mk index 0c567d4d..bccd250d 100644 --- a/app/openssl/Apps-config-target.mk +++ b/app/openssl/Apps-config-target.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz # # This script will append to the following variables: # diff --git a/app/openssl/Apps.mk b/app/openssl/Apps.mk index b2d871c1..3fb94dbe 100644 --- a/app/openssl/Apps.mk +++ b/app/openssl/Apps.mk @@ -1,12 +1,9 @@ # Copyright 2006 The Android Open Source Project -LOCAL_PATH := $(call my-dir) +LOCAL_PATH:= $(call my-dir) include $(CLEAR_VARS) -LOCAL_MODULE := openssl -LOCAL_MULTILIB := both -LOCAL_MODULE_STEM_32 := openssl -LOCAL_MODULE_STEM_64 := openssl64 +LOCAL_MODULE:= openssl LOCAL_CLANG := true LOCAL_MODULE_TAGS := optional LOCAL_SHARED_LIBRARIES := libssl libcrypto @@ -16,7 +13,7 @@ LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/A include $(BUILD_EXECUTABLE) include $(CLEAR_VARS) -LOCAL_MODULE := openssl +LOCAL_MODULE:= openssl LOCAL_MODULE_TAGS := optional LOCAL_SHARED_LIBRARIES := libssl-host libcrypto-host include $(LOCAL_PATH)/Apps-config-host.mk diff --git a/app/openssl/Crypto-config-host.mk b/app/openssl/Crypto-config-host.mk index 5b643792..a377fec4 100644 --- a/app/openssl/Crypto-config-host.mk +++ b/app/openssl/Crypto-config-host.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz # # This script will append to the following variables: # @@ -332,6 +332,7 @@ common_src_files := \ crypto/evp/m_md5.c \ crypto/evp/m_mdc2.c \ crypto/evp/m_null.c \ + crypto/evp/m_ripemd.c \ crypto/evp/m_sha1.c \ crypto/evp/m_sigver.c \ crypto/evp/m_wp.c \ @@ -437,6 +438,8 @@ common_src_files := \ crypto/rc4/rc4_enc.c \ crypto/rc4/rc4_skey.c \ crypto/rc4/rc4_utl.c \ + crypto/ripemd/rmd_dgst.c \ + crypto/ripemd/rmd_one.c \ crypto/rsa/rsa_ameth.c \ crypto/rsa/rsa_asn1.c \ crypto/rsa/rsa_chk.c \ @@ -543,7 +546,6 @@ common_c_includes := \ arm_cflags := \ -DAES_ASM \ -DBSAES_ASM \ - -DDES_UNROLL \ -DGHASH_ASM \ -DOPENSSL_BN_ASM_GF2m \ -DOPENSSL_BN_ASM_MONT \ @@ -554,14 +556,12 @@ arm_cflags := \ arm_src_files := \ crypto/aes/asm/aes-armv4.S \ - crypto/aes/asm/aesv8-armx.S \ crypto/aes/asm/bsaes-armv7.S \ crypto/armcap.c \ crypto/armv4cpuid.S \ crypto/bn/asm/armv4-gf2m.S \ crypto/bn/asm/armv4-mont.S \ crypto/modes/asm/ghash-armv4.S \ - crypto/modes/asm/ghashv8-armx.S \ crypto/sha/asm/sha1-armv4-large.S \ crypto/sha/asm/sha256-armv4.S \ crypto/sha/asm/sha512-armv4.S \ @@ -571,20 +571,9 @@ arm_exclude_files := \ crypto/mem_clr.c \ arm64_cflags := \ - -DDES_UNROLL \ - -DOPENSSL_CPUID_OBJ \ - -DSHA1_ASM \ - -DSHA256_ASM \ - -DSHA512_ASM \ + -DOPENSSL_NO_ASM \ -arm64_src_files := \ - crypto/aes/asm/aesv8-armx-64.S \ - crypto/arm64cpuid.S \ - crypto/armcap.c \ - crypto/modes/asm/ghashv8-armx-64.S \ - crypto/sha/asm/sha1-armv8.S \ - crypto/sha/asm/sha256-armv8.S \ - crypto/sha/asm/sha512-armv8.S \ +arm64_src_files := arm64_exclude_files := @@ -600,8 +589,6 @@ x86_cflags := \ -DOPENSSL_BN_ASM_PART_WORDS \ -DOPENSSL_CPUID_OBJ \ -DOPENSSL_IA32_SSE2 \ - -DRC4_INDEX \ - -DRMD160_ASM \ -DSHA1_ASM \ -DSHA256_ASM \ -DSHA512_ASM \ @@ -637,6 +624,8 @@ x86_exclude_files := \ x86_64_cflags := \ -DAES_ASM \ -DBSAES_ASM \ + -DDES_PTR \ + -DDES_RISC1 \ -DDES_UNROLL \ -DGHASH_ASM \ -DMD5_ASM \ @@ -644,7 +633,6 @@ x86_64_cflags := \ -DOPENSSL_BN_ASM_MONT \ -DOPENSSL_BN_ASM_MONT5 \ -DOPENSSL_CPUID_OBJ \ - -DOPENSSL_IA32_SSE2 \ -DSHA1_ASM \ -DSHA256_ASM \ -DSHA512_ASM \ diff --git a/app/openssl/Crypto-config-target.mk b/app/openssl/Crypto-config-target.mk index bd29dfe5..2c5b01e5 100644 --- a/app/openssl/Crypto-config-target.mk +++ b/app/openssl/Crypto-config-target.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz # # This script will append to the following variables: # @@ -332,6 +332,7 @@ common_src_files := \ crypto/evp/m_md5.c \ crypto/evp/m_mdc2.c \ crypto/evp/m_null.c \ + crypto/evp/m_ripemd.c \ crypto/evp/m_sha1.c \ crypto/evp/m_sigver.c \ crypto/evp/m_wp.c \ @@ -437,6 +438,8 @@ common_src_files := \ crypto/rc4/rc4_enc.c \ crypto/rc4/rc4_skey.c \ crypto/rc4/rc4_utl.c \ + crypto/ripemd/rmd_dgst.c \ + crypto/ripemd/rmd_one.c \ crypto/rsa/rsa_ameth.c \ crypto/rsa/rsa_asn1.c \ crypto/rsa/rsa_chk.c \ @@ -543,7 +546,6 @@ common_c_includes := \ arm_cflags := \ -DAES_ASM \ -DBSAES_ASM \ - -DDES_UNROLL \ -DGHASH_ASM \ -DOPENSSL_BN_ASM_GF2m \ -DOPENSSL_BN_ASM_MONT \ @@ -554,14 +556,12 @@ arm_cflags := \ arm_src_files := \ crypto/aes/asm/aes-armv4.S \ - crypto/aes/asm/aesv8-armx.S \ crypto/aes/asm/bsaes-armv7.S \ crypto/armcap.c \ crypto/armv4cpuid.S \ crypto/bn/asm/armv4-gf2m.S \ crypto/bn/asm/armv4-mont.S \ crypto/modes/asm/ghash-armv4.S \ - crypto/modes/asm/ghashv8-armx.S \ crypto/sha/asm/sha1-armv4-large.S \ crypto/sha/asm/sha256-armv4.S \ crypto/sha/asm/sha512-armv4.S \ @@ -571,20 +571,9 @@ arm_exclude_files := \ crypto/mem_clr.c \ arm64_cflags := \ - -DDES_UNROLL \ - -DOPENSSL_CPUID_OBJ \ - -DSHA1_ASM \ - -DSHA256_ASM \ - -DSHA512_ASM \ + -DOPENSSL_NO_ASM \ -arm64_src_files := \ - crypto/aes/asm/aesv8-armx-64.S \ - crypto/arm64cpuid.S \ - crypto/armcap.c \ - crypto/modes/asm/ghashv8-armx-64.S \ - crypto/sha/asm/sha1-armv8.S \ - crypto/sha/asm/sha256-armv8.S \ - crypto/sha/asm/sha512-armv8.S \ +arm64_src_files := arm64_exclude_files := @@ -600,8 +589,6 @@ x86_cflags := \ -DOPENSSL_BN_ASM_PART_WORDS \ -DOPENSSL_CPUID_OBJ \ -DOPENSSL_IA32_SSE2 \ - -DRC4_INDEX \ - -DRMD160_ASM \ -DSHA1_ASM \ -DSHA256_ASM \ -DSHA512_ASM \ @@ -637,6 +624,8 @@ x86_exclude_files := \ x86_64_cflags := \ -DAES_ASM \ -DBSAES_ASM \ + -DDES_PTR \ + -DDES_RISC1 \ -DDES_UNROLL \ -DGHASH_ASM \ -DMD5_ASM \ @@ -644,7 +633,6 @@ x86_64_cflags := \ -DOPENSSL_BN_ASM_MONT \ -DOPENSSL_BN_ASM_MONT5 \ -DOPENSSL_CPUID_OBJ \ - -DOPENSSL_IA32_SSE2 \ -DSHA1_ASM \ -DSHA256_ASM \ -DSHA512_ASM \ diff --git a/app/openssl/Crypto-config-trusty.mk b/app/openssl/Crypto-config-trusty.mk index 59915986..dc5b12c2 100644 --- a/app/openssl/Crypto-config-trusty.mk +++ b/app/openssl/Crypto-config-trusty.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz # # This script will append to the following variables: # diff --git a/app/openssl/Crypto.mk b/app/openssl/Crypto.mk index 6565f97c..4214b91e 100644 --- a/app/openssl/Crypto.mk +++ b/app/openssl/Crypto.mk @@ -9,7 +9,7 @@ LOCAL_SHARED_LIBRARIES := $(log_shared_libraries) LOCAL_SDK_VERSION := 9 LOCAL_MODULE_TAGS := optional -LOCAL_MODULE := libcrypto_static +LOCAL_MODULE:= libcrypto_static LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk include $(LOCAL_PATH)/Crypto-config-target.mk include $(LOCAL_PATH)/android-config.mk @@ -31,7 +31,7 @@ LOCAL_SHARED_LIBRARIES := $(log_shared_libraries) # in the NDK. ifeq (,$(TARGET_BUILD_APPS)) LOCAL_CLANG := true -ifeq ($(HOST_OS), darwin_does_not_wrok) +ifeq ($(HOST_OS), darwin_XXX) LOCAL_ASFLAGS += -no-integrated-as LOCAL_CFLAGS += -no-integrated-as endif @@ -41,7 +41,7 @@ endif LOCAL_LDFLAGS += -ldl LOCAL_MODULE_TAGS := optional -LOCAL_MODULE := libcrypto +LOCAL_MODULE:= libcrypto LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk include $(LOCAL_PATH)/Crypto-config-target.mk include $(LOCAL_PATH)/android-config.mk @@ -50,16 +50,16 @@ include $(BUILD_SHARED_LIBRARY) ####################################### # host shared library -#include $(CLEAR_VARS) -#LOCAL_SHARED_LIBRARIES := $(log_shared_libraries) -#LOCAL_CFLAGS += -DPURIFY -#LOCAL_LDLIBS += -ldl -#LOCAL_MODULE_TAGS := optional -#LOCAL_MODULE := libcrypto-host -#LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk -#include $(LOCAL_PATH)/Crypto-config-host.mk -#include $(LOCAL_PATH)/android-config.mk -#include $(BUILD_HOST_SHARED_LIBRARY) +# include $(CLEAR_VARS) +# LOCAL_SHARED_LIBRARIES := $(log_shared_libraries) +# LOCAL_CFLAGS += -DPURIFY +# LOCAL_LDLIBS += -ldl +# LOCAL_MODULE_TAGS := optional +# LOCAL_MODULE:= libcrypto-host +# LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk +# include $(LOCAL_PATH)/Crypto-config-host.mk +# include $(LOCAL_PATH)/android-config.mk +# include $(BUILD_HOST_SHARED_LIBRARY) ######################################## # host static library, which is used by some SDK tools. @@ -69,9 +69,8 @@ include $(BUILD_SHARED_LIBRARY) # LOCAL_CFLAGS += -DPURIFY # LOCAL_LDLIBS += -ldl # LOCAL_MODULE_TAGS := optional -# LOCAL_MODULE := libcrypto_static +# LOCAL_MODULE:= libcrypto_static # LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk # include $(LOCAL_PATH)/Crypto-config-host.mk # include $(LOCAL_PATH)/android-config.mk # include $(BUILD_HOST_STATIC_LIBRARY) - diff --git a/app/openssl/Ssl-config-host.mk b/app/openssl/Ssl-config-host.mk index 57ea3775..95035487 100644 --- a/app/openssl/Ssl-config-host.mk +++ b/app/openssl/Ssl-config-host.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz # # This script will append to the following variables: # diff --git a/app/openssl/Ssl-config-target.mk b/app/openssl/Ssl-config-target.mk index c08a971d..32439d3f 100644 --- a/app/openssl/Ssl-config-target.mk +++ b/app/openssl/Ssl-config-target.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz # # This script will append to the following variables: # diff --git a/app/openssl/Ssl.mk b/app/openssl/Ssl.mk index 6c04950a..8ce82d9b 100644 --- a/app/openssl/Ssl.mk +++ b/app/openssl/Ssl.mk @@ -12,7 +12,7 @@ LOCAL_CFLAGS += $(target_c_flags) LOCAL_C_INCLUDES += $(target_c_includes) LOCAL_SHARED_LIBRARIES = $(log_shared_libraries) LOCAL_MODULE_TAGS := optional -LOCAL_MODULE := libssl_static +LOCAL_MODULE:= libssl_static LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk include $(LOCAL_PATH)/Ssl-config-target.mk include $(LOCAL_PATH)/android-config.mk @@ -35,20 +35,19 @@ endif LOCAL_SHARED_LIBRARIES += libcrypto $(log_shared_libraries) LOCAL_MODULE_TAGS := optional -LOCAL_MODULE := libssl +LOCAL_MODULE:= libssl LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk include $(LOCAL_PATH)/Ssl-config-target.mk include $(LOCAL_PATH)/android-config.mk include $(LOCAL_PATH)/ndk-build.mk include $(BUILD_SHARED_LIBRARY) - # ####################################### # # host shared library # include $(CLEAR_VARS) # LOCAL_SHARED_LIBRARIES += libcrypto-host $(log_shared_libraries) # LOCAL_MODULE_TAGS := optional -# LOCAL_MODULE := libssl-host +# LOCAL_MODULE:= libssl-host # LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk # include $(LOCAL_PATH)/Ssl-config-host.mk # include $(LOCAL_PATH)/android-config.mk @@ -57,12 +56,9 @@ include $(BUILD_SHARED_LIBRARY) # ####################################### # # ssltest # include $(CLEAR_VARS) -# LOCAL_SRC_FILES := ssl/ssltest.c +# LOCAL_SRC_FILES:= ssl/ssltest.c # LOCAL_SHARED_LIBRARIES := libssl libcrypto $(log_shared_libraries) -# LOCAL_MODULE := ssltest -# LOCAL_MULTILIB := both -# LOCAL_MODULE_STEM_32 := ssltest -# LOCAL_MODULE_STEM_64 := ssltest64 +# LOCAL_MODULE:= ssltest # LOCAL_MODULE_TAGS := optional # LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk # include $(LOCAL_PATH)/Ssl-config-host.mk diff --git a/app/openssl/apps/enc.c b/app/openssl/apps/enc.c index 19ea3df9..719acc32 100644 --- a/app/openssl/apps/enc.c +++ b/app/openssl/apps/enc.c @@ -331,12 +331,6 @@ bad: setup_engine(bio_err, engine, 0); #endif - if (cipher && EVP_CIPHER_flags(cipher) & EVP_CIPH_FLAG_AEAD_CIPHER) - { - BIO_printf(bio_err, "AEAD ciphers not supported by the enc utility\n"); - goto end; - } - if (md && (dgst=EVP_get_digestbyname(md)) == NULL) { BIO_printf(bio_err,"%s is an unsupported message digest type\n",md); diff --git a/app/openssl/apps/ocsp.c b/app/openssl/apps/ocsp.c index 767f12c6..83c5a767 100644 --- a/app/openssl/apps/ocsp.c +++ b/app/openssl/apps/ocsp.c @@ -127,7 +127,6 @@ int MAIN(int argc, char **argv) ENGINE *e = NULL; char **args; char *host = NULL, *port = NULL, *path = "/"; - char *thost = NULL, *tport = NULL, *tpath = NULL; char *reqin = NULL, *respin = NULL; char *reqout = NULL, *respout = NULL; char *signfile = NULL, *keyfile = NULL; @@ -205,12 +204,6 @@ int MAIN(int argc, char **argv) } else if (!strcmp(*args, "-url")) { - if (thost) - OPENSSL_free(thost); - if (tport) - OPENSSL_free(tport); - if (tpath) - OPENSSL_free(tpath); if (args[1]) { args++; @@ -219,9 +212,6 @@ int MAIN(int argc, char **argv) BIO_printf(bio_err, "Error parsing URL\n"); badarg = 1; } - thost = host; - tport = port; - tpath = path; } else badarg = 1; } @@ -930,12 +920,12 @@ end: sk_X509_pop_free(verify_other, X509_free); sk_CONF_VALUE_pop_free(headers, X509V3_conf_free); - if (thost) - OPENSSL_free(thost); - if (tport) - OPENSSL_free(tport); - if (tpath) - OPENSSL_free(tpath); + if (use_ssl != -1) + { + OPENSSL_free(host); + OPENSSL_free(port); + OPENSSL_free(path); + } OPENSSL_EXIT(ret); } diff --git a/app/openssl/apps/req.c b/app/openssl/apps/req.c index d41385d7..5e034a85 100644 --- a/app/openssl/apps/req.c +++ b/app/openssl/apps/req.c @@ -1489,13 +1489,7 @@ start: #ifdef CHARSET_EBCDIC ebcdic2ascii(buf, buf, i); #endif - if(!req_check_len(i, n_min, n_max)) - { - if (batch || value) - return 0; - goto start; - } - + if(!req_check_len(i, n_min, n_max)) goto start; if (!X509_NAME_add_entry_by_NID(n,nid, chtype, (unsigned char *) buf, -1,-1,mval)) goto err; ret=1; @@ -1554,12 +1548,7 @@ start: #ifdef CHARSET_EBCDIC ebcdic2ascii(buf, buf, i); #endif - if(!req_check_len(i, n_min, n_max)) - { - if (batch || value) - return 0; - goto start; - } + if(!req_check_len(i, n_min, n_max)) goto start; if(!X509_REQ_add1_attr_by_NID(req, nid, chtype, (unsigned char *)buf, -1)) { diff --git a/app/openssl/apps/s_cb.c b/app/openssl/apps/s_cb.c index 146a9607..84c3b447 100644 --- a/app/openssl/apps/s_cb.c +++ b/app/openssl/apps/s_cb.c @@ -747,10 +747,6 @@ void MS_CALLBACK tlsext_cb(SSL *s, int client_server, int type, break; #endif - case TLSEXT_TYPE_padding: - extname = "TLS padding"; - break; - default: extname = "unknown"; break; diff --git a/app/openssl/apps/s_socket.c b/app/openssl/apps/s_socket.c index 94eb40f3..380efdb1 100644 --- a/app/openssl/apps/s_socket.c +++ b/app/openssl/apps/s_socket.c @@ -274,7 +274,7 @@ static int init_client_ip(int *sock, unsigned char ip[4], int port, int type) { i=0; i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE,(char *)&i,sizeof(i)); - if (i < 0) { closesocket(s); perror("keepalive"); return(0); } + if (i < 0) { perror("keepalive"); return(0); } } #endif @@ -450,7 +450,6 @@ redoit: if ((*host=(char *)OPENSSL_malloc(strlen(h1->h_name)+1)) == NULL) { perror("OPENSSL_malloc"); - closesocket(ret); return(0); } BUF_strlcpy(*host,h1->h_name,strlen(h1->h_name)+1); @@ -459,13 +458,11 @@ redoit: if (h2 == NULL) { BIO_printf(bio_err,"gethostbyname failure\n"); - closesocket(ret); return(0); } if (h2->h_addrtype != AF_INET) { BIO_printf(bio_err,"gethostbyname addr is not AF_INET\n"); - closesocket(ret); return(0); } } diff --git a/app/openssl/apps/smime.c b/app/openssl/apps/smime.c index d1fe32d3..c583f8a0 100644 --- a/app/openssl/apps/smime.c +++ b/app/openssl/apps/smime.c @@ -541,8 +541,8 @@ int MAIN(int argc, char **argv) { if (!cipher) { -#ifndef OPENSSL_NO_DES - cipher = EVP_des_ede3_cbc(); +#ifndef OPENSSL_NO_RC2 + cipher = EVP_rc2_40_cbc(); #else BIO_printf(bio_err, "No cipher selected\n"); goto end; diff --git a/app/openssl/build-config-32.mk b/app/openssl/build-config-32.mk index d035f1e4..4f7484b9 100644 --- a/app/openssl/build-config-32.mk +++ b/app/openssl/build-config-32.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz # openssl_cflags_32 := \ -DOPENSSL_THREADS \ @@ -24,7 +24,6 @@ openssl_cflags_32 := \ -DOPENSSL_NO_RC5 \ -DOPENSSL_NO_RDRAND \ -DOPENSSL_NO_RFC3779 \ - -DOPENSSL_NO_RIPEMD \ -DOPENSSL_NO_RSAX \ -DOPENSSL_NO_SCTP \ -DOPENSSL_NO_SEED \ @@ -53,7 +52,6 @@ openssl_cflags_static_32 := \ -DOPENSSL_NO_RC5 \ -DOPENSSL_NO_RDRAND \ -DOPENSSL_NO_RFC3779 \ - -DOPENSSL_NO_RIPEMD \ -DOPENSSL_NO_RSAX \ -DOPENSSL_NO_SCTP \ -DOPENSSL_NO_SEED \ diff --git a/app/openssl/build-config-64.mk b/app/openssl/build-config-64.mk index 45a8141d..c0e6f6de 100644 --- a/app/openssl/build-config-64.mk +++ b/app/openssl/build-config-64.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz # openssl_cflags_64 := \ -DOPENSSL_THREADS \ @@ -24,7 +24,6 @@ openssl_cflags_64 := \ -DOPENSSL_NO_RC5 \ -DOPENSSL_NO_RDRAND \ -DOPENSSL_NO_RFC3779 \ - -DOPENSSL_NO_RIPEMD \ -DOPENSSL_NO_RSAX \ -DOPENSSL_NO_SCTP \ -DOPENSSL_NO_SEED \ @@ -53,7 +52,6 @@ openssl_cflags_static_64 := \ -DOPENSSL_NO_RC5 \ -DOPENSSL_NO_RDRAND \ -DOPENSSL_NO_RFC3779 \ - -DOPENSSL_NO_RIPEMD \ -DOPENSSL_NO_RSAX \ -DOPENSSL_NO_SCTP \ -DOPENSSL_NO_SEED \ diff --git a/app/openssl/build-config-trusty.mk b/app/openssl/build-config-trusty.mk index 4d6fb58c..e5809a3b 100644 --- a/app/openssl/build-config-trusty.mk +++ b/app/openssl/build-config-trusty.mk @@ -1,6 +1,6 @@ # Auto-generated - DO NOT EDIT! # To regenerate, edit openssl.config, then run: -# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz # openssl_cflags_trusty := \ -DL_ENDIAN \ diff --git a/app/openssl/check-all-builds.sh b/app/openssl/check-all-builds.sh index 9743872a..cff2ba5d 100755 --- a/app/openssl/check-all-builds.sh +++ b/app/openssl/check-all-builds.sh @@ -143,7 +143,7 @@ esac # NOTE: x86_64 is not ready yet, while the toolchain is in # prebuilts/ it doesn't have a sysroot which means it requires # a platform build to get Bionic and stuff. -ANDROID_ARCHS="arm arm64 x86 x86_64 mips" +ANDROID_ARCHS="arm x86 mips" BUILD_TYPES= for ARCH in $ANDROID_ARCHS; do @@ -311,14 +311,11 @@ get_build_arch () { # Out: GNU configuration target (e.g. arm-linux-androideabi) get_build_arch_target () { case $1 in - arm64) - echo "aarch64-linux-android" - ;; arm) echo "arm-linux-androideabi" ;; x86) - echo "x86_64-linux-android" + echo "i686-linux-android" ;; x86_64) echo "x86_64-linux-android" @@ -332,8 +329,8 @@ get_build_arch_target () { esac } -GCC_VERSION=4.8 -CLANG_VERSION=3.2 +GCC_VERSION=4.7 +CLANG_VERSION=3.1 get_prebuilt_gcc_dir_for_arch () { local arch=$1 @@ -344,9 +341,6 @@ get_prebuilt_gcc_dir_for_arch () { x86_64) arch=x86 ;; - arm64) - arch=aarch64 - ;; esac echo "$ANDROID_BUILD_TOP/prebuilts/gcc/$ANDROID_HOST_TAG/$arch/$target-$GCC_VERSION" } @@ -403,7 +397,7 @@ get_build_compiler () { # Force -m32 flag when needed for 32-bit builds. case $1 in - *-x86|*-generic32) + *-linux-x86|*-darwin-x86|*-generic32) result="$result -m32" ;; esac diff --git a/app/openssl/crypto/aes/asm/aes-armv4.pl b/app/openssl/crypto/aes/asm/aes-armv4.pl index 4f891708..86b86c4a 100644 --- a/app/openssl/crypto/aes/asm/aes-armv4.pl +++ b/app/openssl/crypto/aes/asm/aes-armv4.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -51,23 +51,9 @@ $key="r11"; $rounds="r12"; $code=<<___; -#ifndef __KERNEL__ -# include "arm_arch.h" -#else -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ -#endif - +#include "arm_arch.h" .text -#if __ARM_ARCH__<7 -.code 32 -#else -.syntax unified -# ifdef __thumb2__ -.thumb -# else .code 32 -# endif -#endif .type AES_Te,%object .align 5 @@ -181,11 +167,7 @@ AES_Te: .type AES_encrypt,%function .align 5 AES_encrypt: -#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_encrypt -#else - adr r3,AES_encrypt -#endif stmdb sp!,{r1,r4-r12,lr} mov $rounds,r0 @ inp mov $key,r2 @@ -427,21 +409,11 @@ _armv4_AES_encrypt: .align 5 private_AES_set_encrypt_key: _armv4_AES_set_encrypt_key: -#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_set_encrypt_key -#else - adr r3,private_AES_set_encrypt_key -#endif teq r0,#0 -#if __ARM_ARCH__>=7 - itt eq @ Thumb2 thing, sanity check in ARM -#endif moveq r0,#-1 beq .Labrt teq r2,#0 -#if __ARM_ARCH__>=7 - itt eq @ Thumb2 thing, sanity check in ARM -#endif moveq r0,#-1 beq .Labrt @@ -450,9 +422,6 @@ _armv4_AES_set_encrypt_key: teq r1,#192 beq .Lok teq r1,#256 -#if __ARM_ARCH__>=7 - itt ne @ Thumb2 thing, sanity check in ARM -#endif movne r0,#-1 bne .Labrt @@ -607,9 +576,6 @@ _armv4_AES_set_encrypt_key: str $s2,[$key,#-16] subs $rounds,$rounds,#1 str $s3,[$key,#-12] -#if __ARM_ARCH__>=7 - itt eq @ Thumb2 thing, sanity check in ARM -#endif subeq r2,$key,#216 beq .Ldone @@ -679,9 +645,6 @@ _armv4_AES_set_encrypt_key: str $s2,[$key,#-24] subs $rounds,$rounds,#1 str $s3,[$key,#-20] -#if __ARM_ARCH__>=7 - itt eq @ Thumb2 thing, sanity check in ARM -#endif subeq r2,$key,#256 beq .Ldone @@ -711,17 +674,11 @@ _armv4_AES_set_encrypt_key: str $i3,[$key,#-4] b .L256_loop -.align 2 .Ldone: mov r0,#0 ldmia sp!,{r4-r12,lr} -.Labrt: -#if __ARM_ARCH__>=5 - ret @ bx lr -#else - tst lr,#1 +.Labrt: tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) -#endif .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key .global private_AES_set_decrypt_key @@ -731,57 +688,34 @@ private_AES_set_decrypt_key: str lr,[sp,#-4]! @ push lr bl _armv4_AES_set_encrypt_key teq r0,#0 - ldr lr,[sp],#4 @ pop lr + ldrne lr,[sp],#4 @ pop lr bne .Labrt - mov r0,r2 @ AES_set_encrypt_key preserves r2, - mov r1,r2 @ which is AES_KEY *key - b _armv4_AES_set_enc2dec_key -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key + stmdb sp!,{r4-r12} -@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out) -.global AES_set_enc2dec_key -.type AES_set_enc2dec_key,%function -.align 5 -AES_set_enc2dec_key: -_armv4_AES_set_enc2dec_key: - stmdb sp!,{r4-r12,lr} - - ldr $rounds,[r0,#240] - mov $i1,r0 @ input - add $i2,r0,$rounds,lsl#4 - mov $key,r1 @ ouput - add $tbl,r1,$rounds,lsl#4 - str $rounds,[r1,#240] - -.Linv: ldr $s0,[$i1],#16 - ldr $s1,[$i1,#-12] - ldr $s2,[$i1,#-8] - ldr $s3,[$i1,#-4] - ldr $t1,[$i2],#-16 - ldr $t2,[$i2,#16+4] - ldr $t3,[$i2,#16+8] - ldr $i3,[$i2,#16+12] - str $s0,[$tbl],#-16 - str $s1,[$tbl,#16+4] - str $s2,[$tbl,#16+8] - str $s3,[$tbl,#16+12] - str $t1,[$key],#16 - str $t2,[$key,#-12] - str $t3,[$key,#-8] - str $i3,[$key,#-4] - teq $i1,$i2 - bne .Linv + ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2, + mov $key,r2 @ which is AES_KEY *key + mov $i1,r2 + add $i2,r2,$rounds,lsl#4 - ldr $s0,[$i1] +.Linv: ldr $s0,[$i1] ldr $s1,[$i1,#4] ldr $s2,[$i1,#8] ldr $s3,[$i1,#12] - str $s0,[$key] - str $s1,[$key,#4] - str $s2,[$key,#8] - str $s3,[$key,#12] - sub $key,$key,$rounds,lsl#3 + ldr $t1,[$i2] + ldr $t2,[$i2,#4] + ldr $t3,[$i2,#8] + ldr $i3,[$i2,#12] + str $s0,[$i2],#-16 + str $s1,[$i2,#16+4] + str $s2,[$i2,#16+8] + str $s3,[$i2,#16+12] + str $t1,[$i1],#16 + str $t2,[$i1,#-12] + str $t3,[$i1,#-8] + str $i3,[$i1,#-4] + teq $i1,$i2 + bne .Linv ___ $mask80=$i1; $mask1b=$i2; @@ -839,7 +773,7 @@ $code.=<<___; moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif -.size AES_set_enc2dec_key,.-AES_set_enc2dec_key +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key .type AES_Td,%object .align 5 @@ -949,11 +883,7 @@ AES_Td: .type AES_decrypt,%function .align 5 AES_decrypt: -#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_decrypt -#else - adr r3,AES_decrypt -#endif stmdb sp!,{r1,r4-r12,lr} mov $rounds,r0 @ inp mov $key,r2 @@ -1150,9 +1080,8 @@ _armv4_AES_decrypt: ldrb $t3,[$tbl,$i3] @ Td4[s0>>0] and $i3,lr,$s1,lsr#8 - add $s1,$tbl,$s1,lsr#24 ldrb $i1,[$tbl,$i1] @ Td4[s1>>0] - ldrb $s1,[$s1] @ Td4[s1>>24] + ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24] ldrb $i2,[$tbl,$i2] @ Td4[s1>>16] eor $s0,$i1,$s0,lsl#24 ldrb $i3,[$tbl,$i3] @ Td4[s1>>8] @@ -1165,8 +1094,7 @@ _armv4_AES_decrypt: ldrb $i2,[$tbl,$i2] @ Td4[s2>>0] and $i3,lr,$s2,lsr#16 - add $s2,$tbl,$s2,lsr#24 - ldrb $s2,[$s2] @ Td4[s2>>24] + ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24] eor $s0,$s0,$i1,lsl#8 ldrb $i3,[$tbl,$i3] @ Td4[s2>>16] eor $s1,$i2,$s1,lsl#16 @@ -1178,9 +1106,8 @@ _armv4_AES_decrypt: ldrb $i2,[$tbl,$i2] @ Td4[s3>>8] and $i3,lr,$s3 @ i2 - add $s3,$tbl,$s3,lsr#24 ldrb $i3,[$tbl,$i3] @ Td4[s3>>0] - ldrb $s3,[$s3] @ Td4[s3>>24] + ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24] eor $s0,$s0,$i1,lsl#16 ldr $i1,[$key,#0] eor $s1,$s1,$i2,lsl#8 @@ -1203,15 +1130,5 @@ _armv4_AES_decrypt: ___ $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -$code =~ s/\bret\b/bx\tlr/gm; - -open SELF,$0; -while() { - next if (/^#!/); - last if (!s/^#/@/ and !/^$/); - print; -} -close SELF; - print $code; close STDOUT; # enforce flush diff --git a/app/openssl/crypto/aes/asm/aes-armv4.s b/app/openssl/crypto/aes/asm/aes-armv4.s index 333a5227..2697d4ce 100644 --- a/app/openssl/crypto/aes/asm/aes-armv4.s +++ b/app/openssl/crypto/aes/asm/aes-armv4.s @@ -1,53 +1,6 @@ - -@ ==================================================================== -@ Written by Andy Polyakov for the OpenSSL -@ project. The module is, however, dual licensed under OpenSSL and -@ CRYPTOGAMS licenses depending on where you obtain it. For further -@ details see http://www.openssl.org/~appro/cryptogams/. -@ ==================================================================== - -@ AES for ARMv4 - -@ January 2007. -@ -@ Code uses single 1K S-box and is >2 times faster than code generated -@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which -@ allows to merge logical or arithmetic operation with shift or rotate -@ in one instruction and emit combined result every cycle. The module -@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit -@ key [on single-issue Xscale PXA250 core]. - -@ May 2007. -@ -@ AES_set_[en|de]crypt_key is added. - -@ July 2010. -@ -@ Rescheduling for dual-issue pipeline resulted in 12% improvement on -@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key. - -@ February 2011. -@ -@ Profiler-assisted and platform-specific optimization resulted in 16% -@ improvement on Cortex A8 core and ~21.5 cycles per byte. - -#ifndef __KERNEL__ -# include "arm_arch.h" -#else -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ -#endif - +#include "arm_arch.h" .text -#if __ARM_ARCH__<7 -.code 32 -#else -.syntax unified -# ifdef __thumb2__ -.thumb -# else .code 32 -# endif -#endif .type AES_Te,%object .align 5 @@ -161,11 +114,7 @@ AES_Te: .type AES_encrypt,%function .align 5 AES_encrypt: -#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_encrypt -#else - adr r3,AES_encrypt -#endif stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp mov r11,r2 @@ -407,21 +356,11 @@ _armv4_AES_encrypt: .align 5 private_AES_set_encrypt_key: _armv4_AES_set_encrypt_key: -#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_set_encrypt_key -#else - adr r3,private_AES_set_encrypt_key -#endif teq r0,#0 -#if __ARM_ARCH__>=7 - itt eq @ Thumb2 thing, sanity check in ARM -#endif moveq r0,#-1 beq .Labrt teq r2,#0 -#if __ARM_ARCH__>=7 - itt eq @ Thumb2 thing, sanity check in ARM -#endif moveq r0,#-1 beq .Labrt @@ -430,9 +369,6 @@ _armv4_AES_set_encrypt_key: teq r1,#192 beq .Lok teq r1,#256 -#if __ARM_ARCH__>=7 - itt ne @ Thumb2 thing, sanity check in ARM -#endif movne r0,#-1 bne .Labrt @@ -587,9 +523,6 @@ _armv4_AES_set_encrypt_key: str r2,[r11,#-16] subs r12,r12,#1 str r3,[r11,#-12] -#if __ARM_ARCH__>=7 - itt eq @ Thumb2 thing, sanity check in ARM -#endif subeq r2,r11,#216 beq .Ldone @@ -659,9 +592,6 @@ _armv4_AES_set_encrypt_key: str r2,[r11,#-24] subs r12,r12,#1 str r3,[r11,#-20] -#if __ARM_ARCH__>=7 - itt eq @ Thumb2 thing, sanity check in ARM -#endif subeq r2,r11,#256 beq .Ldone @@ -691,17 +621,11 @@ _armv4_AES_set_encrypt_key: str r9,[r11,#-4] b .L256_loop -.align 2 .Ldone: mov r0,#0 ldmia sp!,{r4-r12,lr} -.Labrt: -#if __ARM_ARCH__>=5 - bx lr @ .word 0xe12fff1e -#else - tst lr,#1 +.Labrt: tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) -#endif .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key .global private_AES_set_decrypt_key @@ -711,57 +635,34 @@ private_AES_set_decrypt_key: str lr,[sp,#-4]! @ push lr bl _armv4_AES_set_encrypt_key teq r0,#0 - ldr lr,[sp],#4 @ pop lr + ldrne lr,[sp],#4 @ pop lr bne .Labrt - mov r0,r2 @ AES_set_encrypt_key preserves r2, - mov r1,r2 @ which is AES_KEY *key - b _armv4_AES_set_enc2dec_key -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key - -@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out) -.global AES_set_enc2dec_key -.type AES_set_enc2dec_key,%function -.align 5 -AES_set_enc2dec_key: -_armv4_AES_set_enc2dec_key: - stmdb sp!,{r4-r12,lr} - - ldr r12,[r0,#240] - mov r7,r0 @ input - add r8,r0,r12,lsl#4 - mov r11,r1 @ ouput - add r10,r1,r12,lsl#4 - str r12,[r1,#240] + stmdb sp!,{r4-r12} -.Linv: ldr r0,[r7],#16 - ldr r1,[r7,#-12] - ldr r2,[r7,#-8] - ldr r3,[r7,#-4] - ldr r4,[r8],#-16 - ldr r5,[r8,#16+4] - ldr r6,[r8,#16+8] - ldr r9,[r8,#16+12] - str r0,[r10],#-16 - str r1,[r10,#16+4] - str r2,[r10,#16+8] - str r3,[r10,#16+12] - str r4,[r11],#16 - str r5,[r11,#-12] - str r6,[r11,#-8] - str r9,[r11,#-4] - teq r7,r8 - bne .Linv + ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2, + mov r11,r2 @ which is AES_KEY *key + mov r7,r2 + add r8,r2,r12,lsl#4 - ldr r0,[r7] +.Linv: ldr r0,[r7] ldr r1,[r7,#4] ldr r2,[r7,#8] ldr r3,[r7,#12] - str r0,[r11] - str r1,[r11,#4] - str r2,[r11,#8] - str r3,[r11,#12] - sub r11,r11,r12,lsl#3 + ldr r4,[r8] + ldr r5,[r8,#4] + ldr r6,[r8,#8] + ldr r9,[r8,#12] + str r0,[r8],#-16 + str r1,[r8,#16+4] + str r2,[r8,#16+8] + str r3,[r8,#16+12] + str r4,[r7],#16 + str r5,[r7,#-12] + str r6,[r7,#-8] + str r9,[r7,#-4] + teq r7,r8 + bne .Linv ldr r0,[r11,#16]! @ prefetch tp1 mov r7,#0x80 mov r8,#0x1b @@ -814,7 +715,7 @@ _armv4_AES_set_enc2dec_key: moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif -.size AES_set_enc2dec_key,.-AES_set_enc2dec_key +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key .type AES_Td,%object .align 5 @@ -924,11 +825,7 @@ AES_Td: .type AES_decrypt,%function .align 5 AES_decrypt: -#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_decrypt -#else - adr r3,AES_decrypt -#endif stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp mov r11,r2 @@ -1125,9 +1022,8 @@ _armv4_AES_decrypt: ldrb r6,[r10,r9] @ Td4[s0>>0] and r9,lr,r1,lsr#8 - add r1,r10,r1,lsr#24 ldrb r7,[r10,r7] @ Td4[s1>>0] - ldrb r1,[r1] @ Td4[s1>>24] + ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24] ldrb r8,[r10,r8] @ Td4[s1>>16] eor r0,r7,r0,lsl#24 ldrb r9,[r10,r9] @ Td4[s1>>8] @@ -1140,8 +1036,7 @@ _armv4_AES_decrypt: ldrb r8,[r10,r8] @ Td4[s2>>0] and r9,lr,r2,lsr#16 - add r2,r10,r2,lsr#24 - ldrb r2,[r2] @ Td4[s2>>24] + ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24] eor r0,r0,r7,lsl#8 ldrb r9,[r10,r9] @ Td4[s2>>16] eor r1,r8,r1,lsl#16 @@ -1153,9 +1048,8 @@ _armv4_AES_decrypt: ldrb r8,[r10,r8] @ Td4[s3>>8] and r9,lr,r3 @ i2 - add r3,r10,r3,lsr#24 ldrb r9,[r10,r9] @ Td4[s3>>0] - ldrb r3,[r3] @ Td4[s3>>24] + ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24] eor r0,r0,r7,lsl#16 ldr r7,[r11,#0] eor r1,r1,r8,lsl#8 diff --git a/app/openssl/crypto/aes/asm/aesv8-armx-64.S b/app/openssl/crypto/aes/asm/aesv8-armx-64.S deleted file mode 100644 index be0a13df..00000000 --- a/app/openssl/crypto/aes/asm/aesv8-armx-64.S +++ /dev/null @@ -1,761 +0,0 @@ -#include "arm_arch.h" - -#if __ARM_ARCH__>=7 -.text -.arch armv8-a+crypto -.align 5 -rcon: -.long 0x01,0x01,0x01,0x01 -.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat -.long 0x1b,0x1b,0x1b,0x1b - -.globl aes_v8_set_encrypt_key -.type aes_v8_set_encrypt_key,%function -.align 5 -aes_v8_set_encrypt_key: -.Lenc_key: - stp x29,x30,[sp,#-16]! - add x29,sp,#0 - adr x3,rcon - cmp w1,#192 - - eor v0.16b,v0.16b,v0.16b - ld1 {v3.16b},[x0],#16 - mov w1,#8 // reuse w1 - ld1 {v1.4s,v2.4s},[x3],#32 - - b.lt .Loop128 - b.eq .L192 - b .L256 - -.align 4 -.Loop128: - tbl v6.16b,{v3.16b},v2.16b - ext v5.16b,v0.16b,v3.16b,#12 - st1 {v3.4s},[x2],#16 - aese v6.16b,v0.16b - subs w1,w1,#1 - - eor v3.16b,v3.16b,v5.16b - ext v5.16b,v0.16b,v5.16b,#12 - eor v3.16b,v3.16b,v5.16b - ext v5.16b,v0.16b,v5.16b,#12 - eor v6.16b,v6.16b,v1.16b - eor v3.16b,v3.16b,v5.16b - shl v1.16b,v1.16b,#1 - eor v3.16b,v3.16b,v6.16b - b.ne .Loop128 - - ld1 {v1.4s},[x3] - - tbl v6.16b,{v3.16b},v2.16b - ext v5.16b,v0.16b,v3.16b,#12 - st1 {v3.4s},[x2],#16 - aese v6.16b,v0.16b - - eor v3.16b,v3.16b,v5.16b - ext v5.16b,v0.16b,v5.16b,#12 - eor v3.16b,v3.16b,v5.16b - ext v5.16b,v0.16b,v5.16b,#12 - eor v6.16b,v6.16b,v1.16b - eor v3.16b,v3.16b,v5.16b - shl v1.16b,v1.16b,#1 - eor v3.16b,v3.16b,v6.16b - - tbl v6.16b,{v3.16b},v2.16b - ext v5.16b,v0.16b,v3.16b,#12 - st1 {v3.4s},[x2],#16 - aese v6.16b,v0.16b - - eor v3.16b,v3.16b,v5.16b - ext v5.16b,v0.16b,v5.16b,#12 - eor v3.16b,v3.16b,v5.16b - ext v5.16b,v0.16b,v5.16b,#12 - eor v6.16b,v6.16b,v1.16b - eor v3.16b,v3.16b,v5.16b - eor v3.16b,v3.16b,v6.16b - st1 {v3.4s},[x2] - add x2,x2,#0x50 - - mov w12,#10 - b .Ldone - -.align 4 -.L192: - ld1 {v4.8b},[x0],#8 - movi v6.16b,#8 // borrow v6.16b - st1 {v3.4s},[x2],#16 - sub v2.16b,v2.16b,v6.16b // adjust the mask - -.Loop192: - tbl v6.16b,{v4.16b},v2.16b - ext v5.16b,v0.16b,v3.16b,#12 - st1 {v4.8b},[x2],#8 - aese v6.16b,v0.16b - subs w1,w1,#1 - - eor v3.16b,v3.16b,v5.16b - ext v5.16b,v0.16b,v5.16b,#12 - eor v3.16b,v3.16b,v5.16b - ext v5.16b,v0.16b,v5.16b,#12 - eor v3.16b,v3.16b,v5.16b - - dup v5.4s,v3.s[3] - eor v5.16b,v5.16b,v4.16b - eor v6.16b,v6.16b,v1.16b - ext v4.16b,v0.16b,v4.16b,#12 - shl v1.16b,v1.16b,#1 - eor v4.16b,v4.16b,v5.16b - eor v3.16b,v3.16b,v6.16b - eor v4.16b,v4.16b,v6.16b - st1 {v3.4s},[x2],#16 - b.ne .Loop192 - - mov w12,#12 - add x2,x2,#0x20 - b .Ldone - -.align 4 -.L256: - ld1 {v4.16b},[x0] - mov w1,#7 - mov w12,#14 - st1 {v3.4s},[x2],#16 - -.Loop256: - tbl v6.16b,{v4.16b},v2.16b - ext v5.16b,v0.16b,v3.16b,#12 - st1 {v4.4s},[x2],#16 - aese v6.16b,v0.16b - subs w1,w1,#1 - - eor v3.16b,v3.16b,v5.16b - ext v5.16b,v0.16b,v5.16b,#12 - eor v3.16b,v3.16b,v5.16b - ext v5.16b,v0.16b,v5.16b,#12 - eor v6.16b,v6.16b,v1.16b - eor v3.16b,v3.16b,v5.16b - shl v1.16b,v1.16b,#1 - eor v3.16b,v3.16b,v6.16b - st1 {v3.4s},[x2],#16 - b.eq .Ldone - - dup v6.4s,v3.s[3] // just splat - ext v5.16b,v0.16b,v4.16b,#12 - aese v6.16b,v0.16b - - eor v4.16b,v4.16b,v5.16b - ext v5.16b,v0.16b,v5.16b,#12 - eor v4.16b,v4.16b,v5.16b - ext v5.16b,v0.16b,v5.16b,#12 - eor v4.16b,v4.16b,v5.16b - - eor v4.16b,v4.16b,v6.16b - b .Loop256 - -.Ldone: - str w12,[x2] - - eor x0,x0,x0 // return value - ldr x29,[sp],#16 - ret -.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key - -.globl aes_v8_set_decrypt_key -.type aes_v8_set_decrypt_key,%function -.align 5 -aes_v8_set_decrypt_key: - stp x29,x30,[sp,#-16]! - add x29,sp,#0 - bl .Lenc_key - - sub x2,x2,#240 // restore original x2 - mov x4,#-16 - add x0,x2,x12,lsl#4 // end of key schedule - - ld1 {v0.4s},[x2] - ld1 {v1.4s},[x0] - st1 {v0.4s},[x0],x4 - st1 {v1.4s},[x2],#16 - -.Loop_imc: - ld1 {v0.4s},[x2] - ld1 {v1.4s},[x0] - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - st1 {v0.4s},[x0],x4 - st1 {v1.4s},[x2],#16 - cmp x0,x2 - b.hi .Loop_imc - - ld1 {v0.4s},[x2] - aesimc v0.16b,v0.16b - st1 {v0.4s},[x0] - - eor x0,x0,x0 // return value - ldp x29,x30,[sp],#16 - ret -.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key -.globl aes_v8_encrypt -.type aes_v8_encrypt,%function -.align 5 -aes_v8_encrypt: - ldr w3,[x2,#240] - ld1 {v0.4s},[x2],#16 - ld1 {v2.16b},[x0] - sub w3,w3,#2 - ld1 {v1.4s},[x2],#16 - -.Loop_enc: - aese v2.16b,v0.16b - ld1 {v0.4s},[x2],#16 - aesmc v2.16b,v2.16b - subs w3,w3,#2 - aese v2.16b,v1.16b - ld1 {v1.4s},[x2],#16 - aesmc v2.16b,v2.16b - b.gt .Loop_enc - - aese v2.16b,v0.16b - ld1 {v0.4s},[x2] - aesmc v2.16b,v2.16b - aese v2.16b,v1.16b - eor v2.16b,v2.16b,v0.16b - - st1 {v2.16b},[x1] - ret -.size aes_v8_encrypt,.-aes_v8_encrypt -.globl aes_v8_decrypt -.type aes_v8_decrypt,%function -.align 5 -aes_v8_decrypt: - ldr w3,[x2,#240] - ld1 {v0.4s},[x2],#16 - ld1 {v2.16b},[x0] - sub w3,w3,#2 - ld1 {v1.4s},[x2],#16 - -.Loop_dec: - aesd v2.16b,v0.16b - ld1 {v0.4s},[x2],#16 - aesimc v2.16b,v2.16b - subs w3,w3,#2 - aesd v2.16b,v1.16b - ld1 {v1.4s},[x2],#16 - aesimc v2.16b,v2.16b - b.gt .Loop_dec - - aesd v2.16b,v0.16b - ld1 {v0.4s},[x2] - aesimc v2.16b,v2.16b - aesd v2.16b,v1.16b - eor v2.16b,v2.16b,v0.16b - - st1 {v2.16b},[x1] - ret -.size aes_v8_decrypt,.-aes_v8_decrypt -.globl aes_v8_cbc_encrypt -.type aes_v8_cbc_encrypt,%function -.align 5 -aes_v8_cbc_encrypt: - stp x29,x30,[sp,#-16]! - add x29,sp,#0 - subs x2,x2,#16 - mov x8,#16 - b.lo .Lcbc_abort - csel x8,xzr,x8,eq - - cmp w5,#0 // en- or decrypting? - ldr w5,[x3,#240] - and x2,x2,#-16 - ld1 {v6.16b},[x4] - ld1 {v0.16b},[x0],x8 - - ld1 {v16.4s-v17.4s},[x3] // load key schedule... - sub w5,w5,#6 - add x7,x3,x5,lsl#4 // pointer to last 7 round keys - sub w5,w5,#2 - ld1 {v18.4s-v19.4s},[x7],#32 - ld1 {v20.4s-v21.4s},[x7],#32 - ld1 {v22.4s-v23.4s},[x7],#32 - ld1 {v7.4s},[x7] - - add x7,x3,#32 - mov w6,w5 - b.eq .Lcbc_dec - - cmp w5,#2 - eor v0.16b,v0.16b,v6.16b - eor v5.16b,v16.16b,v7.16b - b.eq .Lcbc_enc128 - -.Loop_cbc_enc: - aese v0.16b,v16.16b - ld1 {v16.4s},[x7],#16 - aesmc v0.16b,v0.16b - subs w6,w6,#2 - aese v0.16b,v17.16b - ld1 {v17.4s},[x7],#16 - aesmc v0.16b,v0.16b - b.gt .Loop_cbc_enc - - aese v0.16b,v16.16b - aesmc v0.16b,v0.16b - subs x2,x2,#16 - aese v0.16b,v17.16b - aesmc v0.16b,v0.16b - csel x8,xzr,x8,eq - aese v0.16b,v18.16b - aesmc v0.16b,v0.16b - add x7,x3,#16 - aese v0.16b,v19.16b - aesmc v0.16b,v0.16b - ld1 {v16.16b},[x0],x8 - aese v0.16b,v20.16b - aesmc v0.16b,v0.16b - eor v16.16b,v16.16b,v5.16b - aese v0.16b,v21.16b - aesmc v0.16b,v0.16b - ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] - aese v0.16b,v22.16b - aesmc v0.16b,v0.16b - aese v0.16b,v23.16b - - mov w6,w5 - eor v6.16b,v0.16b,v7.16b - st1 {v6.16b},[x1],#16 - b.hs .Loop_cbc_enc - - b .Lcbc_done - -.align 5 -.Lcbc_enc128: - ld1 {v2.4s-v3.4s},[x7] - aese v0.16b,v16.16b - aesmc v0.16b,v0.16b - b .Lenter_cbc_enc128 -.Loop_cbc_enc128: - aese v0.16b,v16.16b - aesmc v0.16b,v0.16b - st1 {v6.16b},[x1],#16 -.Lenter_cbc_enc128: - aese v0.16b,v17.16b - aesmc v0.16b,v0.16b - subs x2,x2,#16 - aese v0.16b,v2.16b - aesmc v0.16b,v0.16b - csel x8,xzr,x8,eq - aese v0.16b,v3.16b - aesmc v0.16b,v0.16b - aese v0.16b,v18.16b - aesmc v0.16b,v0.16b - aese v0.16b,v19.16b - aesmc v0.16b,v0.16b - ld1 {v16.16b},[x0],x8 - aese v0.16b,v20.16b - aesmc v0.16b,v0.16b - aese v0.16b,v21.16b - aesmc v0.16b,v0.16b - aese v0.16b,v22.16b - aesmc v0.16b,v0.16b - eor v16.16b,v16.16b,v5.16b - aese v0.16b,v23.16b - eor v6.16b,v0.16b,v7.16b - b.hs .Loop_cbc_enc128 - - st1 {v6.16b},[x1],#16 - b .Lcbc_done - -.align 5 -.Lcbc_dec128: - ld1 {v4.4s-v5.4s},[x7] - eor v6.16b,v6.16b,v7.16b - eor v2.16b,v0.16b,v7.16b - mov x12,x8 - -.Loop2x_cbc_dec128: - aesd v0.16b,v16.16b - aesd v1.16b,v16.16b - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - subs x2,x2,#32 - aesd v0.16b,v17.16b - aesd v1.16b,v17.16b - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - csel x8,xzr,x8,lo - aesd v0.16b,v4.16b - aesd v1.16b,v4.16b - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - csel x12,xzr,x12,ls - aesd v0.16b,v5.16b - aesd v1.16b,v5.16b - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - aesd v0.16b,v18.16b - aesd v1.16b,v18.16b - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - aesd v0.16b,v19.16b - aesd v1.16b,v19.16b - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - aesd v0.16b,v20.16b - aesd v1.16b,v20.16b - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - aesd v0.16b,v21.16b - aesd v1.16b,v21.16b - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - aesd v0.16b,v22.16b - aesd v1.16b,v22.16b - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - aesd v0.16b,v23.16b - aesd v1.16b,v23.16b - - eor v6.16b,v6.16b,v0.16b - ld1 {v0.16b},[x0],x8 - eor v2.16b,v2.16b,v1.16b - ld1 {v1.16b},[x0],x12 - st1 {v6.16b},[x1],#16 - eor v6.16b,v3.16b,v7.16b - st1 {v2.16b},[x1],#16 - eor v2.16b,v0.16b,v7.16b - orr v3.16b,v1.16b,v1.16b - b.hs .Loop2x_cbc_dec128 - - adds x2,x2,#32 - eor v6.16b,v6.16b,v7.16b - b.eq .Lcbc_done - eor v2.16b,v2.16b,v7.16b - b .Lcbc_dec_tail - -.align 5 -.Lcbc_dec: - subs x2,x2,#16 - orr v2.16b,v0.16b,v0.16b - b.lo .Lcbc_dec_tail - - csel x8,xzr,x8,eq - cmp w5,#2 - ld1 {v1.16b},[x0],x8 - orr v3.16b,v1.16b,v1.16b - b.eq .Lcbc_dec128 - -.Loop2x_cbc_dec: - aesd v0.16b,v16.16b - aesd v1.16b,v16.16b - ld1 {v16.4s},[x7],#16 - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - subs w6,w6,#2 - aesd v0.16b,v17.16b - aesd v1.16b,v17.16b - ld1 {v17.4s},[x7],#16 - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - b.gt .Loop2x_cbc_dec - - aesd v0.16b,v16.16b - aesd v1.16b,v16.16b - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - eor v4.16b,v6.16b,v7.16b - eor v5.16b,v2.16b,v7.16b - aesd v0.16b,v17.16b - aesd v1.16b,v17.16b - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - orr v6.16b,v3.16b,v3.16b - subs x2,x2,#32 - aesd v0.16b,v18.16b - aesd v1.16b,v18.16b - aesimc v0.16b,v0.16b - csel x8,xzr,x8,lo - aesimc v1.16b,v1.16b - mov x7,x3 - aesd v0.16b,v19.16b - aesd v1.16b,v19.16b - aesimc v0.16b,v0.16b - ld1 {v2.16b},[x0],x8 - aesimc v1.16b,v1.16b - csel x8,xzr,x8,ls - aesd v0.16b,v20.16b - aesd v1.16b,v20.16b - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - ld1 {v3.16b},[x0],x8 - aesd v0.16b,v21.16b - aesd v1.16b,v21.16b - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] - aesd v0.16b,v22.16b - aesd v1.16b,v22.16b - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] - aesd v0.16b,v23.16b - aesd v1.16b,v23.16b - - mov w6,w5 - eor v4.16b,v4.16b,v0.16b - eor v5.16b,v5.16b,v1.16b - orr v0.16b,v2.16b,v2.16b - st1 {v4.16b},[x1],#16 - orr v1.16b,v3.16b,v3.16b - st1 {v5.16b},[x1],#16 - b.hs .Loop2x_cbc_dec - - adds x2,x2,#32 - b.eq .Lcbc_done - -.Lcbc_dec_tail: - aesd v0.16b,v16.16b - ld1 {v16.4s},[x7],#16 - aesimc v0.16b,v0.16b - subs w6,w6,#2 - aesd v0.16b,v17.16b - ld1 {v17.4s},[x7],#16 - aesimc v0.16b,v0.16b - b.gt .Lcbc_dec_tail - - aesd v0.16b,v16.16b - aesimc v0.16b,v0.16b - aesd v0.16b,v17.16b - aesimc v0.16b,v0.16b - eor v4.16b,v6.16b,v7.16b - aesd v0.16b,v18.16b - aesimc v0.16b,v0.16b - orr v6.16b,v2.16b,v2.16b - aesd v0.16b,v19.16b - aesimc v0.16b,v0.16b - aesd v0.16b,v20.16b - aesimc v0.16b,v0.16b - aesd v0.16b,v21.16b - aesimc v0.16b,v0.16b - aesd v0.16b,v22.16b - aesimc v0.16b,v0.16b - aesd v0.16b,v23.16b - - eor v4.16b,v4.16b,v0.16b - st1 {v4.16b},[x1],#16 - -.Lcbc_done: - st1 {v6.16b},[x4] -.Lcbc_abort: - ldr x29,[sp],#16 - ret -.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt -.globl aes_v8_ctr32_encrypt_blocks -.type aes_v8_ctr32_encrypt_blocks,%function -.align 5 -aes_v8_ctr32_encrypt_blocks: - stp x29,x30,[sp,#-16]! - add x29,sp,#0 - ldr w5,[x3,#240] - - ldr w8, [x4, #12] - ld1 {v0.4s},[x4] - - ld1 {v16.4s-v17.4s},[x3] // load key schedule... - sub w5,w5,#6 - add x7,x3,x5,lsl#4 // pointer to last 7 round keys - sub w5,w5,#2 - ld1 {v18.4s-v19.4s},[x7],#32 - ld1 {v20.4s-v21.4s},[x7],#32 - ld1 {v22.4s-v23.4s},[x7],#32 - ld1 {v7.4s},[x7] - - add x7,x3,#32 - mov w6,w5 - - subs x2,x2,#2 - b.lo .Lctr32_tail - -#ifndef __ARMEB__ - rev w8, w8 -#endif - orr v1.16b,v0.16b,v0.16b - add w8, w8, #1 - orr v6.16b,v0.16b,v0.16b - rev w10, w8 - cmp w5,#2 - mov v1.s[3],w10 - b.eq .Lctr32_128 - -.Loop2x_ctr32: - aese v0.16b,v16.16b - aese v1.16b,v16.16b - ld1 {v16.4s},[x7],#16 - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - subs w6,w6,#2 - aese v0.16b,v17.16b - aese v1.16b,v17.16b - ld1 {v17.4s},[x7],#16 - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - b.gt .Loop2x_ctr32 - - aese v0.16b,v16.16b - aese v1.16b,v16.16b - aesmc v4.16b,v0.16b - orr v0.16b,v6.16b,v6.16b - aesmc v5.16b,v1.16b - orr v1.16b,v6.16b,v6.16b - aese v4.16b,v17.16b - aese v5.16b,v17.16b - ld1 {v2.16b},[x0],#16 - aesmc v4.16b,v4.16b - ld1 {v3.16b},[x0],#16 - aesmc v5.16b,v5.16b - add w8,w8,#1 - aese v4.16b,v18.16b - aese v5.16b,v18.16b - rev w9,w8 - aesmc v4.16b,v4.16b - aesmc v5.16b,v5.16b - add w8,w8,#1 - aese v4.16b,v19.16b - aese v5.16b,v19.16b - eor v2.16b,v2.16b,v7.16b - rev w10,w8 - aesmc v4.16b,v4.16b - aesmc v5.16b,v5.16b - eor v3.16b,v3.16b,v7.16b - mov x7,x3 - aese v4.16b,v20.16b - aese v5.16b,v20.16b - subs x2,x2,#2 - aesmc v4.16b,v4.16b - aesmc v5.16b,v5.16b - ld1 {v16.4s-v17.4s},[x7],#32 // re-pre-load rndkey[0-1] - aese v4.16b,v21.16b - aese v5.16b,v21.16b - aesmc v4.16b,v4.16b - aesmc v5.16b,v5.16b - aese v4.16b,v22.16b - aese v5.16b,v22.16b - mov v0.s[3], w9 - aesmc v4.16b,v4.16b - mov v1.s[3], w10 - aesmc v5.16b,v5.16b - aese v4.16b,v23.16b - aese v5.16b,v23.16b - - mov w6,w5 - eor v2.16b,v2.16b,v4.16b - eor v3.16b,v3.16b,v5.16b - st1 {v2.16b},[x1],#16 - st1 {v3.16b},[x1],#16 - b.hs .Loop2x_ctr32 - - adds x2,x2,#2 - b.eq .Lctr32_done - b .Lctr32_tail - -.Lctr32_128: - ld1 {v4.4s-v5.4s},[x7] - -.Loop2x_ctr32_128: - aese v0.16b,v16.16b - aese v1.16b,v16.16b - aesmc v0.16b,v0.16b - ld1 {v2.16b},[x0],#16 - aesmc v1.16b,v1.16b - ld1 {v3.16b},[x0],#16 - aese v0.16b,v17.16b - aese v1.16b,v17.16b - add w8,w8,#1 - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - rev w9,w8 - aese v0.16b,v4.16b - aese v1.16b,v4.16b - add w8,w8,#1 - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - rev w10,w8 - aese v0.16b,v5.16b - aese v1.16b,v5.16b - subs x2,x2,#2 - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - aese v0.16b,v18.16b - aese v1.16b,v18.16b - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - aese v0.16b,v19.16b - aese v1.16b,v19.16b - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - aese v0.16b,v20.16b - aese v1.16b,v20.16b - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - aese v0.16b,v21.16b - aese v1.16b,v21.16b - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - aese v0.16b,v22.16b - aese v1.16b,v22.16b - aesmc v0.16b,v0.16b - aesmc v1.16b,v1.16b - eor v2.16b,v2.16b,v7.16b - aese v0.16b,v23.16b - eor v3.16b,v3.16b,v7.16b - aese v1.16b,v23.16b - - eor v2.16b,v2.16b,v0.16b - orr v0.16b,v6.16b,v6.16b - eor v3.16b,v3.16b,v1.16b - orr v1.16b,v6.16b,v6.16b - st1 {v2.16b},[x1],#16 - mov v0.s[3], w9 - st1 {v3.16b},[x1],#16 - mov v1.s[3], w10 - b.hs .Loop2x_ctr32_128 - - adds x2,x2,#2 - b.eq .Lctr32_done - -.Lctr32_tail: - aese v0.16b,v16.16b - ld1 {v16.4s},[x7],#16 - aesmc v0.16b,v0.16b - subs w6,w6,#2 - aese v0.16b,v17.16b - ld1 {v17.4s},[x7],#16 - aesmc v0.16b,v0.16b - b.gt .Lctr32_tail - - aese v0.16b,v16.16b - aesmc v0.16b,v0.16b - aese v0.16b,v17.16b - aesmc v0.16b,v0.16b - ld1 {v2.16b},[x0] - aese v0.16b,v18.16b - aesmc v0.16b,v0.16b - aese v0.16b,v19.16b - aesmc v0.16b,v0.16b - aese v0.16b,v20.16b - aesmc v0.16b,v0.16b - aese v0.16b,v21.16b - aesmc v0.16b,v0.16b - aese v0.16b,v22.16b - aesmc v0.16b,v0.16b - eor v2.16b,v2.16b,v7.16b - aese v0.16b,v23.16b - - eor v2.16b,v2.16b,v0.16b - st1 {v2.16b},[x1] - -.Lctr32_done: - ldr x29,[sp],#16 - ret -.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks -#endif diff --git a/app/openssl/crypto/aes/asm/aesv8-armx.S b/app/openssl/crypto/aes/asm/aesv8-armx.S deleted file mode 100644 index 1637e4d4..00000000 --- a/app/openssl/crypto/aes/asm/aesv8-armx.S +++ /dev/null @@ -1,767 +0,0 @@ -#include "arm_arch.h" - -#if __ARM_ARCH__>=7 -.text -.fpu neon -.code 32 -.align 5 -rcon: -.long 0x01,0x01,0x01,0x01 -.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat -.long 0x1b,0x1b,0x1b,0x1b - -.globl aes_v8_set_encrypt_key -.type aes_v8_set_encrypt_key,%function -.align 5 -aes_v8_set_encrypt_key: -.Lenc_key: - adr r3,rcon - cmp r1,#192 - - veor q0,q0,q0 - vld1.8 {q3},[r0]! - mov r1,#8 @ reuse r1 - vld1.32 {q1,q2},[r3]! - - blt .Loop128 - beq .L192 - b .L256 - -.align 4 -.Loop128: - vtbl.8 d20,{q3},d4 - vtbl.8 d21,{q3},d5 - vext.8 q9,q0,q3,#12 - vst1.32 {q3},[r2]! - .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 - subs r1,r1,#1 - - veor q3,q3,q9 - vext.8 q9,q0,q9,#12 - veor q3,q3,q9 - vext.8 q9,q0,q9,#12 - veor q10,q10,q1 - veor q3,q3,q9 - vshl.u8 q1,q1,#1 - veor q3,q3,q10 - bne .Loop128 - - vld1.32 {q1},[r3] - - vtbl.8 d20,{q3},d4 - vtbl.8 d21,{q3},d5 - vext.8 q9,q0,q3,#12 - vst1.32 {q3},[r2]! - .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 - - veor q3,q3,q9 - vext.8 q9,q0,q9,#12 - veor q3,q3,q9 - vext.8 q9,q0,q9,#12 - veor q10,q10,q1 - veor q3,q3,q9 - vshl.u8 q1,q1,#1 - veor q3,q3,q10 - - vtbl.8 d20,{q3},d4 - vtbl.8 d21,{q3},d5 - vext.8 q9,q0,q3,#12 - vst1.32 {q3},[r2]! - .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 - - veor q3,q3,q9 - vext.8 q9,q0,q9,#12 - veor q3,q3,q9 - vext.8 q9,q0,q9,#12 - veor q10,q10,q1 - veor q3,q3,q9 - veor q3,q3,q10 - vst1.32 {q3},[r2] - add r2,r2,#0x50 - - mov r12,#10 - b .Ldone - -.align 4 -.L192: - vld1.8 {d16},[r0]! - vmov.i8 q10,#8 @ borrow q10 - vst1.32 {q3},[r2]! - vsub.i8 q2,q2,q10 @ adjust the mask - -.Loop192: - vtbl.8 d20,{q8},d4 - vtbl.8 d21,{q8},d5 - vext.8 q9,q0,q3,#12 - vst1.32 {d16},[r2]! - .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 - subs r1,r1,#1 - - veor q3,q3,q9 - vext.8 q9,q0,q9,#12 - veor q3,q3,q9 - vext.8 q9,q0,q9,#12 - veor q3,q3,q9 - - vdup.32 q9,d7[1] - veor q9,q9,q8 - veor q10,q10,q1 - vext.8 q8,q0,q8,#12 - vshl.u8 q1,q1,#1 - veor q8,q8,q9 - veor q3,q3,q10 - veor q8,q8,q10 - vst1.32 {q3},[r2]! - bne .Loop192 - - mov r12,#12 - add r2,r2,#0x20 - b .Ldone - -.align 4 -.L256: - vld1.8 {q8},[r0] - mov r1,#7 - mov r12,#14 - vst1.32 {q3},[r2]! - -.Loop256: - vtbl.8 d20,{q8},d4 - vtbl.8 d21,{q8},d5 - vext.8 q9,q0,q3,#12 - vst1.32 {q8},[r2]! - .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 - subs r1,r1,#1 - - veor q3,q3,q9 - vext.8 q9,q0,q9,#12 - veor q3,q3,q9 - vext.8 q9,q0,q9,#12 - veor q10,q10,q1 - veor q3,q3,q9 - vshl.u8 q1,q1,#1 - veor q3,q3,q10 - vst1.32 {q3},[r2]! - beq .Ldone - - vdup.32 q10,d7[1] - vext.8 q9,q0,q8,#12 - .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 - - veor q8,q8,q9 - vext.8 q9,q0,q9,#12 - veor q8,q8,q9 - vext.8 q9,q0,q9,#12 - veor q8,q8,q9 - - veor q8,q8,q10 - b .Loop256 - -.Ldone: - str r12,[r2] - - eor r0,r0,r0 @ return value - - bx lr -.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key - -.globl aes_v8_set_decrypt_key -.type aes_v8_set_decrypt_key,%function -.align 5 -aes_v8_set_decrypt_key: - stmdb sp!,{r4,lr} - bl .Lenc_key - - sub r2,r2,#240 @ restore original r2 - mov r4,#-16 - add r0,r2,r12,lsl#4 @ end of key schedule - - vld1.32 {q0},[r2] - vld1.32 {q1},[r0] - vst1.32 {q0},[r0],r4 - vst1.32 {q1},[r2]! - -.Loop_imc: - vld1.32 {q0},[r2] - vld1.32 {q1},[r0] - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - vst1.32 {q0},[r0],r4 - vst1.32 {q1},[r2]! - cmp r0,r2 - bhi .Loop_imc - - vld1.32 {q0},[r2] - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - vst1.32 {q0},[r0] - - eor r0,r0,r0 @ return value - ldmia sp!,{r4,pc} -.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key -.globl aes_v8_encrypt -.type aes_v8_encrypt,%function -.align 5 -aes_v8_encrypt: - ldr r3,[r2,#240] - vld1.32 {q0},[r2]! - vld1.8 {q2},[r0] - sub r3,r3,#2 - vld1.32 {q1},[r2]! - -.Loop_enc: - .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 - vld1.32 {q0},[r2]! - .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 - subs r3,r3,#2 - .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 - vld1.32 {q1},[r2]! - .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 - bgt .Loop_enc - - .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 - vld1.32 {q0},[r2] - .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 - .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 - veor q2,q2,q0 - - vst1.8 {q2},[r1] - bx lr -.size aes_v8_encrypt,.-aes_v8_encrypt -.globl aes_v8_decrypt -.type aes_v8_decrypt,%function -.align 5 -aes_v8_decrypt: - ldr r3,[r2,#240] - vld1.32 {q0},[r2]! - vld1.8 {q2},[r0] - sub r3,r3,#2 - vld1.32 {q1},[r2]! - -.Loop_dec: - .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 - vld1.32 {q0},[r2]! - .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 - subs r3,r3,#2 - .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 - vld1.32 {q1},[r2]! - .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 - bgt .Loop_dec - - .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 - vld1.32 {q0},[r2] - .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 - .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 - veor q2,q2,q0 - - vst1.8 {q2},[r1] - bx lr -.size aes_v8_decrypt,.-aes_v8_decrypt -.globl aes_v8_cbc_encrypt -.type aes_v8_cbc_encrypt,%function -.align 5 -aes_v8_cbc_encrypt: - mov ip,sp - stmdb sp!,{r4-r8,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so - ldmia ip,{r4-r5} @ load remaining args - subs r2,r2,#16 - mov r8,#16 - blo .Lcbc_abort - moveq r8,#0 - - cmp r5,#0 @ en- or decrypting? - ldr r5,[r3,#240] - and r2,r2,#-16 - vld1.8 {q6},[r4] - vld1.8 {q0},[r0],r8 - - vld1.32 {q8-q9},[r3] @ load key schedule... - sub r5,r5,#6 - add r7,r3,r5,lsl#4 @ pointer to last 7 round keys - sub r5,r5,#2 - vld1.32 {q10-q11},[r7]! - vld1.32 {q12-q13},[r7]! - vld1.32 {q14-q15},[r7]! - vld1.32 {q7},[r7] - - add r7,r3,#32 - mov r6,r5 - beq .Lcbc_dec - - cmp r5,#2 - veor q0,q0,q6 - veor q5,q8,q7 - beq .Lcbc_enc128 - -.Loop_cbc_enc: - .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - vld1.32 {q8},[r7]! - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - subs r6,r6,#2 - .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - vld1.32 {q9},[r7]! - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - bgt .Loop_cbc_enc - - .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - subs r2,r2,#16 - .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - moveq r8,#0 - .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - add r7,r3,#16 - .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - vld1.8 {q8},[r0],r8 - .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - veor q8,q8,q5 - .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] - .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 - - mov r6,r5 - veor q6,q0,q7 - vst1.8 {q6},[r1]! - bhs .Loop_cbc_enc - - b .Lcbc_done - -.align 5 -.Lcbc_enc128: - vld1.32 {q2-q3},[r7] - .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - b .Lenter_cbc_enc128 -.Loop_cbc_enc128: - .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - vst1.8 {q6},[r1]! -.Lenter_cbc_enc128: - .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - subs r2,r2,#16 - .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - moveq r8,#0 - .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - vld1.8 {q8},[r0],r8 - .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - veor q8,q8,q5 - .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 - veor q6,q0,q7 - bhs .Loop_cbc_enc128 - - vst1.8 {q6},[r1]! - b .Lcbc_done - -.align 5 -.Lcbc_dec128: - vld1.32 {q4-q5},[r7] - veor q6,q6,q7 - veor q2,q0,q7 - mov r12,r8 - -.Loop2x_cbc_dec128: - .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 - .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - subs r2,r2,#32 - .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 - .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - movlo r8,#0 - .byte 0x48,0x03,0xb0,0xf3 @ aesd q0,q4 - .byte 0x48,0x23,0xb0,0xf3 @ aesd q1,q4 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - movls r12,#0 - .byte 0x4a,0x03,0xb0,0xf3 @ aesd q0,q5 - .byte 0x4a,0x23,0xb0,0xf3 @ aesd q1,q5 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x64,0x03,0xb0,0xf3 @ aesd q0,q10 - .byte 0x64,0x23,0xb0,0xf3 @ aesd q1,q10 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x66,0x03,0xb0,0xf3 @ aesd q0,q11 - .byte 0x66,0x23,0xb0,0xf3 @ aesd q1,q11 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 - .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 - .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 - .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 - .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 - - veor q6,q6,q0 - vld1.8 {q0},[r0],r8 - veor q2,q2,q1 - vld1.8 {q1},[r0],r12 - vst1.8 {q6},[r1]! - veor q6,q3,q7 - vst1.8 {q2},[r1]! - veor q2,q0,q7 - vorr q3,q1,q1 - bhs .Loop2x_cbc_dec128 - - adds r2,r2,#32 - veor q6,q6,q7 - beq .Lcbc_done - veor q2,q2,q7 - b .Lcbc_dec_tail - -.align 5 -.Lcbc_dec: - subs r2,r2,#16 - vorr q2,q0,q0 - blo .Lcbc_dec_tail - - moveq r8,#0 - cmp r5,#2 - vld1.8 {q1},[r0],r8 - vorr q3,q1,q1 - beq .Lcbc_dec128 - -.Loop2x_cbc_dec: - .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 - .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 - vld1.32 {q8},[r7]! - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - subs r6,r6,#2 - .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 - .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 - vld1.32 {q9},[r7]! - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - bgt .Loop2x_cbc_dec - - .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 - .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - veor q4,q6,q7 - veor q5,q2,q7 - .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 - .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - vorr q6,q3,q3 - subs r2,r2,#32 - .byte 0x64,0x03,0xb0,0xf3 @ aesd q0,q10 - .byte 0x64,0x23,0xb0,0xf3 @ aesd q1,q10 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - movlo r8,#0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - mov r7,r3 - .byte 0x66,0x03,0xb0,0xf3 @ aesd q0,q11 - .byte 0x66,0x23,0xb0,0xf3 @ aesd q1,q11 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - vld1.8 {q2},[r0],r8 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - movls r8,#0 - .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 - .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - vld1.8 {q3},[r0],r8 - .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 - .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] - .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 - .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] - .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 - .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 - - mov r6,r5 - veor q4,q4,q0 - veor q5,q5,q1 - vorr q0,q2,q2 - vst1.8 {q4},[r1]! - vorr q1,q3,q3 - vst1.8 {q5},[r1]! - bhs .Loop2x_cbc_dec - - adds r2,r2,#32 - beq .Lcbc_done - -.Lcbc_dec_tail: - .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 - vld1.32 {q8},[r7]! - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - subs r6,r6,#2 - .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 - vld1.32 {q9},[r7]! - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - bgt .Lcbc_dec_tail - - .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - veor q4,q6,q7 - .byte 0x64,0x03,0xb0,0xf3 @ aesd q0,q10 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - vorr q6,q2,q2 - .byte 0x66,0x03,0xb0,0xf3 @ aesd q0,q11 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 - - veor q4,q4,q0 - vst1.8 {q4},[r1]! - -.Lcbc_done: - vst1.8 {q6},[r4] -.Lcbc_abort: - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r8,pc} -.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt -.globl aes_v8_ctr32_encrypt_blocks -.type aes_v8_ctr32_encrypt_blocks,%function -.align 5 -aes_v8_ctr32_encrypt_blocks: - mov ip,sp - stmdb sp!,{r4-r10,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so - ldr r4, [ip] @ load remaining arg - ldr r5,[r3,#240] - - ldr r8, [r4, #12] - vld1.32 {q0},[r4] - - vld1.32 {q8-q9},[r3] @ load key schedule... - sub r5,r5,#6 - add r7,r3,r5,lsl#4 @ pointer to last 7 round keys - sub r5,r5,#2 - vld1.32 {q10-q11},[r7]! - vld1.32 {q12-q13},[r7]! - vld1.32 {q14-q15},[r7]! - vld1.32 {q7},[r7] - - add r7,r3,#32 - mov r6,r5 - - subs r2,r2,#2 - blo .Lctr32_tail - -#ifndef __ARMEB__ - rev r8, r8 -#endif - vorr q1,q0,q0 - add r8, r8, #1 - vorr q6,q0,q0 - rev r10, r8 - cmp r5,#2 - vmov.32 d3[1],r10 - beq .Lctr32_128 - -.Loop2x_ctr32: - .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 - vld1.32 {q8},[r7]! - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 - subs r6,r6,#2 - .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 - vld1.32 {q9},[r7]! - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 - bgt .Loop2x_ctr32 - - .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 - .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 - vorr q0,q6,q6 - .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 - vorr q1,q6,q6 - .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 - .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 - vld1.8 {q2},[r0]! - .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 - vld1.8 {q3},[r0]! - .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 - add r8,r8,#1 - .byte 0x24,0x83,0xb0,0xf3 @ aese q4,q10 - .byte 0x24,0xa3,0xb0,0xf3 @ aese q5,q10 - rev r9,r8 - .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 - .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 - add r8,r8,#1 - .byte 0x26,0x83,0xb0,0xf3 @ aese q4,q11 - .byte 0x26,0xa3,0xb0,0xf3 @ aese q5,q11 - veor q2,q2,q7 - rev r10,r8 - .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 - .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 - veor q3,q3,q7 - mov r7,r3 - .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 - .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 - subs r2,r2,#2 - .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 - .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 - vld1.32 {q8-q9},[r7]! @ re-pre-load rndkey[0-1] - .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 - .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 - .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 - .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 - .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 - .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 - vmov.32 d1[1], r9 - .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 - vmov.32 d3[1], r10 - .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 - .byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 - .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 - - mov r6,r5 - veor q2,q2,q4 - veor q3,q3,q5 - vst1.8 {q2},[r1]! - vst1.8 {q3},[r1]! - bhs .Loop2x_ctr32 - - adds r2,r2,#2 - beq .Lctr32_done - b .Lctr32_tail - -.Lctr32_128: - vld1.32 {q4-q5},[r7] - -.Loop2x_ctr32_128: - .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - vld1.8 {q2},[r0]! - .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 - vld1.8 {q3},[r0]! - .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 - add r8,r8,#1 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 - rev r9,r8 - .byte 0x08,0x03,0xb0,0xf3 @ aese q0,q4 - .byte 0x08,0x23,0xb0,0xf3 @ aese q1,q4 - add r8,r8,#1 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 - rev r10,r8 - .byte 0x0a,0x03,0xb0,0xf3 @ aese q0,q5 - .byte 0x0a,0x23,0xb0,0xf3 @ aese q1,q5 - subs r2,r2,#2 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 - .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 - .byte 0x24,0x23,0xb0,0xf3 @ aese q1,q10 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 - .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 - .byte 0x26,0x23,0xb0,0xf3 @ aese q1,q11 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 - .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 - .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 - .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 - .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 - .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 - .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 - veor q2,q2,q7 - .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 - veor q3,q3,q7 - .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 - - veor q2,q2,q0 - vorr q0,q6,q6 - veor q3,q3,q1 - vorr q1,q6,q6 - vst1.8 {q2},[r1]! - vmov.32 d1[1], r9 - vst1.8 {q3},[r1]! - vmov.32 d3[1], r10 - bhs .Loop2x_ctr32_128 - - adds r2,r2,#2 - beq .Lctr32_done - -.Lctr32_tail: - .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - vld1.32 {q8},[r7]! - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - subs r6,r6,#2 - .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - vld1.32 {q9},[r7]! - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - bgt .Lctr32_tail - - .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - vld1.8 {q2},[r0] - .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - veor q2,q2,q7 - .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 - - veor q2,q2,q0 - vst1.8 {q2},[r1] - -.Lctr32_done: - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r10,pc} -.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks -#endif diff --git a/app/openssl/crypto/aes/asm/aesv8-armx.pl b/app/openssl/crypto/aes/asm/aesv8-armx.pl deleted file mode 100644 index 415dc04a..00000000 --- a/app/openssl/crypto/aes/asm/aesv8-armx.pl +++ /dev/null @@ -1,980 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# This module implements support for ARMv8 AES instructions. The -# module is endian-agnostic in sense that it supports both big- and -# little-endian cases. As does it support both 32- and 64-bit modes -# of operation. Latter is achieved by limiting amount of utilized -# registers to 16, which implies additional instructions. This has -# no effect on mighty Apple A7, as results are literally equal to -# the theoretical estimates based on instruction latencies and issue -# rate. It remains to be seen how does it affect other platforms... -# -# Performance in cycles per byte processed with 128-bit key: -# -# CBC enc CBC dec CTR -# Apple A7 2.39 1.20 1.20 -# Cortex-A5x n/a n/a n/a - -$flavour = shift; -open STDOUT,">".shift; - -$prefix="aes_v8"; - -$code=<<___; -#include "arm_arch.h" - -#if __ARM_ARCH__>=7 -.text -___ -$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/); -$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/); - -# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax, -# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to -# maintain both 32- and 64-bit codes within single module and -# transliterate common code to either flavour with regex vodoo. -# -{{{ -my ($inp,$bits,$out,$ptr,$rounds)=("x0","w1","x2","x3","w12"); -my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)= - $flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10)); - - -$code.=<<___; -.align 5 -rcon: -.long 0x01,0x01,0x01,0x01 -.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat -.long 0x1b,0x1b,0x1b,0x1b - -.globl ${prefix}_set_encrypt_key -.type ${prefix}_set_encrypt_key,%function -.align 5 -${prefix}_set_encrypt_key: -.Lenc_key: -___ -$code.=<<___ if ($flavour =~ /64/); - stp x29,x30,[sp,#-16]! - add x29,sp,#0 -___ -$code.=<<___; - adr $ptr,rcon - cmp $bits,#192 - - veor $zero,$zero,$zero - vld1.8 {$in0},[$inp],#16 - mov $bits,#8 // reuse $bits - vld1.32 {$rcon,$mask},[$ptr],#32 - - b.lt .Loop128 - b.eq .L192 - b .L256 - -.align 4 -.Loop128: - vtbl.8 $key,{$in0},$mask - vext.8 $tmp,$zero,$in0,#12 - vst1.32 {$in0},[$out],#16 - aese $key,$zero - subs $bits,$bits,#1 - - veor $in0,$in0,$tmp - vext.8 $tmp,$zero,$tmp,#12 - veor $in0,$in0,$tmp - vext.8 $tmp,$zero,$tmp,#12 - veor $key,$key,$rcon - veor $in0,$in0,$tmp - vshl.u8 $rcon,$rcon,#1 - veor $in0,$in0,$key - b.ne .Loop128 - - vld1.32 {$rcon},[$ptr] - - vtbl.8 $key,{$in0},$mask - vext.8 $tmp,$zero,$in0,#12 - vst1.32 {$in0},[$out],#16 - aese $key,$zero - - veor $in0,$in0,$tmp - vext.8 $tmp,$zero,$tmp,#12 - veor $in0,$in0,$tmp - vext.8 $tmp,$zero,$tmp,#12 - veor $key,$key,$rcon - veor $in0,$in0,$tmp - vshl.u8 $rcon,$rcon,#1 - veor $in0,$in0,$key - - vtbl.8 $key,{$in0},$mask - vext.8 $tmp,$zero,$in0,#12 - vst1.32 {$in0},[$out],#16 - aese $key,$zero - - veor $in0,$in0,$tmp - vext.8 $tmp,$zero,$tmp,#12 - veor $in0,$in0,$tmp - vext.8 $tmp,$zero,$tmp,#12 - veor $key,$key,$rcon - veor $in0,$in0,$tmp - veor $in0,$in0,$key - vst1.32 {$in0},[$out] - add $out,$out,#0x50 - - mov $rounds,#10 - b .Ldone - -.align 4 -.L192: - vld1.8 {$in1},[$inp],#8 - vmov.i8 $key,#8 // borrow $key - vst1.32 {$in0},[$out],#16 - vsub.i8 $mask,$mask,$key // adjust the mask - -.Loop192: - vtbl.8 $key,{$in1},$mask - vext.8 $tmp,$zero,$in0,#12 - vst1.32 {$in1},[$out],#8 - aese $key,$zero - subs $bits,$bits,#1 - - veor $in0,$in0,$tmp - vext.8 $tmp,$zero,$tmp,#12 - veor $in0,$in0,$tmp - vext.8 $tmp,$zero,$tmp,#12 - veor $in0,$in0,$tmp - - vdup.32 $tmp,${in0}[3] - veor $tmp,$tmp,$in1 - veor $key,$key,$rcon - vext.8 $in1,$zero,$in1,#12 - vshl.u8 $rcon,$rcon,#1 - veor $in1,$in1,$tmp - veor $in0,$in0,$key - veor $in1,$in1,$key - vst1.32 {$in0},[$out],#16 - b.ne .Loop192 - - mov $rounds,#12 - add $out,$out,#0x20 - b .Ldone - -.align 4 -.L256: - vld1.8 {$in1},[$inp] - mov $bits,#7 - mov $rounds,#14 - vst1.32 {$in0},[$out],#16 - -.Loop256: - vtbl.8 $key,{$in1},$mask - vext.8 $tmp,$zero,$in0,#12 - vst1.32 {$in1},[$out],#16 - aese $key,$zero - subs $bits,$bits,#1 - - veor $in0,$in0,$tmp - vext.8 $tmp,$zero,$tmp,#12 - veor $in0,$in0,$tmp - vext.8 $tmp,$zero,$tmp,#12 - veor $key,$key,$rcon - veor $in0,$in0,$tmp - vshl.u8 $rcon,$rcon,#1 - veor $in0,$in0,$key - vst1.32 {$in0},[$out],#16 - b.eq .Ldone - - vdup.32 $key,${in0}[3] // just splat - vext.8 $tmp,$zero,$in1,#12 - aese $key,$zero - - veor $in1,$in1,$tmp - vext.8 $tmp,$zero,$tmp,#12 - veor $in1,$in1,$tmp - vext.8 $tmp,$zero,$tmp,#12 - veor $in1,$in1,$tmp - - veor $in1,$in1,$key - b .Loop256 - -.Ldone: - str $rounds,[$out] - - eor x0,x0,x0 // return value - `"ldr x29,[sp],#16" if ($flavour =~ /64/)` - ret -.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key - -.globl ${prefix}_set_decrypt_key -.type ${prefix}_set_decrypt_key,%function -.align 5 -${prefix}_set_decrypt_key: -___ -$code.=<<___ if ($flavour =~ /64/); - stp x29,x30,[sp,#-16]! - add x29,sp,#0 -___ -$code.=<<___ if ($flavour !~ /64/); - stmdb sp!,{r4,lr} -___ -$code.=<<___; - bl .Lenc_key - - sub $out,$out,#240 // restore original $out - mov x4,#-16 - add $inp,$out,x12,lsl#4 // end of key schedule - - vld1.32 {v0.16b},[$out] - vld1.32 {v1.16b},[$inp] - vst1.32 {v0.16b},[$inp],x4 - vst1.32 {v1.16b},[$out],#16 - -.Loop_imc: - vld1.32 {v0.16b},[$out] - vld1.32 {v1.16b},[$inp] - aesimc v0.16b,v0.16b - aesimc v1.16b,v1.16b - vst1.32 {v0.16b},[$inp],x4 - vst1.32 {v1.16b},[$out],#16 - cmp $inp,$out - b.hi .Loop_imc - - vld1.32 {v0.16b},[$out] - aesimc v0.16b,v0.16b - vst1.32 {v0.16b},[$inp] - - eor x0,x0,x0 // return value -___ -$code.=<<___ if ($flavour !~ /64/); - ldmia sp!,{r4,pc} -___ -$code.=<<___ if ($flavour =~ /64/); - ldp x29,x30,[sp],#16 - ret -___ -$code.=<<___; -.size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key -___ -}}} -{{{ -sub gen_block () { -my $dir = shift; -my ($e,$mc) = $dir eq "en" ? ("e","mc") : ("d","imc"); -my ($inp,$out,$key)=map("x$_",(0..2)); -my $rounds="w3"; -my ($rndkey0,$rndkey1,$inout)=map("q$_",(0..3)); - -$code.=<<___; -.globl ${prefix}_${dir}crypt -.type ${prefix}_${dir}crypt,%function -.align 5 -${prefix}_${dir}crypt: - ldr $rounds,[$key,#240] - vld1.32 {$rndkey0},[$key],#16 - vld1.8 {$inout},[$inp] - sub $rounds,$rounds,#2 - vld1.32 {$rndkey1},[$key],#16 - -.Loop_${dir}c: - aes$e $inout,$rndkey0 - vld1.32 {$rndkey0},[$key],#16 - aes$mc $inout,$inout - subs $rounds,$rounds,#2 - aes$e $inout,$rndkey1 - vld1.32 {$rndkey1},[$key],#16 - aes$mc $inout,$inout - b.gt .Loop_${dir}c - - aes$e $inout,$rndkey0 - vld1.32 {$rndkey0},[$key] - aes$mc $inout,$inout - aes$e $inout,$rndkey1 - veor $inout,$inout,$rndkey0 - - vst1.8 {$inout},[$out] - ret -.size ${prefix}_${dir}crypt,.-${prefix}_${dir}crypt -___ -} -&gen_block("en"); -&gen_block("de"); -}}} -{{{ -my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5"; -my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12"); -my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); - -my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1); - -### q8-q15 preloaded key schedule - -$code.=<<___; -.globl ${prefix}_cbc_encrypt -.type ${prefix}_cbc_encrypt,%function -.align 5 -${prefix}_cbc_encrypt: -___ -$code.=<<___ if ($flavour =~ /64/); - stp x29,x30,[sp,#-16]! - add x29,sp,#0 -___ -$code.=<<___ if ($flavour !~ /64/); - mov ip,sp - stmdb sp!,{r4-r8,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so - ldmia ip,{r4-r5} @ load remaining args -___ -$code.=<<___; - subs $len,$len,#16 - mov $step,#16 - b.lo .Lcbc_abort - cclr $step,eq - - cmp $enc,#0 // en- or decrypting? - ldr $rounds,[$key,#240] - and $len,$len,#-16 - vld1.8 {$ivec},[$ivp] - vld1.8 {$dat},[$inp],$step - - vld1.32 {q8-q9},[$key] // load key schedule... - sub $rounds,$rounds,#6 - add $key_,$key,x5,lsl#4 // pointer to last 7 round keys - sub $rounds,$rounds,#2 - vld1.32 {q10-q11},[$key_],#32 - vld1.32 {q12-q13},[$key_],#32 - vld1.32 {q14-q15},[$key_],#32 - vld1.32 {$rndlast},[$key_] - - add $key_,$key,#32 - mov $cnt,$rounds - b.eq .Lcbc_dec - - cmp $rounds,#2 - veor $dat,$dat,$ivec - veor $rndzero_n_last,q8,$rndlast - b.eq .Lcbc_enc128 - -.Loop_cbc_enc: - aese $dat,q8 - vld1.32 {q8},[$key_],#16 - aesmc $dat,$dat - subs $cnt,$cnt,#2 - aese $dat,q9 - vld1.32 {q9},[$key_],#16 - aesmc $dat,$dat - b.gt .Loop_cbc_enc - - aese $dat,q8 - aesmc $dat,$dat - subs $len,$len,#16 - aese $dat,q9 - aesmc $dat,$dat - cclr $step,eq - aese $dat,q10 - aesmc $dat,$dat - add $key_,$key,#16 - aese $dat,q11 - aesmc $dat,$dat - vld1.8 {q8},[$inp],$step - aese $dat,q12 - aesmc $dat,$dat - veor q8,q8,$rndzero_n_last - aese $dat,q13 - aesmc $dat,$dat - vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] - aese $dat,q14 - aesmc $dat,$dat - aese $dat,q15 - - mov $cnt,$rounds - veor $ivec,$dat,$rndlast - vst1.8 {$ivec},[$out],#16 - b.hs .Loop_cbc_enc - - b .Lcbc_done - -.align 5 -.Lcbc_enc128: - vld1.32 {$in0-$in1},[$key_] - aese $dat,q8 - aesmc $dat,$dat - b .Lenter_cbc_enc128 -.Loop_cbc_enc128: - aese $dat,q8 - aesmc $dat,$dat - vst1.8 {$ivec},[$out],#16 -.Lenter_cbc_enc128: - aese $dat,q9 - aesmc $dat,$dat - subs $len,$len,#16 - aese $dat,$in0 - aesmc $dat,$dat - cclr $step,eq - aese $dat,$in1 - aesmc $dat,$dat - aese $dat,q10 - aesmc $dat,$dat - aese $dat,q11 - aesmc $dat,$dat - vld1.8 {q8},[$inp],$step - aese $dat,q12 - aesmc $dat,$dat - aese $dat,q13 - aesmc $dat,$dat - aese $dat,q14 - aesmc $dat,$dat - veor q8,q8,$rndzero_n_last - aese $dat,q15 - veor $ivec,$dat,$rndlast - b.hs .Loop_cbc_enc128 - - vst1.8 {$ivec},[$out],#16 - b .Lcbc_done - -.align 5 -.Lcbc_dec128: - vld1.32 {$tmp0-$tmp1},[$key_] - veor $ivec,$ivec,$rndlast - veor $in0,$dat0,$rndlast - mov $step1,$step - -.Loop2x_cbc_dec128: - aesd $dat0,q8 - aesd $dat1,q8 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - subs $len,$len,#32 - aesd $dat0,q9 - aesd $dat1,q9 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - cclr $step,lo - aesd $dat0,$tmp0 - aesd $dat1,$tmp0 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - cclr $step1,ls - aesd $dat0,$tmp1 - aesd $dat1,$tmp1 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - aesd $dat0,q10 - aesd $dat1,q10 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - aesd $dat0,q11 - aesd $dat1,q11 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - aesd $dat0,q12 - aesd $dat1,q12 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - aesd $dat0,q13 - aesd $dat1,q13 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - aesd $dat0,q14 - aesd $dat1,q14 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - aesd $dat0,q15 - aesd $dat1,q15 - - veor $ivec,$ivec,$dat0 - vld1.8 {$dat0},[$inp],$step - veor $in0,$in0,$dat1 - vld1.8 {$dat1},[$inp],$step1 - vst1.8 {$ivec},[$out],#16 - veor $ivec,$in1,$rndlast - vst1.8 {$in0},[$out],#16 - veor $in0,$dat0,$rndlast - vorr $in1,$dat1,$dat1 - b.hs .Loop2x_cbc_dec128 - - adds $len,$len,#32 - veor $ivec,$ivec,$rndlast - b.eq .Lcbc_done - veor $in0,$in0,$rndlast - b .Lcbc_dec_tail - -.align 5 -.Lcbc_dec: - subs $len,$len,#16 - vorr $in0,$dat,$dat - b.lo .Lcbc_dec_tail - - cclr $step,eq - cmp $rounds,#2 - vld1.8 {$dat1},[$inp],$step - vorr $in1,$dat1,$dat1 - b.eq .Lcbc_dec128 - -.Loop2x_cbc_dec: - aesd $dat0,q8 - aesd $dat1,q8 - vld1.32 {q8},[$key_],#16 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - subs $cnt,$cnt,#2 - aesd $dat0,q9 - aesd $dat1,q9 - vld1.32 {q9},[$key_],#16 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - b.gt .Loop2x_cbc_dec - - aesd $dat0,q8 - aesd $dat1,q8 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - veor $tmp0,$ivec,$rndlast - veor $tmp1,$in0,$rndlast - aesd $dat0,q9 - aesd $dat1,q9 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - vorr $ivec,$in1,$in1 - subs $len,$len,#32 - aesd $dat0,q10 - aesd $dat1,q10 - aesimc $dat0,$dat0 - cclr $step,lo - aesimc $dat1,$dat1 - mov $key_,$key - aesd $dat0,q11 - aesd $dat1,q11 - aesimc $dat0,$dat0 - vld1.8 {$in0},[$inp],$step - aesimc $dat1,$dat1 - cclr $step,ls - aesd $dat0,q12 - aesd $dat1,q12 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - vld1.8 {$in1},[$inp],$step - aesd $dat0,q13 - aesd $dat1,q13 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] - aesd $dat0,q14 - aesd $dat1,q14 - aesimc $dat0,$dat0 - aesimc $dat1,$dat1 - vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] - aesd $dat0,q15 - aesd $dat1,q15 - - mov $cnt,$rounds - veor $tmp0,$tmp0,$dat0 - veor $tmp1,$tmp1,$dat1 - vorr $dat0,$in0,$in0 - vst1.8 {$tmp0},[$out],#16 - vorr $dat1,$in1,$in1 - vst1.8 {$tmp1},[$out],#16 - b.hs .Loop2x_cbc_dec - - adds $len,$len,#32 - b.eq .Lcbc_done - -.Lcbc_dec_tail: - aesd $dat,q8 - vld1.32 {q8},[$key_],#16 - aesimc $dat,$dat - subs $cnt,$cnt,#2 - aesd $dat,q9 - vld1.32 {q9},[$key_],#16 - aesimc $dat,$dat - b.gt .Lcbc_dec_tail - - aesd $dat,q8 - aesimc $dat,$dat - aesd $dat,q9 - aesimc $dat,$dat - veor $tmp,$ivec,$rndlast - aesd $dat,q10 - aesimc $dat,$dat - vorr $ivec,$in0,$in0 - aesd $dat,q11 - aesimc $dat,$dat - aesd $dat,q12 - aesimc $dat,$dat - aesd $dat,q13 - aesimc $dat,$dat - aesd $dat,q14 - aesimc $dat,$dat - aesd $dat,q15 - - veor $tmp,$tmp,$dat - vst1.8 {$tmp},[$out],#16 - -.Lcbc_done: - vst1.8 {$ivec},[$ivp] -.Lcbc_abort: -___ -$code.=<<___ if ($flavour !~ /64/); - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r8,pc} -___ -$code.=<<___ if ($flavour =~ /64/); - ldr x29,[sp],#16 - ret -___ -$code.=<<___; -.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt -___ -}}} -{{{ -my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); -my ($rounds,$cnt,$key_,$ctr,$tctr,$tctr1)=("w5","w6","x7","w8","w9","w10"); -my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); - -my ($dat,$tmp)=($dat0,$tmp0); - -### q8-q15 preloaded key schedule - -$code.=<<___; -.globl ${prefix}_ctr32_encrypt_blocks -.type ${prefix}_ctr32_encrypt_blocks,%function -.align 5 -${prefix}_ctr32_encrypt_blocks: -___ -$code.=<<___ if ($flavour =~ /64/); - stp x29,x30,[sp,#-16]! - add x29,sp,#0 -___ -$code.=<<___ if ($flavour !~ /64/); - mov ip,sp - stmdb sp!,{r4-r10,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so - ldr r4, [ip] @ load remaining arg -___ -$code.=<<___; - ldr $rounds,[$key,#240] - - ldr $ctr, [$ivp, #12] - vld1.32 {$dat0},[$ivp] - - vld1.32 {q8-q9},[$key] // load key schedule... - sub $rounds,$rounds,#6 - add $key_,$key,x5,lsl#4 // pointer to last 7 round keys - sub $rounds,$rounds,#2 - vld1.32 {q10-q11},[$key_],#32 - vld1.32 {q12-q13},[$key_],#32 - vld1.32 {q14-q15},[$key_],#32 - vld1.32 {$rndlast},[$key_] - - add $key_,$key,#32 - mov $cnt,$rounds - - subs $len,$len,#2 - b.lo .Lctr32_tail - -#ifndef __ARMEB__ - rev $ctr, $ctr -#endif - vorr $dat1,$dat0,$dat0 - add $ctr, $ctr, #1 - vorr $ivec,$dat0,$dat0 - rev $tctr1, $ctr - cmp $rounds,#2 - vmov.32 ${dat1}[3],$tctr1 - b.eq .Lctr32_128 - -.Loop2x_ctr32: - aese $dat0,q8 - aese $dat1,q8 - vld1.32 {q8},[$key_],#16 - aesmc $dat0,$dat0 - aesmc $dat1,$dat1 - subs $cnt,$cnt,#2 - aese $dat0,q9 - aese $dat1,q9 - vld1.32 {q9},[$key_],#16 - aesmc $dat0,$dat0 - aesmc $dat1,$dat1 - b.gt .Loop2x_ctr32 - - aese $dat0,q8 - aese $dat1,q8 - aesmc $tmp0,$dat0 - vorr $dat0,$ivec,$ivec - aesmc $tmp1,$dat1 - vorr $dat1,$ivec,$ivec - aese $tmp0,q9 - aese $tmp1,q9 - vld1.8 {$in0},[$inp],#16 - aesmc $tmp0,$tmp0 - vld1.8 {$in1},[$inp],#16 - aesmc $tmp1,$tmp1 - add $ctr,$ctr,#1 - aese $tmp0,q10 - aese $tmp1,q10 - rev $tctr,$ctr - aesmc $tmp0,$tmp0 - aesmc $tmp1,$tmp1 - add $ctr,$ctr,#1 - aese $tmp0,q11 - aese $tmp1,q11 - veor $in0,$in0,$rndlast - rev $tctr1,$ctr - aesmc $tmp0,$tmp0 - aesmc $tmp1,$tmp1 - veor $in1,$in1,$rndlast - mov $key_,$key - aese $tmp0,q12 - aese $tmp1,q12 - subs $len,$len,#2 - aesmc $tmp0,$tmp0 - aesmc $tmp1,$tmp1 - vld1.32 {q8-q9},[$key_],#32 // re-pre-load rndkey[0-1] - aese $tmp0,q13 - aese $tmp1,q13 - aesmc $tmp0,$tmp0 - aesmc $tmp1,$tmp1 - aese $tmp0,q14 - aese $tmp1,q14 - vmov.32 ${dat0}[3], $tctr - aesmc $tmp0,$tmp0 - vmov.32 ${dat1}[3], $tctr1 - aesmc $tmp1,$tmp1 - aese $tmp0,q15 - aese $tmp1,q15 - - mov $cnt,$rounds - veor $in0,$in0,$tmp0 - veor $in1,$in1,$tmp1 - vst1.8 {$in0},[$out],#16 - vst1.8 {$in1},[$out],#16 - b.hs .Loop2x_ctr32 - - adds $len,$len,#2 - b.eq .Lctr32_done - b .Lctr32_tail - -.Lctr32_128: - vld1.32 {$tmp0-$tmp1},[$key_] - -.Loop2x_ctr32_128: - aese $dat0,q8 - aese $dat1,q8 - aesmc $dat0,$dat0 - vld1.8 {$in0},[$inp],#16 - aesmc $dat1,$dat1 - vld1.8 {$in1},[$inp],#16 - aese $dat0,q9 - aese $dat1,q9 - add $ctr,$ctr,#1 - aesmc $dat0,$dat0 - aesmc $dat1,$dat1 - rev $tctr,$ctr - aese $dat0,$tmp0 - aese $dat1,$tmp0 - add $ctr,$ctr,#1 - aesmc $dat0,$dat0 - aesmc $dat1,$dat1 - rev $tctr1,$ctr - aese $dat0,$tmp1 - aese $dat1,$tmp1 - subs $len,$len,#2 - aesmc $dat0,$dat0 - aesmc $dat1,$dat1 - aese $dat0,q10 - aese $dat1,q10 - aesmc $dat0,$dat0 - aesmc $dat1,$dat1 - aese $dat0,q11 - aese $dat1,q11 - aesmc $dat0,$dat0 - aesmc $dat1,$dat1 - aese $dat0,q12 - aese $dat1,q12 - aesmc $dat0,$dat0 - aesmc $dat1,$dat1 - aese $dat0,q13 - aese $dat1,q13 - aesmc $dat0,$dat0 - aesmc $dat1,$dat1 - aese $dat0,q14 - aese $dat1,q14 - aesmc $dat0,$dat0 - aesmc $dat1,$dat1 - veor $in0,$in0,$rndlast - aese $dat0,q15 - veor $in1,$in1,$rndlast - aese $dat1,q15 - - veor $in0,$in0,$dat0 - vorr $dat0,$ivec,$ivec - veor $in1,$in1,$dat1 - vorr $dat1,$ivec,$ivec - vst1.8 {$in0},[$out],#16 - vmov.32 ${dat0}[3], $tctr - vst1.8 {$in1},[$out],#16 - vmov.32 ${dat1}[3], $tctr1 - b.hs .Loop2x_ctr32_128 - - adds $len,$len,#2 - b.eq .Lctr32_done - -.Lctr32_tail: - aese $dat,q8 - vld1.32 {q8},[$key_],#16 - aesmc $dat,$dat - subs $cnt,$cnt,#2 - aese $dat,q9 - vld1.32 {q9},[$key_],#16 - aesmc $dat,$dat - b.gt .Lctr32_tail - - aese $dat,q8 - aesmc $dat,$dat - aese $dat,q9 - aesmc $dat,$dat - vld1.8 {$in0},[$inp] - aese $dat,q10 - aesmc $dat,$dat - aese $dat,q11 - aesmc $dat,$dat - aese $dat,q12 - aesmc $dat,$dat - aese $dat,q13 - aesmc $dat,$dat - aese $dat,q14 - aesmc $dat,$dat - veor $in0,$in0,$rndlast - aese $dat,q15 - - veor $in0,$in0,$dat - vst1.8 {$in0},[$out] - -.Lctr32_done: -___ -$code.=<<___ if ($flavour !~ /64/); - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r10,pc} -___ -$code.=<<___ if ($flavour =~ /64/); - ldr x29,[sp],#16 - ret -___ -$code.=<<___; -.size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks -___ -}}} -$code.=<<___; -#endif -___ -######################################## -if ($flavour =~ /64/) { ######## 64-bit code - my %opcode = ( - "aesd" => 0x4e285800, "aese" => 0x4e284800, - "aesimc"=> 0x4e287800, "aesmc" => 0x4e286800 ); - - local *unaes = sub { - my ($mnemonic,$arg)=@_; - - $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o && - sprintf ".inst\t0x%08x\t//%s %s", - $opcode{$mnemonic}|$1|($2<<5), - $mnemonic,$arg; - }; - - foreach(split("\n",$code)) { - s/\`([^\`]*)\`/eval($1)/geo; - - s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers - s/@\s/\/\//o; # old->new style commentary - - #s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or - s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or - s/vmov\.i8/movi/o or # fix up legacy mnemonics - s/vext\.8/ext/o or - s/vrev32\.8/rev32/o or - s/vtst\.8/cmtst/o or - s/vshr/ushr/o or - s/^(\s+)v/$1/o or # strip off v prefix - s/\bbx\s+lr\b/ret/o; - - # fix up remainig legacy suffixes - s/\.[ui]?8//o; - m/\],#8/o and s/\.16b/\.8b/go; - s/\.[ui]?32//o and s/\.16b/\.4s/go; - s/\.[ui]?64//o and s/\.16b/\.2d/go; - s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o; - - print $_,"\n"; - } -} else { ######## 32-bit code - my %opcode = ( - "aesd" => 0xf3b00340, "aese" => 0xf3b00300, - "aesimc"=> 0xf3b003c0, "aesmc" => 0xf3b00380 ); - - local *unaes = sub { - my ($mnemonic,$arg)=@_; - - if ($arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o) { - my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) - |(($2&7)<<1) |(($2&8)<<2); - # since ARMv7 instructions are always encoded little-endian. - # correct solution is to use .inst directive, but older - # assemblers don't implement it:-( - sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", - $word&0xff,($word>>8)&0xff, - ($word>>16)&0xff,($word>>24)&0xff, - $mnemonic,$arg; - } - }; - - sub unvtbl { - my $arg=shift; - - $arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o && - sprintf "vtbl.8 d%d,{q%d},d%d\n\t". - "vtbl.8 d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1; - } - - sub unvdup32 { - my $arg=shift; - - $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o && - sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1; - } - - sub unvmov32 { - my $arg=shift; - - $arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o && - sprintf "vmov.32 d%d[%d],%s",2*$1+($2>>1),$2&1,$3; - } - - foreach(split("\n",$code)) { - s/\`([^\`]*)\`/eval($1)/geo; - - s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers - s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers - s/\/\/\s?/@ /o; # new->old style commentary - - # fix up remainig new-style suffixes - s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo or - s/\],#[0-9]+/]!/o; - - s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or - s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or - s/vtbl\.8\s+(.*)/unvtbl($1)/geo or - s/vdup\.32\s+(.*)/unvdup32($1)/geo or - s/vmov\.32\s+(.*)/unvmov32($1)/geo or - s/^(\s+)b\./$1b/o or - s/^(\s+)ret/$1bx\tlr/o; - - print $_,"\n"; - } -} - -close STDOUT; diff --git a/app/openssl/crypto/arm64cpuid.S b/app/openssl/crypto/arm64cpuid.S deleted file mode 100644 index 4778ac1d..00000000 --- a/app/openssl/crypto/arm64cpuid.S +++ /dev/null @@ -1,46 +0,0 @@ -#include "arm_arch.h" - -.text -.arch armv8-a+crypto - -.align 5 -.global _armv7_neon_probe -.type _armv7_neon_probe,%function -_armv7_neon_probe: - orr v15.16b, v15.16b, v15.16b - ret -.size _armv7_neon_probe,.-_armv7_neon_probe - -.global _armv7_tick -.type _armv7_tick,%function -_armv7_tick: - mrs x0, CNTVCT_EL0 - ret -.size _armv7_tick,.-_armv7_tick - -.global _armv8_aes_probe -.type _armv8_aes_probe,%function -_armv8_aes_probe: - aese v0.16b, v0.16b - ret -.size _armv8_aes_probe,.-_armv8_aes_probe - -.global _armv8_sha1_probe -.type _armv8_sha1_probe,%function -_armv8_sha1_probe: - sha1h s0, s0 - ret -.size _armv8_sha1_probe,.-_armv8_sha1_probe - -.global _armv8_sha256_probe -.type _armv8_sha256_probe,%function -_armv8_sha256_probe: - sha256su0 v0.4s, v0.4s - ret -.size _armv8_sha256_probe,.-_armv8_sha256_probe -.global _armv8_pmull_probe -.type _armv8_pmull_probe,%function -_armv8_pmull_probe: - pmull v0.1q, v0.1d, v0.1d - ret -.size _armv8_pmull_probe,.-_armv8_pmull_probe diff --git a/app/openssl/crypto/arm_arch.h b/app/openssl/crypto/arm_arch.h index 6fa87244..5a831076 100644 --- a/app/openssl/crypto/arm_arch.h +++ b/app/openssl/crypto/arm_arch.h @@ -10,24 +10,13 @@ # define __ARMEL__ # endif # elif defined(__GNUC__) -# if defined(__aarch64__) -# define __ARM_ARCH__ 8 -# if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ -# define __ARMEB__ -# else -# define __ARMEL__ -# endif /* * Why doesn't gcc define __ARM_ARCH__? Instead it defines * bunch of below macros. See all_architectires[] table in * gcc/config/arm/arm.c. On a side note it defines * __ARMEL__/__ARMEB__ for little-/big-endian. */ -# elif defined(__ARM_ARCH) -# define __ARM_ARCH__ __ARM_ARCH -# elif defined(__ARM_ARCH_8A__) -# define __ARM_ARCH__ 8 -# elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ +# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) || \ defined(__ARM_ARCH_7EM__) # define __ARM_ARCH__ 7 @@ -54,13 +43,9 @@ #if !__ASSEMBLER__ extern unsigned int OPENSSL_armcap_P; -#endif #define ARMV7_NEON (1<<0) #define ARMV7_TICK (1<<1) -#define ARMV8_AES (1<<2) -#define ARMV8_SHA1 (1<<3) -#define ARMV8_SHA256 (1<<4) -#define ARMV8_PMULL (1<<5) +#endif #endif diff --git a/app/openssl/crypto/armcap.c b/app/openssl/crypto/armcap.c index 7e46d07a..9abaf396 100644 --- a/app/openssl/crypto/armcap.c +++ b/app/openssl/crypto/armcap.c @@ -19,13 +19,9 @@ static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); } * ARM compilers support inline assembler... */ void _armv7_neon_probe(void); -void _armv8_aes_probe(void); -void _armv8_sha1_probe(void); -void _armv8_sha256_probe(void); -void _armv8_pmull_probe(void); -unsigned long _armv7_tick(void); +unsigned int _armv7_tick(void); -unsigned long OPENSSL_rdtsc(void) +unsigned int OPENSSL_rdtsc(void) { if (OPENSSL_armcap_P & ARMV7_TICK) return _armv7_tick(); @@ -33,41 +29,9 @@ unsigned long OPENSSL_rdtsc(void) return 0; } -/* - * Use a weak reference to getauxval() so we can use it if it is available but - * don't break the build if it is not. - */ #if defined(__GNUC__) && __GNUC__>=2 void OPENSSL_cpuid_setup(void) __attribute__((constructor)); -extern unsigned long getauxval(unsigned long type) __attribute__((weak)); -#else -static unsigned long (*getauxval)(unsigned long) = NULL; #endif - -/* - * ARM puts the the feature bits for Crypto Extensions in AT_HWCAP2, whereas - * AArch64 used AT_HWCAP. - */ -#if defined(__arm__) || defined (__arm) -# define HWCAP 16 /* AT_HWCAP */ -# define HWCAP_NEON (1 << 12) - -# define HWCAP_CE 26 /* AT_HWCAP2 */ -# define HWCAP_CE_AES (1 << 0) -# define HWCAP_CE_PMULL (1 << 1) -# define HWCAP_CE_SHA1 (1 << 2) -# define HWCAP_CE_SHA256 (1 << 3) -#elif defined(__aarch64__) -# define HWCAP 16 /* AT_HWCAP */ -# define HWCAP_NEON (1 << 1) - -# define HWCAP_CE HWCAP -# define HWCAP_CE_AES (1 << 3) -# define HWCAP_CE_PMULL (1 << 4) -# define HWCAP_CE_SHA1 (1 << 5) -# define HWCAP_CE_SHA256 (1 << 6) -#endif - void OPENSSL_cpuid_setup(void) { char *e; @@ -80,7 +44,7 @@ void OPENSSL_cpuid_setup(void) if ((e=getenv("OPENSSL_armcap"))) { - OPENSSL_armcap_P=(unsigned int)strtoul(e,NULL,0); + OPENSSL_armcap_P=strtoul(e,NULL,0); return; } @@ -100,51 +64,10 @@ void OPENSSL_cpuid_setup(void) sigprocmask(SIG_SETMASK,&ill_act.sa_mask,&oset); sigaction(SIGILL,&ill_act,&ill_oact); - if (getauxval != NULL) - { - if (getauxval(HWCAP) & HWCAP_NEON) - { - unsigned long hwcap = getauxval(HWCAP_CE); - - OPENSSL_armcap_P |= ARMV7_NEON; - - if (hwcap & HWCAP_CE_AES) - OPENSSL_armcap_P |= ARMV8_AES; - - if (hwcap & HWCAP_CE_PMULL) - OPENSSL_armcap_P |= ARMV8_PMULL; - - if (hwcap & HWCAP_CE_SHA1) - OPENSSL_armcap_P |= ARMV8_SHA1; - - if (hwcap & HWCAP_CE_SHA256) - OPENSSL_armcap_P |= ARMV8_SHA256; - } - } - else if (sigsetjmp(ill_jmp,1) == 0) + if (sigsetjmp(ill_jmp,1) == 0) { _armv7_neon_probe(); OPENSSL_armcap_P |= ARMV7_NEON; - if (sigsetjmp(ill_jmp,1) == 0) - { - _armv8_pmull_probe(); - OPENSSL_armcap_P |= ARMV8_PMULL|ARMV8_AES; - } - else if (sigsetjmp(ill_jmp,1) == 0) - { - _armv8_aes_probe(); - OPENSSL_armcap_P |= ARMV8_AES; - } - if (sigsetjmp(ill_jmp,1) == 0) - { - _armv8_sha1_probe(); - OPENSSL_armcap_P |= ARMV8_SHA1; - } - if (sigsetjmp(ill_jmp,1) == 0) - { - _armv8_sha256_probe(); - OPENSSL_armcap_P |= ARMV8_SHA256; - } } if (sigsetjmp(ill_jmp,1) == 0) { diff --git a/app/openssl/crypto/armv4cpuid.S b/app/openssl/crypto/armv4cpuid.S index add11d40..2d618dea 100644 --- a/app/openssl/crypto/armv4cpuid.S +++ b/app/openssl/crypto/armv4cpuid.S @@ -7,49 +7,17 @@ .global _armv7_neon_probe .type _armv7_neon_probe,%function _armv7_neon_probe: - .byte 0xf0,0x01,0x60,0xf2 @ vorr q8,q8,q8 - .byte 0x1e,0xff,0x2f,0xe1 @ bx lr + .word 0xf26ee1fe @ vorr q15,q15,q15 + .word 0xe12fff1e @ bx lr .size _armv7_neon_probe,.-_armv7_neon_probe .global _armv7_tick .type _armv7_tick,%function _armv7_tick: - mrrc p15,1,r0,r1,c14 @ CNTVCT -#if __ARM_ARCH__>=5 - bx lr -#else - .word 0xe12fff1e @ bx lr -#endif + mrc p15,0,r0,c9,c13,0 + .word 0xe12fff1e @ bx lr .size _armv7_tick,.-_armv7_tick -.global _armv8_aes_probe -.type _armv8_aes_probe,%function -_armv8_aes_probe: - .byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0 - .byte 0x1e,0xff,0x2f,0xe1 @ bx lr -.size _armv8_aes_probe,.-_armv8_aes_probe - -.global _armv8_sha1_probe -.type _armv8_sha1_probe,%function -_armv8_sha1_probe: - .byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0 - .byte 0x1e,0xff,0x2f,0xe1 @ bx lr -.size _armv8_sha1_probe,.-_armv8_sha1_probe - -.global _armv8_sha256_probe -.type _armv8_sha256_probe,%function -_armv8_sha256_probe: - .byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0 - .byte 0x1e,0xff,0x2f,0xe1 @ bx lr -.size _armv8_sha256_probe,.-_armv8_sha256_probe -.global _armv8_pmull_probe -.type _armv8_pmull_probe,%function -_armv8_pmull_probe: - .byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0 - .byte 0x1e,0xff,0x2f,0xe1 @ bx lr -.size _armv8_pmull_probe,.-_armv8_pmull_probe - -.align 5 .global OPENSSL_atomic_add .type OPENSSL_atomic_add,%function OPENSSL_atomic_add: @@ -60,7 +28,7 @@ OPENSSL_atomic_add: cmp r2,#0 bne .Ladd mov r0,r3 - bx lr + .word 0xe12fff1e @ bx lr #else stmdb sp!,{r4-r6,lr} ldr r2,.Lspinlock @@ -113,13 +81,9 @@ OPENSSL_cleanse: adds r1,r1,#4 bne .Little .Lcleanse_done: -#if __ARM_ARCH__>=5 - bx lr -#else tst lr,#1 moveq pc,lr .word 0xe12fff1e @ bx lr -#endif .size OPENSSL_cleanse,.-OPENSSL_cleanse .global OPENSSL_wipe_cpu @@ -133,53 +97,41 @@ OPENSSL_wipe_cpu: eor ip,ip,ip tst r0,#1 beq .Lwipe_done - .byte 0x50,0x01,0x00,0xf3 @ veor q0, q0, q0 - .byte 0x52,0x21,0x02,0xf3 @ veor q1, q1, q1 - .byte 0x54,0x41,0x04,0xf3 @ veor q2, q2, q2 - .byte 0x56,0x61,0x06,0xf3 @ veor q3, q3, q3 - .byte 0xf0,0x01,0x40,0xf3 @ veor q8, q8, q8 - .byte 0xf2,0x21,0x42,0xf3 @ veor q9, q9, q9 - .byte 0xf4,0x41,0x44,0xf3 @ veor q10, q10, q10 - .byte 0xf6,0x61,0x46,0xf3 @ veor q11, q11, q11 - .byte 0xf8,0x81,0x48,0xf3 @ veor q12, q12, q12 - .byte 0xfa,0xa1,0x4a,0xf3 @ veor q13, q13, q13 - .byte 0xfc,0xc1,0x4c,0xf3 @ veor q14, q14, q14 - .byte 0xfe,0xe1,0x4e,0xf3 @ veor q14, q14, q14 + .word 0xf3000150 @ veor q0, q0, q0 + .word 0xf3022152 @ veor q1, q1, q1 + .word 0xf3044154 @ veor q2, q2, q2 + .word 0xf3066156 @ veor q3, q3, q3 + .word 0xf34001f0 @ veor q8, q8, q8 + .word 0xf34221f2 @ veor q9, q9, q9 + .word 0xf34441f4 @ veor q10, q10, q10 + .word 0xf34661f6 @ veor q11, q11, q11 + .word 0xf34881f8 @ veor q12, q12, q12 + .word 0xf34aa1fa @ veor q13, q13, q13 + .word 0xf34cc1fc @ veor q14, q14, q14 + .word 0xf34ee1fe @ veor q15, q15, q15 .Lwipe_done: mov r0,sp -#if __ARM_ARCH__>=5 - bx lr -#else tst lr,#1 moveq pc,lr .word 0xe12fff1e @ bx lr -#endif .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu .global OPENSSL_instrument_bus .type OPENSSL_instrument_bus,%function OPENSSL_instrument_bus: eor r0,r0,r0 -#if __ARM_ARCH__>=5 - bx lr -#else tst lr,#1 moveq pc,lr .word 0xe12fff1e @ bx lr -#endif .size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus .global OPENSSL_instrument_bus2 .type OPENSSL_instrument_bus2,%function OPENSSL_instrument_bus2: eor r0,r0,r0 -#if __ARM_ARCH__>=5 - bx lr -#else tst lr,#1 moveq pc,lr .word 0xe12fff1e @ bx lr -#endif .size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 .align 5 diff --git a/app/openssl/crypto/asn1/a_strnid.c b/app/openssl/crypto/asn1/a_strnid.c index 2afd5a41..2fc48c15 100644 --- a/app/openssl/crypto/asn1/a_strnid.c +++ b/app/openssl/crypto/asn1/a_strnid.c @@ -74,7 +74,7 @@ static int sk_table_cmp(const ASN1_STRING_TABLE * const *a, * certain software (e.g. Netscape) has problems with them. */ -static unsigned long global_mask = B_ASN1_UTF8STRING; +static unsigned long global_mask = 0xFFFFFFFFL; void ASN1_STRING_set_default_mask(unsigned long mask) { diff --git a/app/openssl/crypto/bio/bio.h b/app/openssl/crypto/bio/bio.h index d05fa22a..05699ab2 100644 --- a/app/openssl/crypto/bio/bio.h +++ b/app/openssl/crypto/bio/bio.h @@ -266,9 +266,6 @@ void BIO_clear_flags(BIO *b, int flags); #define BIO_RR_CONNECT 0x02 /* Returned from the accept BIO when an accept would have blocked */ #define BIO_RR_ACCEPT 0x03 -/* Returned from the SSL bio when the channel id retrieval code cannot find the - * private key. */ -#define BIO_RR_SSL_CHANNEL_ID_LOOKUP 0x04 /* These are passed by the BIO callback */ #define BIO_CB_FREE 0x01 diff --git a/app/openssl/crypto/bio/bss_dgram.c b/app/openssl/crypto/bio/bss_dgram.c index d9967e72..54c012c4 100644 --- a/app/openssl/crypto/bio/bss_dgram.c +++ b/app/openssl/crypto/bio/bss_dgram.c @@ -1333,7 +1333,7 @@ static long dgram_sctp_ctrl(BIO *b, int cmd, long num, void *ptr) bio_dgram_sctp_data *data = NULL; socklen_t sockopt_len = 0; struct sctp_authkeyid authkeyid; - struct sctp_authkey *authkey = NULL; + struct sctp_authkey *authkey; data = (bio_dgram_sctp_data *)b->ptr; @@ -1388,11 +1388,6 @@ static long dgram_sctp_ctrl(BIO *b, int cmd, long num, void *ptr) /* Add new key */ sockopt_len = sizeof(struct sctp_authkey) + 64 * sizeof(uint8_t); authkey = OPENSSL_malloc(sockopt_len); - if (authkey == NULL) - { - ret = -1; - break; - } memset(authkey, 0x00, sockopt_len); authkey->sca_keynumber = authkeyid.scact_keynumber + 1; #ifndef __FreeBSD__ @@ -1404,8 +1399,6 @@ static long dgram_sctp_ctrl(BIO *b, int cmd, long num, void *ptr) memcpy(&authkey->sca_key[0], ptr, 64 * sizeof(uint8_t)); ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_KEY, authkey, sockopt_len); - OPENSSL_free(authkey); - authkey = NULL; if (ret < 0) break; /* Reset active key */ diff --git a/app/openssl/crypto/bn/asm/armv4-gf2m.S b/app/openssl/crypto/bn/asm/armv4-gf2m.S index 0fa25b26..038f0864 100644 --- a/app/openssl/crypto/bn/asm/armv4-gf2m.S +++ b/app/openssl/crypto/bn/asm/armv4-gf2m.S @@ -5,6 +5,31 @@ #if __ARM_ARCH__>=7 .fpu neon + +.type mul_1x1_neon,%function +.align 5 +mul_1x1_neon: + vshl.u64 d2,d16,#8 @ q1-q3 are slided + vmull.p8 q0,d16,d17 @ a·bb + vshl.u64 d4,d16,#16 + vmull.p8 q1,d2,d17 @ a<<8·bb + vshl.u64 d6,d16,#24 + vmull.p8 q2,d4,d17 @ a<<16·bb + vshr.u64 d2,#8 + vmull.p8 q3,d6,d17 @ a<<24·bb + vshl.u64 d3,#24 + veor d0,d2 + vshr.u64 d4,#16 + veor d0,d3 + vshl.u64 d5,#16 + veor d0,d4 + vshr.u64 d6,#24 + veor d0,d5 + vshl.u64 d7,#8 + veor d0,d6 + veor d0,d7 + .word 0xe12fff1e +.size mul_1x1_neon,.-mul_1x1_neon #endif .type mul_1x1_ialu,%function .align 5 @@ -95,53 +120,40 @@ bn_GF2m_mul_2x2: tst r12,#1 beq .Lialu - ldr r12, [sp] @ 5th argument - vmov.32 d26, r2, r1 - vmov.32 d27, r12, r3 - vmov.i64 d28, #0x0000ffffffffffff - vmov.i64 d29, #0x00000000ffffffff - vmov.i64 d30, #0x000000000000ffff - - vext.8 d2, d26, d26, #1 @ A1 - vmull.p8 q1, d2, d27 @ F = A1*B - vext.8 d0, d27, d27, #1 @ B1 - vmull.p8 q0, d26, d0 @ E = A*B1 - vext.8 d4, d26, d26, #2 @ A2 - vmull.p8 q2, d4, d27 @ H = A2*B - vext.8 d16, d27, d27, #2 @ B2 - vmull.p8 q8, d26, d16 @ G = A*B2 - vext.8 d6, d26, d26, #3 @ A3 - veor q1, q1, q0 @ L = E + F - vmull.p8 q3, d6, d27 @ J = A3*B - vext.8 d0, d27, d27, #3 @ B3 - veor q2, q2, q8 @ M = G + H - vmull.p8 q0, d26, d0 @ I = A*B3 - veor d2, d2, d3 @ t0 = (L) (P0 + P1) << 8 - vand d3, d3, d28 - vext.8 d16, d27, d27, #4 @ B4 - veor d4, d4, d5 @ t1 = (M) (P2 + P3) << 16 - vand d5, d5, d29 - vmull.p8 q8, d26, d16 @ K = A*B4 - veor q3, q3, q0 @ N = I + J - veor d2, d2, d3 - veor d4, d4, d5 - veor d6, d6, d7 @ t2 = (N) (P4 + P5) << 24 - vand d7, d7, d30 - vext.8 q1, q1, q1, #15 - veor d16, d16, d17 @ t3 = (K) (P6 + P7) << 32 - vmov.i64 d17, #0 - vext.8 q2, q2, q2, #14 - veor d6, d6, d7 - vmull.p8 q0, d26, d27 @ D = A*B - vext.8 q8, q8, q8, #12 - vext.8 q3, q3, q3, #13 - veor q1, q1, q2 - veor q3, q3, q8 - veor q0, q0, q1 - veor q0, q0, q3 - - vst1.32 {q0}, [r0] - bx lr @ bx lr + veor d18,d18 + vmov.32 d19,r3,r3 @ two copies of b1 + vmov.32 d18[0],r1 @ a1 + + veor d20,d20 + vld1.32 d21[],[sp,:32] @ two copies of b0 + vmov.32 d20[0],r2 @ a0 + mov r12,lr + + vmov d16,d18 + vmov d17,d19 + bl mul_1x1_neon @ a1·b1 + vmov d22,d0 + + vmov d16,d20 + vmov d17,d21 + bl mul_1x1_neon @ a0·b0 + vmov d23,d0 + + veor d16,d20,d18 + veor d17,d21,d19 + veor d20,d23,d22 + bl mul_1x1_neon @ (a0+a1)·(b0+b1) + + veor d0,d20 @ (a0+a1)·(b0+b1)-a0·b0-a1·b1 + vshl.u64 d1,d0,#32 + vshr.u64 d0,d0,#32 + veor d23,d1 + veor d22,d0 + vst1.32 {d23[0]},[r0,:32]! + vst1.32 {d23[1]},[r0,:32]! + vst1.32 {d22[0]},[r0,:32]! + vst1.32 {d22[1]},[r0,:32] + bx r12 .align 4 .Lialu: #endif diff --git a/app/openssl/crypto/bn/asm/armv4-gf2m.pl b/app/openssl/crypto/bn/asm/armv4-gf2m.pl index 3f1f4f67..22ad1f85 100644 --- a/app/openssl/crypto/bn/asm/armv4-gf2m.pl +++ b/app/openssl/crypto/bn/asm/armv4-gf2m.pl @@ -20,21 +20,14 @@ # length, more for longer keys. Even though NEON 1x1 multiplication # runs in even less cycles, ~30, improvement is measurable only on # longer keys. One has to optimize code elsewhere to get NEON glow... -# -# April 2014 -# -# Double bn_GF2m_mul_2x2 performance by using algorithm from paper -# referred below, which improves ECDH and ECDSA verify benchmarks -# by 18-40%. -# -# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software -# Polynomial Multiplication on ARM Processors using the NEON Engine. -# -# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } +sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; } + $code=<<___; #include "arm_arch.h" @@ -43,6 +36,31 @@ $code=<<___; #if __ARM_ARCH__>=7 .fpu neon + +.type mul_1x1_neon,%function +.align 5 +mul_1x1_neon: + vshl.u64 `&Dlo("q1")`,d16,#8 @ q1-q3 are slided $a + vmull.p8 `&Q("d0")`,d16,d17 @ a·bb + vshl.u64 `&Dlo("q2")`,d16,#16 + vmull.p8 q1,`&Dlo("q1")`,d17 @ a<<8·bb + vshl.u64 `&Dlo("q3")`,d16,#24 + vmull.p8 q2,`&Dlo("q2")`,d17 @ a<<16·bb + vshr.u64 `&Dlo("q1")`,#8 + vmull.p8 q3,`&Dlo("q3")`,d17 @ a<<24·bb + vshl.u64 `&Dhi("q1")`,#24 + veor d0,`&Dlo("q1")` + vshr.u64 `&Dlo("q2")`,#16 + veor d0,`&Dhi("q1")` + vshl.u64 `&Dhi("q2")`,#16 + veor d0,`&Dlo("q2")` + vshr.u64 `&Dlo("q3")`,#24 + veor d0,`&Dhi("q2")` + vshl.u64 `&Dhi("q3")`,#8 + veor d0,`&Dlo("q3")` + veor d0,`&Dhi("q3")` + bx lr +.size mul_1x1_neon,.-mul_1x1_neon #endif ___ ################ @@ -141,9 +159,8 @@ ___ # void bn_GF2m_mul_2x2(BN_ULONG *r, # BN_ULONG a1,BN_ULONG a0, # BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0 -{ -my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12)); -my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31)); + +($A1,$B1,$A0,$B0,$A1B1,$A0B0)=map("d$_",(18..23)); $code.=<<___; .global bn_GF2m_mul_2x2 @@ -156,58 +173,44 @@ bn_GF2m_mul_2x2: tst r12,#1 beq .Lialu - ldr r12, [sp] @ 5th argument - vmov.32 $a, r2, r1 - vmov.32 $b, r12, r3 - vmov.i64 $k48, #0x0000ffffffffffff - vmov.i64 $k32, #0x00000000ffffffff - vmov.i64 $k16, #0x000000000000ffff - - vext.8 $t0#lo, $a, $a, #1 @ A1 - vmull.p8 $t0, $t0#lo, $b @ F = A1*B - vext.8 $r#lo, $b, $b, #1 @ B1 - vmull.p8 $r, $a, $r#lo @ E = A*B1 - vext.8 $t1#lo, $a, $a, #2 @ A2 - vmull.p8 $t1, $t1#lo, $b @ H = A2*B - vext.8 $t3#lo, $b, $b, #2 @ B2 - vmull.p8 $t3, $a, $t3#lo @ G = A*B2 - vext.8 $t2#lo, $a, $a, #3 @ A3 - veor $t0, $t0, $r @ L = E + F - vmull.p8 $t2, $t2#lo, $b @ J = A3*B - vext.8 $r#lo, $b, $b, #3 @ B3 - veor $t1, $t1, $t3 @ M = G + H - vmull.p8 $r, $a, $r#lo @ I = A*B3 - veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8 - vand $t0#hi, $t0#hi, $k48 - vext.8 $t3#lo, $b, $b, #4 @ B4 - veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16 - vand $t1#hi, $t1#hi, $k32 - vmull.p8 $t3, $a, $t3#lo @ K = A*B4 - veor $t2, $t2, $r @ N = I + J - veor $t0#lo, $t0#lo, $t0#hi - veor $t1#lo, $t1#lo, $t1#hi - veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24 - vand $t2#hi, $t2#hi, $k16 - vext.8 $t0, $t0, $t0, #15 - veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32 - vmov.i64 $t3#hi, #0 - vext.8 $t1, $t1, $t1, #14 - veor $t2#lo, $t2#lo, $t2#hi - vmull.p8 $r, $a, $b @ D = A*B - vext.8 $t3, $t3, $t3, #12 - vext.8 $t2, $t2, $t2, #13 - veor $t0, $t0, $t1 - veor $t2, $t2, $t3 - veor $r, $r, $t0 - veor $r, $r, $t2 - - vst1.32 {$r}, [r0] - ret @ bx lr + veor $A1,$A1 + vmov.32 $B1,r3,r3 @ two copies of b1 + vmov.32 ${A1}[0],r1 @ a1 + + veor $A0,$A0 + vld1.32 ${B0}[],[sp,:32] @ two copies of b0 + vmov.32 ${A0}[0],r2 @ a0 + mov r12,lr + + vmov d16,$A1 + vmov d17,$B1 + bl mul_1x1_neon @ a1·b1 + vmov $A1B1,d0 + + vmov d16,$A0 + vmov d17,$B0 + bl mul_1x1_neon @ a0·b0 + vmov $A0B0,d0 + + veor d16,$A0,$A1 + veor d17,$B0,$B1 + veor $A0,$A0B0,$A1B1 + bl mul_1x1_neon @ (a0+a1)·(b0+b1) + + veor d0,$A0 @ (a0+a1)·(b0+b1)-a0·b0-a1·b1 + vshl.u64 d1,d0,#32 + vshr.u64 d0,d0,#32 + veor $A0B0,d1 + veor $A1B1,d0 + vst1.32 {${A0B0}[0]},[r0,:32]! + vst1.32 {${A0B0}[1]},[r0,:32]! + vst1.32 {${A1B1}[0]},[r0,:32]! + vst1.32 {${A1B1}[1]},[r0,:32] + bx r12 .align 4 .Lialu: #endif ___ -} $ret="r10"; # reassigned 1st argument $code.=<<___; stmdb sp!,{r4-r10,lr} @@ -269,13 +272,7 @@ $code.=<<___; .comm OPENSSL_armcap_P,4,4 ___ -foreach (split("\n",$code)) { - s/\`([^\`]*)\`/eval $1/geo; - - s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or - s/\bret\b/bx lr/go or - s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 - - print $_,"\n"; -} +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 +print $code; close STDOUT; # enforce flush diff --git a/app/openssl/crypto/bn/asm/armv4-mont.pl b/app/openssl/crypto/bn/asm/armv4-mont.pl index 72bad8e3..f78a8b5f 100644 --- a/app/openssl/crypto/bn/asm/armv4-mont.pl +++ b/app/openssl/crypto/bn/asm/armv4-mont.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -23,21 +23,6 @@ # than 1/2KB. Windows CE port would be trivial, as it's exclusively # about decorations, ABI and instruction syntax are identical. -# November 2013 -# -# Add NEON code path, which handles lengths divisible by 8. RSA/DSA -# performance improvement on Cortex-A8 is ~45-100% depending on key -# length, more for longer keys. On Cortex-A15 the span is ~10-105%. -# On Snapdragon S4 improvement was measured to vary from ~70% to -# incredible ~380%, yes, 4.8x faster, for RSA4096 sign. But this is -# rather because original integer-only code seems to perform -# suboptimally on S4. Situation on Cortex-A9 is unfortunately -# different. It's being looked into, but the trouble is that -# performance for vectors longer than 256 bits is actually couple -# of percent worse than for integer-only code. The code is chosen -# for execution on all NEON-capable processors, because gain on -# others outweighs the marginal loss on Cortex-A9. - while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; @@ -67,40 +52,16 @@ $_n0="$num,#14*4"; $_num="$num,#15*4"; $_bpend=$_num; $code=<<___; -#include "arm_arch.h" - .text -.code 32 - -#if __ARM_ARCH__>=7 -.align 5 -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-bn_mul_mont -#endif .global bn_mul_mont .type bn_mul_mont,%function -.align 5 +.align 2 bn_mul_mont: - ldr ip,[sp,#4] @ load num stmdb sp!,{r0,r2} @ sp points at argument block -#if __ARM_ARCH__>=7 - tst ip,#7 - bne .Lialu - adr r0,bn_mul_mont - ldr r2,.LOPENSSL_armcap - ldr r0,[r0,r2] - tst r0,#1 @ NEON available? - ldmia sp, {r0,r2} - beq .Lialu - add sp,sp,#8 - b bn_mul8x_mont_neon -.align 4 -.Lialu: -#endif - cmp ip,#2 - mov $num,ip @ load num + ldr $num,[sp,#3*4] @ load num + cmp $num,#2 movlt r0,#0 addlt sp,sp,#2*4 blt .Labrt @@ -230,446 +191,14 @@ bn_mul_mont: ldmia sp!,{r4-r12,lr} @ restore registers add sp,sp,#2*4 @ skip over {r0,r2} mov r0,#1 -.Labrt: -#if __ARM_ARCH__>=5 - ret @ bx lr -#else - tst lr,#1 +.Labrt: tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) -#endif .size bn_mul_mont,.-bn_mul_mont -___ -{ -sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } -sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } - -my ($A0,$A1,$A2,$A3)=map("d$_",(0..3)); -my ($N0,$N1,$N2,$N3)=map("d$_",(4..7)); -my ($Z,$Temp)=("q4","q5"); -my ($A0xB,$A1xB,$A2xB,$A3xB,$A4xB,$A5xB,$A6xB,$A7xB)=map("q$_",(6..13)); -my ($Bi,$Ni,$M0)=map("d$_",(28..31)); -my $zero=&Dlo($Z); -my $temp=&Dlo($Temp); - -my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5)); -my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9)); - -$code.=<<___; -#if __ARM_ARCH__>=7 -.fpu neon - -.type bn_mul8x_mont_neon,%function -.align 5 -bn_mul8x_mont_neon: - mov ip,sp - stmdb sp!,{r4-r11} - vstmdb sp!,{d8-d15} @ ABI specification says so - ldmia ip,{r4-r5} @ load rest of parameter block - - sub $toutptr,sp,#16 - vld1.32 {${Bi}[0]}, [$bptr,:32]! - sub $toutptr,$toutptr,$num,lsl#4 - vld1.32 {$A0-$A3}, [$aptr]! @ can't specify :32 :-( - and $toutptr,$toutptr,#-64 - vld1.32 {${M0}[0]}, [$n0,:32] - mov sp,$toutptr @ alloca - veor $zero,$zero,$zero - subs $inner,$num,#8 - vzip.16 $Bi,$zero - - vmull.u32 $A0xB,$Bi,${A0}[0] - vmull.u32 $A1xB,$Bi,${A0}[1] - vmull.u32 $A2xB,$Bi,${A1}[0] - vshl.i64 $temp,`&Dhi("$A0xB")`,#16 - vmull.u32 $A3xB,$Bi,${A1}[1] - - vadd.u64 $temp,$temp,`&Dlo("$A0xB")` - veor $zero,$zero,$zero - vmul.u32 $Ni,$temp,$M0 - - vmull.u32 $A4xB,$Bi,${A2}[0] - vld1.32 {$N0-$N3}, [$nptr]! - vmull.u32 $A5xB,$Bi,${A2}[1] - vmull.u32 $A6xB,$Bi,${A3}[0] - vzip.16 $Ni,$zero - vmull.u32 $A7xB,$Bi,${A3}[1] - - bne .LNEON_1st - - @ special case for num=8, everything is in register bank... - - vmlal.u32 $A0xB,$Ni,${N0}[0] - sub $outer,$num,#1 - vmlal.u32 $A1xB,$Ni,${N0}[1] - vmlal.u32 $A2xB,$Ni,${N1}[0] - vmlal.u32 $A3xB,$Ni,${N1}[1] - - vmlal.u32 $A4xB,$Ni,${N2}[0] - vmov $Temp,$A0xB - vmlal.u32 $A5xB,$Ni,${N2}[1] - vmov $A0xB,$A1xB - vmlal.u32 $A6xB,$Ni,${N3}[0] - vmov $A1xB,$A2xB - vmlal.u32 $A7xB,$Ni,${N3}[1] - vmov $A2xB,$A3xB - vmov $A3xB,$A4xB - vshr.u64 $temp,$temp,#16 - vmov $A4xB,$A5xB - vmov $A5xB,$A6xB - vadd.u64 $temp,$temp,`&Dhi("$Temp")` - vmov $A6xB,$A7xB - veor $A7xB,$A7xB - vshr.u64 $temp,$temp,#16 - - b .LNEON_outer8 - -.align 4 -.LNEON_outer8: - vld1.32 {${Bi}[0]}, [$bptr,:32]! - veor $zero,$zero,$zero - vzip.16 $Bi,$zero - vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp - - vmlal.u32 $A0xB,$Bi,${A0}[0] - vmlal.u32 $A1xB,$Bi,${A0}[1] - vmlal.u32 $A2xB,$Bi,${A1}[0] - vshl.i64 $temp,`&Dhi("$A0xB")`,#16 - vmlal.u32 $A3xB,$Bi,${A1}[1] - - vadd.u64 $temp,$temp,`&Dlo("$A0xB")` - veor $zero,$zero,$zero - subs $outer,$outer,#1 - vmul.u32 $Ni,$temp,$M0 - - vmlal.u32 $A4xB,$Bi,${A2}[0] - vmlal.u32 $A5xB,$Bi,${A2}[1] - vmlal.u32 $A6xB,$Bi,${A3}[0] - vzip.16 $Ni,$zero - vmlal.u32 $A7xB,$Bi,${A3}[1] - - vmlal.u32 $A0xB,$Ni,${N0}[0] - vmlal.u32 $A1xB,$Ni,${N0}[1] - vmlal.u32 $A2xB,$Ni,${N1}[0] - vmlal.u32 $A3xB,$Ni,${N1}[1] - - vmlal.u32 $A4xB,$Ni,${N2}[0] - vmov $Temp,$A0xB - vmlal.u32 $A5xB,$Ni,${N2}[1] - vmov $A0xB,$A1xB - vmlal.u32 $A6xB,$Ni,${N3}[0] - vmov $A1xB,$A2xB - vmlal.u32 $A7xB,$Ni,${N3}[1] - vmov $A2xB,$A3xB - vmov $A3xB,$A4xB - vshr.u64 $temp,$temp,#16 - vmov $A4xB,$A5xB - vmov $A5xB,$A6xB - vadd.u64 $temp,$temp,`&Dhi("$Temp")` - vmov $A6xB,$A7xB - veor $A7xB,$A7xB - vshr.u64 $temp,$temp,#16 - - bne .LNEON_outer8 - - vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp - mov $toutptr,sp - vshr.u64 $temp,`&Dlo("$A0xB")`,#16 - mov $inner,$num - vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp - add $tinptr,sp,#16 - vshr.u64 $temp,`&Dhi("$A0xB")`,#16 - vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")` - - b .LNEON_tail2 - -.align 4 -.LNEON_1st: - vmlal.u32 $A0xB,$Ni,${N0}[0] - vld1.32 {$A0-$A3}, [$aptr]! - vmlal.u32 $A1xB,$Ni,${N0}[1] - subs $inner,$inner,#8 - vmlal.u32 $A2xB,$Ni,${N1}[0] - vmlal.u32 $A3xB,$Ni,${N1}[1] - - vmlal.u32 $A4xB,$Ni,${N2}[0] - vld1.32 {$N0-$N1}, [$nptr]! - vmlal.u32 $A5xB,$Ni,${N2}[1] - vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! - vmlal.u32 $A6xB,$Ni,${N3}[0] - vmlal.u32 $A7xB,$Ni,${N3}[1] - vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! - - vmull.u32 $A0xB,$Bi,${A0}[0] - vld1.32 {$N2-$N3}, [$nptr]! - vmull.u32 $A1xB,$Bi,${A0}[1] - vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! - vmull.u32 $A2xB,$Bi,${A1}[0] - vmull.u32 $A3xB,$Bi,${A1}[1] - vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! - - vmull.u32 $A4xB,$Bi,${A2}[0] - vmull.u32 $A5xB,$Bi,${A2}[1] - vmull.u32 $A6xB,$Bi,${A3}[0] - vmull.u32 $A7xB,$Bi,${A3}[1] - - bne .LNEON_1st - - vmlal.u32 $A0xB,$Ni,${N0}[0] - add $tinptr,sp,#16 - vmlal.u32 $A1xB,$Ni,${N0}[1] - sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr - vmlal.u32 $A2xB,$Ni,${N1}[0] - vld1.64 {$Temp}, [sp,:128] - vmlal.u32 $A3xB,$Ni,${N1}[1] - sub $outer,$num,#1 - - vmlal.u32 $A4xB,$Ni,${N2}[0] - vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! - vmlal.u32 $A5xB,$Ni,${N2}[1] - vshr.u64 $temp,$temp,#16 - vld1.64 {$A0xB}, [$tinptr, :128]! - vmlal.u32 $A6xB,$Ni,${N3}[0] - vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! - vmlal.u32 $A7xB,$Ni,${N3}[1] - - vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! - vadd.u64 $temp,$temp,`&Dhi("$Temp")` - veor $Z,$Z,$Z - vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! - vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! - vst1.64 {$Z}, [$toutptr,:128] - vshr.u64 $temp,$temp,#16 - - b .LNEON_outer - -.align 4 -.LNEON_outer: - vld1.32 {${Bi}[0]}, [$bptr,:32]! - sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr - vld1.32 {$A0-$A3}, [$aptr]! - veor $zero,$zero,$zero - mov $toutptr,sp - vzip.16 $Bi,$zero - sub $inner,$num,#8 - vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp - - vmlal.u32 $A0xB,$Bi,${A0}[0] - vld1.64 {$A3xB-$A4xB},[$tinptr,:256]! - vmlal.u32 $A1xB,$Bi,${A0}[1] - vmlal.u32 $A2xB,$Bi,${A1}[0] - vld1.64 {$A5xB-$A6xB},[$tinptr,:256]! - vmlal.u32 $A3xB,$Bi,${A1}[1] - - vshl.i64 $temp,`&Dhi("$A0xB")`,#16 - veor $zero,$zero,$zero - vadd.u64 $temp,$temp,`&Dlo("$A0xB")` - vld1.64 {$A7xB},[$tinptr,:128]! - vmul.u32 $Ni,$temp,$M0 - - vmlal.u32 $A4xB,$Bi,${A2}[0] - vld1.32 {$N0-$N3}, [$nptr]! - vmlal.u32 $A5xB,$Bi,${A2}[1] - vmlal.u32 $A6xB,$Bi,${A3}[0] - vzip.16 $Ni,$zero - vmlal.u32 $A7xB,$Bi,${A3}[1] - -.LNEON_inner: - vmlal.u32 $A0xB,$Ni,${N0}[0] - vld1.32 {$A0-$A3}, [$aptr]! - vmlal.u32 $A1xB,$Ni,${N0}[1] - subs $inner,$inner,#8 - vmlal.u32 $A2xB,$Ni,${N1}[0] - vmlal.u32 $A3xB,$Ni,${N1}[1] - vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! - - vmlal.u32 $A4xB,$Ni,${N2}[0] - vld1.64 {$A0xB}, [$tinptr, :128]! - vmlal.u32 $A5xB,$Ni,${N2}[1] - vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! - vmlal.u32 $A6xB,$Ni,${N3}[0] - vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! - vmlal.u32 $A7xB,$Ni,${N3}[1] - vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! - - vmlal.u32 $A0xB,$Bi,${A0}[0] - vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]! - vmlal.u32 $A1xB,$Bi,${A0}[1] - vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! - vmlal.u32 $A2xB,$Bi,${A1}[0] - vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]! - vmlal.u32 $A3xB,$Bi,${A1}[1] - vld1.32 {$N0-$N3}, [$nptr]! - - vmlal.u32 $A4xB,$Bi,${A2}[0] - vld1.64 {$A7xB}, [$tinptr, :128]! - vmlal.u32 $A5xB,$Bi,${A2}[1] - vmlal.u32 $A6xB,$Bi,${A3}[0] - vmlal.u32 $A7xB,$Bi,${A3}[1] - - bne .LNEON_inner - - vmlal.u32 $A0xB,$Ni,${N0}[0] - add $tinptr,sp,#16 - vmlal.u32 $A1xB,$Ni,${N0}[1] - sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr - vmlal.u32 $A2xB,$Ni,${N1}[0] - vld1.64 {$Temp}, [sp,:128] - vmlal.u32 $A3xB,$Ni,${N1}[1] - subs $outer,$outer,#1 - - vmlal.u32 $A4xB,$Ni,${N2}[0] - vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! - vmlal.u32 $A5xB,$Ni,${N2}[1] - vld1.64 {$A0xB}, [$tinptr, :128]! - vshr.u64 $temp,$temp,#16 - vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! - vmlal.u32 $A6xB,$Ni,${N3}[0] - vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! - vmlal.u32 $A7xB,$Ni,${N3}[1] - - vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! - vadd.u64 $temp,$temp,`&Dhi("$Temp")` - vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! - vshr.u64 $temp,$temp,#16 - - bne .LNEON_outer - - mov $toutptr,sp - mov $inner,$num - -.LNEON_tail: - vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp - vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]! - vshr.u64 $temp,`&Dlo("$A0xB")`,#16 - vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp - vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]! - vshr.u64 $temp,`&Dhi("$A0xB")`,#16 - vld1.64 {$A7xB}, [$tinptr, :128]! - vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")` - -.LNEON_tail2: - vadd.u64 `&Dlo("$A1xB")`,`&Dlo("$A1xB")`,$temp - vst1.32 {`&Dlo("$A0xB")`[0]}, [$toutptr, :32]! - vshr.u64 $temp,`&Dlo("$A1xB")`,#16 - vadd.u64 `&Dhi("$A1xB")`,`&Dhi("$A1xB")`,$temp - vshr.u64 $temp,`&Dhi("$A1xB")`,#16 - vzip.16 `&Dlo("$A1xB")`,`&Dhi("$A1xB")` - - vadd.u64 `&Dlo("$A2xB")`,`&Dlo("$A2xB")`,$temp - vst1.32 {`&Dlo("$A1xB")`[0]}, [$toutptr, :32]! - vshr.u64 $temp,`&Dlo("$A2xB")`,#16 - vadd.u64 `&Dhi("$A2xB")`,`&Dhi("$A2xB")`,$temp - vshr.u64 $temp,`&Dhi("$A2xB")`,#16 - vzip.16 `&Dlo("$A2xB")`,`&Dhi("$A2xB")` - - vadd.u64 `&Dlo("$A3xB")`,`&Dlo("$A3xB")`,$temp - vst1.32 {`&Dlo("$A2xB")`[0]}, [$toutptr, :32]! - vshr.u64 $temp,`&Dlo("$A3xB")`,#16 - vadd.u64 `&Dhi("$A3xB")`,`&Dhi("$A3xB")`,$temp - vshr.u64 $temp,`&Dhi("$A3xB")`,#16 - vzip.16 `&Dlo("$A3xB")`,`&Dhi("$A3xB")` - - vadd.u64 `&Dlo("$A4xB")`,`&Dlo("$A4xB")`,$temp - vst1.32 {`&Dlo("$A3xB")`[0]}, [$toutptr, :32]! - vshr.u64 $temp,`&Dlo("$A4xB")`,#16 - vadd.u64 `&Dhi("$A4xB")`,`&Dhi("$A4xB")`,$temp - vshr.u64 $temp,`&Dhi("$A4xB")`,#16 - vzip.16 `&Dlo("$A4xB")`,`&Dhi("$A4xB")` - - vadd.u64 `&Dlo("$A5xB")`,`&Dlo("$A5xB")`,$temp - vst1.32 {`&Dlo("$A4xB")`[0]}, [$toutptr, :32]! - vshr.u64 $temp,`&Dlo("$A5xB")`,#16 - vadd.u64 `&Dhi("$A5xB")`,`&Dhi("$A5xB")`,$temp - vshr.u64 $temp,`&Dhi("$A5xB")`,#16 - vzip.16 `&Dlo("$A5xB")`,`&Dhi("$A5xB")` - - vadd.u64 `&Dlo("$A6xB")`,`&Dlo("$A6xB")`,$temp - vst1.32 {`&Dlo("$A5xB")`[0]}, [$toutptr, :32]! - vshr.u64 $temp,`&Dlo("$A6xB")`,#16 - vadd.u64 `&Dhi("$A6xB")`,`&Dhi("$A6xB")`,$temp - vld1.64 {$A0xB}, [$tinptr, :128]! - vshr.u64 $temp,`&Dhi("$A6xB")`,#16 - vzip.16 `&Dlo("$A6xB")`,`&Dhi("$A6xB")` - - vadd.u64 `&Dlo("$A7xB")`,`&Dlo("$A7xB")`,$temp - vst1.32 {`&Dlo("$A6xB")`[0]}, [$toutptr, :32]! - vshr.u64 $temp,`&Dlo("$A7xB")`,#16 - vadd.u64 `&Dhi("$A7xB")`,`&Dhi("$A7xB")`,$temp - vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! - vshr.u64 $temp,`&Dhi("$A7xB")`,#16 - vzip.16 `&Dlo("$A7xB")`,`&Dhi("$A7xB")` - subs $inner,$inner,#8 - vst1.32 {`&Dlo("$A7xB")`[0]}, [$toutptr, :32]! - - bne .LNEON_tail - - vst1.32 {${temp}[0]}, [$toutptr, :32] @ top-most bit - sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr - subs $aptr,sp,#0 @ clear carry flag - add $bptr,sp,$num,lsl#2 - -.LNEON_sub: - ldmia $aptr!, {r4-r7} - ldmia $nptr!, {r8-r11} - sbcs r8, r4,r8 - sbcs r9, r5,r9 - sbcs r10,r6,r10 - sbcs r11,r7,r11 - teq $aptr,$bptr @ preserves carry - stmia $rptr!, {r8-r11} - bne .LNEON_sub - - ldr r10, [$aptr] @ load top-most bit - veor q0,q0,q0 - sub r11,$bptr,sp @ this is num*4 - veor q1,q1,q1 - mov $aptr,sp - sub $rptr,$rptr,r11 @ rewind $rptr - mov $nptr,$bptr @ second 3/4th of frame - sbcs r10,r10,#0 @ result is carry flag - -.LNEON_copy_n_zap: - ldmia $aptr!, {r4-r7} - ldmia $rptr, {r8-r11} - movcc r8, r4 - vst1.64 {q0-q1}, [$nptr,:256]! @ wipe - movcc r9, r5 - movcc r10,r6 - vst1.64 {q0-q1}, [$nptr,:256]! @ wipe - movcc r11,r7 - ldmia $aptr, {r4-r7} - stmia $rptr!, {r8-r11} - sub $aptr,$aptr,#16 - ldmia $rptr, {r8-r11} - movcc r8, r4 - vst1.64 {q0-q1}, [$aptr,:256]! @ wipe - movcc r9, r5 - movcc r10,r6 - vst1.64 {q0-q1}, [$nptr,:256]! @ wipe - movcc r11,r7 - teq $aptr,$bptr @ preserves carry - stmia $rptr!, {r8-r11} - bne .LNEON_copy_n_zap - - sub sp,ip,#96 - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r11} - ret @ bx lr -.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon -#endif -___ -} -$code.=<<___; -.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by " +.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by " .align 2 -#if __ARM_ARCH__>=7 -.comm OPENSSL_armcap_P,4,4 -#endif ___ -$code =~ s/\`([^\`]*)\`/eval $1/gem; $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -$code =~ s/\bret\b/bx lr/gm; print $code; close STDOUT; diff --git a/app/openssl/crypto/bn/asm/armv4-mont.s b/app/openssl/crypto/bn/asm/armv4-mont.s index fecae15e..64c220b5 100644 --- a/app/openssl/crypto/bn/asm/armv4-mont.s +++ b/app/openssl/crypto/bn/asm/armv4-mont.s @@ -1,37 +1,13 @@ -#include "arm_arch.h" - .text -.code 32 - -#if __ARM_ARCH__>=7 -.align 5 -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-bn_mul_mont -#endif .global bn_mul_mont .type bn_mul_mont,%function -.align 5 +.align 2 bn_mul_mont: - ldr ip,[sp,#4] @ load num stmdb sp!,{r0,r2} @ sp points at argument block -#if __ARM_ARCH__>=7 - tst ip,#7 - bne .Lialu - adr r0,bn_mul_mont - ldr r2,.LOPENSSL_armcap - ldr r0,[r0,r2] - tst r0,#1 @ NEON available? - ldmia sp, {r0,r2} - beq .Lialu - add sp,sp,#8 - b bn_mul8x_mont_neon -.align 4 -.Lialu: -#endif - cmp ip,#2 - mov r0,ip @ load num + ldr r0,[sp,#3*4] @ load num + cmp r0,#2 movlt r0,#0 addlt sp,sp,#2*4 blt .Labrt @@ -161,419 +137,9 @@ bn_mul_mont: ldmia sp!,{r4-r12,lr} @ restore registers add sp,sp,#2*4 @ skip over {r0,r2} mov r0,#1 -.Labrt: -#if __ARM_ARCH__>=5 - bx lr @ .word 0xe12fff1e -#else - tst lr,#1 +.Labrt: tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) -#endif .size bn_mul_mont,.-bn_mul_mont -#if __ARM_ARCH__>=7 -.fpu neon - -.type bn_mul8x_mont_neon,%function -.align 5 -bn_mul8x_mont_neon: - mov ip,sp - stmdb sp!,{r4-r11} - vstmdb sp!,{d8-d15} @ ABI specification says so - ldmia ip,{r4-r5} @ load rest of parameter block - - sub r7,sp,#16 - vld1.32 {d28[0]}, [r2,:32]! - sub r7,r7,r5,lsl#4 - vld1.32 {d0-d3}, [r1]! @ can't specify :32 :-( - and r7,r7,#-64 - vld1.32 {d30[0]}, [r4,:32] - mov sp,r7 @ alloca - veor d8,d8,d8 - subs r8,r5,#8 - vzip.16 d28,d8 - - vmull.u32 q6,d28,d0[0] - vmull.u32 q7,d28,d0[1] - vmull.u32 q8,d28,d1[0] - vshl.i64 d10,d13,#16 - vmull.u32 q9,d28,d1[1] - - vadd.u64 d10,d10,d12 - veor d8,d8,d8 - vmul.u32 d29,d10,d30 - - vmull.u32 q10,d28,d2[0] - vld1.32 {d4-d7}, [r3]! - vmull.u32 q11,d28,d2[1] - vmull.u32 q12,d28,d3[0] - vzip.16 d29,d8 - vmull.u32 q13,d28,d3[1] - - bne .LNEON_1st - - @ special case for num=8, everything is in register bank... - - vmlal.u32 q6,d29,d4[0] - sub r9,r5,#1 - vmlal.u32 q7,d29,d4[1] - vmlal.u32 q8,d29,d5[0] - vmlal.u32 q9,d29,d5[1] - - vmlal.u32 q10,d29,d6[0] - vmov q5,q6 - vmlal.u32 q11,d29,d6[1] - vmov q6,q7 - vmlal.u32 q12,d29,d7[0] - vmov q7,q8 - vmlal.u32 q13,d29,d7[1] - vmov q8,q9 - vmov q9,q10 - vshr.u64 d10,d10,#16 - vmov q10,q11 - vmov q11,q12 - vadd.u64 d10,d10,d11 - vmov q12,q13 - veor q13,q13 - vshr.u64 d10,d10,#16 - - b .LNEON_outer8 - -.align 4 -.LNEON_outer8: - vld1.32 {d28[0]}, [r2,:32]! - veor d8,d8,d8 - vzip.16 d28,d8 - vadd.u64 d12,d12,d10 - - vmlal.u32 q6,d28,d0[0] - vmlal.u32 q7,d28,d0[1] - vmlal.u32 q8,d28,d1[0] - vshl.i64 d10,d13,#16 - vmlal.u32 q9,d28,d1[1] - - vadd.u64 d10,d10,d12 - veor d8,d8,d8 - subs r9,r9,#1 - vmul.u32 d29,d10,d30 - - vmlal.u32 q10,d28,d2[0] - vmlal.u32 q11,d28,d2[1] - vmlal.u32 q12,d28,d3[0] - vzip.16 d29,d8 - vmlal.u32 q13,d28,d3[1] - - vmlal.u32 q6,d29,d4[0] - vmlal.u32 q7,d29,d4[1] - vmlal.u32 q8,d29,d5[0] - vmlal.u32 q9,d29,d5[1] - - vmlal.u32 q10,d29,d6[0] - vmov q5,q6 - vmlal.u32 q11,d29,d6[1] - vmov q6,q7 - vmlal.u32 q12,d29,d7[0] - vmov q7,q8 - vmlal.u32 q13,d29,d7[1] - vmov q8,q9 - vmov q9,q10 - vshr.u64 d10,d10,#16 - vmov q10,q11 - vmov q11,q12 - vadd.u64 d10,d10,d11 - vmov q12,q13 - veor q13,q13 - vshr.u64 d10,d10,#16 - - bne .LNEON_outer8 - - vadd.u64 d12,d12,d10 - mov r7,sp - vshr.u64 d10,d12,#16 - mov r8,r5 - vadd.u64 d13,d13,d10 - add r6,sp,#16 - vshr.u64 d10,d13,#16 - vzip.16 d12,d13 - - b .LNEON_tail2 - -.align 4 -.LNEON_1st: - vmlal.u32 q6,d29,d4[0] - vld1.32 {d0-d3}, [r1]! - vmlal.u32 q7,d29,d4[1] - subs r8,r8,#8 - vmlal.u32 q8,d29,d5[0] - vmlal.u32 q9,d29,d5[1] - - vmlal.u32 q10,d29,d6[0] - vld1.32 {d4-d5}, [r3]! - vmlal.u32 q11,d29,d6[1] - vst1.64 {q6-q7}, [r7,:256]! - vmlal.u32 q12,d29,d7[0] - vmlal.u32 q13,d29,d7[1] - vst1.64 {q8-q9}, [r7,:256]! - - vmull.u32 q6,d28,d0[0] - vld1.32 {d6-d7}, [r3]! - vmull.u32 q7,d28,d0[1] - vst1.64 {q10-q11}, [r7,:256]! - vmull.u32 q8,d28,d1[0] - vmull.u32 q9,d28,d1[1] - vst1.64 {q12-q13}, [r7,:256]! - - vmull.u32 q10,d28,d2[0] - vmull.u32 q11,d28,d2[1] - vmull.u32 q12,d28,d3[0] - vmull.u32 q13,d28,d3[1] - - bne .LNEON_1st - - vmlal.u32 q6,d29,d4[0] - add r6,sp,#16 - vmlal.u32 q7,d29,d4[1] - sub r1,r1,r5,lsl#2 @ rewind r1 - vmlal.u32 q8,d29,d5[0] - vld1.64 {q5}, [sp,:128] - vmlal.u32 q9,d29,d5[1] - sub r9,r5,#1 - - vmlal.u32 q10,d29,d6[0] - vst1.64 {q6-q7}, [r7,:256]! - vmlal.u32 q11,d29,d6[1] - vshr.u64 d10,d10,#16 - vld1.64 {q6}, [r6, :128]! - vmlal.u32 q12,d29,d7[0] - vst1.64 {q8-q9}, [r7,:256]! - vmlal.u32 q13,d29,d7[1] - - vst1.64 {q10-q11}, [r7,:256]! - vadd.u64 d10,d10,d11 - veor q4,q4,q4 - vst1.64 {q12-q13}, [r7,:256]! - vld1.64 {q7-q8}, [r6, :256]! - vst1.64 {q4}, [r7,:128] - vshr.u64 d10,d10,#16 - - b .LNEON_outer - -.align 4 -.LNEON_outer: - vld1.32 {d28[0]}, [r2,:32]! - sub r3,r3,r5,lsl#2 @ rewind r3 - vld1.32 {d0-d3}, [r1]! - veor d8,d8,d8 - mov r7,sp - vzip.16 d28,d8 - sub r8,r5,#8 - vadd.u64 d12,d12,d10 - - vmlal.u32 q6,d28,d0[0] - vld1.64 {q9-q10},[r6,:256]! - vmlal.u32 q7,d28,d0[1] - vmlal.u32 q8,d28,d1[0] - vld1.64 {q11-q12},[r6,:256]! - vmlal.u32 q9,d28,d1[1] - - vshl.i64 d10,d13,#16 - veor d8,d8,d8 - vadd.u64 d10,d10,d12 - vld1.64 {q13},[r6,:128]! - vmul.u32 d29,d10,d30 - - vmlal.u32 q10,d28,d2[0] - vld1.32 {d4-d7}, [r3]! - vmlal.u32 q11,d28,d2[1] - vmlal.u32 q12,d28,d3[0] - vzip.16 d29,d8 - vmlal.u32 q13,d28,d3[1] - -.LNEON_inner: - vmlal.u32 q6,d29,d4[0] - vld1.32 {d0-d3}, [r1]! - vmlal.u32 q7,d29,d4[1] - subs r8,r8,#8 - vmlal.u32 q8,d29,d5[0] - vmlal.u32 q9,d29,d5[1] - vst1.64 {q6-q7}, [r7,:256]! - - vmlal.u32 q10,d29,d6[0] - vld1.64 {q6}, [r6, :128]! - vmlal.u32 q11,d29,d6[1] - vst1.64 {q8-q9}, [r7,:256]! - vmlal.u32 q12,d29,d7[0] - vld1.64 {q7-q8}, [r6, :256]! - vmlal.u32 q13,d29,d7[1] - vst1.64 {q10-q11}, [r7,:256]! - - vmlal.u32 q6,d28,d0[0] - vld1.64 {q9-q10}, [r6, :256]! - vmlal.u32 q7,d28,d0[1] - vst1.64 {q12-q13}, [r7,:256]! - vmlal.u32 q8,d28,d1[0] - vld1.64 {q11-q12}, [r6, :256]! - vmlal.u32 q9,d28,d1[1] - vld1.32 {d4-d7}, [r3]! - - vmlal.u32 q10,d28,d2[0] - vld1.64 {q13}, [r6, :128]! - vmlal.u32 q11,d28,d2[1] - vmlal.u32 q12,d28,d3[0] - vmlal.u32 q13,d28,d3[1] - - bne .LNEON_inner - - vmlal.u32 q6,d29,d4[0] - add r6,sp,#16 - vmlal.u32 q7,d29,d4[1] - sub r1,r1,r5,lsl#2 @ rewind r1 - vmlal.u32 q8,d29,d5[0] - vld1.64 {q5}, [sp,:128] - vmlal.u32 q9,d29,d5[1] - subs r9,r9,#1 - - vmlal.u32 q10,d29,d6[0] - vst1.64 {q6-q7}, [r7,:256]! - vmlal.u32 q11,d29,d6[1] - vld1.64 {q6}, [r6, :128]! - vshr.u64 d10,d10,#16 - vst1.64 {q8-q9}, [r7,:256]! - vmlal.u32 q12,d29,d7[0] - vld1.64 {q7-q8}, [r6, :256]! - vmlal.u32 q13,d29,d7[1] - - vst1.64 {q10-q11}, [r7,:256]! - vadd.u64 d10,d10,d11 - vst1.64 {q12-q13}, [r7,:256]! - vshr.u64 d10,d10,#16 - - bne .LNEON_outer - - mov r7,sp - mov r8,r5 - -.LNEON_tail: - vadd.u64 d12,d12,d10 - vld1.64 {q9-q10}, [r6, :256]! - vshr.u64 d10,d12,#16 - vadd.u64 d13,d13,d10 - vld1.64 {q11-q12}, [r6, :256]! - vshr.u64 d10,d13,#16 - vld1.64 {q13}, [r6, :128]! - vzip.16 d12,d13 - -.LNEON_tail2: - vadd.u64 d14,d14,d10 - vst1.32 {d12[0]}, [r7, :32]! - vshr.u64 d10,d14,#16 - vadd.u64 d15,d15,d10 - vshr.u64 d10,d15,#16 - vzip.16 d14,d15 - - vadd.u64 d16,d16,d10 - vst1.32 {d14[0]}, [r7, :32]! - vshr.u64 d10,d16,#16 - vadd.u64 d17,d17,d10 - vshr.u64 d10,d17,#16 - vzip.16 d16,d17 - - vadd.u64 d18,d18,d10 - vst1.32 {d16[0]}, [r7, :32]! - vshr.u64 d10,d18,#16 - vadd.u64 d19,d19,d10 - vshr.u64 d10,d19,#16 - vzip.16 d18,d19 - - vadd.u64 d20,d20,d10 - vst1.32 {d18[0]}, [r7, :32]! - vshr.u64 d10,d20,#16 - vadd.u64 d21,d21,d10 - vshr.u64 d10,d21,#16 - vzip.16 d20,d21 - - vadd.u64 d22,d22,d10 - vst1.32 {d20[0]}, [r7, :32]! - vshr.u64 d10,d22,#16 - vadd.u64 d23,d23,d10 - vshr.u64 d10,d23,#16 - vzip.16 d22,d23 - - vadd.u64 d24,d24,d10 - vst1.32 {d22[0]}, [r7, :32]! - vshr.u64 d10,d24,#16 - vadd.u64 d25,d25,d10 - vld1.64 {q6}, [r6, :128]! - vshr.u64 d10,d25,#16 - vzip.16 d24,d25 - - vadd.u64 d26,d26,d10 - vst1.32 {d24[0]}, [r7, :32]! - vshr.u64 d10,d26,#16 - vadd.u64 d27,d27,d10 - vld1.64 {q7-q8}, [r6, :256]! - vshr.u64 d10,d27,#16 - vzip.16 d26,d27 - subs r8,r8,#8 - vst1.32 {d26[0]}, [r7, :32]! - - bne .LNEON_tail - - vst1.32 {d10[0]}, [r7, :32] @ top-most bit - sub r3,r3,r5,lsl#2 @ rewind r3 - subs r1,sp,#0 @ clear carry flag - add r2,sp,r5,lsl#2 - -.LNEON_sub: - ldmia r1!, {r4-r7} - ldmia r3!, {r8-r11} - sbcs r8, r4,r8 - sbcs r9, r5,r9 - sbcs r10,r6,r10 - sbcs r11,r7,r11 - teq r1,r2 @ preserves carry - stmia r0!, {r8-r11} - bne .LNEON_sub - - ldr r10, [r1] @ load top-most bit - veor q0,q0,q0 - sub r11,r2,sp @ this is num*4 - veor q1,q1,q1 - mov r1,sp - sub r0,r0,r11 @ rewind r0 - mov r3,r2 @ second 3/4th of frame - sbcs r10,r10,#0 @ result is carry flag - -.LNEON_copy_n_zap: - ldmia r1!, {r4-r7} - ldmia r0, {r8-r11} - movcc r8, r4 - vst1.64 {q0-q1}, [r3,:256]! @ wipe - movcc r9, r5 - movcc r10,r6 - vst1.64 {q0-q1}, [r3,:256]! @ wipe - movcc r11,r7 - ldmia r1, {r4-r7} - stmia r0!, {r8-r11} - sub r1,r1,#16 - ldmia r0, {r8-r11} - movcc r8, r4 - vst1.64 {q0-q1}, [r1,:256]! @ wipe - movcc r9, r5 - movcc r10,r6 - vst1.64 {q0-q1}, [r3,:256]! @ wipe - movcc r11,r7 - teq r1,r2 @ preserves carry - stmia r0!, {r8-r11} - bne .LNEON_copy_n_zap - - sub sp,ip,#96 - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r11} - bx lr @ .word 0xe12fff1e -.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon -#endif -.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by " +.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by " .align 2 -#if __ARM_ARCH__>=7 -.comm OPENSSL_armcap_P,4,4 -#endif diff --git a/app/openssl/crypto/bn/asm/mips3.S b/app/openssl/crypto/bn/asm/mips3.S deleted file mode 100644 index dca4105c..00000000 --- a/app/openssl/crypto/bn/asm/mips3.S +++ /dev/null @@ -1,2201 +0,0 @@ -.rdata -.asciiz "mips3.s, Version 1.1" -.asciiz "MIPS III/IV ISA artwork by Andy Polyakov " - -/* - * ==================================================================== - * Written by Andy Polyakov for the OpenSSL - * project. - * - * Rights for redistribution and usage in source and binary forms are - * granted according to the OpenSSL license. Warranty of any kind is - * disclaimed. - * ==================================================================== - */ - -/* - * This is my modest contributon to the OpenSSL project (see - * http://www.openssl.org/ for more information about it) and is - * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c - * module. For updates see http://fy.chalmers.se/~appro/hpe/. - * - * The module is designed to work with either of the "new" MIPS ABI(5), - * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under - * IRIX 5.x not only because it doesn't support new ABIs but also - * because 5.x kernels put R4x00 CPU into 32-bit mode and all those - * 64-bit instructions (daddu, dmultu, etc.) found below gonna only - * cause illegal instruction exception:-( - * - * In addition the code depends on preprocessor flags set up by MIPSpro - * compiler driver (either as or cc) and therefore (probably?) can't be - * compiled by the GNU assembler. GNU C driver manages fine though... - * I mean as long as -mmips-as is specified or is the default option, - * because then it simply invokes /usr/bin/as which in turn takes - * perfect care of the preprocessor definitions. Another neat feature - * offered by the MIPSpro assembler is an optimization pass. This gave - * me the opportunity to have the code looking more regular as all those - * architecture dependent instruction rescheduling details were left to - * the assembler. Cool, huh? - * - * Performance improvement is astonishing! 'apps/openssl speed rsa dsa' - * goes way over 3 times faster! - * - * - */ -#include -#include - -#if _MIPS_ISA>=4 -#define MOVNZ(cond,dst,src) \ - movn dst,src,cond -#else -#define MOVNZ(cond,dst,src) \ - .set noreorder; \ - bnezl cond,.+8; \ - move dst,src; \ - .set reorder -#endif - -.text - -.set noat -.set reorder - -#define MINUS4 v1 - -.align 5 -LEAF(bn_mul_add_words) - .set noreorder - bgtzl a2,.L_bn_mul_add_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_mul_add_words_proceed: - li MINUS4,-4 - and ta0,a2,MINUS4 - move v0,zero - beqz ta0,.L_bn_mul_add_words_tail - -.L_bn_mul_add_words_loop: - dmultu t0,a3 - ld t1,0(a0) - ld t2,8(a1) - ld t3,8(a0) - ld ta0,16(a1) - ld ta1,16(a0) - daddu t1,v0 - sltu v0,t1,v0 /* All manuals say it "compares 32-bit - * values", but it seems to work fine - * even on 64-bit registers. */ - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,0(a0) - daddu v0,AT - - dmultu t2,a3 - ld ta2,24(a1) - ld ta3,24(a0) - daddu t3,v0 - sltu v0,t3,v0 - mflo AT - mfhi t2 - daddu t3,AT - daddu v0,t2 - sltu AT,t3,AT - sd t3,8(a0) - daddu v0,AT - - dmultu ta0,a3 - subu a2,4 - PTR_ADD a0,32 - PTR_ADD a1,32 - daddu ta1,v0 - sltu v0,ta1,v0 - mflo AT - mfhi ta0 - daddu ta1,AT - daddu v0,ta0 - sltu AT,ta1,AT - sd ta1,-16(a0) - daddu v0,AT - - - dmultu ta2,a3 - and ta0,a2,MINUS4 - daddu ta3,v0 - sltu v0,ta3,v0 - mflo AT - mfhi ta2 - daddu ta3,AT - daddu v0,ta2 - sltu AT,ta3,AT - sd ta3,-8(a0) - daddu v0,AT - .set noreorder - bgtzl ta0,.L_bn_mul_add_words_loop - ld t0,0(a1) - - bnezl a2,.L_bn_mul_add_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_mul_add_words_return: - jr ra - -.L_bn_mul_add_words_tail: - dmultu t0,a3 - ld t1,0(a0) - subu a2,1 - daddu t1,v0 - sltu v0,t1,v0 - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,0(a0) - daddu v0,AT - beqz a2,.L_bn_mul_add_words_return - - ld t0,8(a1) - dmultu t0,a3 - ld t1,8(a0) - subu a2,1 - daddu t1,v0 - sltu v0,t1,v0 - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,8(a0) - daddu v0,AT - beqz a2,.L_bn_mul_add_words_return - - ld t0,16(a1) - dmultu t0,a3 - ld t1,16(a0) - daddu t1,v0 - sltu v0,t1,v0 - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,16(a0) - daddu v0,AT - jr ra -END(bn_mul_add_words) - -.align 5 -LEAF(bn_mul_words) - .set noreorder - bgtzl a2,.L_bn_mul_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_mul_words_proceed: - li MINUS4,-4 - and ta0,a2,MINUS4 - move v0,zero - beqz ta0,.L_bn_mul_words_tail - -.L_bn_mul_words_loop: - dmultu t0,a3 - ld t2,8(a1) - ld ta0,16(a1) - ld ta2,24(a1) - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,0(a0) - daddu v0,t1,t0 - - dmultu t2,a3 - subu a2,4 - PTR_ADD a0,32 - PTR_ADD a1,32 - mflo AT - mfhi t2 - daddu v0,AT - sltu t3,v0,AT - sd v0,-24(a0) - daddu v0,t3,t2 - - dmultu ta0,a3 - mflo AT - mfhi ta0 - daddu v0,AT - sltu ta1,v0,AT - sd v0,-16(a0) - daddu v0,ta1,ta0 - - - dmultu ta2,a3 - and ta0,a2,MINUS4 - mflo AT - mfhi ta2 - daddu v0,AT - sltu ta3,v0,AT - sd v0,-8(a0) - daddu v0,ta3,ta2 - .set noreorder - bgtzl ta0,.L_bn_mul_words_loop - ld t0,0(a1) - - bnezl a2,.L_bn_mul_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_mul_words_return: - jr ra - -.L_bn_mul_words_tail: - dmultu t0,a3 - subu a2,1 - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,0(a0) - daddu v0,t1,t0 - beqz a2,.L_bn_mul_words_return - - ld t0,8(a1) - dmultu t0,a3 - subu a2,1 - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,8(a0) - daddu v0,t1,t0 - beqz a2,.L_bn_mul_words_return - - ld t0,16(a1) - dmultu t0,a3 - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,16(a0) - daddu v0,t1,t0 - jr ra -END(bn_mul_words) - -.align 5 -LEAF(bn_sqr_words) - .set noreorder - bgtzl a2,.L_bn_sqr_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_sqr_words_proceed: - li MINUS4,-4 - and ta0,a2,MINUS4 - move v0,zero - beqz ta0,.L_bn_sqr_words_tail - -.L_bn_sqr_words_loop: - dmultu t0,t0 - ld t2,8(a1) - ld ta0,16(a1) - ld ta2,24(a1) - mflo t1 - mfhi t0 - sd t1,0(a0) - sd t0,8(a0) - - dmultu t2,t2 - subu a2,4 - PTR_ADD a0,64 - PTR_ADD a1,32 - mflo t3 - mfhi t2 - sd t3,-48(a0) - sd t2,-40(a0) - - dmultu ta0,ta0 - mflo ta1 - mfhi ta0 - sd ta1,-32(a0) - sd ta0,-24(a0) - - - dmultu ta2,ta2 - and ta0,a2,MINUS4 - mflo ta3 - mfhi ta2 - sd ta3,-16(a0) - sd ta2,-8(a0) - - .set noreorder - bgtzl ta0,.L_bn_sqr_words_loop - ld t0,0(a1) - - bnezl a2,.L_bn_sqr_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_sqr_words_return: - move v0,zero - jr ra - -.L_bn_sqr_words_tail: - dmultu t0,t0 - subu a2,1 - mflo t1 - mfhi t0 - sd t1,0(a0) - sd t0,8(a0) - beqz a2,.L_bn_sqr_words_return - - ld t0,8(a1) - dmultu t0,t0 - subu a2,1 - mflo t1 - mfhi t0 - sd t1,16(a0) - sd t0,24(a0) - beqz a2,.L_bn_sqr_words_return - - ld t0,16(a1) - dmultu t0,t0 - mflo t1 - mfhi t0 - sd t1,32(a0) - sd t0,40(a0) - jr ra -END(bn_sqr_words) - -.align 5 -LEAF(bn_add_words) - .set noreorder - bgtzl a3,.L_bn_add_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_add_words_proceed: - li MINUS4,-4 - and AT,a3,MINUS4 - move v0,zero - beqz AT,.L_bn_add_words_tail - -.L_bn_add_words_loop: - ld ta0,0(a2) - subu a3,4 - ld t1,8(a1) - and AT,a3,MINUS4 - ld t2,16(a1) - PTR_ADD a2,32 - ld t3,24(a1) - PTR_ADD a0,32 - ld ta1,-24(a2) - PTR_ADD a1,32 - ld ta2,-16(a2) - ld ta3,-8(a2) - daddu ta0,t0 - sltu t8,ta0,t0 - daddu t0,ta0,v0 - sltu v0,t0,ta0 - sd t0,-32(a0) - daddu v0,t8 - - daddu ta1,t1 - sltu t9,ta1,t1 - daddu t1,ta1,v0 - sltu v0,t1,ta1 - sd t1,-24(a0) - daddu v0,t9 - - daddu ta2,t2 - sltu t8,ta2,t2 - daddu t2,ta2,v0 - sltu v0,t2,ta2 - sd t2,-16(a0) - daddu v0,t8 - - daddu ta3,t3 - sltu t9,ta3,t3 - daddu t3,ta3,v0 - sltu v0,t3,ta3 - sd t3,-8(a0) - daddu v0,t9 - - .set noreorder - bgtzl AT,.L_bn_add_words_loop - ld t0,0(a1) - - bnezl a3,.L_bn_add_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_add_words_return: - jr ra - -.L_bn_add_words_tail: - ld ta0,0(a2) - daddu ta0,t0 - subu a3,1 - sltu t8,ta0,t0 - daddu t0,ta0,v0 - sltu v0,t0,ta0 - sd t0,0(a0) - daddu v0,t8 - beqz a3,.L_bn_add_words_return - - ld t1,8(a1) - ld ta1,8(a2) - daddu ta1,t1 - subu a3,1 - sltu t9,ta1,t1 - daddu t1,ta1,v0 - sltu v0,t1,ta1 - sd t1,8(a0) - daddu v0,t9 - beqz a3,.L_bn_add_words_return - - ld t2,16(a1) - ld ta2,16(a2) - daddu ta2,t2 - sltu t8,ta2,t2 - daddu t2,ta2,v0 - sltu v0,t2,ta2 - sd t2,16(a0) - daddu v0,t8 - jr ra -END(bn_add_words) - -.align 5 -LEAF(bn_sub_words) - .set noreorder - bgtzl a3,.L_bn_sub_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_sub_words_proceed: - li MINUS4,-4 - and AT,a3,MINUS4 - move v0,zero - beqz AT,.L_bn_sub_words_tail - -.L_bn_sub_words_loop: - ld ta0,0(a2) - subu a3,4 - ld t1,8(a1) - and AT,a3,MINUS4 - ld t2,16(a1) - PTR_ADD a2,32 - ld t3,24(a1) - PTR_ADD a0,32 - ld ta1,-24(a2) - PTR_ADD a1,32 - ld ta2,-16(a2) - ld ta3,-8(a2) - sltu t8,t0,ta0 - dsubu t0,ta0 - dsubu ta0,t0,v0 - sd ta0,-32(a0) - MOVNZ (t0,v0,t8) - - sltu t9,t1,ta1 - dsubu t1,ta1 - dsubu ta1,t1,v0 - sd ta1,-24(a0) - MOVNZ (t1,v0,t9) - - - sltu t8,t2,ta2 - dsubu t2,ta2 - dsubu ta2,t2,v0 - sd ta2,-16(a0) - MOVNZ (t2,v0,t8) - - sltu t9,t3,ta3 - dsubu t3,ta3 - dsubu ta3,t3,v0 - sd ta3,-8(a0) - MOVNZ (t3,v0,t9) - - .set noreorder - bgtzl AT,.L_bn_sub_words_loop - ld t0,0(a1) - - bnezl a3,.L_bn_sub_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_sub_words_return: - jr ra - -.L_bn_sub_words_tail: - ld ta0,0(a2) - subu a3,1 - sltu t8,t0,ta0 - dsubu t0,ta0 - dsubu ta0,t0,v0 - MOVNZ (t0,v0,t8) - sd ta0,0(a0) - beqz a3,.L_bn_sub_words_return - - ld t1,8(a1) - subu a3,1 - ld ta1,8(a2) - sltu t9,t1,ta1 - dsubu t1,ta1 - dsubu ta1,t1,v0 - MOVNZ (t1,v0,t9) - sd ta1,8(a0) - beqz a3,.L_bn_sub_words_return - - ld t2,16(a1) - ld ta2,16(a2) - sltu t8,t2,ta2 - dsubu t2,ta2 - dsubu ta2,t2,v0 - MOVNZ (t2,v0,t8) - sd ta2,16(a0) - jr ra -END(bn_sub_words) - -#undef MINUS4 - -.align 5 -LEAF(bn_div_3_words) - .set reorder - move a3,a0 /* we know that bn_div_words doesn't - * touch a3, ta2, ta3 and preserves a2 - * so that we can save two arguments - * and return address in registers - * instead of stack:-) - */ - ld a0,(a3) - move ta2,a1 - ld a1,-8(a3) - bne a0,a2,.L_bn_div_3_words_proceed - li v0,-1 - jr ra -.L_bn_div_3_words_proceed: - move ta3,ra - bal bn_div_words - move ra,ta3 - dmultu ta2,v0 - ld t2,-16(a3) - move ta0,zero - mfhi t1 - mflo t0 - sltu t8,t1,v1 -.L_bn_div_3_words_inner_loop: - bnez t8,.L_bn_div_3_words_inner_loop_done - sgeu AT,t2,t0 - seq t9,t1,v1 - and AT,t9 - sltu t3,t0,ta2 - daddu v1,a2 - dsubu t1,t3 - dsubu t0,ta2 - sltu t8,t1,v1 - sltu ta0,v1,a2 - or t8,ta0 - .set noreorder - beqzl AT,.L_bn_div_3_words_inner_loop - dsubu v0,1 - .set reorder -.L_bn_div_3_words_inner_loop_done: - jr ra -END(bn_div_3_words) - -.align 5 -LEAF(bn_div_words) - .set noreorder - bnezl a2,.L_bn_div_words_proceed - move v1,zero - jr ra - li v0,-1 /* I'd rather signal div-by-zero - * which can be done with 'break 7' */ - -.L_bn_div_words_proceed: - bltz a2,.L_bn_div_words_body - move t9,v1 - dsll a2,1 - bgtz a2,.-4 - addu t9,1 - - .set reorder - negu t1,t9 - li t2,-1 - dsll t2,t1 - and t2,a0 - dsrl AT,a1,t1 - .set noreorder - bnezl t2,.+8 - break 6 /* signal overflow */ - .set reorder - dsll a0,t9 - dsll a1,t9 - or a0,AT - -#define QT ta0 -#define HH ta1 -#define DH v1 -.L_bn_div_words_body: - dsrl DH,a2,32 - sgeu AT,a0,a2 - .set noreorder - bnezl AT,.+8 - dsubu a0,a2 - .set reorder - - li QT,-1 - dsrl HH,a0,32 - dsrl QT,32 /* q=0xffffffff */ - beq DH,HH,.L_bn_div_words_skip_div1 - ddivu zero,a0,DH - mflo QT -.L_bn_div_words_skip_div1: - dmultu a2,QT - dsll t3,a0,32 - dsrl AT,a1,32 - or t3,AT - mflo t0 - mfhi t1 -.L_bn_div_words_inner_loop1: - sltu t2,t3,t0 - seq t8,HH,t1 - sltu AT,HH,t1 - and t2,t8 - sltu v0,t0,a2 - or AT,t2 - .set noreorder - beqz AT,.L_bn_div_words_inner_loop1_done - dsubu t1,v0 - dsubu t0,a2 - b .L_bn_div_words_inner_loop1 - dsubu QT,1 - .set reorder -.L_bn_div_words_inner_loop1_done: - - dsll a1,32 - dsubu a0,t3,t0 - dsll v0,QT,32 - - li QT,-1 - dsrl HH,a0,32 - dsrl QT,32 /* q=0xffffffff */ - beq DH,HH,.L_bn_div_words_skip_div2 - ddivu zero,a0,DH - mflo QT -.L_bn_div_words_skip_div2: -#undef DH - dmultu a2,QT - dsll t3,a0,32 - dsrl AT,a1,32 - or t3,AT - mflo t0 - mfhi t1 -.L_bn_div_words_inner_loop2: - sltu t2,t3,t0 - seq t8,HH,t1 - sltu AT,HH,t1 - and t2,t8 - sltu v1,t0,a2 - or AT,t2 - .set noreorder - beqz AT,.L_bn_div_words_inner_loop2_done - dsubu t1,v1 - dsubu t0,a2 - b .L_bn_div_words_inner_loop2 - dsubu QT,1 - .set reorder -.L_bn_div_words_inner_loop2_done: -#undef HH - - dsubu a0,t3,t0 - or v0,QT - dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */ - dsrl a2,t9 /* restore a2 */ - jr ra -#undef QT -END(bn_div_words) - -#define a_0 t0 -#define a_1 t1 -#define a_2 t2 -#define a_3 t3 -#define b_0 ta0 -#define b_1 ta1 -#define b_2 ta2 -#define b_3 ta3 - -#define a_4 s0 -#define a_5 s2 -#define a_6 s4 -#define a_7 a1 /* once we load a[7] we don't need a anymore */ -#define b_4 s1 -#define b_5 s3 -#define b_6 s5 -#define b_7 a2 /* once we load b[7] we don't need b anymore */ - -#define t_1 t8 -#define t_2 t9 - -#define c_1 v0 -#define c_2 v1 -#define c_3 a3 - -#define FRAME_SIZE 48 - -.align 5 -LEAF(bn_mul_comba8) - .set noreorder - PTR_SUB sp,FRAME_SIZE - .frame sp,64,ra - .set reorder - ld a_0,0(a1) /* If compiled with -mips3 option on - * R5000 box assembler barks on this - * line with "shouldn't have mult/div - * as last instruction in bb (R10K - * bug)" warning. If anybody out there - * has a clue about how to circumvent - * this do send me a note. - * - */ - ld b_0,0(a2) - ld a_1,8(a1) - ld a_2,16(a1) - ld a_3,24(a1) - ld b_1,8(a2) - ld b_2,16(a2) - ld b_3,24(a2) - dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - sd s0,0(sp) - sd s1,8(sp) - sd s2,16(sp) - sd s3,24(sp) - sd s4,32(sp) - sd s5,40(sp) - mflo c_1 - mfhi c_2 - - dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ - ld a_4,32(a1) - ld a_5,40(a1) - ld a_6,48(a1) - ld a_7,56(a1) - ld b_4,32(a2) - ld b_5,40(a2) - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ - ld b_6,48(a2) - ld b_7,56(a2) - sd c_1,0(a0) /* r[0]=c1; */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - sd c_2,8(a0) /* r[1]=c2; */ - - dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) /* r[2]=c3; */ - - dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) /* r[3]=c1; */ - - dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) /* r[4]=c2; */ - - dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) /* r[5]=c3; */ - - dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,48(a0) /* r[6]=c1; */ - - dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,56(a0) /* r[7]=c2; */ - - dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,64(a0) /* r[8]=c3; */ - - dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,72(a0) /* r[9]=c1; */ - - dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,80(a0) /* r[10]=c2; */ - - dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,88(a0) /* r[11]=c3; */ - - dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,96(a0) /* r[12]=c1; */ - - dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,104(a0) /* r[13]=c2; */ - - dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ - ld s0,0(sp) - ld s1,8(sp) - ld s2,16(sp) - ld s3,24(sp) - ld s4,32(sp) - ld s5,40(sp) - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sd c_3,112(a0) /* r[14]=c3; */ - sd c_1,120(a0) /* r[15]=c1; */ - - PTR_ADD sp,FRAME_SIZE - - jr ra -END(bn_mul_comba8) - -.align 5 -LEAF(bn_mul_comba4) - .set reorder - ld a_0,0(a1) - ld b_0,0(a2) - ld a_1,8(a1) - ld a_2,16(a1) - dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - ld a_3,24(a1) - ld b_1,8(a2) - ld b_2,16(a2) - ld b_3,24(a2) - mflo c_1 - mfhi c_2 - sd c_1,0(a0) - - dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - sd c_2,8(a0) - - dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) - - dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) - - dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) - - dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) - - dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sd c_1,48(a0) - sd c_2,56(a0) - - jr ra -END(bn_mul_comba4) - -#undef a_4 -#undef a_5 -#undef a_6 -#undef a_7 -#define a_4 b_0 -#define a_5 b_1 -#define a_6 b_2 -#define a_7 b_3 - -.align 5 -LEAF(bn_sqr_comba8) - .set reorder - ld a_0,0(a1) - ld a_1,8(a1) - ld a_2,16(a1) - ld a_3,24(a1) - - dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - ld a_4,32(a1) - ld a_5,40(a1) - ld a_6,48(a1) - ld a_7,56(a1) - mflo c_1 - mfhi c_2 - sd c_1,0(a0) - - dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - sd c_2,8(a0) - - dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) - - dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) - - dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) - - dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) - - dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,48(a0) - - dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,56(a0) - - dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,64(a0) - - dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,72(a0) - - dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,80(a0) - - dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,88(a0) - - dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,96(a0) - - dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,104(a0) - - dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sd c_3,112(a0) - sd c_1,120(a0) - - jr ra -END(bn_sqr_comba8) - -.align 5 -LEAF(bn_sqr_comba4) - .set reorder - ld a_0,0(a1) - ld a_1,8(a1) - ld a_2,16(a1) - ld a_3,24(a1) - dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - mflo c_1 - mfhi c_2 - sd c_1,0(a0) - - dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - sd c_2,8(a0) - - dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) - - dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) - - dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) - - dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) - - dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sd c_1,48(a0) - sd c_2,56(a0) - - jr ra -END(bn_sqr_comba4) diff --git a/app/openssl/crypto/bn/asm/pa-risc2.S b/app/openssl/crypto/bn/asm/pa-risc2.S deleted file mode 100644 index f3b16290..00000000 --- a/app/openssl/crypto/bn/asm/pa-risc2.S +++ /dev/null @@ -1,1618 +0,0 @@ -; -; PA-RISC 2.0 implementation of bn_asm code, based on the -; 64-bit version of the code. This code is effectively the -; same as the 64-bit version except the register model is -; slightly different given all values must be 32-bit between -; function calls. Thus the 64-bit return values are returned -; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit -; -; -; This code is approximately 2x faster than the C version -; for RSA/DSA. -; -; See http://devresource.hp.com/ for more details on the PA-RISC -; architecture. Also see the book "PA-RISC 2.0 Architecture" -; by Gerry Kane for information on the instruction set architecture. -; -; Code written by Chris Ruemmler (with some help from the HP C -; compiler). -; -; The code compiles with HP's assembler -; - - .level 2.0N - .space $TEXT$ - .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY - -; -; Global Register definitions used for the routines. -; -; Some information about HP's runtime architecture for 32-bits. -; -; "Caller save" means the calling function must save the register -; if it wants the register to be preserved. -; "Callee save" means if a function uses the register, it must save -; the value before using it. -; -; For the floating point registers -; -; "caller save" registers: fr4-fr11, fr22-fr31 -; "callee save" registers: fr12-fr21 -; "special" registers: fr0-fr3 (status and exception registers) -; -; For the integer registers -; value zero : r0 -; "caller save" registers: r1,r19-r26 -; "callee save" registers: r3-r18 -; return register : r2 (rp) -; return values ; r28,r29 (ret0,ret1) -; Stack pointer ; r30 (sp) -; millicode return ptr ; r31 (also a caller save register) - - -; -; Arguments to the routines -; -r_ptr .reg %r26 -a_ptr .reg %r25 -b_ptr .reg %r24 -num .reg %r24 -n .reg %r23 - -; -; Note that the "w" argument for bn_mul_add_words and bn_mul_words -; is passed on the stack at a delta of -56 from the top of stack -; as the routine is entered. -; - -; -; Globals used in some routines -; - -top_overflow .reg %r23 -high_mask .reg %r22 ; value 0xffffffff80000000L - - -;------------------------------------------------------------------------------ -; -; bn_mul_add_words -; -;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, -; int num, BN_ULONG w) -; -; arg0 = r_ptr -; arg1 = a_ptr -; arg3 = num -; -56(sp) = w -; -; Local register definitions -; - -fm1 .reg %fr22 -fm .reg %fr23 -ht_temp .reg %fr24 -ht_temp_1 .reg %fr25 -lt_temp .reg %fr26 -lt_temp_1 .reg %fr27 -fm1_1 .reg %fr28 -fm_1 .reg %fr29 - -fw_h .reg %fr7L -fw_l .reg %fr7R -fw .reg %fr7 - -fht_0 .reg %fr8L -flt_0 .reg %fr8R -t_float_0 .reg %fr8 - -fht_1 .reg %fr9L -flt_1 .reg %fr9R -t_float_1 .reg %fr9 - -tmp_0 .reg %r31 -tmp_1 .reg %r21 -m_0 .reg %r20 -m_1 .reg %r19 -ht_0 .reg %r1 -ht_1 .reg %r3 -lt_0 .reg %r4 -lt_1 .reg %r5 -m1_0 .reg %r6 -m1_1 .reg %r7 -rp_val .reg %r8 -rp_val_1 .reg %r9 - -bn_mul_add_words - .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN - .proc - .callinfo frame=128 - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP ; Needed to make the loop 16-byte aligned - NOP ; needed to make the loop 16-byte aligned - - STD %r5,16(%sp) ; save r5 - NOP - STD %r6,24(%sp) ; save r6 - STD %r7,32(%sp) ; save r7 - - STD %r8,40(%sp) ; save r8 - STD %r9,48(%sp) ; save r9 - COPY %r0,%ret1 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - - CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit - LDO 128(%sp),%sp ; bump stack - - ; - ; The loop is unrolled twice, so if there is only 1 number - ; then go straight to the cleanup code. - ; - CMPIB,= 1,num,bn_mul_add_words_single_top - FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_add_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDD 8(r_ptr),rp_val_1 ; rp[1] - - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1[0] - FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] - - XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h - XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m[0] - FSTD fm_1,-40(%sp) ; -40(sp) = m[1] - - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 - - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 - - LDD -8(%sp),m_0 ; m[0] - LDD -40(%sp),m_1 ; m[1] - LDD -16(%sp),m1_0 ; m1[0] - LDD -48(%sp),m1_1 ; m1[1] - - LDD -24(%sp),ht_0 ; ht[0] - LDD -56(%sp),ht_1 ; ht[1] - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; - - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) - ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) - - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) - ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) - EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 - - EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 - ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) - ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) - - ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - - ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - - LDO -2(num),num ; num = num - 2; - ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - - ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] - ADD,DC ht_1,%r0,%ret1 ; ht[1]++ - LDO 16(a_ptr),a_ptr ; a_ptr += 2 - - STD lt_1,8(r_ptr) ; rp[1] = lt[1] - CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do - LDO 16(r_ptr),r_ptr ; r_ptr += 2 - - CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_add_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDO 8(a_ptr),a_ptr ; a_ptr++ - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 ; m1 = temp1 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD %ret1,tmp_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] - ADD,DC ht_0,%r0,%ret1 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_add_words_exit - .EXIT - - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - LDD -80(%sp),%r9 ; restore r9 - LDD -88(%sp),%r8 ; restore r8 - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) -; -; arg0 = rp -; arg1 = ap -; arg3 = num -; w on stack at -56(sp) - -bn_mul_words - .proc - .callinfo frame=128 - .entry - .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP - STD %r5,16(%sp) ; save r5 - - STD %r6,24(%sp) ; save r6 - STD %r7,32(%sp) ; save r7 - COPY %r0,%ret1 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - - CMPIB,>= 0,num,bn_mul_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; See if only 1 word to do, thus just do cleanup - ; - CMPIB,= 1,num,bn_mul_words_single_top - FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l - - FSTD fm1,-16(%sp) ; -16(sp) = m1 - FSTD fm1_1,-48(%sp) ; -48(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h - - FSTD fm,-8(%sp) ; -8(sp) = m - FSTD fm_1,-40(%sp) ; -40(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h - - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt - LDD -8(%sp),m_0 - LDD -40(%sp),m_1 - - LDD -16(%sp),m1_0 - LDD -48(%sp),m1_1 - LDD -24(%sp),ht_0 - LDD -56(%sp),ht_1 - - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) - ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - EXTRD,U tmp_1,31,32,m_1 ; m>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD lt_1,m1_1,lt_1 ; lt = lt+m1; - ADD,DC ht_1,%r0,ht_1 ; ht++ - ADD %ret1,lt_0,lt_0 ; lt = lt + c (ret1); - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) - ADD,DC ht_1,%r0,ht_1 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - STD lt_1,8(r_ptr) ; rp[1] = lt - - COPY ht_1,%ret1 ; carry = ht - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_mul_words_unroll2 - LDO 16(r_ptr),r_ptr ; rp++ - - CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt= lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD %ret1,lt_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - COPY ht_0,%ret1 ; copy carry - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_words_exit - .EXIT - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND - -;---------------------------------------------------------------------------- -; -;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) -; -; arg0 = rp -; arg1 = ap -; arg2 = num -; - -bn_sqr_words - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP - STD %r5,16(%sp) ; save r5 - - CMPIB,>= 0,num,bn_sqr_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; If only 1, the goto straight to cleanup - ; - CMPIB,= 1,num,bn_sqr_words_single_top - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - -bn_sqr_words_unroll2 - FLDD 0(a_ptr),t_float_0 ; a[0] - FLDD 8(a_ptr),t_float_1 ; a[1] - XMPYU fht_0,flt_0,fm ; m[0] - XMPYU fht_1,flt_1,fm_1 ; m[1] - - FSTD fm,-24(%sp) ; store m[0] - FSTD fm_1,-56(%sp) ; store m[1] - XMPYU flt_0,flt_0,lt_temp ; lt[0] - XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] - - FSTD lt_temp,-16(%sp) ; store lt[0] - FSTD lt_temp_1,-48(%sp) ; store lt[1] - XMPYU fht_0,fht_0,ht_temp ; ht[0] - XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] - - FSTD ht_temp,-8(%sp) ; store ht[0] - FSTD ht_temp_1,-40(%sp) ; store ht[1] - LDD -24(%sp),m_0 - LDD -56(%sp),m_1 - - AND m_0,high_mask,tmp_0 ; m[0] & Mask - AND m_1,high_mask,tmp_1 ; m[1] & Mask - DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 - DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 - - LDD -16(%sp),lt_0 - LDD -48(%sp),lt_1 - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 - EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 - - LDD -8(%sp),ht_0 - LDD -40(%sp),ht_1 - ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 - ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 - - ADD lt_0,m_0,lt_0 ; lt = lt+m - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - STD ht_0,8(r_ptr) ; rp[1] = ht[1] - - ADD lt_1,m_1,lt_1 ; lt = lt+m - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_1,16(r_ptr) ; rp[2] = lt[1] - STD ht_1,24(r_ptr) ; rp[3] = ht[1] - - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_sqr_words_unroll2 - LDO 32(r_ptr),r_ptr ; rp += 4 - - CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_sqr_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,flt_0,fm ; m - FSTD fm,-24(%sp) ; store m - - XMPYU flt_0,flt_0,lt_temp ; lt - FSTD lt_temp,-16(%sp) ; store lt - - XMPYU fht_0,fht_0,ht_temp ; ht - FSTD ht_temp,-8(%sp) ; store ht - - LDD -24(%sp),m_0 ; load m - AND m_0,high_mask,tmp_0 ; m & Mask - DEPD,Z m_0,30,31,m_0 ; m << 32+1 - LDD -16(%sp),lt_0 ; lt - - LDD -8(%sp),ht_0 ; ht - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 - ADD m_0,lt_0,lt_0 ; lt = lt+m - ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 - ADD,DC ht_0,%r0,ht_0 ; ht++ - - STD lt_0,0(r_ptr) ; rp[0] = lt - STD ht_0,8(r_ptr) ; rp[1] = ht - -bn_sqr_words_exit - .EXIT - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - .PROCEND ;in=23,24,25,26,29;out=28; - - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t .reg %r22 -b .reg %r21 -l .reg %r20 - -bn_add_words - .proc - .entry - .callinfo - .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - CMPIB,>= 0,n,bn_add_words_exit - COPY %r0,%ret1 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_add_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_add_words_unroll2 - LDD 0(a_ptr),t - LDD 0(b_ptr),b - ADD t,%ret1,t ; t = t+c; - ADD,DC %r0,%r0,%ret1 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret1,%r0,%ret1 ; c+= carry - STD l,0(r_ptr) - - LDD 8(a_ptr),t - LDD 8(b_ptr),b - ADD t,%ret1,t ; t = t+c; - ADD,DC %r0,%r0,%ret1 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret1,%r0,%ret1 ; c+= carry - STD l,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_add_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_add_words_exit ; are we done? - -bn_add_words_single_top - LDD 0(a_ptr),t - LDD 0(b_ptr),b - - ADD t,%ret1,t ; t = t+c; - ADD,DC %r0,%r0,%ret1 ; set c to carry (could use CMPCLR??) - ADD t,b,l ; l = t + b[0] - ADD,DC %ret1,%r0,%ret1 ; c+= carry - STD l,0(r_ptr) - -bn_add_words_exit - .EXIT - BVE (%rp) - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t1 .reg %r22 -t2 .reg %r21 -sub_tmp1 .reg %r20 -sub_tmp2 .reg %r19 - - -bn_sub_words - .proc - .callinfo - .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - CMPIB,>= 0,n,bn_sub_words_exit - COPY %r0,%ret1 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_sub_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_sub_words_unroll2 - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; - - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret1 - STD sub_tmp1,0(r_ptr) - - LDD 8(a_ptr),t1 - LDD 8(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret1 - STD sub_tmp1,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_sub_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? - -bn_sub_words_single_top - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret1 - - STD sub_tmp1,0(r_ptr) - -bn_sub_words_exit - .EXIT - BVE (%rp) - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - .PROCEND ;in=23,24,25,26,29;out=28; - -;------------------------------------------------------------------------------ -; -; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) -; -; arg0 = h -; arg1 = l -; arg2 = d -; -; This is mainly just output from the HP C compiler. -; -;------------------------------------------------------------------------------ -bn_div_words - .PROC - .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN - .IMPORT BN_num_bits_word,CODE - ;--- not PIC .IMPORT __iob,DATA - ;--- not PIC .IMPORT fprintf,CODE - .IMPORT abort,CODE - .IMPORT $$div2U,MILLICODE - .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE - .ENTRY - STW %r2,-20(%r30) ;offset 0x8ec - STW,MA %r3,192(%r30) ;offset 0x8f0 - STW %r4,-188(%r30) ;offset 0x8f4 - DEPD %r5,31,32,%r6 ;offset 0x8f8 - STD %r6,-184(%r30) ;offset 0x8fc - DEPD %r7,31,32,%r8 ;offset 0x900 - STD %r8,-176(%r30) ;offset 0x904 - STW %r9,-168(%r30) ;offset 0x908 - LDD -248(%r30),%r3 ;offset 0x90c - COPY %r26,%r4 ;offset 0x910 - COPY %r24,%r5 ;offset 0x914 - DEPD %r25,31,32,%r4 ;offset 0x918 - CMPB,*<> %r3,%r0,$0006000C ;offset 0x91c - DEPD %r23,31,32,%r5 ;offset 0x920 - MOVIB,TR -1,%r29,$00060002 ;offset 0x924 - EXTRD,U %r29,31,32,%r28 ;offset 0x928 -$0006002A - LDO -1(%r29),%r29 ;offset 0x92c - SUB %r23,%r7,%r23 ;offset 0x930 -$00060024 - SUB %r4,%r31,%r25 ;offset 0x934 - AND %r25,%r19,%r26 ;offset 0x938 - CMPB,*<>,N %r0,%r26,$00060046 ;offset 0x93c - DEPD,Z %r25,31,32,%r20 ;offset 0x940 - OR %r20,%r24,%r21 ;offset 0x944 - CMPB,*<<,N %r21,%r23,$0006002A ;offset 0x948 - SUB %r31,%r2,%r31 ;offset 0x94c -$00060046 -$0006002E - DEPD,Z %r23,31,32,%r25 ;offset 0x950 - EXTRD,U %r23,31,32,%r26 ;offset 0x954 - AND %r25,%r19,%r24 ;offset 0x958 - ADD,L %r31,%r26,%r31 ;offset 0x95c - CMPCLR,*>>= %r5,%r24,%r0 ;offset 0x960 - LDO 1(%r31),%r31 ;offset 0x964 -$00060032 - CMPB,*<<=,N %r31,%r4,$00060036 ;offset 0x968 - LDO -1(%r29),%r29 ;offset 0x96c - ADD,L %r4,%r3,%r4 ;offset 0x970 -$00060036 - ADDIB,=,N -1,%r8,$D0 ;offset 0x974 - SUB %r5,%r24,%r28 ;offset 0x978 -$0006003A - SUB %r4,%r31,%r24 ;offset 0x97c - SHRPD %r24,%r28,32,%r4 ;offset 0x980 - DEPD,Z %r29,31,32,%r9 ;offset 0x984 - DEPD,Z %r28,31,32,%r5 ;offset 0x988 -$0006001C - EXTRD,U %r4,31,32,%r31 ;offset 0x98c - CMPB,*<>,N %r31,%r2,$00060020 ;offset 0x990 - MOVB,TR %r6,%r29,$D1 ;offset 0x994 - STD %r29,-152(%r30) ;offset 0x998 -$0006000C - EXTRD,U %r3,31,32,%r25 ;offset 0x99c - COPY %r3,%r26 ;offset 0x9a0 - EXTRD,U %r3,31,32,%r9 ;offset 0x9a4 - EXTRD,U %r4,31,32,%r8 ;offset 0x9a8 - .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28; - B,L BN_num_bits_word,%r2 ;offset 0x9ac - EXTRD,U %r5,31,32,%r7 ;offset 0x9b0 - LDI 64,%r20 ;offset 0x9b4 - DEPD %r7,31,32,%r5 ;offset 0x9b8 - DEPD %r8,31,32,%r4 ;offset 0x9bc - DEPD %r9,31,32,%r3 ;offset 0x9c0 - CMPB,= %r28,%r20,$00060012 ;offset 0x9c4 - COPY %r28,%r24 ;offset 0x9c8 - MTSARCM %r24 ;offset 0x9cc - DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0 - CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4 -$00060012 - SUBI 64,%r24,%r31 ;offset 0x9d8 - CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc - SUB %r4,%r3,%r4 ;offset 0x9e0 -$00060016 - CMPB,= %r31,%r0,$0006001A ;offset 0x9e4 - COPY %r0,%r9 ;offset 0x9e8 - MTSARCM %r31 ;offset 0x9ec - DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0 - SUBI 64,%r31,%r26 ;offset 0x9f4 - MTSAR %r26 ;offset 0x9f8 - SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc - MTSARCM %r31 ;offset 0xa00 - DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04 -$0006001A - DEPDI,Z -1,31,32,%r19 ;offset 0xa08 - AND %r3,%r19,%r29 ;offset 0xa0c - EXTRD,U %r29,31,32,%r2 ;offset 0xa10 - DEPDI,Z -1,63,32,%r6 ;offset 0xa14 - MOVIB,TR 2,%r8,$0006001C ;offset 0xa18 - EXTRD,U %r3,63,32,%r7 ;offset 0xa1c -$D2 - ;--- not PIC ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20 - ;--- not PIC LDIL LR'C$7,%r21 ;offset 0xa24 - ;--- not PIC LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28 - ;--- not PIC .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28; - ;--- not PIC B,L fprintf,%r2 ;offset 0xa2c - ;--- not PIC LDO RR'C$7(%r21),%r25 ;offset 0xa30 - .CALL ; - B,L abort,%r2 ;offset 0xa34 - NOP ;offset 0xa38 - B $D3 ;offset 0xa3c - LDW -212(%r30),%r2 ;offset 0xa40 -$00060020 - COPY %r4,%r26 ;offset 0xa44 - EXTRD,U %r4,31,32,%r25 ;offset 0xa48 - COPY %r2,%r24 ;offset 0xa4c - .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) - B,L $$div2U,%r31 ;offset 0xa50 - EXTRD,U %r2,31,32,%r23 ;offset 0xa54 - DEPD %r28,31,32,%r29 ;offset 0xa58 -$00060022 - STD %r29,-152(%r30) ;offset 0xa5c -$D1 - AND %r5,%r19,%r24 ;offset 0xa60 - EXTRD,U %r24,31,32,%r24 ;offset 0xa64 - STW %r2,-160(%r30) ;offset 0xa68 - STW %r7,-128(%r30) ;offset 0xa6c - FLDD -152(%r30),%fr4 ;offset 0xa70 - FLDD -152(%r30),%fr7 ;offset 0xa74 - FLDW -160(%r30),%fr8L ;offset 0xa78 - FLDW -128(%r30),%fr5L ;offset 0xa7c - XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80 - FSTD %fr10,-136(%r30) ;offset 0xa84 - XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88 - FSTD %fr22,-144(%r30) ;offset 0xa8c - XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90 - XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94 - FSTD %fr11,-112(%r30) ;offset 0xa98 - FSTD %fr23,-120(%r30) ;offset 0xa9c - LDD -136(%r30),%r28 ;offset 0xaa0 - DEPD,Z %r28,31,32,%r31 ;offset 0xaa4 - LDD -144(%r30),%r20 ;offset 0xaa8 - ADD,L %r20,%r31,%r31 ;offset 0xaac - LDD -112(%r30),%r22 ;offset 0xab0 - DEPD,Z %r22,31,32,%r22 ;offset 0xab4 - LDD -120(%r30),%r21 ;offset 0xab8 - B $00060024 ;offset 0xabc - ADD,L %r21,%r22,%r23 ;offset 0xac0 -$D0 - OR %r9,%r29,%r29 ;offset 0xac4 -$00060040 - EXTRD,U %r29,31,32,%r28 ;offset 0xac8 -$00060002 -$L2 - LDW -212(%r30),%r2 ;offset 0xacc -$D3 - LDW -168(%r30),%r9 ;offset 0xad0 - LDD -176(%r30),%r8 ;offset 0xad4 - EXTRD,U %r8,31,32,%r7 ;offset 0xad8 - LDD -184(%r30),%r6 ;offset 0xadc - EXTRD,U %r6,31,32,%r5 ;offset 0xae0 - LDW -188(%r30),%r4 ;offset 0xae4 - BVE (%r2) ;offset 0xae8 - .EXIT - LDW,MB -192(%r30),%r3 ;offset 0xaec - .PROCEND ;in=23,25;out=28,29;fpin=105,107; - - - - -;---------------------------------------------------------------------------- -; -; Registers to hold 64-bit values to manipulate. The "L" part -; of the register corresponds to the upper 32-bits, while the "R" -; part corresponds to the lower 32-bits -; -; Note, that when using b6 and b7, the code must save these before -; using them because they are callee save registers -; -; -; Floating point registers to use to save values that -; are manipulated. These don't collide with ftemp1-6 and -; are all caller save registers -; -a0 .reg %fr22 -a0L .reg %fr22L -a0R .reg %fr22R - -a1 .reg %fr23 -a1L .reg %fr23L -a1R .reg %fr23R - -a2 .reg %fr24 -a2L .reg %fr24L -a2R .reg %fr24R - -a3 .reg %fr25 -a3L .reg %fr25L -a3R .reg %fr25R - -a4 .reg %fr26 -a4L .reg %fr26L -a4R .reg %fr26R - -a5 .reg %fr27 -a5L .reg %fr27L -a5R .reg %fr27R - -a6 .reg %fr28 -a6L .reg %fr28L -a6R .reg %fr28R - -a7 .reg %fr29 -a7L .reg %fr29L -a7R .reg %fr29R - -b0 .reg %fr30 -b0L .reg %fr30L -b0R .reg %fr30R - -b1 .reg %fr31 -b1L .reg %fr31L -b1R .reg %fr31R - -; -; Temporary floating point variables, these are all caller save -; registers -; -ftemp1 .reg %fr4 -ftemp2 .reg %fr5 -ftemp3 .reg %fr6 -ftemp4 .reg %fr7 - -; -; The B set of registers when used. -; - -b2 .reg %fr8 -b2L .reg %fr8L -b2R .reg %fr8R - -b3 .reg %fr9 -b3L .reg %fr9L -b3R .reg %fr9R - -b4 .reg %fr10 -b4L .reg %fr10L -b4R .reg %fr10R - -b5 .reg %fr11 -b5L .reg %fr11L -b5R .reg %fr11R - -b6 .reg %fr12 -b6L .reg %fr12L -b6R .reg %fr12R - -b7 .reg %fr13 -b7L .reg %fr13L -b7R .reg %fr13R - -c1 .reg %r21 ; only reg -temp1 .reg %r20 ; only reg -temp2 .reg %r19 ; only reg -temp3 .reg %r31 ; only reg - -m1 .reg %r28 -c2 .reg %r23 -high_one .reg %r1 -ht .reg %r6 -lt .reg %r5 -m .reg %r4 -c3 .reg %r3 - -SQR_ADD_C .macro A0L,A0R,C1,C2,C3 - XMPYU A0L,A0R,ftemp1 ; m - FSTD ftemp1,-24(%sp) ; store m - - XMPYU A0R,A0R,ftemp2 ; lt - FSTD ftemp2,-16(%sp) ; store lt - - XMPYU A0L,A0L,ftemp3 ; ht - FSTD ftemp3,-8(%sp) ; store ht - - LDD -24(%sp),m ; load m - AND m,high_mask,temp2 ; m & Mask - DEPD,Z m,30,31,temp3 ; m << 32+1 - LDD -16(%sp),lt ; lt - - LDD -8(%sp),ht ; ht - EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 - ADD temp3,lt,lt ; lt = lt+m - ADD,L ht,temp1,ht ; ht += temp1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; ht++ - - ADD C2,ht,C2 ; c2=c2+ht - ADD,DC C3,%r0,C3 ; c3++ -.endm - -SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 - XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,A1L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,A1R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,A1L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD ht,ht,ht ; ht=ht+ht; - ADD,DC C3,%r0,C3 ; add in carry (c3++) - - ADD lt,lt,lt ; lt=lt+lt; - ADD,DC ht,%r0,ht ; add in carry (ht++) - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) - LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - -; -;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba8 - .PROC - .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .ENTRY - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 - SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 - STD c2,56(r_ptr) ; r[7] = c2; - COPY %r0,c2 - - SQR_ADD_C a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 - SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 - STD c3,64(r_ptr) ; r[8] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 - STD c1,72(r_ptr) ; r[9] = c1; - COPY %r0,c1 - - SQR_ADD_C a5L,a5R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 - SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 - STD c2,80(r_ptr) ; r[10] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 - STD c3,88(r_ptr) ; r[11] = c3; - COPY %r0,c3 - - SQR_ADD_C a6L,a6R,c1,c2,c3 - SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 - STD c1,96(r_ptr) ; r[12] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 - STD c2,104(r_ptr) ; r[13] = c2; - COPY %r0,c2 - - SQR_ADD_C a7L,a7R,c3,c1,c2 - STD c3, 112(r_ptr) ; r[14] = c3 - STD c1, 120(r_ptr) ; r[15] = c1 - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - STD c2,56(r_ptr) ; r[7] = c2; - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - -;--------------------------------------------------------------------------- - -MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 - XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,B0L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,B0R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,B0L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - - -; -;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba8 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - FLDD 32(b_ptr),b4 - FLDD 40(b_ptr),b5 - FLDD 48(b_ptr),b6 - FLDD 56(b_ptr),b7 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 - STD c1,48(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 - STD c2,56(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 - STD c3,64(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 - STD c1,72(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 - STD c2,80(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 - STD c3,88(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 - STD c1,96(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 - STD c2,104(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 - STD c3,112(r_ptr) - STD c1,120(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - STD c1,48(r_ptr) - STD c2,56(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - -;--- not PIC .SPACE $TEXT$ -;--- not PIC .SUBSPA $CODE$ -;--- not PIC .SPACE $PRIVATE$,SORT=16 -;--- not PIC .IMPORT $global$,DATA -;--- not PIC .SPACE $TEXT$ -;--- not PIC .SUBSPA $CODE$ -;--- not PIC .SUBSPA $LIT$,ACCESS=0x2c -;--- not PIC C$7 -;--- not PIC .ALIGN 8 -;--- not PIC .STRINGZ "Division would overflow (%d)\n" - .END diff --git a/app/openssl/crypto/bn/asm/pa-risc2W.S b/app/openssl/crypto/bn/asm/pa-risc2W.S deleted file mode 100644 index a9954575..00000000 --- a/app/openssl/crypto/bn/asm/pa-risc2W.S +++ /dev/null @@ -1,1605 +0,0 @@ -; -; PA-RISC 64-bit implementation of bn_asm code -; -; This code is approximately 2x faster than the C version -; for RSA/DSA. -; -; See http://devresource.hp.com/ for more details on the PA-RISC -; architecture. Also see the book "PA-RISC 2.0 Architecture" -; by Gerry Kane for information on the instruction set architecture. -; -; Code written by Chris Ruemmler (with some help from the HP C -; compiler). -; -; The code compiles with HP's assembler -; - - .level 2.0W - .space $TEXT$ - .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY - -; -; Global Register definitions used for the routines. -; -; Some information about HP's runtime architecture for 64-bits. -; -; "Caller save" means the calling function must save the register -; if it wants the register to be preserved. -; "Callee save" means if a function uses the register, it must save -; the value before using it. -; -; For the floating point registers -; -; "caller save" registers: fr4-fr11, fr22-fr31 -; "callee save" registers: fr12-fr21 -; "special" registers: fr0-fr3 (status and exception registers) -; -; For the integer registers -; value zero : r0 -; "caller save" registers: r1,r19-r26 -; "callee save" registers: r3-r18 -; return register : r2 (rp) -; return values ; r28 (ret0,ret1) -; Stack pointer ; r30 (sp) -; global data pointer ; r27 (dp) -; argument pointer ; r29 (ap) -; millicode return ptr ; r31 (also a caller save register) - - -; -; Arguments to the routines -; -r_ptr .reg %r26 -a_ptr .reg %r25 -b_ptr .reg %r24 -num .reg %r24 -w .reg %r23 -n .reg %r23 - - -; -; Globals used in some routines -; - -top_overflow .reg %r29 -high_mask .reg %r22 ; value 0xffffffff80000000L - - -;------------------------------------------------------------------------------ -; -; bn_mul_add_words -; -;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, -; int num, BN_ULONG w) -; -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = num -; arg3 = w -; -; Local register definitions -; - -fm1 .reg %fr22 -fm .reg %fr23 -ht_temp .reg %fr24 -ht_temp_1 .reg %fr25 -lt_temp .reg %fr26 -lt_temp_1 .reg %fr27 -fm1_1 .reg %fr28 -fm_1 .reg %fr29 - -fw_h .reg %fr7L -fw_l .reg %fr7R -fw .reg %fr7 - -fht_0 .reg %fr8L -flt_0 .reg %fr8R -t_float_0 .reg %fr8 - -fht_1 .reg %fr9L -flt_1 .reg %fr9R -t_float_1 .reg %fr9 - -tmp_0 .reg %r31 -tmp_1 .reg %r21 -m_0 .reg %r20 -m_1 .reg %r19 -ht_0 .reg %r1 -ht_1 .reg %r3 -lt_0 .reg %r4 -lt_1 .reg %r5 -m1_0 .reg %r6 -m1_1 .reg %r7 -rp_val .reg %r8 -rp_val_1 .reg %r9 - -bn_mul_add_words - .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN - .proc - .callinfo frame=128 - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP ; Needed to make the loop 16-byte aligned - NOP ; Needed to make the loop 16-byte aligned - - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - STD %r7,32(%sp) ; save r7 - STD %r8,40(%sp) ; save r8 - - STD %r9,48(%sp) ; save r9 - COPY %r0,%ret0 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - STD w,56(%sp) ; store w on stack - - CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit - LDO 128(%sp),%sp ; bump stack - - ; - ; The loop is unrolled twice, so if there is only 1 number - ; then go straight to the cleanup code. - ; - CMPIB,= 1,num,bn_mul_add_words_single_top - FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_add_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDD 8(r_ptr),rp_val_1 ; rp[1] - - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1[0] - FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] - - XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h - XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m[0] - FSTD fm_1,-40(%sp) ; -40(sp) = m[1] - - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 - - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 - - LDD -8(%sp),m_0 ; m[0] - LDD -40(%sp),m_1 ; m[1] - LDD -16(%sp),m1_0 ; m1[0] - LDD -48(%sp),m1_1 ; m1[1] - - LDD -24(%sp),ht_0 ; ht[0] - LDD -56(%sp),ht_1 ; ht[1] - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; - - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) - ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) - - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) - ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) - EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 - - EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 - ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) - ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) - - ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - - ADD %ret0,lt_0,lt_0 ; lt[0] = lt[0] + c; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - - LDO -2(num),num ; num = num - 2; - ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - - ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] - ADD,DC ht_1,%r0,%ret0 ; ht[1]++ - LDO 16(a_ptr),a_ptr ; a_ptr += 2 - - STD lt_1,8(r_ptr) ; rp[1] = lt[1] - CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do - LDO 16(r_ptr),r_ptr ; r_ptr += 2 - - CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_add_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDO 8(a_ptr),a_ptr ; a_ptr++ - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 ; m1 = temp1 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD %ret0,tmp_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] - ADD,DC ht_0,%r0,%ret0 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_add_words_exit - .EXIT - LDD -80(%sp),%r9 ; restore r9 - LDD -88(%sp),%r8 ; restore r8 - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) -; -; arg0 = rp -; arg1 = ap -; arg2 = num -; arg3 = w - -bn_mul_words - .proc - .callinfo frame=128 - .entry - .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - STD %r7,32(%sp) ; save r7 - COPY %r0,%ret0 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - STD w,56(%sp) ; w on stack - - CMPIB,>= 0,num,bn_mul_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; See if only 1 word to do, thus just do cleanup - ; - CMPIB,= 1,num,bn_mul_words_single_top - FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l - - FSTD fm1,-16(%sp) ; -16(sp) = m1 - FSTD fm1_1,-48(%sp) ; -48(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h - - FSTD fm,-8(%sp) ; -8(sp) = m - FSTD fm_1,-40(%sp) ; -40(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h - - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt - LDD -8(%sp),m_0 - LDD -40(%sp),m_1 - - LDD -16(%sp),m1_0 - LDD -48(%sp),m1_1 - LDD -24(%sp),ht_0 - LDD -56(%sp),ht_1 - - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) - ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - EXTRD,U tmp_1,31,32,m_1 ; m>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD lt_1,m1_1,lt_1 ; lt = lt+m1; - ADD,DC ht_1,%r0,ht_1 ; ht++ - ADD %ret0,lt_0,lt_0 ; lt = lt + c (ret0); - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) - ADD,DC ht_1,%r0,ht_1 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - STD lt_1,8(r_ptr) ; rp[1] = lt - - COPY ht_1,%ret0 ; carry = ht - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_mul_words_unroll2 - LDO 16(r_ptr),r_ptr ; rp++ - - CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt= lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD %ret0,lt_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - COPY ht_0,%ret0 ; copy carry - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_words_exit - .EXIT - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) -; -; arg0 = rp -; arg1 = ap -; arg2 = num -; - -bn_sqr_words - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP - STD %r5,16(%sp) ; save r5 - - CMPIB,>= 0,num,bn_sqr_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; If only 1, the goto straight to cleanup - ; - CMPIB,= 1,num,bn_sqr_words_single_top - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - -bn_sqr_words_unroll2 - FLDD 0(a_ptr),t_float_0 ; a[0] - FLDD 8(a_ptr),t_float_1 ; a[1] - XMPYU fht_0,flt_0,fm ; m[0] - XMPYU fht_1,flt_1,fm_1 ; m[1] - - FSTD fm,-24(%sp) ; store m[0] - FSTD fm_1,-56(%sp) ; store m[1] - XMPYU flt_0,flt_0,lt_temp ; lt[0] - XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] - - FSTD lt_temp,-16(%sp) ; store lt[0] - FSTD lt_temp_1,-48(%sp) ; store lt[1] - XMPYU fht_0,fht_0,ht_temp ; ht[0] - XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] - - FSTD ht_temp,-8(%sp) ; store ht[0] - FSTD ht_temp_1,-40(%sp) ; store ht[1] - LDD -24(%sp),m_0 - LDD -56(%sp),m_1 - - AND m_0,high_mask,tmp_0 ; m[0] & Mask - AND m_1,high_mask,tmp_1 ; m[1] & Mask - DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 - DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 - - LDD -16(%sp),lt_0 - LDD -48(%sp),lt_1 - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 - EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 - - LDD -8(%sp),ht_0 - LDD -40(%sp),ht_1 - ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 - ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 - - ADD lt_0,m_0,lt_0 ; lt = lt+m - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - STD ht_0,8(r_ptr) ; rp[1] = ht[1] - - ADD lt_1,m_1,lt_1 ; lt = lt+m - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_1,16(r_ptr) ; rp[2] = lt[1] - STD ht_1,24(r_ptr) ; rp[3] = ht[1] - - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_sqr_words_unroll2 - LDO 32(r_ptr),r_ptr ; rp += 4 - - CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_sqr_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,flt_0,fm ; m - FSTD fm,-24(%sp) ; store m - - XMPYU flt_0,flt_0,lt_temp ; lt - FSTD lt_temp,-16(%sp) ; store lt - - XMPYU fht_0,fht_0,ht_temp ; ht - FSTD ht_temp,-8(%sp) ; store ht - - LDD -24(%sp),m_0 ; load m - AND m_0,high_mask,tmp_0 ; m & Mask - DEPD,Z m_0,30,31,m_0 ; m << 32+1 - LDD -16(%sp),lt_0 ; lt - - LDD -8(%sp),ht_0 ; ht - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 - ADD m_0,lt_0,lt_0 ; lt = lt+m - ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 - ADD,DC ht_0,%r0,ht_0 ; ht++ - - STD lt_0,0(r_ptr) ; rp[0] = lt - STD ht_0,8(r_ptr) ; rp[1] = ht - -bn_sqr_words_exit - .EXIT - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - .PROCEND ;in=23,24,25,26,29;out=28; - - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t .reg %r22 -b .reg %r21 -l .reg %r20 - -bn_add_words - .proc - .entry - .callinfo - .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - CMPIB,>= 0,n,bn_add_words_exit - COPY %r0,%ret0 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_add_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_add_words_unroll2 - LDD 0(a_ptr),t - LDD 0(b_ptr),b - ADD t,%ret0,t ; t = t+c; - ADD,DC %r0,%r0,%ret0 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret0,%r0,%ret0 ; c+= carry - STD l,0(r_ptr) - - LDD 8(a_ptr),t - LDD 8(b_ptr),b - ADD t,%ret0,t ; t = t+c; - ADD,DC %r0,%r0,%ret0 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret0,%r0,%ret0 ; c+= carry - STD l,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_add_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_add_words_exit ; are we done? - -bn_add_words_single_top - LDD 0(a_ptr),t - LDD 0(b_ptr),b - - ADD t,%ret0,t ; t = t+c; - ADD,DC %r0,%r0,%ret0 ; set c to carry (could use CMPCLR??) - ADD t,b,l ; l = t + b[0] - ADD,DC %ret0,%r0,%ret0 ; c+= carry - STD l,0(r_ptr) - -bn_add_words_exit - .EXIT - BVE (%rp) - NOP - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t1 .reg %r22 -t2 .reg %r21 -sub_tmp1 .reg %r20 -sub_tmp2 .reg %r19 - - -bn_sub_words - .proc - .callinfo - .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - CMPIB,>= 0,n,bn_sub_words_exit - COPY %r0,%ret0 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_sub_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_sub_words_unroll2 - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; - - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret0 - STD sub_tmp1,0(r_ptr) - - LDD 8(a_ptr),t1 - LDD 8(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret0 - STD sub_tmp1,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_sub_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? - -bn_sub_words_single_top - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret0 - - STD sub_tmp1,0(r_ptr) - -bn_sub_words_exit - .EXIT - BVE (%rp) - NOP - .PROCEND ;in=23,24,25,26,29;out=28; - -;------------------------------------------------------------------------------ -; -; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) -; -; arg0 = h -; arg1 = l -; arg2 = d -; -; This is mainly just modified assembly from the compiler, thus the -; lack of variable names. -; -;------------------------------------------------------------------------------ -bn_div_words - .proc - .callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .IMPORT BN_num_bits_word,CODE,NO_RELOCATION - .IMPORT __iob,DATA - .IMPORT fprintf,CODE,NO_RELOCATION - .IMPORT abort,CODE,NO_RELOCATION - .IMPORT $$div2U,MILLICODE - .entry - STD %r2,-16(%r30) - STD,MA %r3,352(%r30) - STD %r4,-344(%r30) - STD %r5,-336(%r30) - STD %r6,-328(%r30) - STD %r7,-320(%r30) - STD %r8,-312(%r30) - STD %r9,-304(%r30) - STD %r10,-296(%r30) - - STD %r27,-288(%r30) ; save gp - - COPY %r24,%r3 ; save d - COPY %r26,%r4 ; save h (high 64-bits) - LDO -1(%r0),%ret0 ; return -1 by default - - CMPB,*= %r0,%arg2,$D3 ; if (d == 0) - COPY %r25,%r5 ; save l (low 64-bits) - - LDO -48(%r30),%r29 ; create ap - .CALL ;in=26,29;out=28; - B,L BN_num_bits_word,%r2 - COPY %r3,%r26 - LDD -288(%r30),%r27 ; restore gp - LDI 64,%r21 - - CMPB,= %r21,%ret0,$00000012 ;if (i == 64) (forward) - COPY %ret0,%r24 ; i - MTSARCM %r24 - DEPDI,Z -1,%sar,1,%r29 - CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<= d) - SUB %r4,%r3,%r4 ; h -= d - CMPB,= %r31,%r0,$0000001A ; if (i) - COPY %r0,%r10 ; ret = 0 - MTSARCM %r31 ; i to shift - DEPD,Z %r3,%sar,64,%r3 ; d <<= i; - SUBI 64,%r31,%r19 ; 64 - i; redundent - MTSAR %r19 ; (64 -i) to shift - SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i) - MTSARCM %r31 ; i to shift - DEPD,Z %r5,%sar,64,%r5 ; l <<= i; - -$0000001A - DEPDI,Z -1,31,32,%r19 - EXTRD,U %r3,31,32,%r6 ; dh=(d&0xfff)>>32 - EXTRD,U %r3,63,32,%r8 ; dl = d&0xffffff - LDO 2(%r0),%r9 - STD %r3,-280(%r30) ; "d" to stack - -$0000001C - DEPDI,Z -1,63,32,%r29 ; - EXTRD,U %r4,31,32,%r31 ; h >> 32 - CMPB,*=,N %r31,%r6,$D2 ; if ((h>>32) != dh)(forward) div - COPY %r4,%r26 - EXTRD,U %r4,31,32,%r25 - COPY %r6,%r24 - .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) - B,L $$div2U,%r2 - EXTRD,U %r6,31,32,%r23 - DEPD %r28,31,32,%r29 -$D2 - STD %r29,-272(%r30) ; q - AND %r5,%r19,%r24 ; t & 0xffffffff00000000; - EXTRD,U %r24,31,32,%r24 ; ??? - FLDD -272(%r30),%fr7 ; q - FLDD -280(%r30),%fr8 ; d - XMPYU %fr8L,%fr7L,%fr10 - FSTD %fr10,-256(%r30) - XMPYU %fr8L,%fr7R,%fr22 - FSTD %fr22,-264(%r30) - XMPYU %fr8R,%fr7L,%fr11 - XMPYU %fr8R,%fr7R,%fr23 - FSTD %fr11,-232(%r30) - FSTD %fr23,-240(%r30) - LDD -256(%r30),%r28 - DEPD,Z %r28,31,32,%r2 - LDD -264(%r30),%r20 - ADD,L %r20,%r2,%r31 - LDD -232(%r30),%r22 - DEPD,Z %r22,31,32,%r22 - LDD -240(%r30),%r21 - B $00000024 ; enter loop - ADD,L %r21,%r22,%r23 - -$0000002A - LDO -1(%r29),%r29 - SUB %r23,%r8,%r23 -$00000024 - SUB %r4,%r31,%r25 - AND %r25,%r19,%r26 - CMPB,*<>,N %r0,%r26,$00000046 ; (forward) - DEPD,Z %r25,31,32,%r20 - OR %r20,%r24,%r21 - CMPB,*<<,N %r21,%r23,$0000002A ;(backward) - SUB %r31,%r6,%r31 -;-------------Break path--------------------- - -$00000046 - DEPD,Z %r23,31,32,%r25 ;tl - EXTRD,U %r23,31,32,%r26 ;t - AND %r25,%r19,%r24 ;tl = (tl<<32)&0xfffffff0000000L - ADD,L %r31,%r26,%r31 ;th += t; - CMPCLR,*>>= %r5,%r24,%r0 ;if (l>32)); - DEPD,Z %r29,31,32,%r10 ; ret = q<<32 - b $0000001C - DEPD,Z %r28,31,32,%r5 ; l = l << 32 - -$D1 - OR %r10,%r29,%r28 ; ret |= q -$D3 - LDD -368(%r30),%r2 -$D0 - LDD -296(%r30),%r10 - LDD -304(%r30),%r9 - LDD -312(%r30),%r8 - LDD -320(%r30),%r7 - LDD -328(%r30),%r6 - LDD -336(%r30),%r5 - LDD -344(%r30),%r4 - BVE (%r2) - .EXIT - LDD,MB -352(%r30),%r3 - -bn_div_err_case - MFIA %r6 - ADDIL L'bn_div_words-bn_div_err_case,%r6,%r1 - LDO R'bn_div_words-bn_div_err_case(%r1),%r6 - ADDIL LT'__iob,%r27,%r1 - LDD RT'__iob(%r1),%r26 - ADDIL L'C$4-bn_div_words,%r6,%r1 - LDO R'C$4-bn_div_words(%r1),%r25 - LDO 64(%r26),%r26 - .CALL ;in=24,25,26,29;out=28; - B,L fprintf,%r2 - LDO -48(%r30),%r29 - LDD -288(%r30),%r27 - .CALL ;in=29; - B,L abort,%r2 - LDO -48(%r30),%r29 - LDD -288(%r30),%r27 - B $D0 - LDD -368(%r30),%r2 - .PROCEND ;in=24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -; Registers to hold 64-bit values to manipulate. The "L" part -; of the register corresponds to the upper 32-bits, while the "R" -; part corresponds to the lower 32-bits -; -; Note, that when using b6 and b7, the code must save these before -; using them because they are callee save registers -; -; -; Floating point registers to use to save values that -; are manipulated. These don't collide with ftemp1-6 and -; are all caller save registers -; -a0 .reg %fr22 -a0L .reg %fr22L -a0R .reg %fr22R - -a1 .reg %fr23 -a1L .reg %fr23L -a1R .reg %fr23R - -a2 .reg %fr24 -a2L .reg %fr24L -a2R .reg %fr24R - -a3 .reg %fr25 -a3L .reg %fr25L -a3R .reg %fr25R - -a4 .reg %fr26 -a4L .reg %fr26L -a4R .reg %fr26R - -a5 .reg %fr27 -a5L .reg %fr27L -a5R .reg %fr27R - -a6 .reg %fr28 -a6L .reg %fr28L -a6R .reg %fr28R - -a7 .reg %fr29 -a7L .reg %fr29L -a7R .reg %fr29R - -b0 .reg %fr30 -b0L .reg %fr30L -b0R .reg %fr30R - -b1 .reg %fr31 -b1L .reg %fr31L -b1R .reg %fr31R - -; -; Temporary floating point variables, these are all caller save -; registers -; -ftemp1 .reg %fr4 -ftemp2 .reg %fr5 -ftemp3 .reg %fr6 -ftemp4 .reg %fr7 - -; -; The B set of registers when used. -; - -b2 .reg %fr8 -b2L .reg %fr8L -b2R .reg %fr8R - -b3 .reg %fr9 -b3L .reg %fr9L -b3R .reg %fr9R - -b4 .reg %fr10 -b4L .reg %fr10L -b4R .reg %fr10R - -b5 .reg %fr11 -b5L .reg %fr11L -b5R .reg %fr11R - -b6 .reg %fr12 -b6L .reg %fr12L -b6R .reg %fr12R - -b7 .reg %fr13 -b7L .reg %fr13L -b7R .reg %fr13R - -c1 .reg %r21 ; only reg -temp1 .reg %r20 ; only reg -temp2 .reg %r19 ; only reg -temp3 .reg %r31 ; only reg - -m1 .reg %r28 -c2 .reg %r23 -high_one .reg %r1 -ht .reg %r6 -lt .reg %r5 -m .reg %r4 -c3 .reg %r3 - -SQR_ADD_C .macro A0L,A0R,C1,C2,C3 - XMPYU A0L,A0R,ftemp1 ; m - FSTD ftemp1,-24(%sp) ; store m - - XMPYU A0R,A0R,ftemp2 ; lt - FSTD ftemp2,-16(%sp) ; store lt - - XMPYU A0L,A0L,ftemp3 ; ht - FSTD ftemp3,-8(%sp) ; store ht - - LDD -24(%sp),m ; load m - AND m,high_mask,temp2 ; m & Mask - DEPD,Z m,30,31,temp3 ; m << 32+1 - LDD -16(%sp),lt ; lt - - LDD -8(%sp),ht ; ht - EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 - ADD temp3,lt,lt ; lt = lt+m - ADD,L ht,temp1,ht ; ht += temp1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; ht++ - - ADD C2,ht,C2 ; c2=c2+ht - ADD,DC C3,%r0,C3 ; c3++ -.endm - -SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 - XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,A1L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,A1R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,A1L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD ht,ht,ht ; ht=ht+ht; - ADD,DC C3,%r0,C3 ; add in carry (c3++) - - ADD lt,lt,lt ; lt=lt+lt; - ADD,DC ht,%r0,ht ; add in carry (ht++) - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) - LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - -; -;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba8 - .PROC - .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .ENTRY - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 - SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 - STD c2,56(r_ptr) ; r[7] = c2; - COPY %r0,c2 - - SQR_ADD_C a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 - SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 - STD c3,64(r_ptr) ; r[8] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 - STD c1,72(r_ptr) ; r[9] = c1; - COPY %r0,c1 - - SQR_ADD_C a5L,a5R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 - SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 - STD c2,80(r_ptr) ; r[10] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 - STD c3,88(r_ptr) ; r[11] = c3; - COPY %r0,c3 - - SQR_ADD_C a6L,a6R,c1,c2,c3 - SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 - STD c1,96(r_ptr) ; r[12] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 - STD c2,104(r_ptr) ; r[13] = c2; - COPY %r0,c2 - - SQR_ADD_C a7L,a7R,c3,c1,c2 - STD c3, 112(r_ptr) ; r[14] = c3 - STD c1, 120(r_ptr) ; r[15] = c1 - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - STD c2,56(r_ptr) ; r[7] = c2; - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - -;--------------------------------------------------------------------------- - -MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 - XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,B0L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,B0R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,B0L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - - -; -;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba8 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - FLDD 32(b_ptr),b4 - FLDD 40(b_ptr),b5 - FLDD 48(b_ptr),b6 - FLDD 56(b_ptr),b7 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 - STD c1,48(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 - STD c2,56(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 - STD c3,64(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 - STD c1,72(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 - STD c2,80(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 - STD c3,88(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 - STD c1,96(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 - STD c2,104(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 - STD c3,112(r_ptr) - STD c1,120(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - STD c1,48(r_ptr) - STD c2,56(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - - .SPACE $TEXT$ - .SUBSPA $CODE$ - .SPACE $PRIVATE$,SORT=16 - .IMPORT $global$,DATA - .SPACE $TEXT$ - .SUBSPA $CODE$ - .SUBSPA $LIT$,ACCESS=0x2c -C$4 - .ALIGN 8 - .STRINGZ "Division would overflow (%d)\n" - .END diff --git a/app/openssl/crypto/bn/bn_mont.c b/app/openssl/crypto/bn/bn_mont.c index ee8532c7..427b5cf4 100644 --- a/app/openssl/crypto/bn/bn_mont.c +++ b/app/openssl/crypto/bn/bn_mont.c @@ -478,38 +478,32 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock, const BIGNUM *mod, BN_CTX *ctx) { + int got_write_lock = 0; BN_MONT_CTX *ret; CRYPTO_r_lock(lock); - ret = *pmont; - CRYPTO_r_unlock(lock); - if (ret) - return ret; - - /* We don't want to serialise globally while doing our lazy-init math in - * BN_MONT_CTX_set. That punishes threads that are doing independent - * things. Instead, punish the case where more than one thread tries to - * lazy-init the same 'pmont', by having each do the lazy-init math work - * independently and only use the one from the thread that wins the race - * (the losers throw away the work they've done). */ - ret = BN_MONT_CTX_new(); - if (!ret) - return NULL; - if (!BN_MONT_CTX_set(ret, mod, ctx)) + if (!*pmont) { - BN_MONT_CTX_free(ret); - return NULL; - } + CRYPTO_r_unlock(lock); + CRYPTO_w_lock(lock); + got_write_lock = 1; - /* The locked compare-and-set, after the local work is done. */ - CRYPTO_w_lock(lock); - if (*pmont) - { - BN_MONT_CTX_free(ret); - ret = *pmont; + if (!*pmont) + { + ret = BN_MONT_CTX_new(); + if (ret && !BN_MONT_CTX_set(ret, mod, ctx)) + BN_MONT_CTX_free(ret); + else + *pmont = ret; + } } + + ret = *pmont; + + if (got_write_lock) + CRYPTO_w_unlock(lock); else - *pmont = ret; - CRYPTO_w_unlock(lock); + CRYPTO_r_unlock(lock); + return ret; } diff --git a/app/openssl/crypto/cms/cms_env.c b/app/openssl/crypto/cms/cms_env.c index add00bf9..be20b1c0 100644 --- a/app/openssl/crypto/cms/cms_env.c +++ b/app/openssl/crypto/cms/cms_env.c @@ -185,8 +185,6 @@ CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms, if (flags & CMS_USE_KEYID) { ktri->version = 2; - if (env->version < 2) - env->version = 2; type = CMS_RECIPINFO_KEYIDENTIFIER; } else diff --git a/app/openssl/crypto/cms/cms_sd.c b/app/openssl/crypto/cms/cms_sd.c index 51dd33a1..77fbd135 100644 --- a/app/openssl/crypto/cms/cms_sd.c +++ b/app/openssl/crypto/cms/cms_sd.c @@ -158,8 +158,8 @@ static void cms_sd_set_version(CMS_SignedData *sd) if (sd->version < 3) sd->version = 3; } - else if (si->version < 1) - si->version = 1; + else + sd->version = 1; } if (sd->version < 1) diff --git a/app/openssl/crypto/cms/cms_smime.c b/app/openssl/crypto/cms/cms_smime.c index 1af9f3a6..8c56e3a8 100644 --- a/app/openssl/crypto/cms/cms_smime.c +++ b/app/openssl/crypto/cms/cms_smime.c @@ -611,7 +611,7 @@ int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert) STACK_OF(CMS_RecipientInfo) *ris; CMS_RecipientInfo *ri; int i, r; - int debug = 0, ri_match = 0; + int debug = 0; ris = CMS_get0_RecipientInfos(cms); if (ris) debug = cms->d.envelopedData->encryptedContentInfo->debug; @@ -620,7 +620,6 @@ int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert) ri = sk_CMS_RecipientInfo_value(ris, i); if (CMS_RecipientInfo_type(ri) != CMS_RECIPINFO_TRANS) continue; - ri_match = 1; /* If we have a cert try matching RecipientInfo * otherwise try them all. */ @@ -656,7 +655,7 @@ int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert) } } /* If no cert and not debugging always return success */ - if (ri_match && !cert && !debug) + if (!cert && !debug) { ERR_clear_error(); return 1; diff --git a/app/openssl/crypto/dso/dso_dlfcn.c b/app/openssl/crypto/dso/dso_dlfcn.c index 4a56aace..5f225480 100644 --- a/app/openssl/crypto/dso/dso_dlfcn.c +++ b/app/openssl/crypto/dso/dso_dlfcn.c @@ -464,7 +464,7 @@ static int dlfcn_pathbyaddr(void *addr,char *path,int sz) return len; } - ERR_add_error_data(2, "dlfcn_pathbyaddr(): ", dlerror()); + ERR_add_error_data(4, "dlfcn_pathbyaddr(): ", dlerror()); #endif return -1; } diff --git a/app/openssl/crypto/ec/ec_ameth.c b/app/openssl/crypto/ec/ec_ameth.c index f715a238..0ce45240 100644 --- a/app/openssl/crypto/ec/ec_ameth.c +++ b/app/openssl/crypto/ec/ec_ameth.c @@ -352,7 +352,6 @@ static int eckey_priv_encode(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pkey) EC_KEY_set_enc_flags(ec_key, old_flags); OPENSSL_free(ep); ECerr(EC_F_ECKEY_PRIV_ENCODE, ERR_R_EC_LIB); - return 0; } /* restore old encoding flags */ EC_KEY_set_enc_flags(ec_key, old_flags); diff --git a/app/openssl/crypto/ec/ec_asn1.c b/app/openssl/crypto/ec/ec_asn1.c index e94f34e1..145807b6 100644 --- a/app/openssl/crypto/ec/ec_asn1.c +++ b/app/openssl/crypto/ec/ec_asn1.c @@ -1435,11 +1435,8 @@ int i2o_ECPublicKey(EC_KEY *a, unsigned char **out) *out, buf_len, NULL)) { ECerr(EC_F_I2O_ECPUBLICKEY, ERR_R_EC_LIB); - if (new_buffer) - { - OPENSSL_free(*out); - *out = NULL; - } + OPENSSL_free(*out); + *out = NULL; return 0; } if (!new_buffer) diff --git a/app/openssl/crypto/ec/ec_lcl.h b/app/openssl/crypto/ec/ec_lcl.h index dae91483..6f714c75 100644 --- a/app/openssl/crypto/ec/ec_lcl.h +++ b/app/openssl/crypto/ec/ec_lcl.h @@ -405,7 +405,7 @@ int ec_GF2m_simple_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, int ec_GF2m_precompute_mult(EC_GROUP *group, BN_CTX *ctx); int ec_GF2m_have_precompute_mult(const EC_GROUP *group); -#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +#ifndef OPENSSL_EC_NISTP_64_GCC_128 /* method functions in ecp_nistp224.c */ int ec_GFp_nistp224_group_init(EC_GROUP *group); int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *n, BN_CTX *); diff --git a/app/openssl/crypto/evp/bio_b64.c b/app/openssl/crypto/evp/bio_b64.c index 16863fe2..ac6d441a 100644 --- a/app/openssl/crypto/evp/bio_b64.c +++ b/app/openssl/crypto/evp/bio_b64.c @@ -226,7 +226,6 @@ static int b64_read(BIO *b, char *out, int outl) else if (ctx->start) { q=p=(unsigned char *)ctx->tmp; - num = 0; for (j=0; jkey_len * 8, &gctx->ks.ks); + aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f)aesni_encrypt); gctx->ctr = (ctr128_f)aesni_ctr32_encrypt_blocks; @@ -355,19 +355,19 @@ static int aesni_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, /* key_len is two AES keys */ if (enc) { - aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); + aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); xctx->xts.block1 = (block128_f)aesni_encrypt; xctx->stream = aesni_xts_encrypt; } else { - aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); + aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); xctx->xts.block1 = (block128_f)aesni_decrypt; xctx->stream = aesni_xts_decrypt; } aesni_set_encrypt_key(key + ctx->key_len/2, - ctx->key_len * 4, &xctx->ks2.ks); + ctx->key_len * 4, &xctx->ks2); xctx->xts.block2 = (block128_f)aesni_encrypt; xctx->xts.key1 = &xctx->ks1; @@ -394,7 +394,7 @@ static int aesni_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; if (key) { - aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); + aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, &cctx->ks, (block128_f)aesni_encrypt); cctx->str = enc?(ccm128_f)aesni_ccm64_encrypt_blocks : @@ -482,38 +482,14 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \ NULL,NULL,aes_##mode##_ctrl,NULL }; \ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ { return &aes_##keylen##_##mode; } + #endif -#if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__)) +#if defined(AES_ASM) && defined(BSAES_ASM) && (defined(__arm__) || defined(__arm)) #include "arm_arch.h" #if __ARM_ARCH__>=7 -# if defined(BSAES_ASM) -# define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) -# endif -# define HWAES_CAPABLE (OPENSSL_armcap_P & ARMV8_AES) -# define HWAES_set_encrypt_key aes_v8_set_encrypt_key -# define HWAES_set_decrypt_key aes_v8_set_decrypt_key -# define HWAES_encrypt aes_v8_encrypt -# define HWAES_decrypt aes_v8_decrypt -# define HWAES_cbc_encrypt aes_v8_cbc_encrypt -# define HWAES_ctr32_encrypt_blocks aes_v8_ctr32_encrypt_blocks -#endif +#define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) #endif - -#if defined(HWAES_CAPABLE) -int HWAES_set_encrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key); -int HWAES_set_decrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key); -void HWAES_encrypt(const unsigned char *in, unsigned char *out, - const AES_KEY *key); -void HWAES_decrypt(const unsigned char *in, unsigned char *out, - const AES_KEY *key); -void HWAES_cbc_encrypt(const unsigned char *in, unsigned char *out, - size_t length, const AES_KEY *key, - unsigned char *ivec, const int enc); -void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, - size_t len, const AES_KEY *key, const unsigned char ivec[16]); #endif #define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \ @@ -534,23 +510,10 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, mode = ctx->cipher->flags & EVP_CIPH_MODE; if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) -#ifdef HWAES_CAPABLE - if (HWAES_CAPABLE) - { - ret = HWAES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks); - dat->block = (block128_f)HWAES_decrypt; - dat->stream.cbc = NULL; -#ifdef HWAES_cbc_encrypt - if (mode==EVP_CIPH_CBC_MODE) - dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt; -#endif - } - else -#endif #ifdef BSAES_CAPABLE if (BSAES_CAPABLE && mode==EVP_CIPH_CBC_MODE) { - ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks); + ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks); dat->block = (block128_f)AES_decrypt; dat->stream.cbc = (cbc128_f)bsaes_cbc_encrypt; } @@ -559,7 +522,7 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, #ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - ret = vpaes_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks); + ret = vpaes_set_decrypt_key(key,ctx->key_len*8,&dat->ks); dat->block = (block128_f)vpaes_decrypt; dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? (cbc128_f)vpaes_cbc_encrypt : @@ -568,37 +531,17 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, else #endif { - ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks); + ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks); dat->block = (block128_f)AES_decrypt; dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? (cbc128_f)AES_cbc_encrypt : NULL; } else -#ifdef HWAES_CAPABLE - if (HWAES_CAPABLE) - { - ret = HWAES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks); - dat->block = (block128_f)HWAES_encrypt; - dat->stream.cbc = NULL; -#ifdef HWAES_cbc_encrypt - if (mode==EVP_CIPH_CBC_MODE) - dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt; - else -#endif -#ifdef HWAES_ctr32_encrypt_blocks - if (mode==EVP_CIPH_CTR_MODE) - dat->stream.ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks; - else -#endif - (void)0; /* terminate potentially open 'else' */ - } - else -#endif #ifdef BSAES_CAPABLE if (BSAES_CAPABLE && mode==EVP_CIPH_CTR_MODE) { - ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks); + ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks); dat->block = (block128_f)AES_encrypt; dat->stream.ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks; } @@ -607,7 +550,7 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, #ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - ret = vpaes_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks); + ret = vpaes_set_encrypt_key(key,ctx->key_len*8,&dat->ks); dat->block = (block128_f)vpaes_encrypt; dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? (cbc128_f)vpaes_cbc_encrypt : @@ -616,7 +559,7 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, else #endif { - ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks); + ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks); dat->block = (block128_f)AES_encrypt; dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? (cbc128_f)AES_cbc_encrypt : @@ -887,25 +830,10 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; if (key) { do { -#ifdef HWAES_CAPABLE - if (HWAES_CAPABLE) - { - HWAES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks); - CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, - (block128_f)HWAES_encrypt); -#ifdef HWAES_ctr32_encrypt_blocks - gctx->ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks; -#else - gctx->ctr = NULL; -#endif - break; - } - else -#endif #ifdef BSAES_CAPABLE if (BSAES_CAPABLE) { - AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks); + AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks); CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, (block128_f)AES_encrypt); gctx->ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks; @@ -916,7 +844,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, #ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks); + vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks); CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, (block128_f)vpaes_encrypt); gctx->ctr = NULL; @@ -926,7 +854,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, #endif (void)0; /* terminate potentially open 'else' */ - AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); + AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f)AES_encrypt); #ifdef AES_CTR_ASM gctx->ctr = (ctr128_f)AES_ctr32_encrypt; @@ -1147,50 +1075,29 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, xctx->stream = NULL; #endif /* key_len is two AES keys */ -#ifdef HWAES_CAPABLE - if (HWAES_CAPABLE) - { - if (enc) - { - HWAES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); - xctx->xts.block1 = (block128_f)HWAES_encrypt; - } - else - { - HWAES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); - xctx->xts.block1 = (block128_f)HWAES_decrypt; - } - - HWAES_set_encrypt_key(key + ctx->key_len/2, - ctx->key_len * 4, &xctx->ks2.ks); - xctx->xts.block2 = (block128_f)HWAES_encrypt; - - xctx->xts.key1 = &xctx->ks1; - break; - } - else -#endif +#if !(defined(__arm__) || defined(__arm)) /* not yet? */ #ifdef BSAES_CAPABLE if (BSAES_CAPABLE) xctx->stream = enc ? bsaes_xts_encrypt : bsaes_xts_decrypt; else #endif +#endif #ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { if (enc) { - vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); + vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); xctx->xts.block1 = (block128_f)vpaes_encrypt; } else { - vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); + vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); xctx->xts.block1 = (block128_f)vpaes_decrypt; } vpaes_set_encrypt_key(key + ctx->key_len/2, - ctx->key_len * 4, &xctx->ks2.ks); + ctx->key_len * 4, &xctx->ks2); xctx->xts.block2 = (block128_f)vpaes_encrypt; xctx->xts.key1 = &xctx->ks1; @@ -1202,17 +1109,17 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, if (enc) { - AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); + AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); xctx->xts.block1 = (block128_f)AES_encrypt; } else { - AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); + AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); xctx->xts.block1 = (block128_f)AES_decrypt; } AES_set_encrypt_key(key + ctx->key_len/2, - ctx->key_len * 4, &xctx->ks2.ks); + ctx->key_len * 4, &xctx->ks2); xctx->xts.block2 = (block128_f)AES_encrypt; xctx->xts.key1 = &xctx->ks1; @@ -1320,23 +1227,10 @@ static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; if (key) do { -#ifdef HWAES_CAPABLE - if (HWAES_CAPABLE) - { - HWAES_set_encrypt_key(key,ctx->key_len*8,&cctx->ks.ks); - - CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, - &cctx->ks, (block128_f)HWAES_encrypt); - cctx->str = NULL; - cctx->key_set = 1; - break; - } - else -#endif #ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks.ks); + vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks); CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, &cctx->ks, (block128_f)vpaes_encrypt); cctx->str = NULL; @@ -1344,7 +1238,7 @@ static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, break; } #endif - AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); + AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, &cctx->ks, (block128_f)AES_encrypt); cctx->str = NULL; diff --git a/app/openssl/crypto/evp/encode.c b/app/openssl/crypto/evp/encode.c index 4654bdc6..28546a84 100644 --- a/app/openssl/crypto/evp/encode.c +++ b/app/openssl/crypto/evp/encode.c @@ -324,7 +324,6 @@ int EVP_DecodeUpdate(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl, v=EVP_DecodeBlock(out,d,n); n=0; if (v < 0) { rv=0; goto end; } - if (eof > v) { rv=-1; goto end; } ret+=(v-eof); } else diff --git a/app/openssl/crypto/evp/p_lib.c b/app/openssl/crypto/evp/p_lib.c index 8ee53c1d..bd1977d7 100644 --- a/app/openssl/crypto/evp/p_lib.c +++ b/app/openssl/crypto/evp/p_lib.c @@ -202,7 +202,7 @@ EVP_PKEY *EVP_PKEY_new(void) EVP_PKEY *EVP_PKEY_dup(EVP_PKEY *pkey) { - CRYPTO_add(&pkey->references,1,CRYPTO_LOCK_EVP_PKEY); + CRYPTO_add(&pkey->references, 1, CRYPTO_LOCK_EVP_PKEY); return pkey; } diff --git a/app/openssl/crypto/modes/asm/ghash-armv4.S b/app/openssl/crypto/modes/asm/ghash-armv4.S index 6c453774..d66c4cbf 100644 --- a/app/openssl/crypto/modes/asm/ghash-armv4.S +++ b/app/openssl/crypto/modes/asm/ghash-armv4.S @@ -309,213 +309,99 @@ gcm_gmult_4bit: #if __ARM_ARCH__>=7 .fpu neon -.global gcm_init_neon -.type gcm_init_neon,%function -.align 4 -gcm_init_neon: - vld1.64 d7,[r1,:64]! @ load H - vmov.i8 q8,#0xe1 - vld1.64 d6,[r1,:64] - vshl.i64 d17,#57 - vshr.u64 d16,#63 @ t0=0xc2....01 - vdup.8 q9,d7[7] - vshr.u64 d26,d6,#63 - vshr.s8 q9,#7 @ broadcast carry bit - vshl.i64 q3,q3,#1 - vand q8,q8,q9 - vorr d7,d26 @ H<<<=1 - veor q3,q3,q8 @ twisted H - vstmia r0,{q3} - - bx lr @ bx lr -.size gcm_init_neon,.-gcm_init_neon - .global gcm_gmult_neon .type gcm_gmult_neon,%function .align 4 gcm_gmult_neon: - vld1.64 d7,[r0,:64]! @ load Xi - vld1.64 d6,[r0,:64]! - vmov.i64 d29,#0x0000ffffffffffff - vldmia r1,{d26-d27} @ load twisted H - vmov.i64 d30,#0x00000000ffffffff + sub r1,#16 @ point at H in GCM128_CTX + vld1.64 d29,[r0,:64]!@ load Xi + vmov.i32 d5,#0xe1 @ our irreducible polynomial + vld1.64 d28,[r0,:64]! + vshr.u64 d5,#32 + vldmia r1,{d0-d1} @ load H + veor q12,q12 #ifdef __ARMEL__ - vrev64.8 q3,q3 + vrev64.8 q14,q14 #endif - vmov.i64 d31,#0x000000000000ffff - veor d28,d26,d27 @ Karatsuba pre-processing + veor q13,q13 + veor q11,q11 + mov r1,#16 + veor q10,q10 mov r3,#16 - b .Lgmult_neon + veor d2,d2 + vdup.8 d4,d28[0] @ broadcast lowest byte + b .Linner_neon .size gcm_gmult_neon,.-gcm_gmult_neon .global gcm_ghash_neon .type gcm_ghash_neon,%function .align 4 gcm_ghash_neon: - vld1.64 d1,[r0,:64]! @ load Xi - vld1.64 d0,[r0,:64]! - vmov.i64 d29,#0x0000ffffffffffff - vldmia r1,{d26-d27} @ load twisted H - vmov.i64 d30,#0x00000000ffffffff + vld1.64 d21,[r0,:64]! @ load Xi + vmov.i32 d5,#0xe1 @ our irreducible polynomial + vld1.64 d20,[r0,:64]! + vshr.u64 d5,#32 + vldmia r0,{d0-d1} @ load H + veor q12,q12 + nop #ifdef __ARMEL__ - vrev64.8 q0,q0 + vrev64.8 q10,q10 #endif - vmov.i64 d31,#0x000000000000ffff - veor d28,d26,d27 @ Karatsuba pre-processing - -.Loop_neon: - vld1.64 d7,[r2]! @ load inp - vld1.64 d6,[r2]! +.Louter_neon: + vld1.64 d29,[r2]! @ load inp + veor q13,q13 + vld1.64 d28,[r2]! + veor q11,q11 + mov r1,#16 #ifdef __ARMEL__ - vrev64.8 q3,q3 + vrev64.8 q14,q14 #endif - veor q3,q0 @ inp^=Xi -.Lgmult_neon: - vext.8 d16, d26, d26, #1 @ A1 - vmull.p8 q8, d16, d6 @ F = A1*B - vext.8 d0, d6, d6, #1 @ B1 - vmull.p8 q0, d26, d0 @ E = A*B1 - vext.8 d18, d26, d26, #2 @ A2 - vmull.p8 q9, d18, d6 @ H = A2*B - vext.8 d22, d6, d6, #2 @ B2 - vmull.p8 q11, d26, d22 @ G = A*B2 - vext.8 d20, d26, d26, #3 @ A3 - veor q8, q8, q0 @ L = E + F - vmull.p8 q10, d20, d6 @ J = A3*B - vext.8 d0, d6, d6, #3 @ B3 - veor q9, q9, q11 @ M = G + H - vmull.p8 q0, d26, d0 @ I = A*B3 - veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 - vand d17, d17, d29 - vext.8 d22, d6, d6, #4 @ B4 - veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 - vand d19, d19, d30 - vmull.p8 q11, d26, d22 @ K = A*B4 - veor q10, q10, q0 @ N = I + J - veor d16, d16, d17 - veor d18, d18, d19 - veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 - vand d21, d21, d31 - vext.8 q8, q8, q8, #15 - veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 - vmov.i64 d23, #0 - vext.8 q9, q9, q9, #14 - veor d20, d20, d21 - vmull.p8 q0, d26, d6 @ D = A*B - vext.8 q11, q11, q11, #12 - vext.8 q10, q10, q10, #13 - veor q8, q8, q9 - veor q10, q10, q11 - veor q0, q0, q8 - veor q0, q0, q10 - veor d6,d6,d7 @ Karatsuba pre-processing - vext.8 d16, d28, d28, #1 @ A1 - vmull.p8 q8, d16, d6 @ F = A1*B - vext.8 d2, d6, d6, #1 @ B1 - vmull.p8 q1, d28, d2 @ E = A*B1 - vext.8 d18, d28, d28, #2 @ A2 - vmull.p8 q9, d18, d6 @ H = A2*B - vext.8 d22, d6, d6, #2 @ B2 - vmull.p8 q11, d28, d22 @ G = A*B2 - vext.8 d20, d28, d28, #3 @ A3 - veor q8, q8, q1 @ L = E + F - vmull.p8 q10, d20, d6 @ J = A3*B - vext.8 d2, d6, d6, #3 @ B3 - veor q9, q9, q11 @ M = G + H - vmull.p8 q1, d28, d2 @ I = A*B3 - veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 - vand d17, d17, d29 - vext.8 d22, d6, d6, #4 @ B4 - veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 - vand d19, d19, d30 - vmull.p8 q11, d28, d22 @ K = A*B4 - veor q10, q10, q1 @ N = I + J - veor d16, d16, d17 - veor d18, d18, d19 - veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 - vand d21, d21, d31 - vext.8 q8, q8, q8, #15 - veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 - vmov.i64 d23, #0 - vext.8 q9, q9, q9, #14 - veor d20, d20, d21 - vmull.p8 q1, d28, d6 @ D = A*B - vext.8 q11, q11, q11, #12 - vext.8 q10, q10, q10, #13 - veor q8, q8, q9 - veor q10, q10, q11 - veor q1, q1, q8 - veor q1, q1, q10 - vext.8 d16, d27, d27, #1 @ A1 - vmull.p8 q8, d16, d7 @ F = A1*B - vext.8 d4, d7, d7, #1 @ B1 - vmull.p8 q2, d27, d4 @ E = A*B1 - vext.8 d18, d27, d27, #2 @ A2 - vmull.p8 q9, d18, d7 @ H = A2*B - vext.8 d22, d7, d7, #2 @ B2 - vmull.p8 q11, d27, d22 @ G = A*B2 - vext.8 d20, d27, d27, #3 @ A3 - veor q8, q8, q2 @ L = E + F - vmull.p8 q10, d20, d7 @ J = A3*B - vext.8 d4, d7, d7, #3 @ B3 - veor q9, q9, q11 @ M = G + H - vmull.p8 q2, d27, d4 @ I = A*B3 - veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 - vand d17, d17, d29 - vext.8 d22, d7, d7, #4 @ B4 - veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 - vand d19, d19, d30 - vmull.p8 q11, d27, d22 @ K = A*B4 - veor q10, q10, q2 @ N = I + J - veor d16, d16, d17 - veor d18, d18, d19 - veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 - vand d21, d21, d31 - vext.8 q8, q8, q8, #15 - veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 - vmov.i64 d23, #0 - vext.8 q9, q9, q9, #14 - veor d20, d20, d21 - vmull.p8 q2, d27, d7 @ D = A*B - vext.8 q11, q11, q11, #12 - vext.8 q10, q10, q10, #13 - veor q8, q8, q9 - veor q10, q10, q11 - veor q2, q2, q8 - veor q2, q2, q10 - veor q1,q1,q0 @ Karatsuba post-processing - veor q1,q1,q2 - veor d1,d1,d2 - veor d4,d4,d3 @ Xh|Xl - 256-bit result + veor d2,d2 + veor q14,q10 @ inp^=Xi + veor q10,q10 + vdup.8 d4,d28[0] @ broadcast lowest byte +.Linner_neon: + subs r1,r1,#1 + vmull.p8 q9,d1,d4 @ H.lo·Xi[i] + vmull.p8 q8,d0,d4 @ H.hi·Xi[i] + vext.8 q14,q12,#1 @ IN>>=8 + + veor q10,q13 @ modulo-scheduled part + vshl.i64 d22,#48 + vdup.8 d4,d28[0] @ broadcast lowest byte + veor d3,d18,d20 + + veor d21,d22 + vuzp.8 q9,q8 + vsli.8 d2,d3,#1 @ compose the "carry" byte + vext.8 q10,q12,#1 @ Z>>=8 - @ equivalent of reduction_avx from ghash-x86_64.pl - vshl.i64 q9,q0,#57 @ 1st phase - vshl.i64 q10,q0,#62 - veor q10,q10,q9 @ - vshl.i64 q9,q0,#63 - veor q10, q10, q9 @ - veor d1,d1,d20 @ - veor d4,d4,d21 + vmull.p8 q11,d2,d5 @ "carry"·0xe1 + vshr.u8 d2,d3,#7 @ save Z's bottom bit + vext.8 q13,q9,q12,#1 @ Qlo>>=8 + veor q10,q8 + bne .Linner_neon - vshr.u64 q10,q0,#1 @ 2nd phase - veor q2,q2,q0 - veor q0,q0,q10 @ - vshr.u64 q10,q10,#6 - vshr.u64 q0,q0,#1 @ - veor q0,q0,q2 @ - veor q0,q0,q10 @ + veor q10,q13 @ modulo-scheduled artefact + vshl.i64 d22,#48 + veor d21,d22 + @ finalization, normalize Z:Zo + vand d2,d5 @ suffices to mask the bit + vshr.u64 d3,d20,#63 + vshl.i64 q10,#1 subs r3,#16 - bne .Loop_neon + vorr q10,q1 @ Z=Z:Zo<<1 + bne .Louter_neon #ifdef __ARMEL__ - vrev64.8 q0,q0 + vrev64.8 q10,q10 #endif sub r0,#16 - vst1.64 d1,[r0,:64]! @ write out Xi - vst1.64 d0,[r0,:64] + vst1.64 d21,[r0,:64]! @ write out Xi + vst1.64 d20,[r0,:64] - bx lr @ bx lr + .word 0xe12fff1e .size gcm_ghash_neon,.-gcm_ghash_neon #endif .asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by " diff --git a/app/openssl/crypto/modes/asm/ghash-armv4.pl b/app/openssl/crypto/modes/asm/ghash-armv4.pl index b79ecbcc..e46f8e34 100644 --- a/app/openssl/crypto/modes/asm/ghash-armv4.pl +++ b/app/openssl/crypto/modes/asm/ghash-armv4.pl @@ -35,20 +35,6 @@ # Add NEON implementation featuring polynomial multiplication, i.e. no # lookup tables involved. On Cortex A8 it was measured to process one # byte in 15 cycles or 55% faster than integer-only code. -# -# April 2014 -# -# Switch to multiplication algorithm suggested in paper referred -# below and combine it with reduction algorithm from x86 module. -# Performance improvement over previous version varies from 65% on -# Snapdragon S4 to 110% on Cortex A9. In absolute terms Cortex A8 -# processes one byte in 8.45 cycles, A9 - in 10.2, Snapdragon S4 - -# in 9.33. -# -# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software -# Polynomial Multiplication on ARM Processors using the NEON Engine. -# -# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf # ==================================================================== # Note about "528B" variant. In ARM case it makes lesser sense to @@ -317,160 +303,117 @@ $code.=<<___; .size gcm_gmult_4bit,.-gcm_gmult_4bit ___ { -my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3)); -my ($t0,$t1,$t2,$t3)=map("q$_",(8..12)); -my ($Hlo,$Hhi,$Hhl,$k48,$k32,$k16)=map("d$_",(26..31)); +my $cnt=$Htbl; # $Htbl is used once in the very beginning -sub clmul64x64 { -my ($r,$a,$b)=@_; -$code.=<<___; - vext.8 $t0#lo, $a, $a, #1 @ A1 - vmull.p8 $t0, $t0#lo, $b @ F = A1*B - vext.8 $r#lo, $b, $b, #1 @ B1 - vmull.p8 $r, $a, $r#lo @ E = A*B1 - vext.8 $t1#lo, $a, $a, #2 @ A2 - vmull.p8 $t1, $t1#lo, $b @ H = A2*B - vext.8 $t3#lo, $b, $b, #2 @ B2 - vmull.p8 $t3, $a, $t3#lo @ G = A*B2 - vext.8 $t2#lo, $a, $a, #3 @ A3 - veor $t0, $t0, $r @ L = E + F - vmull.p8 $t2, $t2#lo, $b @ J = A3*B - vext.8 $r#lo, $b, $b, #3 @ B3 - veor $t1, $t1, $t3 @ M = G + H - vmull.p8 $r, $a, $r#lo @ I = A*B3 - veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8 - vand $t0#hi, $t0#hi, $k48 - vext.8 $t3#lo, $b, $b, #4 @ B4 - veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16 - vand $t1#hi, $t1#hi, $k32 - vmull.p8 $t3, $a, $t3#lo @ K = A*B4 - veor $t2, $t2, $r @ N = I + J - veor $t0#lo, $t0#lo, $t0#hi - veor $t1#lo, $t1#lo, $t1#hi - veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24 - vand $t2#hi, $t2#hi, $k16 - vext.8 $t0, $t0, $t0, #15 - veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32 - vmov.i64 $t3#hi, #0 - vext.8 $t1, $t1, $t1, #14 - veor $t2#lo, $t2#lo, $t2#hi - vmull.p8 $r, $a, $b @ D = A*B - vext.8 $t3, $t3, $t3, #12 - vext.8 $t2, $t2, $t2, #13 - veor $t0, $t0, $t1 - veor $t2, $t2, $t3 - veor $r, $r, $t0 - veor $r, $r, $t2 -___ -} +my ($Hhi, $Hlo, $Zo, $T, $xi, $mod) = map("d$_",(0..7)); +my ($Qhi, $Qlo, $Z, $R, $zero, $Qpost, $IN) = map("q$_",(8..15)); + +# Z:Zo keeps 128-bit result shifted by 1 to the right, with bottom bit +# in Zo. Or should I say "top bit", because GHASH is specified in +# reverse bit order? Otherwise straightforward 128-bt H by one input +# byte multiplication and modulo-reduction, times 16. + +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } +sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; } $code.=<<___; #if __ARM_ARCH__>=7 .fpu neon -.global gcm_init_neon -.type gcm_init_neon,%function -.align 4 -gcm_init_neon: - vld1.64 $IN#hi,[r1,:64]! @ load H - vmov.i8 $t0,#0xe1 - vld1.64 $IN#lo,[r1,:64] - vshl.i64 $t0#hi,#57 - vshr.u64 $t0#lo,#63 @ t0=0xc2....01 - vdup.8 $t1,$IN#hi[7] - vshr.u64 $Hlo,$IN#lo,#63 - vshr.s8 $t1,#7 @ broadcast carry bit - vshl.i64 $IN,$IN,#1 - vand $t0,$t0,$t1 - vorr $IN#hi,$Hlo @ H<<<=1 - veor $IN,$IN,$t0 @ twisted H - vstmia r0,{$IN} - - ret @ bx lr -.size gcm_init_neon,.-gcm_init_neon - .global gcm_gmult_neon .type gcm_gmult_neon,%function .align 4 gcm_gmult_neon: - vld1.64 $IN#hi,[$Xi,:64]! @ load Xi - vld1.64 $IN#lo,[$Xi,:64]! - vmov.i64 $k48,#0x0000ffffffffffff - vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H - vmov.i64 $k32,#0x00000000ffffffff + sub $Htbl,#16 @ point at H in GCM128_CTX + vld1.64 `&Dhi("$IN")`,[$Xi,:64]!@ load Xi + vmov.i32 $mod,#0xe1 @ our irreducible polynomial + vld1.64 `&Dlo("$IN")`,[$Xi,:64]! + vshr.u64 $mod,#32 + vldmia $Htbl,{$Hhi-$Hlo} @ load H + veor $zero,$zero #ifdef __ARMEL__ vrev64.8 $IN,$IN #endif - vmov.i64 $k16,#0x000000000000ffff - veor $Hhl,$Hlo,$Hhi @ Karatsuba pre-processing + veor $Qpost,$Qpost + veor $R,$R + mov $cnt,#16 + veor $Z,$Z mov $len,#16 - b .Lgmult_neon + veor $Zo,$Zo + vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte + b .Linner_neon .size gcm_gmult_neon,.-gcm_gmult_neon .global gcm_ghash_neon .type gcm_ghash_neon,%function .align 4 gcm_ghash_neon: - vld1.64 $Xl#hi,[$Xi,:64]! @ load Xi - vld1.64 $Xl#lo,[$Xi,:64]! - vmov.i64 $k48,#0x0000ffffffffffff - vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H - vmov.i64 $k32,#0x00000000ffffffff + vld1.64 `&Dhi("$Z")`,[$Xi,:64]! @ load Xi + vmov.i32 $mod,#0xe1 @ our irreducible polynomial + vld1.64 `&Dlo("$Z")`,[$Xi,:64]! + vshr.u64 $mod,#32 + vldmia $Xi,{$Hhi-$Hlo} @ load H + veor $zero,$zero + nop #ifdef __ARMEL__ - vrev64.8 $Xl,$Xl + vrev64.8 $Z,$Z #endif - vmov.i64 $k16,#0x000000000000ffff - veor $Hhl,$Hlo,$Hhi @ Karatsuba pre-processing - -.Loop_neon: - vld1.64 $IN#hi,[$inp]! @ load inp - vld1.64 $IN#lo,[$inp]! +.Louter_neon: + vld1.64 `&Dhi($IN)`,[$inp]! @ load inp + veor $Qpost,$Qpost + vld1.64 `&Dlo($IN)`,[$inp]! + veor $R,$R + mov $cnt,#16 #ifdef __ARMEL__ vrev64.8 $IN,$IN #endif - veor $IN,$Xl @ inp^=Xi -.Lgmult_neon: -___ - &clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo -$code.=<<___; - veor $IN#lo,$IN#lo,$IN#hi @ Karatsuba pre-processing -___ - &clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi) - &clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi -$code.=<<___; - veor $Xm,$Xm,$Xl @ Karatsuba post-processing - veor $Xm,$Xm,$Xh - veor $Xl#hi,$Xl#hi,$Xm#lo - veor $Xh#lo,$Xh#lo,$Xm#hi @ Xh|Xl - 256-bit result - - @ equivalent of reduction_avx from ghash-x86_64.pl - vshl.i64 $t1,$Xl,#57 @ 1st phase - vshl.i64 $t2,$Xl,#62 - veor $t2,$t2,$t1 @ - vshl.i64 $t1,$Xl,#63 - veor $t2, $t2, $t1 @ - veor $Xl#hi,$Xl#hi,$t2#lo @ - veor $Xh#lo,$Xh#lo,$t2#hi - - vshr.u64 $t2,$Xl,#1 @ 2nd phase - veor $Xh,$Xh,$Xl - veor $Xl,$Xl,$t2 @ - vshr.u64 $t2,$t2,#6 - vshr.u64 $Xl,$Xl,#1 @ - veor $Xl,$Xl,$Xh @ - veor $Xl,$Xl,$t2 @ - + veor $Zo,$Zo + veor $IN,$Z @ inp^=Xi + veor $Z,$Z + vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte +.Linner_neon: + subs $cnt,$cnt,#1 + vmull.p8 $Qlo,$Hlo,$xi @ H.lo·Xi[i] + vmull.p8 $Qhi,$Hhi,$xi @ H.hi·Xi[i] + vext.8 $IN,$zero,#1 @ IN>>=8 + + veor $Z,$Qpost @ modulo-scheduled part + vshl.i64 `&Dlo("$R")`,#48 + vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte + veor $T,`&Dlo("$Qlo")`,`&Dlo("$Z")` + + veor `&Dhi("$Z")`,`&Dlo("$R")` + vuzp.8 $Qlo,$Qhi + vsli.8 $Zo,$T,#1 @ compose the "carry" byte + vext.8 $Z,$zero,#1 @ Z>>=8 + + vmull.p8 $R,$Zo,$mod @ "carry"·0xe1 + vshr.u8 $Zo,$T,#7 @ save Z's bottom bit + vext.8 $Qpost,$Qlo,$zero,#1 @ Qlo>>=8 + veor $Z,$Qhi + bne .Linner_neon + + veor $Z,$Qpost @ modulo-scheduled artefact + vshl.i64 `&Dlo("$R")`,#48 + veor `&Dhi("$Z")`,`&Dlo("$R")` + + @ finalization, normalize Z:Zo + vand $Zo,$mod @ suffices to mask the bit + vshr.u64 `&Dhi(&Q("$Zo"))`,`&Dlo("$Z")`,#63 + vshl.i64 $Z,#1 subs $len,#16 - bne .Loop_neon + vorr $Z,`&Q("$Zo")` @ Z=Z:Zo<<1 + bne .Louter_neon #ifdef __ARMEL__ - vrev64.8 $Xl,$Xl + vrev64.8 $Z,$Z #endif sub $Xi,#16 - vst1.64 $Xl#hi,[$Xi,:64]! @ write out Xi - vst1.64 $Xl#lo,[$Xi,:64] + vst1.64 `&Dhi("$Z")`,[$Xi,:64]! @ write out Xi + vst1.64 `&Dlo("$Z")`,[$Xi,:64] - ret @ bx lr + bx lr .size gcm_ghash_neon,.-gcm_ghash_neon #endif ___ @@ -480,13 +423,7 @@ $code.=<<___; .align 2 ___ -foreach (split("\n",$code)) { - s/\`([^\`]*)\`/eval $1/geo; - - s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or - s/\bret\b/bx lr/go or - s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 - - print $_,"\n"; -} +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 +print $code; close STDOUT; # enforce flush diff --git a/app/openssl/crypto/modes/asm/ghashv8-armx-64.S b/app/openssl/crypto/modes/asm/ghashv8-armx-64.S deleted file mode 100644 index b77b6c40..00000000 --- a/app/openssl/crypto/modes/asm/ghashv8-armx-64.S +++ /dev/null @@ -1,115 +0,0 @@ -#include "arm_arch.h" - -.text -.arch armv8-a+crypto -.global gcm_init_v8 -.type gcm_init_v8,%function -.align 4 -gcm_init_v8: - ld1 {v17.2d},[x1] //load H - movi v16.16b,#0xe1 - ext v3.16b,v17.16b,v17.16b,#8 - shl v16.2d,v16.2d,#57 - ushr v18.2d,v16.2d,#63 - ext v16.16b,v18.16b,v16.16b,#8 //t0=0xc2....01 - dup v17.4s,v17.s[1] - ushr v19.2d,v3.2d,#63 - sshr v17.4s,v17.4s,#31 //broadcast carry bit - and v19.16b,v19.16b,v16.16b - shl v3.2d,v3.2d,#1 - ext v19.16b,v19.16b,v19.16b,#8 - and v16.16b,v16.16b,v17.16b - orr v3.16b,v3.16b,v19.16b //H<<<=1 - eor v3.16b,v3.16b,v16.16b //twisted H - st1 {v3.2d},[x0] - - ret -.size gcm_init_v8,.-gcm_init_v8 - -.global gcm_gmult_v8 -.type gcm_gmult_v8,%function -.align 4 -gcm_gmult_v8: - ld1 {v17.2d},[x0] //load Xi - movi v19.16b,#0xe1 - ld1 {v20.2d},[x1] //load twisted H - shl v19.2d,v19.2d,#57 -#ifndef __ARMEB__ - rev64 v17.16b,v17.16b -#endif - ext v21.16b,v20.16b,v20.16b,#8 - mov x3,#0 - ext v3.16b,v17.16b,v17.16b,#8 - mov x12,#0 - eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing - mov x2,x0 - b .Lgmult_v8 -.size gcm_gmult_v8,.-gcm_gmult_v8 - -.global gcm_ghash_v8 -.type gcm_ghash_v8,%function -.align 4 -gcm_ghash_v8: - ld1 {v0.2d},[x0] //load [rotated] Xi - subs x3,x3,#16 - movi v19.16b,#0xe1 - mov x12,#16 - ld1 {v20.2d},[x1] //load twisted H - csel x12,xzr,x12,eq - ext v0.16b,v0.16b,v0.16b,#8 - shl v19.2d,v19.2d,#57 - ld1 {v17.2d},[x2],x12 //load [rotated] inp - ext v21.16b,v20.16b,v20.16b,#8 -#ifndef __ARMEB__ - rev64 v0.16b,v0.16b - rev64 v17.16b,v17.16b -#endif - eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing - ext v3.16b,v17.16b,v17.16b,#8 - b .Loop_v8 - -.align 4 -.Loop_v8: - ext v18.16b,v0.16b,v0.16b,#8 - eor v3.16b,v3.16b,v0.16b //inp^=Xi - eor v17.16b,v17.16b,v18.16b //v17.16b is rotated inp^Xi - -.Lgmult_v8: - pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo - eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing - pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi - subs x3,x3,#16 - pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) - csel x12,xzr,x12,eq - - ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing - eor v18.16b,v0.16b,v2.16b - eor v1.16b,v1.16b,v17.16b - ld1 {v17.2d},[x2],x12 //load [rotated] inp - eor v1.16b,v1.16b,v18.16b - pmull v18.1q,v0.1d,v19.1d //1st phase - - ins v2.d[0],v1.d[1] - ins v1.d[1],v0.d[0] -#ifndef __ARMEB__ - rev64 v17.16b,v17.16b -#endif - eor v0.16b,v1.16b,v18.16b - ext v3.16b,v17.16b,v17.16b,#8 - - ext v18.16b,v0.16b,v0.16b,#8 //2nd phase - pmull v0.1q,v0.1d,v19.1d - eor v18.16b,v18.16b,v2.16b - eor v0.16b,v0.16b,v18.16b - b.hs .Loop_v8 - -#ifndef __ARMEB__ - rev64 v0.16b,v0.16b -#endif - ext v0.16b,v0.16b,v0.16b,#8 - st1 {v0.2d},[x0] //write out Xi - - ret -.size gcm_ghash_v8,.-gcm_ghash_v8 -.asciz "GHASH for ARMv8, CRYPTOGAMS by " -.align 2 diff --git a/app/openssl/crypto/modes/asm/ghashv8-armx.S b/app/openssl/crypto/modes/asm/ghashv8-armx.S deleted file mode 100644 index f388c54e..00000000 --- a/app/openssl/crypto/modes/asm/ghashv8-armx.S +++ /dev/null @@ -1,116 +0,0 @@ -#include "arm_arch.h" - -.text -.fpu neon -.code 32 -.global gcm_init_v8 -.type gcm_init_v8,%function -.align 4 -gcm_init_v8: - vld1.64 {q9},[r1] @ load H - vmov.i8 q8,#0xe1 - vext.8 q3,q9,q9,#8 - vshl.i64 q8,q8,#57 - vshr.u64 q10,q8,#63 - vext.8 q8,q10,q8,#8 @ t0=0xc2....01 - vdup.32 q9,d18[1] - vshr.u64 q11,q3,#63 - vshr.s32 q9,q9,#31 @ broadcast carry bit - vand q11,q11,q8 - vshl.i64 q3,q3,#1 - vext.8 q11,q11,q11,#8 - vand q8,q8,q9 - vorr q3,q3,q11 @ H<<<=1 - veor q3,q3,q8 @ twisted H - vst1.64 {q3},[r0] - - bx lr -.size gcm_init_v8,.-gcm_init_v8 - -.global gcm_gmult_v8 -.type gcm_gmult_v8,%function -.align 4 -gcm_gmult_v8: - vld1.64 {q9},[r0] @ load Xi - vmov.i8 q11,#0xe1 - vld1.64 {q12},[r1] @ load twisted H - vshl.u64 q11,q11,#57 -#ifndef __ARMEB__ - vrev64.8 q9,q9 -#endif - vext.8 q13,q12,q12,#8 - mov r3,#0 - vext.8 q3,q9,q9,#8 - mov r12,#0 - veor q13,q13,q12 @ Karatsuba pre-processing - mov r2,r0 - b .Lgmult_v8 -.size gcm_gmult_v8,.-gcm_gmult_v8 - -.global gcm_ghash_v8 -.type gcm_ghash_v8,%function -.align 4 -gcm_ghash_v8: - vld1.64 {q0},[r0] @ load [rotated] Xi - subs r3,r3,#16 - vmov.i8 q11,#0xe1 - mov r12,#16 - vld1.64 {q12},[r1] @ load twisted H - moveq r12,#0 - vext.8 q0,q0,q0,#8 - vshl.u64 q11,q11,#57 - vld1.64 {q9},[r2],r12 @ load [rotated] inp - vext.8 q13,q12,q12,#8 -#ifndef __ARMEB__ - vrev64.8 q0,q0 - vrev64.8 q9,q9 -#endif - veor q13,q13,q12 @ Karatsuba pre-processing - vext.8 q3,q9,q9,#8 - b .Loop_v8 - -.align 4 -.Loop_v8: - vext.8 q10,q0,q0,#8 - veor q3,q3,q0 @ inp^=Xi - veor q9,q9,q10 @ q9 is rotated inp^Xi - -.Lgmult_v8: - .byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo - veor q9,q9,q3 @ Karatsuba pre-processing - .byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi - subs r3,r3,#16 - .byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) - moveq r12,#0 - - vext.8 q9,q0,q2,#8 @ Karatsuba post-processing - veor q10,q0,q2 - veor q1,q1,q9 - vld1.64 {q9},[r2],r12 @ load [rotated] inp - veor q1,q1,q10 - .byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase - - vmov d4,d3 @ Xh|Xm - 256-bit result - vmov d3,d0 @ Xm is rotated Xl -#ifndef __ARMEB__ - vrev64.8 q9,q9 -#endif - veor q0,q1,q10 - vext.8 q3,q9,q9,#8 - - vext.8 q10,q0,q0,#8 @ 2nd phase - .byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 - veor q10,q10,q2 - veor q0,q0,q10 - bhs .Loop_v8 - -#ifndef __ARMEB__ - vrev64.8 q0,q0 -#endif - vext.8 q0,q0,q0,#8 - vst1.64 {q0},[r0] @ write out Xi - - bx lr -.size gcm_ghash_v8,.-gcm_ghash_v8 -.asciz "GHASH for ARMv8, CRYPTOGAMS by " -.align 2 diff --git a/app/openssl/crypto/modes/asm/ghashv8-armx.pl b/app/openssl/crypto/modes/asm/ghashv8-armx.pl deleted file mode 100644 index 69e863e7..00000000 --- a/app/openssl/crypto/modes/asm/ghashv8-armx.pl +++ /dev/null @@ -1,240 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# GHASH for ARMv8 Crypto Extension, 64-bit polynomial multiplication. -# -# June 2014 -# -# Initial version was developed in tight cooperation with Ard -# Biesheuvel from bits-n-pieces from -# other assembly modules. Just like aesv8-armx.pl this module -# supports both AArch32 and AArch64 execution modes. -# -# Current performance in cycles per processed byte: -# -# PMULL[2] 32-bit NEON(*) -# Apple A7 1.76 5.62 -# Cortex-A5x n/a n/a -# -# (*) presented for reference/comparison purposes; - -$flavour = shift; -open STDOUT,">".shift; - -$Xi="x0"; # argument block -$Htbl="x1"; -$inp="x2"; -$len="x3"; - -$inc="x12"; - -{ -my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3)); -my ($t0,$t1,$t2,$t3,$H,$Hhl)=map("q$_",(8..14)); - -$code=<<___; -#include "arm_arch.h" - -.text -___ -$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/); -$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/); - -$code.=<<___; -.global gcm_init_v8 -.type gcm_init_v8,%function -.align 4 -gcm_init_v8: - vld1.64 {$t1},[x1] @ load H - vmov.i8 $t0,#0xe1 - vext.8 $IN,$t1,$t1,#8 - vshl.i64 $t0,$t0,#57 - vshr.u64 $t2,$t0,#63 - vext.8 $t0,$t2,$t0,#8 @ t0=0xc2....01 - vdup.32 $t1,${t1}[1] - vshr.u64 $t3,$IN,#63 - vshr.s32 $t1,$t1,#31 @ broadcast carry bit - vand $t3,$t3,$t0 - vshl.i64 $IN,$IN,#1 - vext.8 $t3,$t3,$t3,#8 - vand $t0,$t0,$t1 - vorr $IN,$IN,$t3 @ H<<<=1 - veor $IN,$IN,$t0 @ twisted H - vst1.64 {$IN},[x0] - - ret -.size gcm_init_v8,.-gcm_init_v8 - -.global gcm_gmult_v8 -.type gcm_gmult_v8,%function -.align 4 -gcm_gmult_v8: - vld1.64 {$t1},[$Xi] @ load Xi - vmov.i8 $t3,#0xe1 - vld1.64 {$H},[$Htbl] @ load twisted H - vshl.u64 $t3,$t3,#57 -#ifndef __ARMEB__ - vrev64.8 $t1,$t1 -#endif - vext.8 $Hhl,$H,$H,#8 - mov $len,#0 - vext.8 $IN,$t1,$t1,#8 - mov $inc,#0 - veor $Hhl,$Hhl,$H @ Karatsuba pre-processing - mov $inp,$Xi - b .Lgmult_v8 -.size gcm_gmult_v8,.-gcm_gmult_v8 - -.global gcm_ghash_v8 -.type gcm_ghash_v8,%function -.align 4 -gcm_ghash_v8: - vld1.64 {$Xl},[$Xi] @ load [rotated] Xi - subs $len,$len,#16 - vmov.i8 $t3,#0xe1 - mov $inc,#16 - vld1.64 {$H},[$Htbl] @ load twisted H - cclr $inc,eq - vext.8 $Xl,$Xl,$Xl,#8 - vshl.u64 $t3,$t3,#57 - vld1.64 {$t1},[$inp],$inc @ load [rotated] inp - vext.8 $Hhl,$H,$H,#8 -#ifndef __ARMEB__ - vrev64.8 $Xl,$Xl - vrev64.8 $t1,$t1 -#endif - veor $Hhl,$Hhl,$H @ Karatsuba pre-processing - vext.8 $IN,$t1,$t1,#8 - b .Loop_v8 - -.align 4 -.Loop_v8: - vext.8 $t2,$Xl,$Xl,#8 - veor $IN,$IN,$Xl @ inp^=Xi - veor $t1,$t1,$t2 @ $t1 is rotated inp^Xi - -.Lgmult_v8: - vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo - veor $t1,$t1,$IN @ Karatsuba pre-processing - vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi - subs $len,$len,#16 - vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) - cclr $inc,eq - - vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing - veor $t2,$Xl,$Xh - veor $Xm,$Xm,$t1 - vld1.64 {$t1},[$inp],$inc @ load [rotated] inp - veor $Xm,$Xm,$t2 - vpmull.p64 $t2,$Xl,$t3 @ 1st phase - - vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result - vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl -#ifndef __ARMEB__ - vrev64.8 $t1,$t1 -#endif - veor $Xl,$Xm,$t2 - vext.8 $IN,$t1,$t1,#8 - - vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase - vpmull.p64 $Xl,$Xl,$t3 - veor $t2,$t2,$Xh - veor $Xl,$Xl,$t2 - b.hs .Loop_v8 - -#ifndef __ARMEB__ - vrev64.8 $Xl,$Xl -#endif - vext.8 $Xl,$Xl,$Xl,#8 - vst1.64 {$Xl},[$Xi] @ write out Xi - - ret -.size gcm_ghash_v8,.-gcm_ghash_v8 -___ -} -$code.=<<___; -.asciz "GHASH for ARMv8, CRYPTOGAMS by " -.align 2 -___ - -if ($flavour =~ /64/) { ######## 64-bit code - sub unvmov { - my $arg=shift; - - $arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o && - sprintf "ins v%d.d[%d],v%d.d[%d]",$1,($2 eq "lo")?0:1,$3,($4 eq "lo")?0:1; - } - foreach(split("\n",$code)) { - s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or - s/vmov\.i8/movi/o or # fix up legacy mnemonics - s/vmov\s+(.*)/unvmov($1)/geo or - s/vext\.8/ext/o or - s/vshr\.s/sshr\.s/o or - s/vshr/ushr/o or - s/^(\s+)v/$1/o or # strip off v prefix - s/\bbx\s+lr\b/ret/o; - - s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers - s/@\s/\/\//o; # old->new style commentary - - # fix up remainig legacy suffixes - s/\.[ui]?8(\s)/$1/o; - s/\.[uis]?32//o and s/\.16b/\.4s/go; - m/\.p64/o and s/\.16b/\.1q/o; # 1st pmull argument - m/l\.p64/o and s/\.16b/\.1d/go; # 2nd and 3rd pmull arguments - s/\.[uisp]?64//o and s/\.16b/\.2d/go; - s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o; - - print $_,"\n"; - } -} else { ######## 32-bit code - sub unvdup32 { - my $arg=shift; - - $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o && - sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1; - } - sub unvpmullp64 { - my ($mnemonic,$arg)=@_; - - if ($arg =~ m/q([0-9]+),\s*q([0-9]+),\s*q([0-9]+)/o) { - my $word = 0xf2a00e00|(($1&7)<<13)|(($1&8)<<19) - |(($2&7)<<17)|(($2&8)<<4) - |(($3&7)<<1) |(($3&8)<<2); - $word |= 0x00010001 if ($mnemonic =~ "2"); - # since ARMv7 instructions are always encoded little-endian. - # correct solution is to use .inst directive, but older - # assemblers don't implement it:-( - sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", - $word&0xff,($word>>8)&0xff, - ($word>>16)&0xff,($word>>24)&0xff, - $mnemonic,$arg; - } - } - - foreach(split("\n",$code)) { - s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers - s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers - s/\/\/\s?/@ /o; # new->old style commentary - - # fix up remainig new-style suffixes - s/\],#[0-9]+/]!/o; - - s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or - s/vdup\.32\s+(.*)/unvdup32($1)/geo or - s/v?(pmull2?)\.p64\s+(.*)/unvpmullp64($1,$2)/geo or - s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or - s/^(\s+)b\./$1b/o or - s/^(\s+)ret/$1bx\tlr/o; - - print $_,"\n"; - } -} - -close STDOUT; # enforce flush diff --git a/app/openssl/crypto/modes/gcm128.c b/app/openssl/crypto/modes/gcm128.c index 79ebb66e..e1dc2b0f 100644 --- a/app/openssl/crypto/modes/gcm128.c +++ b/app/openssl/crypto/modes/gcm128.c @@ -642,7 +642,7 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) #endif -#if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ)) +#if TABLE_BITS==4 && defined(GHASH_ASM) # if !defined(I386_ONLY) && \ (defined(__i386) || defined(__i386__) || \ defined(__x86_64) || defined(__x86_64__) || \ @@ -663,21 +663,13 @@ void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]); void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); # endif -# elif defined(__arm__) || defined(__arm) || defined(__aarch64__) +# elif defined(__arm__) || defined(__arm) # include "arm_arch.h" # if __ARM_ARCH__>=7 # define GHASH_ASM_ARM # define GCM_FUNCREF_4BIT -# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL) -# if defined(__arm__) || defined(__arm) -# define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) -# endif -void gcm_init_neon(u128 Htable[16],const u64 Xi[2]); void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]); void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); -void gcm_init_v8(u128 Htable[16],const u64 Xi[2]); -void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]); -void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); # endif # endif #endif @@ -747,21 +739,10 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) ctx->ghash = gcm_ghash_4bit; # endif # elif defined(GHASH_ASM_ARM) -# ifdef PMULL_CAPABLE - if (PMULL_CAPABLE) { - gcm_init_v8(ctx->Htable,ctx->H.u); - ctx->gmult = gcm_gmult_v8; - ctx->ghash = gcm_ghash_v8; - } else -# endif -# ifdef NEON_CAPABLE - if (NEON_CAPABLE) { - gcm_init_neon(ctx->Htable,ctx->H.u); + if (OPENSSL_armcap_P & ARMV7_NEON) { ctx->gmult = gcm_gmult_neon; ctx->ghash = gcm_ghash_neon; - } else -# endif - { + } else { gcm_init_4bit(ctx->Htable,ctx->H.u); ctx->gmult = gcm_gmult_4bit; ctx->ghash = gcm_ghash_4bit; diff --git a/app/openssl/crypto/opensslconf-32.h b/app/openssl/crypto/opensslconf-32.h index caf6f1b8..d6625489 100644 --- a/app/openssl/crypto/opensslconf-32.h +++ b/app/openssl/crypto/opensslconf-32.h @@ -53,9 +53,6 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -140,9 +137,6 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -# define NO_RIPEMD -# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/crypto/opensslconf-64.h b/app/openssl/crypto/opensslconf-64.h index 88fb0419..70c5a2cb 100644 --- a/app/openssl/crypto/opensslconf-64.h +++ b/app/openssl/crypto/opensslconf-64.h @@ -53,9 +53,6 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -140,9 +137,6 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -# define NO_RIPEMD -# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/crypto/opensslconf-static-32.h b/app/openssl/crypto/opensslconf-static-32.h index caf6f1b8..d6625489 100644 --- a/app/openssl/crypto/opensslconf-static-32.h +++ b/app/openssl/crypto/opensslconf-static-32.h @@ -53,9 +53,6 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -140,9 +137,6 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -# define NO_RIPEMD -# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/crypto/opensslconf-static-64.h b/app/openssl/crypto/opensslconf-static-64.h index 88fb0419..70c5a2cb 100644 --- a/app/openssl/crypto/opensslconf-static-64.h +++ b/app/openssl/crypto/opensslconf-static-64.h @@ -53,9 +53,6 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -140,9 +137,6 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -# define NO_RIPEMD -# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/crypto/opensslv.h b/app/openssl/crypto/opensslv.h index c3b6acec..ebe71807 100644 --- a/app/openssl/crypto/opensslv.h +++ b/app/openssl/crypto/opensslv.h @@ -25,11 +25,11 @@ * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for * major minor fix final patch/beta) */ -#define OPENSSL_VERSION_NUMBER 0x1000108fL +#define OPENSSL_VERSION_NUMBER 0x1000107fL #ifdef OPENSSL_FIPS -#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1h-fips 5 Jun 2014" +#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1g-fips 7 Apr 2014" #else -#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1h 5 Jun 2014" +#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1g 7 Apr 2014" #endif #define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT diff --git a/app/openssl/crypto/pkcs12/p12_crt.c b/app/openssl/crypto/pkcs12/p12_crt.c index 35e8a4a8..a34915d0 100644 --- a/app/openssl/crypto/pkcs12/p12_crt.c +++ b/app/openssl/crypto/pkcs12/p12_crt.c @@ -96,11 +96,7 @@ PKCS12 *PKCS12_create(char *pass, char *name, EVP_PKEY *pkey, X509 *cert, nid_cert = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; else #endif -#ifdef OPENSSL_NO_RC2 - nid_cert = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; -#else nid_cert = NID_pbe_WithSHA1And40BitRC2_CBC; -#endif } if (!nid_key) nid_key = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; @@ -290,11 +286,7 @@ int PKCS12_add_safe(STACK_OF(PKCS7) **psafes, STACK_OF(PKCS12_SAFEBAG) *bags, free_safes = 0; if (nid_safe == 0) -#ifdef OPENSSL_NO_RC2 - nid_safe = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; -#else nid_safe = NID_pbe_WithSHA1And40BitRC2_CBC; -#endif if (nid_safe == -1) p7 = PKCS12_pack_p7data(bags); diff --git a/app/openssl/crypto/pkcs12/p12_kiss.c b/app/openssl/crypto/pkcs12/p12_kiss.c index c9b7ab61..206b1b0b 100644 --- a/app/openssl/crypto/pkcs12/p12_kiss.c +++ b/app/openssl/crypto/pkcs12/p12_kiss.c @@ -269,7 +269,7 @@ static int parse_bag(PKCS12_SAFEBAG *bag, const char *pass, int passlen, int len, r; unsigned char *data; len = ASN1_STRING_to_UTF8(&data, fname); - if(len >= 0) { + if(len > 0) { r = X509_alias_set1(x509, data, len); OPENSSL_free(data); if (!r) diff --git a/app/openssl/crypto/pkcs7/pk7_doit.c b/app/openssl/crypto/pkcs7/pk7_doit.c index d91aa116..77fda3b8 100644 --- a/app/openssl/crypto/pkcs7/pk7_doit.c +++ b/app/openssl/crypto/pkcs7/pk7_doit.c @@ -440,11 +440,6 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert) { case NID_pkcs7_signed: data_body=PKCS7_get_octet_string(p7->d.sign->contents); - if (!PKCS7_is_detached(p7) && data_body == NULL) - { - PKCS7err(PKCS7_F_PKCS7_DATADECODE,PKCS7_R_INVALID_SIGNED_DATA_TYPE); - goto err; - } md_sk=p7->d.sign->md_algs; break; case NID_pkcs7_signedAndEnveloped: @@ -933,7 +928,6 @@ int PKCS7_SIGNER_INFO_sign(PKCS7_SIGNER_INFO *si) if (EVP_DigestSignUpdate(&mctx,abuf,alen) <= 0) goto err; OPENSSL_free(abuf); - abuf = NULL; if (EVP_DigestSignFinal(&mctx, NULL, &siglen) <= 0) goto err; abuf = OPENSSL_malloc(siglen); diff --git a/app/openssl/crypto/pkcs7/pkcs7.h b/app/openssl/crypto/pkcs7/pkcs7.h index 04f60379..e4d44319 100644 --- a/app/openssl/crypto/pkcs7/pkcs7.h +++ b/app/openssl/crypto/pkcs7/pkcs7.h @@ -453,7 +453,6 @@ void ERR_load_PKCS7_strings(void); #define PKCS7_R_ERROR_SETTING_CIPHER 121 #define PKCS7_R_INVALID_MIME_TYPE 131 #define PKCS7_R_INVALID_NULL_POINTER 143 -#define PKCS7_R_INVALID_SIGNED_DATA_TYPE 155 #define PKCS7_R_MIME_NO_CONTENT_TYPE 132 #define PKCS7_R_MIME_PARSE_ERROR 133 #define PKCS7_R_MIME_SIG_PARSE_ERROR 134 diff --git a/app/openssl/crypto/pkcs7/pkcs7err.c b/app/openssl/crypto/pkcs7/pkcs7err.c index f3db08e0..d0af32a2 100644 --- a/app/openssl/crypto/pkcs7/pkcs7err.c +++ b/app/openssl/crypto/pkcs7/pkcs7err.c @@ -1,6 +1,6 @@ /* crypto/pkcs7/pkcs7err.c */ /* ==================================================================== - * Copyright (c) 1999-2014 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -130,7 +130,6 @@ static ERR_STRING_DATA PKCS7_str_reasons[]= {ERR_REASON(PKCS7_R_ERROR_SETTING_CIPHER),"error setting cipher"}, {ERR_REASON(PKCS7_R_INVALID_MIME_TYPE) ,"invalid mime type"}, {ERR_REASON(PKCS7_R_INVALID_NULL_POINTER),"invalid null pointer"}, -{ERR_REASON(PKCS7_R_INVALID_SIGNED_DATA_TYPE),"invalid signed data type"}, {ERR_REASON(PKCS7_R_MIME_NO_CONTENT_TYPE),"mime no content type"}, {ERR_REASON(PKCS7_R_MIME_PARSE_ERROR) ,"mime parse error"}, {ERR_REASON(PKCS7_R_MIME_SIG_PARSE_ERROR),"mime sig parse error"}, diff --git a/app/openssl/crypto/rsa/rsa_ameth.c b/app/openssl/crypto/rsa/rsa_ameth.c index 4c8ecd92..5a2062f9 100644 --- a/app/openssl/crypto/rsa/rsa_ameth.c +++ b/app/openssl/crypto/rsa/rsa_ameth.c @@ -358,7 +358,7 @@ static int rsa_pss_param_print(BIO *bp, RSA_PSS_PARAMS *pss, if (i2a_ASN1_INTEGER(bp, pss->saltLength) <= 0) goto err; } - else if (BIO_puts(bp, "14 (default)") <= 0) + else if (BIO_puts(bp, "0x14 (default)") <= 0) goto err; BIO_puts(bp, "\n"); diff --git a/app/openssl/crypto/sha/asm/sha1-armv4-large.pl b/app/openssl/crypto/sha/asm/sha1-armv4-large.pl index 50bd07b3..33da3e0e 100644 --- a/app/openssl/crypto/sha/asm/sha1-armv4-large.pl +++ b/app/openssl/crypto/sha/asm/sha1-armv4-large.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -52,20 +52,6 @@ # Profiler-assisted and platform-specific optimization resulted in 10% # improvement on Cortex A8 core and 12.2 cycles per byte. -# September 2013. -# -# Add NEON implementation (see sha1-586.pl for background info). On -# Cortex A8 it was measured to process one byte in 6.7 cycles or >80% -# faster than integer-only code. Because [fully unrolled] NEON code -# is ~2.5x larger and there are some redundant instructions executed -# when processing last block, improvement is not as big for smallest -# blocks, only ~30%. Snapdragon S4 is a tad faster, 6.4 cycles per -# byte, which is also >80% faster than integer-only code. - -# May 2014. -# -# Add ARMv8 code path performing at 2.35 cpb on Apple A7. - while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; @@ -167,22 +153,12 @@ $code=<<___; #include "arm_arch.h" .text -.code 32 .global sha1_block_data_order .type sha1_block_data_order,%function -.align 5 +.align 2 sha1_block_data_order: -#if __ARM_ARCH__>=7 - sub r3,pc,#8 @ sha1_block_data_order - ldr r12,.LOPENSSL_armcap - ldr r12,[r3,r12] @ OPENSSL_armcap_P - tst r12,#ARMV8_SHA1 - bne .LARMv8 - tst r12,#ARMV7_NEON - bne .LNEON -#endif stmdb sp!,{r4-r12,lr} add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp ldmia $ctx,{$a,$b,$c,$d,$e} @@ -257,422 +233,16 @@ $code.=<<___; moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif -.size sha1_block_data_order,.-sha1_block_data_order - -.align 5 +.align 2 .LK_00_19: .word 0x5a827999 .LK_20_39: .word 0x6ed9eba1 .LK_40_59: .word 0x8f1bbcdc .LK_60_79: .word 0xca62c1d6 -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-sha1_block_data_order -.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " -.align 5 -___ -##################################################################### -# NEON stuff -# -{{{ -my @V=($a,$b,$c,$d,$e); -my ($K_XX_XX,$Ki,$t0,$t1,$Xfer,$saved_sp)=map("r$_",(8..12,14)); -my $Xi=4; -my @X=map("q$_",(8..11,0..3)); -my @Tx=("q12","q13"); -my ($K,$zero)=("q14","q15"); -my $j=0; - -sub AUTOLOAD() # thunk [simplified] x86-style perlasm -{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; - my $arg = pop; - $arg = "#$arg" if ($arg*1 eq $arg); - $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; -} - -sub body_00_19 () { - ( - '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'. - '&bic ($t0,$d,$b)', - '&add ($e,$e,$Ki)', # e+=X[i]+K - '&and ($t1,$c,$b)', - '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))', - '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27) - '&eor ($t1,$t1,$t0)', # F_00_19 - '&mov ($b,$b,"ror#2")', # b=ROR(b,2) - '&add ($e,$e,$t1);'. # e+=F_00_19 - '$j++; unshift(@V,pop(@V));' - ) -} -sub body_20_39 () { - ( - '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'. - '&eor ($t0,$b,$d)', - '&add ($e,$e,$Ki)', # e+=X[i]+K - '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15)) if ($j<79)', - '&eor ($t1,$t0,$c)', # F_20_39 - '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27) - '&mov ($b,$b,"ror#2")', # b=ROR(b,2) - '&add ($e,$e,$t1);'. # e+=F_20_39 - '$j++; unshift(@V,pop(@V));' - ) -} -sub body_40_59 () { - ( - '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'. - '&add ($e,$e,$Ki)', # e+=X[i]+K - '&and ($t0,$c,$d)', - '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))', - '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27) - '&eor ($t1,$c,$d)', - '&add ($e,$e,$t0)', - '&and ($t1,$t1,$b)', - '&mov ($b,$b,"ror#2")', # b=ROR(b,2) - '&add ($e,$e,$t1);'. # e+=F_40_59 - '$j++; unshift(@V,pop(@V));' - ) -} - -sub Xupdate_16_31 () -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); - my ($a,$b,$c,$d,$e); - - &vext_8 (@X[0],@X[-4&7],@X[-3&7],8); # compose "X[-14]" in "X[0]" - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - &vadd_i32 (@Tx[1],@X[-1&7],$K); - eval(shift(@insns)); - &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!") if ($Xi%5==0); - eval(shift(@insns)); - &vext_8 (@Tx[0],@X[-1&7],$zero,4); # "X[-3]", 3 words - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - &veor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]" - eval(shift(@insns)); - eval(shift(@insns)); - &veor (@Tx[0],@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]" - eval(shift(@insns)); - eval(shift(@insns)); - &veor (@Tx[0],@Tx[0],@X[0]); # "X[0]"^="X[-3]"^"X[-8] - eval(shift(@insns)); - eval(shift(@insns)); - &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!"); # X[]+K xfer - &sub ($Xfer,$Xfer,64) if ($Xi%4==0); - eval(shift(@insns)); - eval(shift(@insns)); - &vext_8 (@Tx[1],$zero,@Tx[0],4); # "X[0]"<<96, extract one dword - eval(shift(@insns)); - eval(shift(@insns)); - &vadd_i32 (@X[0],@Tx[0],@Tx[0]); - eval(shift(@insns)); - eval(shift(@insns)); - &vsri_32 (@X[0],@Tx[0],31); # "X[0]"<<<=1 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - &vshr_u32 (@Tx[0],@Tx[1],30); - eval(shift(@insns)); - eval(shift(@insns)); - &vshl_u32 (@Tx[1],@Tx[1],2); - eval(shift(@insns)); - eval(shift(@insns)); - &veor (@X[0],@X[0],@Tx[0]); - eval(shift(@insns)); - eval(shift(@insns)); - &veor (@X[0],@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2 - - foreach (@insns) { eval; } # remaining instructions [if any] - - $Xi++; push(@X,shift(@X)); # "rotate" X[] -} - -sub Xupdate_32_79 () -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); - my ($a,$b,$c,$d,$e); - - &vext_8 (@Tx[0],@X[-2&7],@X[-1&7],8); # compose "X[-6]" - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - &veor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" - eval(shift(@insns)); - eval(shift(@insns)); - &veor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]" - eval(shift(@insns)); - eval(shift(@insns)); - &vadd_i32 (@Tx[1],@X[-1&7],$K); - eval(shift(@insns)); - &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!") if ($Xi%5==0); - eval(shift(@insns)); - &veor (@Tx[0],@Tx[0],@X[0]); # "X[-6]"^="X[0]" - eval(shift(@insns)); - eval(shift(@insns)); - &vshr_u32 (@X[0],@Tx[0],30); - eval(shift(@insns)); - eval(shift(@insns)); - &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!"); # X[]+K xfer - &sub ($Xfer,$Xfer,64) if ($Xi%4==0); - eval(shift(@insns)); - eval(shift(@insns)); - &vsli_32 (@X[0],@Tx[0],2); # "X[0]"="X[-6]"<<<2 - - foreach (@insns) { eval; } # remaining instructions [if any] - - $Xi++; push(@X,shift(@X)); # "rotate" X[] -} - -sub Xuplast_80 () -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); - my ($a,$b,$c,$d,$e); - - &vadd_i32 (@Tx[1],@X[-1&7],$K); - eval(shift(@insns)); - eval(shift(@insns)); - &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!"); - &sub ($Xfer,$Xfer,64); - - &teq ($inp,$len); - &sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX - &subeq ($inp,$inp,64); # reload last block to avoid SEGV - &vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!"); - eval(shift(@insns)); - eval(shift(@insns)); - &vld1_8 ("{@X[-2&7]-@X[-1&7]}","[$inp]!"); - eval(shift(@insns)); - eval(shift(@insns)); - &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!"); # load K_00_19 - eval(shift(@insns)); - eval(shift(@insns)); - &vrev32_8 (@X[-4&7],@X[-4&7]); - - foreach (@insns) { eval; } # remaining instructions - - $Xi=0; -} - -sub Xloop() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); - my ($a,$b,$c,$d,$e); - - &vrev32_8 (@X[($Xi-3)&7],@X[($Xi-3)&7]); - eval(shift(@insns)); - eval(shift(@insns)); - &vadd_i32 (@X[$Xi&7],@X[($Xi-4)&7],$K); - eval(shift(@insns)); - eval(shift(@insns)); - &vst1_32 ("{@X[$Xi&7]}","[$Xfer,:128]!");# X[]+K xfer to IALU - - foreach (@insns) { eval; } - - $Xi++; -} - -$code.=<<___; -#if __ARM_ARCH__>=7 -.fpu neon - -.type sha1_block_data_order_neon,%function -.align 4 -sha1_block_data_order_neon: -.LNEON: - stmdb sp!,{r4-r12,lr} - add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp - @ dmb @ errata #451034 on early Cortex A8 - @ vstmdb sp!,{d8-d15} @ ABI specification says so - mov $saved_sp,sp - sub sp,sp,#64 @ alloca - adr $K_XX_XX,.LK_00_19 - bic sp,sp,#15 @ align for 128-bit stores - - ldmia $ctx,{$a,$b,$c,$d,$e} @ load context - mov $Xfer,sp - - vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned - veor $zero,$zero,$zero - vld1.8 {@X[-2&7]-@X[-1&7]},[$inp]! - vld1.32 {${K}\[]},[$K_XX_XX,:32]! @ load K_00_19 - vrev32.8 @X[-4&7],@X[-4&7] @ yes, even on - vrev32.8 @X[-3&7],@X[-3&7] @ big-endian... - vrev32.8 @X[-2&7],@X[-2&7] - vadd.i32 @X[0],@X[-4&7],$K - vrev32.8 @X[-1&7],@X[-1&7] - vadd.i32 @X[1],@X[-3&7],$K - vst1.32 {@X[0]},[$Xfer,:128]! - vadd.i32 @X[2],@X[-2&7],$K - vst1.32 {@X[1]},[$Xfer,:128]! - vst1.32 {@X[2]},[$Xfer,:128]! - ldr $Ki,[sp] @ big RAW stall - -.Loop_neon: -___ - &Xupdate_16_31(\&body_00_19); - &Xupdate_16_31(\&body_00_19); - &Xupdate_16_31(\&body_00_19); - &Xupdate_16_31(\&body_00_19); - &Xupdate_32_79(\&body_00_19); - &Xupdate_32_79(\&body_20_39); - &Xupdate_32_79(\&body_20_39); - &Xupdate_32_79(\&body_20_39); - &Xupdate_32_79(\&body_20_39); - &Xupdate_32_79(\&body_20_39); - &Xupdate_32_79(\&body_40_59); - &Xupdate_32_79(\&body_40_59); - &Xupdate_32_79(\&body_40_59); - &Xupdate_32_79(\&body_40_59); - &Xupdate_32_79(\&body_40_59); - &Xupdate_32_79(\&body_20_39); - &Xuplast_80(\&body_20_39); - &Xloop(\&body_20_39); - &Xloop(\&body_20_39); - &Xloop(\&body_20_39); -$code.=<<___; - ldmia $ctx,{$Ki,$t0,$t1,$Xfer} @ accumulate context - add $a,$a,$Ki - ldr $Ki,[$ctx,#16] - add $b,$b,$t0 - add $c,$c,$t1 - add $d,$d,$Xfer - moveq sp,$saved_sp - add $e,$e,$Ki - ldrne $Ki,[sp] - stmia $ctx,{$a,$b,$c,$d,$e} - addne $Xfer,sp,#3*16 - bne .Loop_neon - - @ vldmia sp!,{d8-d15} - ldmia sp!,{r4-r12,pc} -.size sha1_block_data_order_neon,.-sha1_block_data_order_neon -#endif -___ -}}} -##################################################################### -# ARMv8 stuff -# -{{{ -my ($ABCD,$E,$E0,$E1)=map("q$_",(0..3)); -my @MSG=map("q$_",(4..7)); -my @Kxx=map("q$_",(8..11)); -my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14)); - -$code.=<<___; -#if __ARM_ARCH__>=7 -.type sha1_block_data_order_armv8,%function -.align 5 -sha1_block_data_order_armv8: -.LARMv8: - vstmdb sp!,{d8-d15} @ ABI specification says so - - veor $E,$E,$E - adr r3,.LK_00_19 - vld1.32 {$ABCD},[$ctx]! - vld1.32 {$E\[0]},[$ctx] - sub $ctx,$ctx,#16 - vld1.32 {@Kxx[0]\[]},[r3,:32]! - vld1.32 {@Kxx[1]\[]},[r3,:32]! - vld1.32 {@Kxx[2]\[]},[r3,:32]! - vld1.32 {@Kxx[3]\[]},[r3,:32] - -.Loop_v8: - vld1.8 {@MSG[0]-@MSG[1]},[$inp]! - vld1.8 {@MSG[2]-@MSG[3]},[$inp]! - vrev32.8 @MSG[0],@MSG[0] - vrev32.8 @MSG[1],@MSG[1] - - vadd.i32 $W0,@Kxx[0],@MSG[0] - vrev32.8 @MSG[2],@MSG[2] - vmov $ABCD_SAVE,$ABCD @ offload - subs $len,$len,#1 - - vadd.i32 $W1,@Kxx[0],@MSG[1] - vrev32.8 @MSG[3],@MSG[3] - sha1h $E1,$ABCD @ 0 - sha1c $ABCD,$E,$W0 - vadd.i32 $W0,@Kxx[$j],@MSG[2] - sha1su0 @MSG[0],@MSG[1],@MSG[2] -___ -for ($j=0,$i=1;$i<20-3;$i++) { -my $f=("c","p","m","p")[$i/5]; -$code.=<<___; - sha1h $E0,$ABCD @ $i - sha1$f $ABCD,$E1,$W1 - vadd.i32 $W1,@Kxx[$j],@MSG[3] - sha1su1 @MSG[0],@MSG[3] -___ -$code.=<<___ if ($i<20-4); - sha1su0 @MSG[1],@MSG[2],@MSG[3] -___ - ($E0,$E1)=($E1,$E0); ($W0,$W1)=($W1,$W0); - push(@MSG,shift(@MSG)); $j++ if ((($i+3)%5)==0); -} -$code.=<<___; - sha1h $E0,$ABCD @ $i - sha1p $ABCD,$E1,$W1 - vadd.i32 $W1,@Kxx[$j],@MSG[3] - - sha1h $E1,$ABCD @ 18 - sha1p $ABCD,$E0,$W0 - - sha1h $E0,$ABCD @ 19 - sha1p $ABCD,$E1,$W1 - - vadd.i32 $E,$E,$E0 - vadd.i32 $ABCD,$ABCD,$ABCD_SAVE - bne .Loop_v8 - - vst1.32 {$ABCD},[$ctx]! - vst1.32 {$E\[0]},[$ctx] - - vldmia sp!,{d8-d15} - ret @ bx lr -.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8 -#endif -___ -}}} -$code.=<<___; -.comm OPENSSL_armcap_P,4,4 +.size sha1_block_data_order,.-sha1_block_data_order +.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by " +.align 2 ___ -{ my %opcode = ( - "sha1c" => 0xf2000c40, "sha1p" => 0xf2100c40, - "sha1m" => 0xf2200c40, "sha1su0" => 0xf2300c40, - "sha1h" => 0xf3b902c0, "sha1su1" => 0xf3ba0380 ); - - sub unsha1 { - my ($mnemonic,$arg)=@_; - - if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { - my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) - |(($2&7)<<17)|(($2&8)<<4) - |(($3&7)<<1) |(($3&8)<<2); - # since ARMv7 instructions are always encoded little-endian. - # correct solution is to use .inst directive, but older - # assemblers don't implement it:-( - sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", - $word&0xff,($word>>8)&0xff, - ($word>>16)&0xff,($word>>24)&0xff, - $mnemonic,$arg; - } - } -} - -foreach (split($/,$code)) { - s/{q([0-9]+)\[\]}/sprintf "{d%d[],d%d[]}",2*$1,2*$1+1/eo or - s/{q([0-9]+)\[0\]}/sprintf "{d%d[0]}",2*$1/eo; - - s/\b(sha1\w+)\s+(q.*)/unsha1($1,$2)/geo; - - s/\bret\b/bx lr/o or - s/\bbx\s+lr\b/.word\t0xe12fff1e/o; # make it possible to compile with -march=armv4 - - print $_,$/; -} - +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 +print $code; close STDOUT; # enforce flush diff --git a/app/openssl/crypto/sha/asm/sha1-armv4-large.s b/app/openssl/crypto/sha/asm/sha1-armv4-large.s index a1562883..639ae78a 100644 --- a/app/openssl/crypto/sha/asm/sha1-armv4-large.s +++ b/app/openssl/crypto/sha/asm/sha1-armv4-large.s @@ -1,22 +1,12 @@ #include "arm_arch.h" .text -.code 32 .global sha1_block_data_order .type sha1_block_data_order,%function -.align 5 +.align 2 sha1_block_data_order: -#if __ARM_ARCH__>=7 - sub r3,pc,#8 @ sha1_block_data_order - ldr r12,.LOPENSSL_armcap - ldr r12,[r3,r12] @ OPENSSL_armcap_P - tst r12,#ARMV8_SHA1 - bne .LARMv8 - tst r12,#ARMV7_NEON - bne .LNEON -#endif stmdb sp!,{r4-r12,lr} add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 ldmia r0,{r3,r4,r5,r6,r7} @@ -452,999 +442,11 @@ sha1_block_data_order: moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif -.size sha1_block_data_order,.-sha1_block_data_order - -.align 5 +.align 2 .LK_00_19: .word 0x5a827999 .LK_20_39: .word 0x6ed9eba1 .LK_40_59: .word 0x8f1bbcdc .LK_60_79: .word 0xca62c1d6 -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-sha1_block_data_order -.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " -.align 5 -#if __ARM_ARCH__>=7 -.fpu neon - -.type sha1_block_data_order_neon,%function -.align 4 -sha1_block_data_order_neon: -.LNEON: - stmdb sp!,{r4-r12,lr} - add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 - @ dmb @ errata #451034 on early Cortex A8 - @ vstmdb sp!,{d8-d15} @ ABI specification says so - mov r14,sp - sub sp,sp,#64 @ alloca - adr r8,.LK_00_19 - bic sp,sp,#15 @ align for 128-bit stores - - ldmia r0,{r3,r4,r5,r6,r7} @ load context - mov r12,sp - - vld1.8 {q0-q1},[r1]! @ handles unaligned - veor q15,q15,q15 - vld1.8 {q2-q3},[r1]! - vld1.32 {d28[],d29[]},[r8,:32]! @ load K_00_19 - vrev32.8 q0,q0 @ yes, even on - vrev32.8 q1,q1 @ big-endian... - vrev32.8 q2,q2 - vadd.i32 q8,q0,q14 - vrev32.8 q3,q3 - vadd.i32 q9,q1,q14 - vst1.32 {q8},[r12,:128]! - vadd.i32 q10,q2,q14 - vst1.32 {q9},[r12,:128]! - vst1.32 {q10},[r12,:128]! - ldr r9,[sp] @ big RAW stall - -.Loop_neon: - vext.8 q8,q0,q1,#8 - bic r10,r6,r4 - add r7,r7,r9 - and r11,r5,r4 - vadd.i32 q13,q3,q14 - ldr r9,[sp,#4] - add r7,r7,r3,ror#27 - vext.8 q12,q3,q15,#4 - eor r11,r11,r10 - mov r4,r4,ror#2 - add r7,r7,r11 - veor q8,q8,q0 - bic r10,r5,r3 - add r6,r6,r9 - veor q12,q12,q2 - and r11,r4,r3 - ldr r9,[sp,#8] - veor q12,q12,q8 - add r6,r6,r7,ror#27 - eor r11,r11,r10 - vst1.32 {q13},[r12,:128]! - sub r12,r12,#64 - mov r3,r3,ror#2 - add r6,r6,r11 - vext.8 q13,q15,q12,#4 - bic r10,r4,r7 - add r5,r5,r9 - vadd.i32 q8,q12,q12 - and r11,r3,r7 - ldr r9,[sp,#12] - vsri.32 q8,q12,#31 - add r5,r5,r6,ror#27 - eor r11,r11,r10 - mov r7,r7,ror#2 - vshr.u32 q12,q13,#30 - add r5,r5,r11 - bic r10,r3,r6 - vshl.u32 q13,q13,#2 - add r4,r4,r9 - and r11,r7,r6 - veor q8,q8,q12 - ldr r9,[sp,#16] - add r4,r4,r5,ror#27 - veor q8,q8,q13 - eor r11,r11,r10 - mov r6,r6,ror#2 - add r4,r4,r11 - vext.8 q9,q1,q2,#8 - bic r10,r7,r5 - add r3,r3,r9 - and r11,r6,r5 - vadd.i32 q13,q8,q14 - ldr r9,[sp,#20] - vld1.32 {d28[],d29[]},[r8,:32]! - add r3,r3,r4,ror#27 - vext.8 q12,q8,q15,#4 - eor r11,r11,r10 - mov r5,r5,ror#2 - add r3,r3,r11 - veor q9,q9,q1 - bic r10,r6,r4 - add r7,r7,r9 - veor q12,q12,q3 - and r11,r5,r4 - ldr r9,[sp,#24] - veor q12,q12,q9 - add r7,r7,r3,ror#27 - eor r11,r11,r10 - vst1.32 {q13},[r12,:128]! - mov r4,r4,ror#2 - add r7,r7,r11 - vext.8 q13,q15,q12,#4 - bic r10,r5,r3 - add r6,r6,r9 - vadd.i32 q9,q12,q12 - and r11,r4,r3 - ldr r9,[sp,#28] - vsri.32 q9,q12,#31 - add r6,r6,r7,ror#27 - eor r11,r11,r10 - mov r3,r3,ror#2 - vshr.u32 q12,q13,#30 - add r6,r6,r11 - bic r10,r4,r7 - vshl.u32 q13,q13,#2 - add r5,r5,r9 - and r11,r3,r7 - veor q9,q9,q12 - ldr r9,[sp,#32] - add r5,r5,r6,ror#27 - veor q9,q9,q13 - eor r11,r11,r10 - mov r7,r7,ror#2 - add r5,r5,r11 - vext.8 q10,q2,q3,#8 - bic r10,r3,r6 - add r4,r4,r9 - and r11,r7,r6 - vadd.i32 q13,q9,q14 - ldr r9,[sp,#36] - add r4,r4,r5,ror#27 - vext.8 q12,q9,q15,#4 - eor r11,r11,r10 - mov r6,r6,ror#2 - add r4,r4,r11 - veor q10,q10,q2 - bic r10,r7,r5 - add r3,r3,r9 - veor q12,q12,q8 - and r11,r6,r5 - ldr r9,[sp,#40] - veor q12,q12,q10 - add r3,r3,r4,ror#27 - eor r11,r11,r10 - vst1.32 {q13},[r12,:128]! - mov r5,r5,ror#2 - add r3,r3,r11 - vext.8 q13,q15,q12,#4 - bic r10,r6,r4 - add r7,r7,r9 - vadd.i32 q10,q12,q12 - and r11,r5,r4 - ldr r9,[sp,#44] - vsri.32 q10,q12,#31 - add r7,r7,r3,ror#27 - eor r11,r11,r10 - mov r4,r4,ror#2 - vshr.u32 q12,q13,#30 - add r7,r7,r11 - bic r10,r5,r3 - vshl.u32 q13,q13,#2 - add r6,r6,r9 - and r11,r4,r3 - veor q10,q10,q12 - ldr r9,[sp,#48] - add r6,r6,r7,ror#27 - veor q10,q10,q13 - eor r11,r11,r10 - mov r3,r3,ror#2 - add r6,r6,r11 - vext.8 q11,q3,q8,#8 - bic r10,r4,r7 - add r5,r5,r9 - and r11,r3,r7 - vadd.i32 q13,q10,q14 - ldr r9,[sp,#52] - add r5,r5,r6,ror#27 - vext.8 q12,q10,q15,#4 - eor r11,r11,r10 - mov r7,r7,ror#2 - add r5,r5,r11 - veor q11,q11,q3 - bic r10,r3,r6 - add r4,r4,r9 - veor q12,q12,q9 - and r11,r7,r6 - ldr r9,[sp,#56] - veor q12,q12,q11 - add r4,r4,r5,ror#27 - eor r11,r11,r10 - vst1.32 {q13},[r12,:128]! - mov r6,r6,ror#2 - add r4,r4,r11 - vext.8 q13,q15,q12,#4 - bic r10,r7,r5 - add r3,r3,r9 - vadd.i32 q11,q12,q12 - and r11,r6,r5 - ldr r9,[sp,#60] - vsri.32 q11,q12,#31 - add r3,r3,r4,ror#27 - eor r11,r11,r10 - mov r5,r5,ror#2 - vshr.u32 q12,q13,#30 - add r3,r3,r11 - bic r10,r6,r4 - vshl.u32 q13,q13,#2 - add r7,r7,r9 - and r11,r5,r4 - veor q11,q11,q12 - ldr r9,[sp,#0] - add r7,r7,r3,ror#27 - veor q11,q11,q13 - eor r11,r11,r10 - mov r4,r4,ror#2 - add r7,r7,r11 - vext.8 q12,q10,q11,#8 - bic r10,r5,r3 - add r6,r6,r9 - and r11,r4,r3 - veor q0,q0,q8 - ldr r9,[sp,#4] - add r6,r6,r7,ror#27 - veor q0,q0,q1 - eor r11,r11,r10 - mov r3,r3,ror#2 - vadd.i32 q13,q11,q14 - add r6,r6,r11 - bic r10,r4,r7 - veor q12,q12,q0 - add r5,r5,r9 - and r11,r3,r7 - vshr.u32 q0,q12,#30 - ldr r9,[sp,#8] - add r5,r5,r6,ror#27 - vst1.32 {q13},[r12,:128]! - sub r12,r12,#64 - eor r11,r11,r10 - mov r7,r7,ror#2 - vsli.32 q0,q12,#2 - add r5,r5,r11 - bic r10,r3,r6 - add r4,r4,r9 - and r11,r7,r6 - ldr r9,[sp,#12] - add r4,r4,r5,ror#27 - eor r11,r11,r10 - mov r6,r6,ror#2 - add r4,r4,r11 - bic r10,r7,r5 - add r3,r3,r9 - and r11,r6,r5 - ldr r9,[sp,#16] - add r3,r3,r4,ror#27 - eor r11,r11,r10 - mov r5,r5,ror#2 - add r3,r3,r11 - vext.8 q12,q11,q0,#8 - eor r10,r4,r6 - add r7,r7,r9 - ldr r9,[sp,#20] - veor q1,q1,q9 - eor r11,r10,r5 - add r7,r7,r3,ror#27 - veor q1,q1,q2 - mov r4,r4,ror#2 - add r7,r7,r11 - vadd.i32 q13,q0,q14 - eor r10,r3,r5 - add r6,r6,r9 - veor q12,q12,q1 - ldr r9,[sp,#24] - eor r11,r10,r4 - vshr.u32 q1,q12,#30 - add r6,r6,r7,ror#27 - mov r3,r3,ror#2 - vst1.32 {q13},[r12,:128]! - add r6,r6,r11 - eor r10,r7,r4 - vsli.32 q1,q12,#2 - add r5,r5,r9 - ldr r9,[sp,#28] - eor r11,r10,r3 - add r5,r5,r6,ror#27 - mov r7,r7,ror#2 - add r5,r5,r11 - eor r10,r6,r3 - add r4,r4,r9 - ldr r9,[sp,#32] - eor r11,r10,r7 - add r4,r4,r5,ror#27 - mov r6,r6,ror#2 - add r4,r4,r11 - vext.8 q12,q0,q1,#8 - eor r10,r5,r7 - add r3,r3,r9 - ldr r9,[sp,#36] - veor q2,q2,q10 - eor r11,r10,r6 - add r3,r3,r4,ror#27 - veor q2,q2,q3 - mov r5,r5,ror#2 - add r3,r3,r11 - vadd.i32 q13,q1,q14 - eor r10,r4,r6 - vld1.32 {d28[],d29[]},[r8,:32]! - add r7,r7,r9 - veor q12,q12,q2 - ldr r9,[sp,#40] - eor r11,r10,r5 - vshr.u32 q2,q12,#30 - add r7,r7,r3,ror#27 - mov r4,r4,ror#2 - vst1.32 {q13},[r12,:128]! - add r7,r7,r11 - eor r10,r3,r5 - vsli.32 q2,q12,#2 - add r6,r6,r9 - ldr r9,[sp,#44] - eor r11,r10,r4 - add r6,r6,r7,ror#27 - mov r3,r3,ror#2 - add r6,r6,r11 - eor r10,r7,r4 - add r5,r5,r9 - ldr r9,[sp,#48] - eor r11,r10,r3 - add r5,r5,r6,ror#27 - mov r7,r7,ror#2 - add r5,r5,r11 - vext.8 q12,q1,q2,#8 - eor r10,r6,r3 - add r4,r4,r9 - ldr r9,[sp,#52] - veor q3,q3,q11 - eor r11,r10,r7 - add r4,r4,r5,ror#27 - veor q3,q3,q8 - mov r6,r6,ror#2 - add r4,r4,r11 - vadd.i32 q13,q2,q14 - eor r10,r5,r7 - add r3,r3,r9 - veor q12,q12,q3 - ldr r9,[sp,#56] - eor r11,r10,r6 - vshr.u32 q3,q12,#30 - add r3,r3,r4,ror#27 - mov r5,r5,ror#2 - vst1.32 {q13},[r12,:128]! - add r3,r3,r11 - eor r10,r4,r6 - vsli.32 q3,q12,#2 - add r7,r7,r9 - ldr r9,[sp,#60] - eor r11,r10,r5 - add r7,r7,r3,ror#27 - mov r4,r4,ror#2 - add r7,r7,r11 - eor r10,r3,r5 - add r6,r6,r9 - ldr r9,[sp,#0] - eor r11,r10,r4 - add r6,r6,r7,ror#27 - mov r3,r3,ror#2 - add r6,r6,r11 - vext.8 q12,q2,q3,#8 - eor r10,r7,r4 - add r5,r5,r9 - ldr r9,[sp,#4] - veor q8,q8,q0 - eor r11,r10,r3 - add r5,r5,r6,ror#27 - veor q8,q8,q9 - mov r7,r7,ror#2 - add r5,r5,r11 - vadd.i32 q13,q3,q14 - eor r10,r6,r3 - add r4,r4,r9 - veor q12,q12,q8 - ldr r9,[sp,#8] - eor r11,r10,r7 - vshr.u32 q8,q12,#30 - add r4,r4,r5,ror#27 - mov r6,r6,ror#2 - vst1.32 {q13},[r12,:128]! - sub r12,r12,#64 - add r4,r4,r11 - eor r10,r5,r7 - vsli.32 q8,q12,#2 - add r3,r3,r9 - ldr r9,[sp,#12] - eor r11,r10,r6 - add r3,r3,r4,ror#27 - mov r5,r5,ror#2 - add r3,r3,r11 - eor r10,r4,r6 - add r7,r7,r9 - ldr r9,[sp,#16] - eor r11,r10,r5 - add r7,r7,r3,ror#27 - mov r4,r4,ror#2 - add r7,r7,r11 - vext.8 q12,q3,q8,#8 - eor r10,r3,r5 - add r6,r6,r9 - ldr r9,[sp,#20] - veor q9,q9,q1 - eor r11,r10,r4 - add r6,r6,r7,ror#27 - veor q9,q9,q10 - mov r3,r3,ror#2 - add r6,r6,r11 - vadd.i32 q13,q8,q14 - eor r10,r7,r4 - add r5,r5,r9 - veor q12,q12,q9 - ldr r9,[sp,#24] - eor r11,r10,r3 - vshr.u32 q9,q12,#30 - add r5,r5,r6,ror#27 - mov r7,r7,ror#2 - vst1.32 {q13},[r12,:128]! - add r5,r5,r11 - eor r10,r6,r3 - vsli.32 q9,q12,#2 - add r4,r4,r9 - ldr r9,[sp,#28] - eor r11,r10,r7 - add r4,r4,r5,ror#27 - mov r6,r6,ror#2 - add r4,r4,r11 - eor r10,r5,r7 - add r3,r3,r9 - ldr r9,[sp,#32] - eor r11,r10,r6 - add r3,r3,r4,ror#27 - mov r5,r5,ror#2 - add r3,r3,r11 - vext.8 q12,q8,q9,#8 - add r7,r7,r9 - and r10,r5,r6 - ldr r9,[sp,#36] - veor q10,q10,q2 - add r7,r7,r3,ror#27 - eor r11,r5,r6 - veor q10,q10,q11 - add r7,r7,r10 - and r11,r11,r4 - vadd.i32 q13,q9,q14 - mov r4,r4,ror#2 - add r7,r7,r11 - veor q12,q12,q10 - add r6,r6,r9 - and r10,r4,r5 - vshr.u32 q10,q12,#30 - ldr r9,[sp,#40] - add r6,r6,r7,ror#27 - vst1.32 {q13},[r12,:128]! - eor r11,r4,r5 - add r6,r6,r10 - vsli.32 q10,q12,#2 - and r11,r11,r3 - mov r3,r3,ror#2 - add r6,r6,r11 - add r5,r5,r9 - and r10,r3,r4 - ldr r9,[sp,#44] - add r5,r5,r6,ror#27 - eor r11,r3,r4 - add r5,r5,r10 - and r11,r11,r7 - mov r7,r7,ror#2 - add r5,r5,r11 - add r4,r4,r9 - and r10,r7,r3 - ldr r9,[sp,#48] - add r4,r4,r5,ror#27 - eor r11,r7,r3 - add r4,r4,r10 - and r11,r11,r6 - mov r6,r6,ror#2 - add r4,r4,r11 - vext.8 q12,q9,q10,#8 - add r3,r3,r9 - and r10,r6,r7 - ldr r9,[sp,#52] - veor q11,q11,q3 - add r3,r3,r4,ror#27 - eor r11,r6,r7 - veor q11,q11,q0 - add r3,r3,r10 - and r11,r11,r5 - vadd.i32 q13,q10,q14 - mov r5,r5,ror#2 - vld1.32 {d28[],d29[]},[r8,:32]! - add r3,r3,r11 - veor q12,q12,q11 - add r7,r7,r9 - and r10,r5,r6 - vshr.u32 q11,q12,#30 - ldr r9,[sp,#56] - add r7,r7,r3,ror#27 - vst1.32 {q13},[r12,:128]! - eor r11,r5,r6 - add r7,r7,r10 - vsli.32 q11,q12,#2 - and r11,r11,r4 - mov r4,r4,ror#2 - add r7,r7,r11 - add r6,r6,r9 - and r10,r4,r5 - ldr r9,[sp,#60] - add r6,r6,r7,ror#27 - eor r11,r4,r5 - add r6,r6,r10 - and r11,r11,r3 - mov r3,r3,ror#2 - add r6,r6,r11 - add r5,r5,r9 - and r10,r3,r4 - ldr r9,[sp,#0] - add r5,r5,r6,ror#27 - eor r11,r3,r4 - add r5,r5,r10 - and r11,r11,r7 - mov r7,r7,ror#2 - add r5,r5,r11 - vext.8 q12,q10,q11,#8 - add r4,r4,r9 - and r10,r7,r3 - ldr r9,[sp,#4] - veor q0,q0,q8 - add r4,r4,r5,ror#27 - eor r11,r7,r3 - veor q0,q0,q1 - add r4,r4,r10 - and r11,r11,r6 - vadd.i32 q13,q11,q14 - mov r6,r6,ror#2 - add r4,r4,r11 - veor q12,q12,q0 - add r3,r3,r9 - and r10,r6,r7 - vshr.u32 q0,q12,#30 - ldr r9,[sp,#8] - add r3,r3,r4,ror#27 - vst1.32 {q13},[r12,:128]! - sub r12,r12,#64 - eor r11,r6,r7 - add r3,r3,r10 - vsli.32 q0,q12,#2 - and r11,r11,r5 - mov r5,r5,ror#2 - add r3,r3,r11 - add r7,r7,r9 - and r10,r5,r6 - ldr r9,[sp,#12] - add r7,r7,r3,ror#27 - eor r11,r5,r6 - add r7,r7,r10 - and r11,r11,r4 - mov r4,r4,ror#2 - add r7,r7,r11 - add r6,r6,r9 - and r10,r4,r5 - ldr r9,[sp,#16] - add r6,r6,r7,ror#27 - eor r11,r4,r5 - add r6,r6,r10 - and r11,r11,r3 - mov r3,r3,ror#2 - add r6,r6,r11 - vext.8 q12,q11,q0,#8 - add r5,r5,r9 - and r10,r3,r4 - ldr r9,[sp,#20] - veor q1,q1,q9 - add r5,r5,r6,ror#27 - eor r11,r3,r4 - veor q1,q1,q2 - add r5,r5,r10 - and r11,r11,r7 - vadd.i32 q13,q0,q14 - mov r7,r7,ror#2 - add r5,r5,r11 - veor q12,q12,q1 - add r4,r4,r9 - and r10,r7,r3 - vshr.u32 q1,q12,#30 - ldr r9,[sp,#24] - add r4,r4,r5,ror#27 - vst1.32 {q13},[r12,:128]! - eor r11,r7,r3 - add r4,r4,r10 - vsli.32 q1,q12,#2 - and r11,r11,r6 - mov r6,r6,ror#2 - add r4,r4,r11 - add r3,r3,r9 - and r10,r6,r7 - ldr r9,[sp,#28] - add r3,r3,r4,ror#27 - eor r11,r6,r7 - add r3,r3,r10 - and r11,r11,r5 - mov r5,r5,ror#2 - add r3,r3,r11 - add r7,r7,r9 - and r10,r5,r6 - ldr r9,[sp,#32] - add r7,r7,r3,ror#27 - eor r11,r5,r6 - add r7,r7,r10 - and r11,r11,r4 - mov r4,r4,ror#2 - add r7,r7,r11 - vext.8 q12,q0,q1,#8 - add r6,r6,r9 - and r10,r4,r5 - ldr r9,[sp,#36] - veor q2,q2,q10 - add r6,r6,r7,ror#27 - eor r11,r4,r5 - veor q2,q2,q3 - add r6,r6,r10 - and r11,r11,r3 - vadd.i32 q13,q1,q14 - mov r3,r3,ror#2 - add r6,r6,r11 - veor q12,q12,q2 - add r5,r5,r9 - and r10,r3,r4 - vshr.u32 q2,q12,#30 - ldr r9,[sp,#40] - add r5,r5,r6,ror#27 - vst1.32 {q13},[r12,:128]! - eor r11,r3,r4 - add r5,r5,r10 - vsli.32 q2,q12,#2 - and r11,r11,r7 - mov r7,r7,ror#2 - add r5,r5,r11 - add r4,r4,r9 - and r10,r7,r3 - ldr r9,[sp,#44] - add r4,r4,r5,ror#27 - eor r11,r7,r3 - add r4,r4,r10 - and r11,r11,r6 - mov r6,r6,ror#2 - add r4,r4,r11 - add r3,r3,r9 - and r10,r6,r7 - ldr r9,[sp,#48] - add r3,r3,r4,ror#27 - eor r11,r6,r7 - add r3,r3,r10 - and r11,r11,r5 - mov r5,r5,ror#2 - add r3,r3,r11 - vext.8 q12,q1,q2,#8 - eor r10,r4,r6 - add r7,r7,r9 - ldr r9,[sp,#52] - veor q3,q3,q11 - eor r11,r10,r5 - add r7,r7,r3,ror#27 - veor q3,q3,q8 - mov r4,r4,ror#2 - add r7,r7,r11 - vadd.i32 q13,q2,q14 - eor r10,r3,r5 - add r6,r6,r9 - veor q12,q12,q3 - ldr r9,[sp,#56] - eor r11,r10,r4 - vshr.u32 q3,q12,#30 - add r6,r6,r7,ror#27 - mov r3,r3,ror#2 - vst1.32 {q13},[r12,:128]! - add r6,r6,r11 - eor r10,r7,r4 - vsli.32 q3,q12,#2 - add r5,r5,r9 - ldr r9,[sp,#60] - eor r11,r10,r3 - add r5,r5,r6,ror#27 - mov r7,r7,ror#2 - add r5,r5,r11 - eor r10,r6,r3 - add r4,r4,r9 - ldr r9,[sp,#0] - eor r11,r10,r7 - add r4,r4,r5,ror#27 - mov r6,r6,ror#2 - add r4,r4,r11 - vadd.i32 q13,q3,q14 - eor r10,r5,r7 - add r3,r3,r9 - vst1.32 {q13},[r12,:128]! - sub r12,r12,#64 - teq r1,r2 - sub r8,r8,#16 - subeq r1,r1,#64 - vld1.8 {q0-q1},[r1]! - ldr r9,[sp,#4] - eor r11,r10,r6 - vld1.8 {q2-q3},[r1]! - add r3,r3,r4,ror#27 - mov r5,r5,ror#2 - vld1.32 {d28[],d29[]},[r8,:32]! - add r3,r3,r11 - eor r10,r4,r6 - vrev32.8 q0,q0 - add r7,r7,r9 - ldr r9,[sp,#8] - eor r11,r10,r5 - add r7,r7,r3,ror#27 - mov r4,r4,ror#2 - add r7,r7,r11 - eor r10,r3,r5 - add r6,r6,r9 - ldr r9,[sp,#12] - eor r11,r10,r4 - add r6,r6,r7,ror#27 - mov r3,r3,ror#2 - add r6,r6,r11 - eor r10,r7,r4 - add r5,r5,r9 - ldr r9,[sp,#16] - eor r11,r10,r3 - add r5,r5,r6,ror#27 - mov r7,r7,ror#2 - add r5,r5,r11 - vrev32.8 q1,q1 - eor r10,r6,r3 - add r4,r4,r9 - vadd.i32 q8,q0,q14 - ldr r9,[sp,#20] - eor r11,r10,r7 - vst1.32 {q8},[r12,:128]! - add r4,r4,r5,ror#27 - mov r6,r6,ror#2 - add r4,r4,r11 - eor r10,r5,r7 - add r3,r3,r9 - ldr r9,[sp,#24] - eor r11,r10,r6 - add r3,r3,r4,ror#27 - mov r5,r5,ror#2 - add r3,r3,r11 - eor r10,r4,r6 - add r7,r7,r9 - ldr r9,[sp,#28] - eor r11,r10,r5 - add r7,r7,r3,ror#27 - mov r4,r4,ror#2 - add r7,r7,r11 - eor r10,r3,r5 - add r6,r6,r9 - ldr r9,[sp,#32] - eor r11,r10,r4 - add r6,r6,r7,ror#27 - mov r3,r3,ror#2 - add r6,r6,r11 - vrev32.8 q2,q2 - eor r10,r7,r4 - add r5,r5,r9 - vadd.i32 q9,q1,q14 - ldr r9,[sp,#36] - eor r11,r10,r3 - vst1.32 {q9},[r12,:128]! - add r5,r5,r6,ror#27 - mov r7,r7,ror#2 - add r5,r5,r11 - eor r10,r6,r3 - add r4,r4,r9 - ldr r9,[sp,#40] - eor r11,r10,r7 - add r4,r4,r5,ror#27 - mov r6,r6,ror#2 - add r4,r4,r11 - eor r10,r5,r7 - add r3,r3,r9 - ldr r9,[sp,#44] - eor r11,r10,r6 - add r3,r3,r4,ror#27 - mov r5,r5,ror#2 - add r3,r3,r11 - eor r10,r4,r6 - add r7,r7,r9 - ldr r9,[sp,#48] - eor r11,r10,r5 - add r7,r7,r3,ror#27 - mov r4,r4,ror#2 - add r7,r7,r11 - vrev32.8 q3,q3 - eor r10,r3,r5 - add r6,r6,r9 - vadd.i32 q10,q2,q14 - ldr r9,[sp,#52] - eor r11,r10,r4 - vst1.32 {q10},[r12,:128]! - add r6,r6,r7,ror#27 - mov r3,r3,ror#2 - add r6,r6,r11 - eor r10,r7,r4 - add r5,r5,r9 - ldr r9,[sp,#56] - eor r11,r10,r3 - add r5,r5,r6,ror#27 - mov r7,r7,ror#2 - add r5,r5,r11 - eor r10,r6,r3 - add r4,r4,r9 - ldr r9,[sp,#60] - eor r11,r10,r7 - add r4,r4,r5,ror#27 - mov r6,r6,ror#2 - add r4,r4,r11 - eor r10,r5,r7 - add r3,r3,r9 - eor r11,r10,r6 - add r3,r3,r4,ror#27 - mov r5,r5,ror#2 - add r3,r3,r11 - ldmia r0,{r9,r10,r11,r12} @ accumulate context - add r3,r3,r9 - ldr r9,[r0,#16] - add r4,r4,r10 - add r5,r5,r11 - add r6,r6,r12 - moveq sp,r14 - add r7,r7,r9 - ldrne r9,[sp] - stmia r0,{r3,r4,r5,r6,r7} - addne r12,sp,#3*16 - bne .Loop_neon - - @ vldmia sp!,{d8-d15} - ldmia sp!,{r4-r12,pc} -.size sha1_block_data_order_neon,.-sha1_block_data_order_neon -#endif -#if __ARM_ARCH__>=7 -.type sha1_block_data_order_armv8,%function -.align 5 -sha1_block_data_order_armv8: -.LARMv8: - vstmdb sp!,{d8-d15} @ ABI specification says so - - veor q1,q1,q1 - adr r3,.LK_00_19 - vld1.32 {q0},[r0]! - vld1.32 {d2[0]},[r0] - sub r0,r0,#16 - vld1.32 {d16[],d17[]},[r3,:32]! - vld1.32 {d18[],d19[]},[r3,:32]! - vld1.32 {d20[],d21[]},[r3,:32]! - vld1.32 {d22[],d23[]},[r3,:32] - -.Loop_v8: - vld1.8 {q4-q5},[r1]! - vld1.8 {q6-q7},[r1]! - vrev32.8 q4,q4 - vrev32.8 q5,q5 - - vadd.i32 q12,q8,q4 - vrev32.8 q6,q6 - vmov q14,q0 @ offload - subs r2,r2,#1 - - vadd.i32 q13,q8,q5 - vrev32.8 q7,q7 - .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 0 - .byte 0x68,0x0c,0x02,0xf2 @ sha1c q0,q1,q12 - vadd.i32 q12,q8,q6 - .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 - .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 1 - .byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13 - vadd.i32 q13,q8,q7 - .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 - .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 - .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 2 - .byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12 - vadd.i32 q12,q8,q4 - .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 - .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 - .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 3 - .byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13 - vadd.i32 q13,q9,q5 - .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 - .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 - .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 4 - .byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12 - vadd.i32 q12,q9,q6 - .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 - .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 - .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 5 - .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 - vadd.i32 q13,q9,q7 - .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 - .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 - .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 6 - .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 - vadd.i32 q12,q9,q4 - .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 - .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 - .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 7 - .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 - vadd.i32 q13,q9,q5 - .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 - .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 - .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 8 - .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 - vadd.i32 q12,q10,q6 - .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 - .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 - .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 9 - .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 - vadd.i32 q13,q10,q7 - .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 - .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 - .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 10 - .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 - vadd.i32 q12,q10,q4 - .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 - .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 - .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 11 - .byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13 - vadd.i32 q13,q10,q5 - .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 - .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 - .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 12 - .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 - vadd.i32 q12,q10,q6 - .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 - .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 - .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 13 - .byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13 - vadd.i32 q13,q11,q7 - .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 - .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 - .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 14 - .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 - vadd.i32 q12,q11,q4 - .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 - .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 - .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 15 - .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 - vadd.i32 q13,q11,q5 - .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 - .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 - .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 16 - .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 - vadd.i32 q12,q11,q6 - .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 - .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 17 - .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 - vadd.i32 q13,q11,q7 - - .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 18 - .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 - - .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 19 - .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 - - vadd.i32 q1,q1,q2 - vadd.i32 q0,q0,q14 - bne .Loop_v8 - - vst1.32 {q0},[r0]! - vst1.32 {d2[0]},[r0] - - vldmia sp!,{d8-d15} - bx lr @ bx lr -.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8 -#endif -.comm OPENSSL_armcap_P,4,4 +.size sha1_block_data_order,.-sha1_block_data_order +.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by " +.align 2 diff --git a/app/openssl/crypto/sha/asm/sha1-armv8.S b/app/openssl/crypto/sha/asm/sha1-armv8.S deleted file mode 100644 index f9d12625..00000000 --- a/app/openssl/crypto/sha/asm/sha1-armv8.S +++ /dev/null @@ -1,1211 +0,0 @@ -#include "arm_arch.h" - -.text - -.globl sha1_block_data_order -.type sha1_block_data_order,%function -.align 6 -sha1_block_data_order: - ldr x16,.LOPENSSL_armcap_P - adr x17,.LOPENSSL_armcap_P - add x16,x16,x17 - ldr w16,[x16] - tst w16,#ARMV8_SHA1 - b.ne .Lv8_entry - - stp x29,x30,[sp,#-96]! - add x29,sp,#0 - stp x19,x20,[sp,#16] - stp x21,x22,[sp,#32] - stp x23,x24,[sp,#48] - stp x25,x26,[sp,#64] - stp x27,x28,[sp,#80] - - ldp w20,w21,[x0] - ldp w22,w23,[x0,#8] - ldr w24,[x0,#16] - -.Loop: - ldr x3,[x1],#64 - movz w28,#0x7999 - sub x2,x2,#1 - movk w28,#0x5a82,lsl#16 -#ifdef __ARMEB__ - ror x3,x3,#32 -#else - rev32 x3,x3 -#endif - add w24,w24,w28 // warm it up - add w24,w24,w3 - lsr x4,x3,#32 - ldr x5,[x1,#-56] - bic w25,w23,w21 - and w26,w22,w21 - ror w27,w20,#27 - add w23,w23,w28 // future e+=K - orr w25,w25,w26 - add w24,w24,w27 // e+=rot(a,5) - ror w21,w21,#2 - add w23,w23,w4 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) -#ifdef __ARMEB__ - ror x5,x5,#32 -#else - rev32 x5,x5 -#endif - bic w25,w22,w20 - and w26,w21,w20 - ror w27,w24,#27 - add w22,w22,w28 // future e+=K - orr w25,w25,w26 - add w23,w23,w27 // e+=rot(a,5) - ror w20,w20,#2 - add w22,w22,w5 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) - lsr x6,x5,#32 - ldr x7,[x1,#-48] - bic w25,w21,w24 - and w26,w20,w24 - ror w27,w23,#27 - add w21,w21,w28 // future e+=K - orr w25,w25,w26 - add w22,w22,w27 // e+=rot(a,5) - ror w24,w24,#2 - add w21,w21,w6 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) -#ifdef __ARMEB__ - ror x7,x7,#32 -#else - rev32 x7,x7 -#endif - bic w25,w20,w23 - and w26,w24,w23 - ror w27,w22,#27 - add w20,w20,w28 // future e+=K - orr w25,w25,w26 - add w21,w21,w27 // e+=rot(a,5) - ror w23,w23,#2 - add w20,w20,w7 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) - lsr x8,x7,#32 - ldr x9,[x1,#-40] - bic w25,w24,w22 - and w26,w23,w22 - ror w27,w21,#27 - add w24,w24,w28 // future e+=K - orr w25,w25,w26 - add w20,w20,w27 // e+=rot(a,5) - ror w22,w22,#2 - add w24,w24,w8 // future e+=X[i] - add w20,w20,w25 // e+=F(b,c,d) -#ifdef __ARMEB__ - ror x9,x9,#32 -#else - rev32 x9,x9 -#endif - bic w25,w23,w21 - and w26,w22,w21 - ror w27,w20,#27 - add w23,w23,w28 // future e+=K - orr w25,w25,w26 - add w24,w24,w27 // e+=rot(a,5) - ror w21,w21,#2 - add w23,w23,w9 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) - lsr x10,x9,#32 - ldr x11,[x1,#-32] - bic w25,w22,w20 - and w26,w21,w20 - ror w27,w24,#27 - add w22,w22,w28 // future e+=K - orr w25,w25,w26 - add w23,w23,w27 // e+=rot(a,5) - ror w20,w20,#2 - add w22,w22,w10 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) -#ifdef __ARMEB__ - ror x11,x11,#32 -#else - rev32 x11,x11 -#endif - bic w25,w21,w24 - and w26,w20,w24 - ror w27,w23,#27 - add w21,w21,w28 // future e+=K - orr w25,w25,w26 - add w22,w22,w27 // e+=rot(a,5) - ror w24,w24,#2 - add w21,w21,w11 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) - lsr x12,x11,#32 - ldr x13,[x1,#-24] - bic w25,w20,w23 - and w26,w24,w23 - ror w27,w22,#27 - add w20,w20,w28 // future e+=K - orr w25,w25,w26 - add w21,w21,w27 // e+=rot(a,5) - ror w23,w23,#2 - add w20,w20,w12 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) -#ifdef __ARMEB__ - ror x13,x13,#32 -#else - rev32 x13,x13 -#endif - bic w25,w24,w22 - and w26,w23,w22 - ror w27,w21,#27 - add w24,w24,w28 // future e+=K - orr w25,w25,w26 - add w20,w20,w27 // e+=rot(a,5) - ror w22,w22,#2 - add w24,w24,w13 // future e+=X[i] - add w20,w20,w25 // e+=F(b,c,d) - lsr x14,x13,#32 - ldr x15,[x1,#-16] - bic w25,w23,w21 - and w26,w22,w21 - ror w27,w20,#27 - add w23,w23,w28 // future e+=K - orr w25,w25,w26 - add w24,w24,w27 // e+=rot(a,5) - ror w21,w21,#2 - add w23,w23,w14 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) -#ifdef __ARMEB__ - ror x15,x15,#32 -#else - rev32 x15,x15 -#endif - bic w25,w22,w20 - and w26,w21,w20 - ror w27,w24,#27 - add w22,w22,w28 // future e+=K - orr w25,w25,w26 - add w23,w23,w27 // e+=rot(a,5) - ror w20,w20,#2 - add w22,w22,w15 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) - lsr x16,x15,#32 - ldr x17,[x1,#-8] - bic w25,w21,w24 - and w26,w20,w24 - ror w27,w23,#27 - add w21,w21,w28 // future e+=K - orr w25,w25,w26 - add w22,w22,w27 // e+=rot(a,5) - ror w24,w24,#2 - add w21,w21,w16 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) -#ifdef __ARMEB__ - ror x17,x17,#32 -#else - rev32 x17,x17 -#endif - bic w25,w20,w23 - and w26,w24,w23 - ror w27,w22,#27 - add w20,w20,w28 // future e+=K - orr w25,w25,w26 - add w21,w21,w27 // e+=rot(a,5) - ror w23,w23,#2 - add w20,w20,w17 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) - lsr x19,x17,#32 - eor w3,w3,w5 - bic w25,w24,w22 - and w26,w23,w22 - ror w27,w21,#27 - eor w3,w3,w11 - add w24,w24,w28 // future e+=K - orr w25,w25,w26 - add w20,w20,w27 // e+=rot(a,5) - eor w3,w3,w16 - ror w22,w22,#2 - add w24,w24,w19 // future e+=X[i] - add w20,w20,w25 // e+=F(b,c,d) - ror w3,w3,#31 - eor w4,w4,w6 - bic w25,w23,w21 - and w26,w22,w21 - ror w27,w20,#27 - eor w4,w4,w12 - add w23,w23,w28 // future e+=K - orr w25,w25,w26 - add w24,w24,w27 // e+=rot(a,5) - eor w4,w4,w17 - ror w21,w21,#2 - add w23,w23,w3 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) - ror w4,w4,#31 - eor w5,w5,w7 - bic w25,w22,w20 - and w26,w21,w20 - ror w27,w24,#27 - eor w5,w5,w13 - add w22,w22,w28 // future e+=K - orr w25,w25,w26 - add w23,w23,w27 // e+=rot(a,5) - eor w5,w5,w19 - ror w20,w20,#2 - add w22,w22,w4 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) - ror w5,w5,#31 - eor w6,w6,w8 - bic w25,w21,w24 - and w26,w20,w24 - ror w27,w23,#27 - eor w6,w6,w14 - add w21,w21,w28 // future e+=K - orr w25,w25,w26 - add w22,w22,w27 // e+=rot(a,5) - eor w6,w6,w3 - ror w24,w24,#2 - add w21,w21,w5 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) - ror w6,w6,#31 - eor w7,w7,w9 - bic w25,w20,w23 - and w26,w24,w23 - ror w27,w22,#27 - eor w7,w7,w15 - add w20,w20,w28 // future e+=K - orr w25,w25,w26 - add w21,w21,w27 // e+=rot(a,5) - eor w7,w7,w4 - ror w23,w23,#2 - add w20,w20,w6 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) - ror w7,w7,#31 - movz w28,#0xeba1 - movk w28,#0x6ed9,lsl#16 - eor w8,w8,w10 - bic w25,w24,w22 - and w26,w23,w22 - ror w27,w21,#27 - eor w8,w8,w16 - add w24,w24,w28 // future e+=K - orr w25,w25,w26 - add w20,w20,w27 // e+=rot(a,5) - eor w8,w8,w5 - ror w22,w22,#2 - add w24,w24,w7 // future e+=X[i] - add w20,w20,w25 // e+=F(b,c,d) - ror w8,w8,#31 - eor w9,w9,w11 - eor w25,w23,w21 - ror w27,w20,#27 - add w23,w23,w28 // future e+=K - eor w9,w9,w17 - eor w25,w25,w22 - add w24,w24,w27 // e+=rot(a,5) - ror w21,w21,#2 - eor w9,w9,w6 - add w23,w23,w8 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) - ror w9,w9,#31 - eor w10,w10,w12 - eor w25,w22,w20 - ror w27,w24,#27 - add w22,w22,w28 // future e+=K - eor w10,w10,w19 - eor w25,w25,w21 - add w23,w23,w27 // e+=rot(a,5) - ror w20,w20,#2 - eor w10,w10,w7 - add w22,w22,w9 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) - ror w10,w10,#31 - eor w11,w11,w13 - eor w25,w21,w24 - ror w27,w23,#27 - add w21,w21,w28 // future e+=K - eor w11,w11,w3 - eor w25,w25,w20 - add w22,w22,w27 // e+=rot(a,5) - ror w24,w24,#2 - eor w11,w11,w8 - add w21,w21,w10 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) - ror w11,w11,#31 - eor w12,w12,w14 - eor w25,w20,w23 - ror w27,w22,#27 - add w20,w20,w28 // future e+=K - eor w12,w12,w4 - eor w25,w25,w24 - add w21,w21,w27 // e+=rot(a,5) - ror w23,w23,#2 - eor w12,w12,w9 - add w20,w20,w11 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) - ror w12,w12,#31 - eor w13,w13,w15 - eor w25,w24,w22 - ror w27,w21,#27 - add w24,w24,w28 // future e+=K - eor w13,w13,w5 - eor w25,w25,w23 - add w20,w20,w27 // e+=rot(a,5) - ror w22,w22,#2 - eor w13,w13,w10 - add w24,w24,w12 // future e+=X[i] - add w20,w20,w25 // e+=F(b,c,d) - ror w13,w13,#31 - eor w14,w14,w16 - eor w25,w23,w21 - ror w27,w20,#27 - add w23,w23,w28 // future e+=K - eor w14,w14,w6 - eor w25,w25,w22 - add w24,w24,w27 // e+=rot(a,5) - ror w21,w21,#2 - eor w14,w14,w11 - add w23,w23,w13 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) - ror w14,w14,#31 - eor w15,w15,w17 - eor w25,w22,w20 - ror w27,w24,#27 - add w22,w22,w28 // future e+=K - eor w15,w15,w7 - eor w25,w25,w21 - add w23,w23,w27 // e+=rot(a,5) - ror w20,w20,#2 - eor w15,w15,w12 - add w22,w22,w14 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) - ror w15,w15,#31 - eor w16,w16,w19 - eor w25,w21,w24 - ror w27,w23,#27 - add w21,w21,w28 // future e+=K - eor w16,w16,w8 - eor w25,w25,w20 - add w22,w22,w27 // e+=rot(a,5) - ror w24,w24,#2 - eor w16,w16,w13 - add w21,w21,w15 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) - ror w16,w16,#31 - eor w17,w17,w3 - eor w25,w20,w23 - ror w27,w22,#27 - add w20,w20,w28 // future e+=K - eor w17,w17,w9 - eor w25,w25,w24 - add w21,w21,w27 // e+=rot(a,5) - ror w23,w23,#2 - eor w17,w17,w14 - add w20,w20,w16 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) - ror w17,w17,#31 - eor w19,w19,w4 - eor w25,w24,w22 - ror w27,w21,#27 - add w24,w24,w28 // future e+=K - eor w19,w19,w10 - eor w25,w25,w23 - add w20,w20,w27 // e+=rot(a,5) - ror w22,w22,#2 - eor w19,w19,w15 - add w24,w24,w17 // future e+=X[i] - add w20,w20,w25 // e+=F(b,c,d) - ror w19,w19,#31 - eor w3,w3,w5 - eor w25,w23,w21 - ror w27,w20,#27 - add w23,w23,w28 // future e+=K - eor w3,w3,w11 - eor w25,w25,w22 - add w24,w24,w27 // e+=rot(a,5) - ror w21,w21,#2 - eor w3,w3,w16 - add w23,w23,w19 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) - ror w3,w3,#31 - eor w4,w4,w6 - eor w25,w22,w20 - ror w27,w24,#27 - add w22,w22,w28 // future e+=K - eor w4,w4,w12 - eor w25,w25,w21 - add w23,w23,w27 // e+=rot(a,5) - ror w20,w20,#2 - eor w4,w4,w17 - add w22,w22,w3 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) - ror w4,w4,#31 - eor w5,w5,w7 - eor w25,w21,w24 - ror w27,w23,#27 - add w21,w21,w28 // future e+=K - eor w5,w5,w13 - eor w25,w25,w20 - add w22,w22,w27 // e+=rot(a,5) - ror w24,w24,#2 - eor w5,w5,w19 - add w21,w21,w4 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) - ror w5,w5,#31 - eor w6,w6,w8 - eor w25,w20,w23 - ror w27,w22,#27 - add w20,w20,w28 // future e+=K - eor w6,w6,w14 - eor w25,w25,w24 - add w21,w21,w27 // e+=rot(a,5) - ror w23,w23,#2 - eor w6,w6,w3 - add w20,w20,w5 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) - ror w6,w6,#31 - eor w7,w7,w9 - eor w25,w24,w22 - ror w27,w21,#27 - add w24,w24,w28 // future e+=K - eor w7,w7,w15 - eor w25,w25,w23 - add w20,w20,w27 // e+=rot(a,5) - ror w22,w22,#2 - eor w7,w7,w4 - add w24,w24,w6 // future e+=X[i] - add w20,w20,w25 // e+=F(b,c,d) - ror w7,w7,#31 - eor w8,w8,w10 - eor w25,w23,w21 - ror w27,w20,#27 - add w23,w23,w28 // future e+=K - eor w8,w8,w16 - eor w25,w25,w22 - add w24,w24,w27 // e+=rot(a,5) - ror w21,w21,#2 - eor w8,w8,w5 - add w23,w23,w7 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) - ror w8,w8,#31 - eor w9,w9,w11 - eor w25,w22,w20 - ror w27,w24,#27 - add w22,w22,w28 // future e+=K - eor w9,w9,w17 - eor w25,w25,w21 - add w23,w23,w27 // e+=rot(a,5) - ror w20,w20,#2 - eor w9,w9,w6 - add w22,w22,w8 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) - ror w9,w9,#31 - eor w10,w10,w12 - eor w25,w21,w24 - ror w27,w23,#27 - add w21,w21,w28 // future e+=K - eor w10,w10,w19 - eor w25,w25,w20 - add w22,w22,w27 // e+=rot(a,5) - ror w24,w24,#2 - eor w10,w10,w7 - add w21,w21,w9 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) - ror w10,w10,#31 - eor w11,w11,w13 - eor w25,w20,w23 - ror w27,w22,#27 - add w20,w20,w28 // future e+=K - eor w11,w11,w3 - eor w25,w25,w24 - add w21,w21,w27 // e+=rot(a,5) - ror w23,w23,#2 - eor w11,w11,w8 - add w20,w20,w10 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) - ror w11,w11,#31 - movz w28,#0xbcdc - movk w28,#0x8f1b,lsl#16 - eor w12,w12,w14 - eor w25,w24,w22 - ror w27,w21,#27 - add w24,w24,w28 // future e+=K - eor w12,w12,w4 - eor w25,w25,w23 - add w20,w20,w27 // e+=rot(a,5) - ror w22,w22,#2 - eor w12,w12,w9 - add w24,w24,w11 // future e+=X[i] - add w20,w20,w25 // e+=F(b,c,d) - ror w12,w12,#31 - orr w25,w21,w22 - and w26,w21,w22 - eor w13,w13,w15 - ror w27,w20,#27 - and w25,w25,w23 - add w23,w23,w28 // future e+=K - eor w13,w13,w5 - add w24,w24,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w21,w21,#2 - eor w13,w13,w10 - add w23,w23,w12 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) - ror w13,w13,#31 - orr w25,w20,w21 - and w26,w20,w21 - eor w14,w14,w16 - ror w27,w24,#27 - and w25,w25,w22 - add w22,w22,w28 // future e+=K - eor w14,w14,w6 - add w23,w23,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w20,w20,#2 - eor w14,w14,w11 - add w22,w22,w13 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) - ror w14,w14,#31 - orr w25,w24,w20 - and w26,w24,w20 - eor w15,w15,w17 - ror w27,w23,#27 - and w25,w25,w21 - add w21,w21,w28 // future e+=K - eor w15,w15,w7 - add w22,w22,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w24,w24,#2 - eor w15,w15,w12 - add w21,w21,w14 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) - ror w15,w15,#31 - orr w25,w23,w24 - and w26,w23,w24 - eor w16,w16,w19 - ror w27,w22,#27 - and w25,w25,w20 - add w20,w20,w28 // future e+=K - eor w16,w16,w8 - add w21,w21,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w23,w23,#2 - eor w16,w16,w13 - add w20,w20,w15 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) - ror w16,w16,#31 - orr w25,w22,w23 - and w26,w22,w23 - eor w17,w17,w3 - ror w27,w21,#27 - and w25,w25,w24 - add w24,w24,w28 // future e+=K - eor w17,w17,w9 - add w20,w20,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w22,w22,#2 - eor w17,w17,w14 - add w24,w24,w16 // future e+=X[i] - add w20,w20,w25 // e+=F(b,c,d) - ror w17,w17,#31 - orr w25,w21,w22 - and w26,w21,w22 - eor w19,w19,w4 - ror w27,w20,#27 - and w25,w25,w23 - add w23,w23,w28 // future e+=K - eor w19,w19,w10 - add w24,w24,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w21,w21,#2 - eor w19,w19,w15 - add w23,w23,w17 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) - ror w19,w19,#31 - orr w25,w20,w21 - and w26,w20,w21 - eor w3,w3,w5 - ror w27,w24,#27 - and w25,w25,w22 - add w22,w22,w28 // future e+=K - eor w3,w3,w11 - add w23,w23,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w20,w20,#2 - eor w3,w3,w16 - add w22,w22,w19 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) - ror w3,w3,#31 - orr w25,w24,w20 - and w26,w24,w20 - eor w4,w4,w6 - ror w27,w23,#27 - and w25,w25,w21 - add w21,w21,w28 // future e+=K - eor w4,w4,w12 - add w22,w22,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w24,w24,#2 - eor w4,w4,w17 - add w21,w21,w3 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) - ror w4,w4,#31 - orr w25,w23,w24 - and w26,w23,w24 - eor w5,w5,w7 - ror w27,w22,#27 - and w25,w25,w20 - add w20,w20,w28 // future e+=K - eor w5,w5,w13 - add w21,w21,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w23,w23,#2 - eor w5,w5,w19 - add w20,w20,w4 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) - ror w5,w5,#31 - orr w25,w22,w23 - and w26,w22,w23 - eor w6,w6,w8 - ror w27,w21,#27 - and w25,w25,w24 - add w24,w24,w28 // future e+=K - eor w6,w6,w14 - add w20,w20,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w22,w22,#2 - eor w6,w6,w3 - add w24,w24,w5 // future e+=X[i] - add w20,w20,w25 // e+=F(b,c,d) - ror w6,w6,#31 - orr w25,w21,w22 - and w26,w21,w22 - eor w7,w7,w9 - ror w27,w20,#27 - and w25,w25,w23 - add w23,w23,w28 // future e+=K - eor w7,w7,w15 - add w24,w24,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w21,w21,#2 - eor w7,w7,w4 - add w23,w23,w6 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) - ror w7,w7,#31 - orr w25,w20,w21 - and w26,w20,w21 - eor w8,w8,w10 - ror w27,w24,#27 - and w25,w25,w22 - add w22,w22,w28 // future e+=K - eor w8,w8,w16 - add w23,w23,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w20,w20,#2 - eor w8,w8,w5 - add w22,w22,w7 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) - ror w8,w8,#31 - orr w25,w24,w20 - and w26,w24,w20 - eor w9,w9,w11 - ror w27,w23,#27 - and w25,w25,w21 - add w21,w21,w28 // future e+=K - eor w9,w9,w17 - add w22,w22,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w24,w24,#2 - eor w9,w9,w6 - add w21,w21,w8 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) - ror w9,w9,#31 - orr w25,w23,w24 - and w26,w23,w24 - eor w10,w10,w12 - ror w27,w22,#27 - and w25,w25,w20 - add w20,w20,w28 // future e+=K - eor w10,w10,w19 - add w21,w21,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w23,w23,#2 - eor w10,w10,w7 - add w20,w20,w9 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) - ror w10,w10,#31 - orr w25,w22,w23 - and w26,w22,w23 - eor w11,w11,w13 - ror w27,w21,#27 - and w25,w25,w24 - add w24,w24,w28 // future e+=K - eor w11,w11,w3 - add w20,w20,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w22,w22,#2 - eor w11,w11,w8 - add w24,w24,w10 // future e+=X[i] - add w20,w20,w25 // e+=F(b,c,d) - ror w11,w11,#31 - orr w25,w21,w22 - and w26,w21,w22 - eor w12,w12,w14 - ror w27,w20,#27 - and w25,w25,w23 - add w23,w23,w28 // future e+=K - eor w12,w12,w4 - add w24,w24,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w21,w21,#2 - eor w12,w12,w9 - add w23,w23,w11 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) - ror w12,w12,#31 - orr w25,w20,w21 - and w26,w20,w21 - eor w13,w13,w15 - ror w27,w24,#27 - and w25,w25,w22 - add w22,w22,w28 // future e+=K - eor w13,w13,w5 - add w23,w23,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w20,w20,#2 - eor w13,w13,w10 - add w22,w22,w12 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) - ror w13,w13,#31 - orr w25,w24,w20 - and w26,w24,w20 - eor w14,w14,w16 - ror w27,w23,#27 - and w25,w25,w21 - add w21,w21,w28 // future e+=K - eor w14,w14,w6 - add w22,w22,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w24,w24,#2 - eor w14,w14,w11 - add w21,w21,w13 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) - ror w14,w14,#31 - orr w25,w23,w24 - and w26,w23,w24 - eor w15,w15,w17 - ror w27,w22,#27 - and w25,w25,w20 - add w20,w20,w28 // future e+=K - eor w15,w15,w7 - add w21,w21,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w23,w23,#2 - eor w15,w15,w12 - add w20,w20,w14 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) - ror w15,w15,#31 - movz w28,#0xc1d6 - movk w28,#0xca62,lsl#16 - orr w25,w22,w23 - and w26,w22,w23 - eor w16,w16,w19 - ror w27,w21,#27 - and w25,w25,w24 - add w24,w24,w28 // future e+=K - eor w16,w16,w8 - add w20,w20,w27 // e+=rot(a,5) - orr w25,w25,w26 - ror w22,w22,#2 - eor w16,w16,w13 - add w24,w24,w15 // future e+=X[i] - add w20,w20,w25 // e+=F(b,c,d) - ror w16,w16,#31 - eor w17,w17,w3 - eor w25,w23,w21 - ror w27,w20,#27 - add w23,w23,w28 // future e+=K - eor w17,w17,w9 - eor w25,w25,w22 - add w24,w24,w27 // e+=rot(a,5) - ror w21,w21,#2 - eor w17,w17,w14 - add w23,w23,w16 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) - ror w17,w17,#31 - eor w19,w19,w4 - eor w25,w22,w20 - ror w27,w24,#27 - add w22,w22,w28 // future e+=K - eor w19,w19,w10 - eor w25,w25,w21 - add w23,w23,w27 // e+=rot(a,5) - ror w20,w20,#2 - eor w19,w19,w15 - add w22,w22,w17 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) - ror w19,w19,#31 - eor w3,w3,w5 - eor w25,w21,w24 - ror w27,w23,#27 - add w21,w21,w28 // future e+=K - eor w3,w3,w11 - eor w25,w25,w20 - add w22,w22,w27 // e+=rot(a,5) - ror w24,w24,#2 - eor w3,w3,w16 - add w21,w21,w19 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) - ror w3,w3,#31 - eor w4,w4,w6 - eor w25,w20,w23 - ror w27,w22,#27 - add w20,w20,w28 // future e+=K - eor w4,w4,w12 - eor w25,w25,w24 - add w21,w21,w27 // e+=rot(a,5) - ror w23,w23,#2 - eor w4,w4,w17 - add w20,w20,w3 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) - ror w4,w4,#31 - eor w5,w5,w7 - eor w25,w24,w22 - ror w27,w21,#27 - add w24,w24,w28 // future e+=K - eor w5,w5,w13 - eor w25,w25,w23 - add w20,w20,w27 // e+=rot(a,5) - ror w22,w22,#2 - eor w5,w5,w19 - add w24,w24,w4 // future e+=X[i] - add w20,w20,w25 // e+=F(b,c,d) - ror w5,w5,#31 - eor w6,w6,w8 - eor w25,w23,w21 - ror w27,w20,#27 - add w23,w23,w28 // future e+=K - eor w6,w6,w14 - eor w25,w25,w22 - add w24,w24,w27 // e+=rot(a,5) - ror w21,w21,#2 - eor w6,w6,w3 - add w23,w23,w5 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) - ror w6,w6,#31 - eor w7,w7,w9 - eor w25,w22,w20 - ror w27,w24,#27 - add w22,w22,w28 // future e+=K - eor w7,w7,w15 - eor w25,w25,w21 - add w23,w23,w27 // e+=rot(a,5) - ror w20,w20,#2 - eor w7,w7,w4 - add w22,w22,w6 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) - ror w7,w7,#31 - eor w8,w8,w10 - eor w25,w21,w24 - ror w27,w23,#27 - add w21,w21,w28 // future e+=K - eor w8,w8,w16 - eor w25,w25,w20 - add w22,w22,w27 // e+=rot(a,5) - ror w24,w24,#2 - eor w8,w8,w5 - add w21,w21,w7 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) - ror w8,w8,#31 - eor w9,w9,w11 - eor w25,w20,w23 - ror w27,w22,#27 - add w20,w20,w28 // future e+=K - eor w9,w9,w17 - eor w25,w25,w24 - add w21,w21,w27 // e+=rot(a,5) - ror w23,w23,#2 - eor w9,w9,w6 - add w20,w20,w8 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) - ror w9,w9,#31 - eor w10,w10,w12 - eor w25,w24,w22 - ror w27,w21,#27 - add w24,w24,w28 // future e+=K - eor w10,w10,w19 - eor w25,w25,w23 - add w20,w20,w27 // e+=rot(a,5) - ror w22,w22,#2 - eor w10,w10,w7 - add w24,w24,w9 // future e+=X[i] - add w20,w20,w25 // e+=F(b,c,d) - ror w10,w10,#31 - eor w11,w11,w13 - eor w25,w23,w21 - ror w27,w20,#27 - add w23,w23,w28 // future e+=K - eor w11,w11,w3 - eor w25,w25,w22 - add w24,w24,w27 // e+=rot(a,5) - ror w21,w21,#2 - eor w11,w11,w8 - add w23,w23,w10 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) - ror w11,w11,#31 - eor w12,w12,w14 - eor w25,w22,w20 - ror w27,w24,#27 - add w22,w22,w28 // future e+=K - eor w12,w12,w4 - eor w25,w25,w21 - add w23,w23,w27 // e+=rot(a,5) - ror w20,w20,#2 - eor w12,w12,w9 - add w22,w22,w11 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) - ror w12,w12,#31 - eor w13,w13,w15 - eor w25,w21,w24 - ror w27,w23,#27 - add w21,w21,w28 // future e+=K - eor w13,w13,w5 - eor w25,w25,w20 - add w22,w22,w27 // e+=rot(a,5) - ror w24,w24,#2 - eor w13,w13,w10 - add w21,w21,w12 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) - ror w13,w13,#31 - eor w14,w14,w16 - eor w25,w20,w23 - ror w27,w22,#27 - add w20,w20,w28 // future e+=K - eor w14,w14,w6 - eor w25,w25,w24 - add w21,w21,w27 // e+=rot(a,5) - ror w23,w23,#2 - eor w14,w14,w11 - add w20,w20,w13 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) - ror w14,w14,#31 - eor w15,w15,w17 - eor w25,w24,w22 - ror w27,w21,#27 - add w24,w24,w28 // future e+=K - eor w15,w15,w7 - eor w25,w25,w23 - add w20,w20,w27 // e+=rot(a,5) - ror w22,w22,#2 - eor w15,w15,w12 - add w24,w24,w14 // future e+=X[i] - add w20,w20,w25 // e+=F(b,c,d) - ror w15,w15,#31 - eor w16,w16,w19 - eor w25,w23,w21 - ror w27,w20,#27 - add w23,w23,w28 // future e+=K - eor w16,w16,w8 - eor w25,w25,w22 - add w24,w24,w27 // e+=rot(a,5) - ror w21,w21,#2 - eor w16,w16,w13 - add w23,w23,w15 // future e+=X[i] - add w24,w24,w25 // e+=F(b,c,d) - ror w16,w16,#31 - eor w17,w17,w3 - eor w25,w22,w20 - ror w27,w24,#27 - add w22,w22,w28 // future e+=K - eor w17,w17,w9 - eor w25,w25,w21 - add w23,w23,w27 // e+=rot(a,5) - ror w20,w20,#2 - eor w17,w17,w14 - add w22,w22,w16 // future e+=X[i] - add w23,w23,w25 // e+=F(b,c,d) - ror w17,w17,#31 - eor w19,w19,w4 - eor w25,w21,w24 - ror w27,w23,#27 - add w21,w21,w28 // future e+=K - eor w19,w19,w10 - eor w25,w25,w20 - add w22,w22,w27 // e+=rot(a,5) - ror w24,w24,#2 - eor w19,w19,w15 - add w21,w21,w17 // future e+=X[i] - add w22,w22,w25 // e+=F(b,c,d) - ror w19,w19,#31 - ldp w4,w5,[x0] - eor w25,w20,w23 - ror w27,w22,#27 - add w20,w20,w28 // future e+=K - eor w25,w25,w24 - add w21,w21,w27 // e+=rot(a,5) - ror w23,w23,#2 - add w20,w20,w19 // future e+=X[i] - add w21,w21,w25 // e+=F(b,c,d) - ldp w6,w7,[x0,#8] - eor w25,w24,w22 - ror w27,w21,#27 - eor w25,w25,w23 - add w20,w20,w27 // e+=rot(a,5) - ror w22,w22,#2 - ldr w8,[x0,#16] - add w20,w20,w25 // e+=F(b,c,d) - add w21,w21,w5 - add w22,w22,w6 - add w20,w20,w4 - add w23,w23,w7 - add w24,w24,w8 - stp w20,w21,[x0] - stp w22,w23,[x0,#8] - str w24,[x0,#16] - cbnz x2,.Loop - - ldp x19,x20,[sp,#16] - ldp x21,x22,[sp,#32] - ldp x23,x24,[sp,#48] - ldp x25,x26,[sp,#64] - ldp x27,x28,[sp,#80] - ldr x29,[sp],#96 - ret -.size sha1_block_data_order,.-sha1_block_data_order -.type sha1_block_armv8,%function -.align 6 -sha1_block_armv8: -.Lv8_entry: - stp x29,x30,[sp,#-16]! - add x29,sp,#0 - - adr x4,.Lconst - eor v1.16b,v1.16b,v1.16b - ld1 {v0.4s},[x0],#16 - ld1 {v1.s}[0],[x0] - sub x0,x0,#16 - ld1 {v16.4s-v19.4s},[x4] - -.Loop_hw: - ld1 {v4.16b-v7.16b},[x1],#64 - sub x2,x2,#1 - rev32 v4.16b,v4.16b - rev32 v5.16b,v5.16b - - add v20.4s,v16.4s,v4.4s - rev32 v6.16b,v6.16b - orr v22.16b,v0.16b,v0.16b // offload - - add v21.4s,v16.4s,v5.4s - rev32 v7.16b,v7.16b - .inst 0x5e280803 //sha1h v3.16b,v0.16b - .inst 0x5e140020 //sha1c v0.16b,v1.16b,v20.4s // 0 - add v20.4s,v16.4s,v6.4s - .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b - .inst 0x5e280802 //sha1h v2.16b,v0.16b // 1 - .inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s - add v21.4s,v16.4s,v7.4s - .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b - .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b - .inst 0x5e280803 //sha1h v3.16b,v0.16b // 2 - .inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s - add v20.4s,v16.4s,v4.4s - .inst 0x5e281885 //sha1su1 v5.16b,v4.16b - .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b - .inst 0x5e280802 //sha1h v2.16b,v0.16b // 3 - .inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s - add v21.4s,v17.4s,v5.4s - .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b - .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b - .inst 0x5e280803 //sha1h v3.16b,v0.16b // 4 - .inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s - add v20.4s,v17.4s,v6.4s - .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b - .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b - .inst 0x5e280802 //sha1h v2.16b,v0.16b // 5 - .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s - add v21.4s,v17.4s,v7.4s - .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b - .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b - .inst 0x5e280803 //sha1h v3.16b,v0.16b // 6 - .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s - add v20.4s,v17.4s,v4.4s - .inst 0x5e281885 //sha1su1 v5.16b,v4.16b - .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b - .inst 0x5e280802 //sha1h v2.16b,v0.16b // 7 - .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s - add v21.4s,v17.4s,v5.4s - .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b - .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b - .inst 0x5e280803 //sha1h v3.16b,v0.16b // 8 - .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s - add v20.4s,v18.4s,v6.4s - .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b - .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b - .inst 0x5e280802 //sha1h v2.16b,v0.16b // 9 - .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s - add v21.4s,v18.4s,v7.4s - .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b - .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b - .inst 0x5e280803 //sha1h v3.16b,v0.16b // 10 - .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s - add v20.4s,v18.4s,v4.4s - .inst 0x5e281885 //sha1su1 v5.16b,v4.16b - .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b - .inst 0x5e280802 //sha1h v2.16b,v0.16b // 11 - .inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s - add v21.4s,v18.4s,v5.4s - .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b - .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b - .inst 0x5e280803 //sha1h v3.16b,v0.16b // 12 - .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s - add v20.4s,v18.4s,v6.4s - .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b - .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b - .inst 0x5e280802 //sha1h v2.16b,v0.16b // 13 - .inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s - add v21.4s,v19.4s,v7.4s - .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b - .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b - .inst 0x5e280803 //sha1h v3.16b,v0.16b // 14 - .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s - add v20.4s,v19.4s,v4.4s - .inst 0x5e281885 //sha1su1 v5.16b,v4.16b - .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b - .inst 0x5e280802 //sha1h v2.16b,v0.16b // 15 - .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s - add v21.4s,v19.4s,v5.4s - .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b - .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b - .inst 0x5e280803 //sha1h v3.16b,v0.16b // 16 - .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s - add v20.4s,v19.4s,v6.4s - .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b - .inst 0x5e280802 //sha1h v2.16b,v0.16b // 17 - .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s - add v21.4s,v19.4s,v7.4s - - .inst 0x5e280803 //sha1h v3.16b,v0.16b // 18 - .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s - - .inst 0x5e280802 //sha1h v2.16b,v0.16b // 19 - .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s - - add v1.4s,v1.4s,v2.4s - add v0.4s,v0.4s,v22.4s - - cbnz x2,.Loop_hw - - st1 {v0.4s},[x0],#16 - st1 {v1.s}[0],[x0] - - ldr x29,[sp],#16 - ret -.size sha1_block_armv8,.-sha1_block_armv8 -.align 6 -.Lconst: -.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 -.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39 -.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59 -.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79 -.LOPENSSL_armcap_P: -.quad OPENSSL_armcap_P-. -.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by " -.align 2 -.comm OPENSSL_armcap_P,4,4 diff --git a/app/openssl/crypto/sha/asm/sha1-armv8.pl b/app/openssl/crypto/sha/asm/sha1-armv8.pl deleted file mode 100644 index c1f552b6..00000000 --- a/app/openssl/crypto/sha/asm/sha1-armv8.pl +++ /dev/null @@ -1,333 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# SHA1 for ARMv8. -# -# Performance in cycles per processed byte and improvement coefficient -# over code generated with "default" compiler: -# -# hardware-assisted software(*) -# Apple A7 2.31 4.13 (+14%) -# Cortex-A5x n/a n/a -# -# (*) Software results are presented mostly for reference purposes. - -$flavour = shift; -open STDOUT,">".shift; - -($ctx,$inp,$num)=("x0","x1","x2"); -@Xw=map("w$_",(3..17,19)); -@Xx=map("x$_",(3..17,19)); -@V=($A,$B,$C,$D,$E)=map("w$_",(20..24)); -($t0,$t1,$t2,$K)=map("w$_",(25..28)); - - -sub BODY_00_19 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=($i+2)&15; - -$code.=<<___ if ($i<15 && !($i&1)); - lsr @Xx[$i+1],@Xx[$i],#32 -___ -$code.=<<___ if ($i<14 && !($i&1)); - ldr @Xx[$i+2],[$inp,#`($i+2)*4-64`] -___ -$code.=<<___ if ($i<14 && ($i&1)); -#ifdef __ARMEB__ - ror @Xx[$i+1],@Xx[$i+1],#32 -#else - rev32 @Xx[$i+1],@Xx[$i+1] -#endif -___ -$code.=<<___ if ($i<14); - bic $t0,$d,$b - and $t1,$c,$b - ror $t2,$a,#27 - add $d,$d,$K // future e+=K - orr $t0,$t0,$t1 - add $e,$e,$t2 // e+=rot(a,5) - ror $b,$b,#2 - add $d,$d,@Xw[($i+1)&15] // future e+=X[i] - add $e,$e,$t0 // e+=F(b,c,d) -___ -$code.=<<___ if ($i==19); - movz $K,#0xeba1 - movk $K,#0x6ed9,lsl#16 -___ -$code.=<<___ if ($i>=14); - eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15] - bic $t0,$d,$b - and $t1,$c,$b - ror $t2,$a,#27 - eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15] - add $d,$d,$K // future e+=K - orr $t0,$t0,$t1 - add $e,$e,$t2 // e+=rot(a,5) - eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15] - ror $b,$b,#2 - add $d,$d,@Xw[($i+1)&15] // future e+=X[i] - add $e,$e,$t0 // e+=F(b,c,d) - ror @Xw[$j],@Xw[$j],#31 -___ -} - -sub BODY_40_59 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=($i+2)&15; - -$code.=<<___ if ($i==59); - movz $K,#0xc1d6 - movk $K,#0xca62,lsl#16 -___ -$code.=<<___; - orr $t0,$b,$c - and $t1,$b,$c - eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15] - ror $t2,$a,#27 - and $t0,$t0,$d - add $d,$d,$K // future e+=K - eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15] - add $e,$e,$t2 // e+=rot(a,5) - orr $t0,$t0,$t1 - ror $b,$b,#2 - eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15] - add $d,$d,@Xw[($i+1)&15] // future e+=X[i] - add $e,$e,$t0 // e+=F(b,c,d) - ror @Xw[$j],@Xw[$j],#31 -___ -} - -sub BODY_20_39 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=($i+2)&15; - -$code.=<<___ if ($i==39); - movz $K,#0xbcdc - movk $K,#0x8f1b,lsl#16 -___ -$code.=<<___ if ($i<78); - eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15] - eor $t0,$d,$b - ror $t2,$a,#27 - add $d,$d,$K // future e+=K - eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15] - eor $t0,$t0,$c - add $e,$e,$t2 // e+=rot(a,5) - ror $b,$b,#2 - eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15] - add $d,$d,@Xw[($i+1)&15] // future e+=X[i] - add $e,$e,$t0 // e+=F(b,c,d) - ror @Xw[$j],@Xw[$j],#31 -___ -$code.=<<___ if ($i==78); - ldp @Xw[1],@Xw[2],[$ctx] - eor $t0,$d,$b - ror $t2,$a,#27 - add $d,$d,$K // future e+=K - eor $t0,$t0,$c - add $e,$e,$t2 // e+=rot(a,5) - ror $b,$b,#2 - add $d,$d,@Xw[($i+1)&15] // future e+=X[i] - add $e,$e,$t0 // e+=F(b,c,d) -___ -$code.=<<___ if ($i==79); - ldp @Xw[3],@Xw[4],[$ctx,#8] - eor $t0,$d,$b - ror $t2,$a,#27 - eor $t0,$t0,$c - add $e,$e,$t2 // e+=rot(a,5) - ror $b,$b,#2 - ldr @Xw[5],[$ctx,#16] - add $e,$e,$t0 // e+=F(b,c,d) -___ -} - -$code.=<<___; -#include "arm_arch.h" - -.text - -.globl sha1_block_data_order -.type sha1_block_data_order,%function -.align 6 -sha1_block_data_order: - ldr x16,.LOPENSSL_armcap_P - adr x17,.LOPENSSL_armcap_P - add x16,x16,x17 - ldr w16,[x16] - tst w16,#ARMV8_SHA1 - b.ne .Lv8_entry - - stp x29,x30,[sp,#-96]! - add x29,sp,#0 - stp x19,x20,[sp,#16] - stp x21,x22,[sp,#32] - stp x23,x24,[sp,#48] - stp x25,x26,[sp,#64] - stp x27,x28,[sp,#80] - - ldp $A,$B,[$ctx] - ldp $C,$D,[$ctx,#8] - ldr $E,[$ctx,#16] - -.Loop: - ldr @Xx[0],[$inp],#64 - movz $K,#0x7999 - sub $num,$num,#1 - movk $K,#0x5a82,lsl#16 -#ifdef __ARMEB__ - ror $Xx[0],@Xx[0],#32 -#else - rev32 @Xx[0],@Xx[0] -#endif - add $E,$E,$K // warm it up - add $E,$E,@Xw[0] -___ -for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } -for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } -for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - add $B,$B,@Xw[2] - add $C,$C,@Xw[3] - add $A,$A,@Xw[1] - add $D,$D,@Xw[4] - add $E,$E,@Xw[5] - stp $A,$B,[$ctx] - stp $C,$D,[$ctx,#8] - str $E,[$ctx,#16] - cbnz $num,.Loop - - ldp x19,x20,[sp,#16] - ldp x21,x22,[sp,#32] - ldp x23,x24,[sp,#48] - ldp x25,x26,[sp,#64] - ldp x27,x28,[sp,#80] - ldr x29,[sp],#96 - ret -.size sha1_block_data_order,.-sha1_block_data_order -___ -{{{ -my ($ABCD,$E,$E0,$E1)=map("v$_.16b",(0..3)); -my @MSG=map("v$_.16b",(4..7)); -my @Kxx=map("v$_.4s",(16..19)); -my ($W0,$W1)=("v20.4s","v21.4s"); -my $ABCD_SAVE="v22.16b"; - -$code.=<<___; -.type sha1_block_armv8,%function -.align 6 -sha1_block_armv8: -.Lv8_entry: - stp x29,x30,[sp,#-16]! - add x29,sp,#0 - - adr x4,.Lconst - eor $E,$E,$E - ld1.32 {$ABCD},[$ctx],#16 - ld1.32 {$E}[0],[$ctx] - sub $ctx,$ctx,#16 - ld1.32 {@Kxx[0]-@Kxx[3]},[x4] - -.Loop_hw: - ld1 {@MSG[0]-@MSG[3]},[$inp],#64 - sub $num,$num,#1 - rev32 @MSG[0],@MSG[0] - rev32 @MSG[1],@MSG[1] - - add.i32 $W0,@Kxx[0],@MSG[0] - rev32 @MSG[2],@MSG[2] - orr $ABCD_SAVE,$ABCD,$ABCD // offload - - add.i32 $W1,@Kxx[0],@MSG[1] - rev32 @MSG[3],@MSG[3] - sha1h $E1,$ABCD - sha1c $ABCD,$E,$W0 // 0 - add.i32 $W0,@Kxx[$j],@MSG[2] - sha1su0 @MSG[0],@MSG[1],@MSG[2] -___ -for ($j=0,$i=1;$i<20-3;$i++) { -my $f=("c","p","m","p")[$i/5]; -$code.=<<___; - sha1h $E0,$ABCD // $i - sha1$f $ABCD,$E1,$W1 - add.i32 $W1,@Kxx[$j],@MSG[3] - sha1su1 @MSG[0],@MSG[3] -___ -$code.=<<___ if ($i<20-4); - sha1su0 @MSG[1],@MSG[2],@MSG[3] -___ - ($E0,$E1)=($E1,$E0); ($W0,$W1)=($W1,$W0); - push(@MSG,shift(@MSG)); $j++ if ((($i+3)%5)==0); -} -$code.=<<___; - sha1h $E0,$ABCD // $i - sha1p $ABCD,$E1,$W1 - add.i32 $W1,@Kxx[$j],@MSG[3] - - sha1h $E1,$ABCD // 18 - sha1p $ABCD,$E0,$W0 - - sha1h $E0,$ABCD // 19 - sha1p $ABCD,$E1,$W1 - - add.i32 $E,$E,$E0 - add.i32 $ABCD,$ABCD,$ABCD_SAVE - - cbnz $num,.Loop_hw - - st1.32 {$ABCD},[$ctx],#16 - st1.32 {$E}[0],[$ctx] - - ldr x29,[sp],#16 - ret -.size sha1_block_armv8,.-sha1_block_armv8 -.align 6 -.Lconst: -.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 -.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39 -.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59 -.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79 -.LOPENSSL_armcap_P: -.quad OPENSSL_armcap_P-. -.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by " -.align 2 -.comm OPENSSL_armcap_P,4,4 -___ -}}} - -{ my %opcode = ( - "sha1c" => 0x5e000000, "sha1p" => 0x5e001000, - "sha1m" => 0x5e002000, "sha1su0" => 0x5e003000, - "sha1h" => 0x5e280800, "sha1su1" => 0x5e281800 ); - - sub unsha1 { - my ($mnemonic,$arg)=@_; - - $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o - && - sprintf ".inst\t0x%08x\t//%s %s", - $opcode{$mnemonic}|$1|($2<<5)|($3<<16), - $mnemonic,$arg; - } -} - -foreach(split("\n",$code)) { - - s/\`([^\`]*)\`/eval($1)/geo; - - s/\b(sha1\w+)\s+([qv].*)/unsha1($1,$2)/geo; - - s/\.\w?32\b//o and s/\.16b/\.4s/go; - m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go; - - print $_,"\n"; -} - -close STDOUT; diff --git a/app/openssl/crypto/sha/asm/sha256-armv4.pl b/app/openssl/crypto/sha/asm/sha256-armv4.pl index 505ca8f3..9c84e8d9 100644 --- a/app/openssl/crypto/sha/asm/sha256-armv4.pl +++ b/app/openssl/crypto/sha/asm/sha256-armv4.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -21,27 +21,15 @@ # February 2011. # # Profiler-assisted and platform-specific optimization resulted in 16% -# improvement on Cortex A8 core and ~15.4 cycles per processed byte. - -# September 2013. -# -# Add NEON implementation. On Cortex A8 it was measured to process one -# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon -# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only -# code (meaning that latter performs sub-optimally, nothing was done -# about it). - -# May 2014. -# -# Add ARMv8 code path performing at 2.0 cpb on Apple A7. +# improvement on Cortex A8 core and ~17 cycles per processed byte. while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; $ctx="r0"; $t0="r0"; -$inp="r1"; $t4="r1"; +$inp="r1"; $t3="r1"; $len="r2"; $t1="r2"; -$T1="r3"; $t3="r3"; +$T1="r3"; $A="r4"; $B="r5"; $C="r6"; @@ -64,88 +52,71 @@ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; $code.=<<___ if ($i<16); #if __ARM_ARCH__>=7 - @ ldr $t1,[$inp],#4 @ $i -# if $i==15 - str $inp,[sp,#17*4] @ make room for $t4 -# endif - eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` - add $a,$a,$t2 @ h+=Maj(a,b,c) from the past - eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) - rev $t1,$t1 + ldr $T1,[$inp],#4 #else - @ ldrb $t1,[$inp,#3] @ $i - add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + ldrb $T1,[$inp,#3] @ $i ldrb $t2,[$inp,#2] - ldrb $t0,[$inp,#1] - orr $t1,$t1,$t2,lsl#8 - ldrb $t2,[$inp],#4 - orr $t1,$t1,$t0,lsl#16 -# if $i==15 - str $inp,[sp,#17*4] @ make room for $t4 -# endif - eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` - orr $t1,$t1,$t2,lsl#24 - eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) + ldrb $t1,[$inp,#1] + ldrb $t0,[$inp],#4 + orr $T1,$T1,$t2,lsl#8 + orr $T1,$T1,$t1,lsl#16 + orr $T1,$T1,$t0,lsl#24 #endif ___ $code.=<<___; + mov $t0,$e,ror#$Sigma1[0] ldr $t2,[$Ktbl],#4 @ *K256++ - add $h,$h,$t1 @ h+=X[i] - str $t1,[sp,#`$i%16`*4] + eor $t0,$t0,$e,ror#$Sigma1[1] eor $t1,$f,$g - add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e) +#if $i>=16 + add $T1,$T1,$t3 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev $T1,$T1 +#endif +#if $i==15 + str $inp,[sp,#17*4] @ leave room for $t3 +#endif + eor $t0,$t0,$e,ror#$Sigma1[2] @ Sigma1(e) and $t1,$t1,$e - add $h,$h,$t2 @ h+=K256[i] + str $T1,[sp,#`$i%16`*4] + add $T1,$T1,$t0 eor $t1,$t1,$g @ Ch(e,f,g) - eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]` - add $h,$h,$t1 @ h+=Ch(e,f,g) -#if $i==31 - and $t2,$t2,#0xff - cmp $t2,#0xf2 @ done? + add $T1,$T1,$h + mov $h,$a,ror#$Sigma0[0] + add $T1,$T1,$t1 + eor $h,$h,$a,ror#$Sigma0[1] + add $T1,$T1,$t2 + eor $h,$h,$a,ror#$Sigma0[2] @ Sigma0(a) +#if $i>=15 + ldr $t3,[sp,#`($i+2)%16`*4] @ from BODY_16_xx #endif -#if $i<15 -# if __ARM_ARCH__>=7 - ldr $t1,[$inp],#4 @ prefetch -# else - ldrb $t1,[$inp,#3] -# endif - eor $t2,$a,$b @ a^b, b^c in next round -#else - ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx - eor $t2,$a,$b @ a^b, b^c in next round - ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx -#endif - eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a) - and $t3,$t3,$t2 @ (b^c)&=(a^b) - add $d,$d,$h @ d+=h - eor $t3,$t3,$b @ Maj(a,b,c) - add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a) - @ add $h,$h,$t3 @ h+=Maj(a,b,c) + orr $t0,$a,$b + and $t1,$a,$b + and $t0,$t0,$c + add $h,$h,$T1 + orr $t0,$t0,$t1 @ Maj(a,b,c) + add $d,$d,$T1 + add $h,$h,$t0 ___ - ($t2,$t3)=($t3,$t2); } sub BODY_16_XX { my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; $code.=<<___; - @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i - @ ldr $t4,[sp,#`($i+14)%16`*4] - mov $t0,$t1,ror#$sigma0[0] - add $a,$a,$t2 @ h+=Maj(a,b,c) from the past - mov $t2,$t4,ror#$sigma1[0] - eor $t0,$t0,$t1,ror#$sigma0[1] - eor $t2,$t2,$t4,ror#$sigma1[1] - eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) - ldr $t1,[sp,#`($i+0)%16`*4] - eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14]) - ldr $t4,[sp,#`($i+9)%16`*4] - - add $t2,$t2,$t0 - eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15 - add $t1,$t1,$t2 - eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) - add $t1,$t1,$t4 @ X[i] + @ ldr $t3,[sp,#`($i+1)%16`*4] @ $i + ldr $t2,[sp,#`($i+14)%16`*4] + mov $t0,$t3,ror#$sigma0[0] + ldr $T1,[sp,#`($i+0)%16`*4] + eor $t0,$t0,$t3,ror#$sigma0[1] + ldr $t1,[sp,#`($i+9)%16`*4] + eor $t0,$t0,$t3,lsr#$sigma0[2] @ sigma0(X[i+1]) + mov $t3,$t2,ror#$sigma1[0] + add $T1,$T1,$t0 + eor $t3,$t3,$t2,ror#$sigma1[1] + add $T1,$T1,$t1 + eor $t3,$t3,$t2,lsr#$sigma1[2] @ sigma1(X[i+14]) + @ add $T1,$T1,$t3 ___ &BODY_00_15(@_); } @@ -176,64 +147,46 @@ K256: .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .size K256,.-K256 -.word 0 @ terminator -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-sha256_block_data_order -.align 5 .global sha256_block_data_order .type sha256_block_data_order,%function sha256_block_data_order: sub r3,pc,#8 @ sha256_block_data_order add $len,$inp,$len,lsl#6 @ len to point at the end of inp -#if __ARM_ARCH__>=7 - ldr r12,.LOPENSSL_armcap - ldr r12,[r3,r12] @ OPENSSL_armcap_P - tst r12,#ARMV8_SHA256 - bne .LARMv8 - tst r12,#ARMV7_NEON - bne .LNEON -#endif stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} - sub $Ktbl,r3,#256+32 @ K256 + sub $Ktbl,r3,#256 @ K256 sub sp,sp,#16*4 @ alloca(X[16]) .Loop: -# if __ARM_ARCH__>=7 - ldr $t1,[$inp],#4 -# else - ldrb $t1,[$inp,#3] -# endif - eor $t3,$B,$C @ magic - eor $t2,$t2,$t2 ___ for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } $code.=".Lrounds_16_xx:\n"; for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } $code.=<<___; - ldreq $t3,[sp,#16*4] @ pull ctx + and $t2,$t2,#0xff + cmp $t2,#0xf2 bne .Lrounds_16_xx - add $A,$A,$t2 @ h+=Maj(a,b,c) from the past - ldr $t0,[$t3,#0] - ldr $t1,[$t3,#4] - ldr $t2,[$t3,#8] + ldr $T1,[sp,#16*4] @ pull ctx + ldr $t0,[$T1,#0] + ldr $t1,[$T1,#4] + ldr $t2,[$T1,#8] add $A,$A,$t0 - ldr $t0,[$t3,#12] + ldr $t0,[$T1,#12] add $B,$B,$t1 - ldr $t1,[$t3,#16] + ldr $t1,[$T1,#16] add $C,$C,$t2 - ldr $t2,[$t3,#20] + ldr $t2,[$T1,#20] add $D,$D,$t0 - ldr $t0,[$t3,#24] + ldr $t0,[$T1,#24] add $E,$E,$t1 - ldr $t1,[$t3,#28] + ldr $t1,[$T1,#28] add $F,$F,$t2 ldr $inp,[sp,#17*4] @ pull inp ldr $t2,[sp,#18*4] @ pull inp+len add $G,$G,$t0 add $H,$H,$t1 - stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H} + stmia $T1,{$A,$B,$C,$D,$E,$F,$G,$H} cmp $inp,$t2 sub $Ktbl,$Ktbl,#256 @ rewind Ktbl bne .Loop @@ -247,410 +200,12 @@ $code.=<<___; moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif -.size sha256_block_data_order,.-sha256_block_data_order -___ -###################################################################### -# NEON stuff -# -{{{ -my @X=map("q$_",(0..3)); -my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25"); -my $Xfer=$t4; -my $j=0; - -sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } -sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } - -sub AUTOLOAD() # thunk [simplified] x86-style perlasm -{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; - my $arg = pop; - $arg = "#$arg" if ($arg*1 eq $arg); - $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; -} - -sub Xupdate() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); - my ($a,$b,$c,$d,$e,$f,$g,$h); - - &vext_8 ($T0,@X[0],@X[1],4); # X[1..4] - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - &vext_8 ($T1,@X[2],@X[3],4); # X[9..12] - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - &vshr_u32 ($T2,$T0,$sigma0[0]); - eval(shift(@insns)); - eval(shift(@insns)); - &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12] - eval(shift(@insns)); - eval(shift(@insns)); - &vshr_u32 ($T1,$T0,$sigma0[2]); - eval(shift(@insns)); - eval(shift(@insns)); - &vsli_32 ($T2,$T0,32-$sigma0[0]); - eval(shift(@insns)); - eval(shift(@insns)); - &vshr_u32 ($T3,$T0,$sigma0[1]); - eval(shift(@insns)); - eval(shift(@insns)); - &veor ($T1,$T1,$T2); - eval(shift(@insns)); - eval(shift(@insns)); - &vsli_32 ($T3,$T0,32-$sigma0[1]); - eval(shift(@insns)); - eval(shift(@insns)); - &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]); - eval(shift(@insns)); - eval(shift(@insns)); - &veor ($T1,$T1,$T3); # sigma0(X[1..4]) - eval(shift(@insns)); - eval(shift(@insns)); - &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]); - eval(shift(@insns)); - eval(shift(@insns)); - &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]); - eval(shift(@insns)); - eval(shift(@insns)); - &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4]) - eval(shift(@insns)); - eval(shift(@insns)); - &veor ($T5,$T5,$T4); - eval(shift(@insns)); - eval(shift(@insns)); - &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]); - eval(shift(@insns)); - eval(shift(@insns)); - &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]); - eval(shift(@insns)); - eval(shift(@insns)); - &veor ($T5,$T5,$T4); # sigma1(X[14..15]) - eval(shift(@insns)); - eval(shift(@insns)); - &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15]) - eval(shift(@insns)); - eval(shift(@insns)); - &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]); - eval(shift(@insns)); - eval(shift(@insns)); - &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]); - eval(shift(@insns)); - eval(shift(@insns)); - &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]); - eval(shift(@insns)); - eval(shift(@insns)); - &veor ($T5,$T5,$T4); - eval(shift(@insns)); - eval(shift(@insns)); - &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]); - eval(shift(@insns)); - eval(shift(@insns)); - &vld1_32 ("{$T0}","[$Ktbl,:128]!"); - eval(shift(@insns)); - eval(shift(@insns)); - &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]); - eval(shift(@insns)); - eval(shift(@insns)); - &veor ($T5,$T5,$T4); # sigma1(X[16..17]) - eval(shift(@insns)); - eval(shift(@insns)); - &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17]) - eval(shift(@insns)); - eval(shift(@insns)); - &vadd_i32 ($T0,$T0,@X[0]); - while($#insns>=2) { eval(shift(@insns)); } - &vst1_32 ("{$T0}","[$Xfer,:128]!"); - eval(shift(@insns)); - eval(shift(@insns)); - - push(@X,shift(@X)); # "rotate" X[] -} - -sub Xpreload() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); - my ($a,$b,$c,$d,$e,$f,$g,$h); - - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - &vld1_32 ("{$T0}","[$Ktbl,:128]!"); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - &vrev32_8 (@X[0],@X[0]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - &vadd_i32 ($T0,$T0,@X[0]); - foreach (@insns) { eval; } # remaining instructions - &vst1_32 ("{$T0}","[$Xfer,:128]!"); - - push(@X,shift(@X)); # "rotate" X[] -} - -sub body_00_15 () { - ( - '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. - '&add ($h,$h,$t1)', # h+=X[i]+K[i] - '&eor ($t1,$f,$g)', - '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', - '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past - '&and ($t1,$t1,$e)', - '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e) - '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', - '&eor ($t1,$t1,$g)', # Ch(e,f,g) - '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e) - '&eor ($t2,$a,$b)', # a^b, b^c in next round - '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) - '&add ($h,$h,$t1)', # h+=Ch(e,f,g) - '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'. - '&ldr ($t1,"[$Ktbl]") if ($j==15);'. - '&ldr ($t1,"[sp,#64]") if ($j==31)', - '&and ($t3,$t3,$t2)', # (b^c)&=(a^b) - '&add ($d,$d,$h)', # d+=h - '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a) - '&eor ($t3,$t3,$b)', # Maj(a,b,c) - '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' - ) -} - -$code.=<<___; -#if __ARM_ARCH__>=7 -.fpu neon - -.type sha256_block_data_order_neon,%function -.align 4 -sha256_block_data_order_neon: -.LNEON: - stmdb sp!,{r4-r12,lr} - - mov $t2,sp - sub sp,sp,#16*4+16 @ alloca - sub $Ktbl,r3,#256+32 @ K256 - bic sp,sp,#15 @ align for 128-bit stores - - vld1.8 {@X[0]},[$inp]! - vld1.8 {@X[1]},[$inp]! - vld1.8 {@X[2]},[$inp]! - vld1.8 {@X[3]},[$inp]! - vld1.32 {$T0},[$Ktbl,:128]! - vld1.32 {$T1},[$Ktbl,:128]! - vld1.32 {$T2},[$Ktbl,:128]! - vld1.32 {$T3},[$Ktbl,:128]! - vrev32.8 @X[0],@X[0] @ yes, even on - str $ctx,[sp,#64] - vrev32.8 @X[1],@X[1] @ big-endian - str $inp,[sp,#68] - mov $Xfer,sp - vrev32.8 @X[2],@X[2] - str $len,[sp,#72] - vrev32.8 @X[3],@X[3] - str $t2,[sp,#76] @ save original sp - vadd.i32 $T0,$T0,@X[0] - vadd.i32 $T1,$T1,@X[1] - vst1.32 {$T0},[$Xfer,:128]! - vadd.i32 $T2,$T2,@X[2] - vst1.32 {$T1},[$Xfer,:128]! - vadd.i32 $T3,$T3,@X[3] - vst1.32 {$T2},[$Xfer,:128]! - vst1.32 {$T3},[$Xfer,:128]! - - ldmia $ctx,{$A-$H} - sub $Xfer,$Xfer,#64 - ldr $t1,[sp,#0] - eor $t2,$t2,$t2 - eor $t3,$B,$C - b .L_00_48 - -.align 4 -.L_00_48: -___ - &Xupdate(\&body_00_15); - &Xupdate(\&body_00_15); - &Xupdate(\&body_00_15); - &Xupdate(\&body_00_15); -$code.=<<___; - teq $t1,#0 @ check for K256 terminator - ldr $t1,[sp,#0] - sub $Xfer,$Xfer,#64 - bne .L_00_48 - - ldr $inp,[sp,#68] - ldr $t0,[sp,#72] - sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl - teq $inp,$t0 - subeq $inp,$inp,#64 @ avoid SEGV - vld1.8 {@X[0]},[$inp]! @ load next input block - vld1.8 {@X[1]},[$inp]! - vld1.8 {@X[2]},[$inp]! - vld1.8 {@X[3]},[$inp]! - strne $inp,[sp,#68] - mov $Xfer,sp -___ - &Xpreload(\&body_00_15); - &Xpreload(\&body_00_15); - &Xpreload(\&body_00_15); - &Xpreload(\&body_00_15); -$code.=<<___; - ldr $t0,[$t1,#0] - add $A,$A,$t2 @ h+=Maj(a,b,c) from the past - ldr $t2,[$t1,#4] - ldr $t3,[$t1,#8] - ldr $t4,[$t1,#12] - add $A,$A,$t0 @ accumulate - ldr $t0,[$t1,#16] - add $B,$B,$t2 - ldr $t2,[$t1,#20] - add $C,$C,$t3 - ldr $t3,[$t1,#24] - add $D,$D,$t4 - ldr $t4,[$t1,#28] - add $E,$E,$t0 - str $A,[$t1],#4 - add $F,$F,$t2 - str $B,[$t1],#4 - add $G,$G,$t3 - str $C,[$t1],#4 - add $H,$H,$t4 - str $D,[$t1],#4 - stmia $t1,{$E-$H} - - movne $Xfer,sp - ldrne $t1,[sp,#0] - eorne $t2,$t2,$t2 - ldreq sp,[sp,#76] @ restore original sp - eorne $t3,$B,$C - bne .L_00_48 - - ldmia sp!,{r4-r12,pc} -.size sha256_block_data_order_neon,.-sha256_block_data_order_neon -#endif -___ -}}} -###################################################################### -# ARMv8 stuff -# -{{{ -my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2)); -my @MSG=map("q$_",(8..11)); -my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15)); -my $Ktbl="r3"; - -$code.=<<___; -#if __ARM_ARCH__>=7 -.type sha256_block_data_order_armv8,%function -.align 5 -sha256_block_data_order_armv8: -.LARMv8: - vld1.32 {$ABCD,$EFGH},[$ctx] - sub $Ktbl,r3,#sha256_block_data_order-K256 - -.Loop_v8: - vld1.8 {@MSG[0]-@MSG[1]},[$inp]! - vld1.8 {@MSG[2]-@MSG[3]},[$inp]! - vld1.32 {$W0},[$Ktbl]! - vrev32.8 @MSG[0],@MSG[0] - vrev32.8 @MSG[1],@MSG[1] - vrev32.8 @MSG[2],@MSG[2] - vrev32.8 @MSG[3],@MSG[3] - vmov $ABCD_SAVE,$ABCD @ offload - vmov $EFGH_SAVE,$EFGH - teq $inp,$len -___ -for($i=0;$i<12;$i++) { -$code.=<<___; - vld1.32 {$W1},[$Ktbl]! - vadd.i32 $W0,$W0,@MSG[0] - sha256su0 @MSG[0],@MSG[1] - vmov $abcd,$ABCD - sha256h $ABCD,$EFGH,$W0 - sha256h2 $EFGH,$abcd,$W0 - sha256su1 @MSG[0],@MSG[2],@MSG[3] -___ - ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); -} -$code.=<<___; - vld1.32 {$W1},[$Ktbl]! - vadd.i32 $W0,$W0,@MSG[0] - vmov $abcd,$ABCD - sha256h $ABCD,$EFGH,$W0 - sha256h2 $EFGH,$abcd,$W0 - - vld1.32 {$W0},[$Ktbl]! - vadd.i32 $W1,$W1,@MSG[1] - vmov $abcd,$ABCD - sha256h $ABCD,$EFGH,$W1 - sha256h2 $EFGH,$abcd,$W1 - - vld1.32 {$W1},[$Ktbl] - vadd.i32 $W0,$W0,@MSG[2] - sub $Ktbl,$Ktbl,#256-16 @ rewind - vmov $abcd,$ABCD - sha256h $ABCD,$EFGH,$W0 - sha256h2 $EFGH,$abcd,$W0 - - vadd.i32 $W1,$W1,@MSG[3] - vmov $abcd,$ABCD - sha256h $ABCD,$EFGH,$W1 - sha256h2 $EFGH,$abcd,$W1 - - vadd.i32 $ABCD,$ABCD,$ABCD_SAVE - vadd.i32 $EFGH,$EFGH,$EFGH_SAVE - bne .Loop_v8 - - vst1.32 {$ABCD,$EFGH},[$ctx] - - ret @ bx lr -.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 -#endif -___ -}}} -$code.=<<___; -.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " +.size sha256_block_data_order,.-sha256_block_data_order +.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by " .align 2 -.comm OPENSSL_armcap_P,4,4 ___ -{ my %opcode = ( - "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40, - "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 ); - - sub unsha256 { - my ($mnemonic,$arg)=@_; - - if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { - my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) - |(($2&7)<<17)|(($2&8)<<4) - |(($3&7)<<1) |(($3&8)<<2); - # since ARMv7 instructions are always encoded little-endian. - # correct solution is to use .inst directive, but older - # assemblers don't implement it:-( - sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", - $word&0xff,($word>>8)&0xff, - ($word>>16)&0xff,($word>>24)&0xff, - $mnemonic,$arg; - } - } -} - -foreach (split($/,$code)) { - - s/\`([^\`]*)\`/eval $1/geo; - - s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo; - - s/\bret\b/bx lr/go or - s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 - - print $_,"\n"; -} - +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 +print $code; close STDOUT; # enforce flush diff --git a/app/openssl/crypto/sha/asm/sha256-armv4.s b/app/openssl/crypto/sha/asm/sha256-armv4.s index 853d7da5..9c20a63c 100644 --- a/app/openssl/crypto/sha/asm/sha256-armv4.s +++ b/app/openssl/crypto/sha/asm/sha256-armv4.s @@ -23,1721 +23,1463 @@ K256: .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .size K256,.-K256 -.word 0 @ terminator -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-sha256_block_data_order -.align 5 .global sha256_block_data_order .type sha256_block_data_order,%function sha256_block_data_order: sub r3,pc,#8 @ sha256_block_data_order add r2,r1,r2,lsl#6 @ len to point at the end of inp -#if __ARM_ARCH__>=7 - ldr r12,.LOPENSSL_armcap - ldr r12,[r3,r12] @ OPENSSL_armcap_P - tst r12,#ARMV8_SHA256 - bne .LARMv8 - tst r12,#ARMV7_NEON - bne .LNEON -#endif stmdb sp!,{r0,r1,r2,r4-r11,lr} ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} - sub r14,r3,#256+32 @ K256 + sub r14,r3,#256 @ K256 sub sp,sp,#16*4 @ alloca(X[16]) .Loop: -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 -# else - ldrb r2,[r1,#3] -# endif - eor r3,r5,r6 @ magic - eor r12,r12,r12 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 0 -# if 0==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r8,r8,ror#5 - add r4,r4,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r8,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 0 - add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#3] @ 0 ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 0==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r8,r8,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r8,ror#19 @ Sigma1(e) + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 #endif + mov r0,r8,ror#6 ldr r12,[r14],#4 @ *K256++ - add r11,r11,r2 @ h+=X[i] - str r2,[sp,#0*4] + eor r0,r0,r8,ror#11 eor r2,r9,r10 - add r11,r11,r0,ror#6 @ h+=Sigma1(e) +#if 0>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 0==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r8,ror#25 @ Sigma1(e) and r2,r2,r8 - add r11,r11,r12 @ h+=K256[i] + str r3,[sp,#0*4] + add r3,r3,r0 eor r2,r2,r10 @ Ch(e,f,g) - eor r0,r4,r4,ror#11 - add r11,r11,r2 @ h+=Ch(e,f,g) -#if 0==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 0<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r4,r5 @ a^b, b^c in next round -#else - ldr r2,[sp,#2*4] @ from future BODY_16_xx - eor r12,r4,r5 @ a^b, b^c in next round - ldr r1,[sp,#15*4] @ from future BODY_16_xx -#endif - eor r0,r0,r4,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r7,r7,r11 @ d+=h - eor r3,r3,r5 @ Maj(a,b,c) - add r11,r11,r0,ror#2 @ h+=Sigma0(a) - @ add r11,r11,r3 @ h+=Maj(a,b,c) + add r3,r3,r11 + mov r11,r4,ror#2 + add r3,r3,r2 + eor r11,r11,r4,ror#13 + add r3,r3,r12 + eor r11,r11,r4,ror#22 @ Sigma0(a) +#if 0>=15 + ldr r1,[sp,#2*4] @ from BODY_16_xx +#endif + orr r0,r4,r5 + and r2,r4,r5 + and r0,r0,r6 + add r11,r11,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r7,r7,r3 + add r11,r11,r0 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 1 -# if 1==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r7,r7,ror#5 - add r11,r11,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r7,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 1 - add r11,r11,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 1==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r7,r7,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r7,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r10,r10,r2 @ h+=X[i] - str r2,[sp,#1*4] + ldrb r3,[r1,#3] @ 1 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r7,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r7,ror#11 eor r2,r8,r9 - add r10,r10,r0,ror#6 @ h+=Sigma1(e) +#if 1>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 1==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r7,ror#25 @ Sigma1(e) and r2,r2,r7 - add r10,r10,r3 @ h+=K256[i] + str r3,[sp,#1*4] + add r3,r3,r0 eor r2,r2,r9 @ Ch(e,f,g) - eor r0,r11,r11,ror#11 - add r10,r10,r2 @ h+=Ch(e,f,g) -#if 1==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 1<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r11,r4 @ a^b, b^c in next round -#else - ldr r2,[sp,#3*4] @ from future BODY_16_xx - eor r3,r11,r4 @ a^b, b^c in next round - ldr r1,[sp,#0*4] @ from future BODY_16_xx -#endif - eor r0,r0,r11,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r6,r6,r10 @ d+=h - eor r12,r12,r4 @ Maj(a,b,c) - add r10,r10,r0,ror#2 @ h+=Sigma0(a) - @ add r10,r10,r12 @ h+=Maj(a,b,c) + add r3,r3,r10 + mov r10,r11,ror#2 + add r3,r3,r2 + eor r10,r10,r11,ror#13 + add r3,r3,r12 + eor r10,r10,r11,ror#22 @ Sigma0(a) +#if 1>=15 + ldr r1,[sp,#3*4] @ from BODY_16_xx +#endif + orr r0,r11,r4 + and r2,r11,r4 + and r0,r0,r5 + add r10,r10,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r6,r6,r3 + add r10,r10,r0 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 2 -# if 2==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r6,r6,ror#5 - add r10,r10,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r6,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 2 - add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#3] @ 2 ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 2==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r6,r6,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r6,ror#19 @ Sigma1(e) + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 #endif + mov r0,r6,ror#6 ldr r12,[r14],#4 @ *K256++ - add r9,r9,r2 @ h+=X[i] - str r2,[sp,#2*4] + eor r0,r0,r6,ror#11 eor r2,r7,r8 - add r9,r9,r0,ror#6 @ h+=Sigma1(e) +#if 2>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 2==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r6,ror#25 @ Sigma1(e) and r2,r2,r6 - add r9,r9,r12 @ h+=K256[i] + str r3,[sp,#2*4] + add r3,r3,r0 eor r2,r2,r8 @ Ch(e,f,g) - eor r0,r10,r10,ror#11 - add r9,r9,r2 @ h+=Ch(e,f,g) -#if 2==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 2<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r10,r11 @ a^b, b^c in next round -#else - ldr r2,[sp,#4*4] @ from future BODY_16_xx - eor r12,r10,r11 @ a^b, b^c in next round - ldr r1,[sp,#1*4] @ from future BODY_16_xx -#endif - eor r0,r0,r10,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r5,r5,r9 @ d+=h - eor r3,r3,r11 @ Maj(a,b,c) - add r9,r9,r0,ror#2 @ h+=Sigma0(a) - @ add r9,r9,r3 @ h+=Maj(a,b,c) + add r3,r3,r9 + mov r9,r10,ror#2 + add r3,r3,r2 + eor r9,r9,r10,ror#13 + add r3,r3,r12 + eor r9,r9,r10,ror#22 @ Sigma0(a) +#if 2>=15 + ldr r1,[sp,#4*4] @ from BODY_16_xx +#endif + orr r0,r10,r11 + and r2,r10,r11 + and r0,r0,r4 + add r9,r9,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r5,r5,r3 + add r9,r9,r0 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 3 -# if 3==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r5,r5,ror#5 - add r9,r9,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r5,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 3 - add r9,r9,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 3==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r5,r5,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r5,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r8,r8,r2 @ h+=X[i] - str r2,[sp,#3*4] + ldrb r3,[r1,#3] @ 3 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r5,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r5,ror#11 eor r2,r6,r7 - add r8,r8,r0,ror#6 @ h+=Sigma1(e) +#if 3>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 3==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r5,ror#25 @ Sigma1(e) and r2,r2,r5 - add r8,r8,r3 @ h+=K256[i] + str r3,[sp,#3*4] + add r3,r3,r0 eor r2,r2,r7 @ Ch(e,f,g) - eor r0,r9,r9,ror#11 - add r8,r8,r2 @ h+=Ch(e,f,g) -#if 3==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 3<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r9,r10 @ a^b, b^c in next round -#else - ldr r2,[sp,#5*4] @ from future BODY_16_xx - eor r3,r9,r10 @ a^b, b^c in next round - ldr r1,[sp,#2*4] @ from future BODY_16_xx -#endif - eor r0,r0,r9,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r4,r4,r8 @ d+=h - eor r12,r12,r10 @ Maj(a,b,c) - add r8,r8,r0,ror#2 @ h+=Sigma0(a) - @ add r8,r8,r12 @ h+=Maj(a,b,c) + add r3,r3,r8 + mov r8,r9,ror#2 + add r3,r3,r2 + eor r8,r8,r9,ror#13 + add r3,r3,r12 + eor r8,r8,r9,ror#22 @ Sigma0(a) +#if 3>=15 + ldr r1,[sp,#5*4] @ from BODY_16_xx +#endif + orr r0,r9,r10 + and r2,r9,r10 + and r0,r0,r11 + add r8,r8,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r4,r4,r3 + add r8,r8,r0 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 4 -# if 4==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r4,r4,ror#5 - add r8,r8,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r4,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 4 - add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#3] @ 4 ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 4==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r4,r4,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r4,ror#19 @ Sigma1(e) + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 #endif + mov r0,r4,ror#6 ldr r12,[r14],#4 @ *K256++ - add r7,r7,r2 @ h+=X[i] - str r2,[sp,#4*4] + eor r0,r0,r4,ror#11 eor r2,r5,r6 - add r7,r7,r0,ror#6 @ h+=Sigma1(e) +#if 4>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 4==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r4,ror#25 @ Sigma1(e) and r2,r2,r4 - add r7,r7,r12 @ h+=K256[i] + str r3,[sp,#4*4] + add r3,r3,r0 eor r2,r2,r6 @ Ch(e,f,g) - eor r0,r8,r8,ror#11 - add r7,r7,r2 @ h+=Ch(e,f,g) -#if 4==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 4<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r8,r9 @ a^b, b^c in next round -#else - ldr r2,[sp,#6*4] @ from future BODY_16_xx - eor r12,r8,r9 @ a^b, b^c in next round - ldr r1,[sp,#3*4] @ from future BODY_16_xx -#endif - eor r0,r0,r8,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r11,r11,r7 @ d+=h - eor r3,r3,r9 @ Maj(a,b,c) - add r7,r7,r0,ror#2 @ h+=Sigma0(a) - @ add r7,r7,r3 @ h+=Maj(a,b,c) + add r3,r3,r7 + mov r7,r8,ror#2 + add r3,r3,r2 + eor r7,r7,r8,ror#13 + add r3,r3,r12 + eor r7,r7,r8,ror#22 @ Sigma0(a) +#if 4>=15 + ldr r1,[sp,#6*4] @ from BODY_16_xx +#endif + orr r0,r8,r9 + and r2,r8,r9 + and r0,r0,r10 + add r7,r7,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r11,r11,r3 + add r7,r7,r0 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 5 -# if 5==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r11,r11,ror#5 - add r7,r7,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r11,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 5 - add r7,r7,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 5==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r11,r11,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r11,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r6,r6,r2 @ h+=X[i] - str r2,[sp,#5*4] + ldrb r3,[r1,#3] @ 5 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r11,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r11,ror#11 eor r2,r4,r5 - add r6,r6,r0,ror#6 @ h+=Sigma1(e) +#if 5>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 5==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r11,ror#25 @ Sigma1(e) and r2,r2,r11 - add r6,r6,r3 @ h+=K256[i] + str r3,[sp,#5*4] + add r3,r3,r0 eor r2,r2,r5 @ Ch(e,f,g) - eor r0,r7,r7,ror#11 - add r6,r6,r2 @ h+=Ch(e,f,g) -#if 5==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 5<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r7,r8 @ a^b, b^c in next round -#else - ldr r2,[sp,#7*4] @ from future BODY_16_xx - eor r3,r7,r8 @ a^b, b^c in next round - ldr r1,[sp,#4*4] @ from future BODY_16_xx -#endif - eor r0,r0,r7,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r10,r10,r6 @ d+=h - eor r12,r12,r8 @ Maj(a,b,c) - add r6,r6,r0,ror#2 @ h+=Sigma0(a) - @ add r6,r6,r12 @ h+=Maj(a,b,c) + add r3,r3,r6 + mov r6,r7,ror#2 + add r3,r3,r2 + eor r6,r6,r7,ror#13 + add r3,r3,r12 + eor r6,r6,r7,ror#22 @ Sigma0(a) +#if 5>=15 + ldr r1,[sp,#7*4] @ from BODY_16_xx +#endif + orr r0,r7,r8 + and r2,r7,r8 + and r0,r0,r9 + add r6,r6,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r10,r10,r3 + add r6,r6,r0 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 6 -# if 6==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r10,r10,ror#5 - add r6,r6,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r10,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 6 - add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#3] @ 6 ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 6==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r10,r10,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r10,ror#19 @ Sigma1(e) + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 #endif + mov r0,r10,ror#6 ldr r12,[r14],#4 @ *K256++ - add r5,r5,r2 @ h+=X[i] - str r2,[sp,#6*4] + eor r0,r0,r10,ror#11 eor r2,r11,r4 - add r5,r5,r0,ror#6 @ h+=Sigma1(e) +#if 6>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 6==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r10,ror#25 @ Sigma1(e) and r2,r2,r10 - add r5,r5,r12 @ h+=K256[i] + str r3,[sp,#6*4] + add r3,r3,r0 eor r2,r2,r4 @ Ch(e,f,g) - eor r0,r6,r6,ror#11 - add r5,r5,r2 @ h+=Ch(e,f,g) -#if 6==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 6<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r6,r7 @ a^b, b^c in next round -#else - ldr r2,[sp,#8*4] @ from future BODY_16_xx - eor r12,r6,r7 @ a^b, b^c in next round - ldr r1,[sp,#5*4] @ from future BODY_16_xx -#endif - eor r0,r0,r6,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r9,r9,r5 @ d+=h - eor r3,r3,r7 @ Maj(a,b,c) - add r5,r5,r0,ror#2 @ h+=Sigma0(a) - @ add r5,r5,r3 @ h+=Maj(a,b,c) + add r3,r3,r5 + mov r5,r6,ror#2 + add r3,r3,r2 + eor r5,r5,r6,ror#13 + add r3,r3,r12 + eor r5,r5,r6,ror#22 @ Sigma0(a) +#if 6>=15 + ldr r1,[sp,#8*4] @ from BODY_16_xx +#endif + orr r0,r6,r7 + and r2,r6,r7 + and r0,r0,r8 + add r5,r5,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r9,r9,r3 + add r5,r5,r0 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 7 -# if 7==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r9,r9,ror#5 - add r5,r5,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r9,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 7 - add r5,r5,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 7==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r9,r9,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r9,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r4,r4,r2 @ h+=X[i] - str r2,[sp,#7*4] + ldrb r3,[r1,#3] @ 7 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r9,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r9,ror#11 eor r2,r10,r11 - add r4,r4,r0,ror#6 @ h+=Sigma1(e) +#if 7>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 7==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r9,ror#25 @ Sigma1(e) and r2,r2,r9 - add r4,r4,r3 @ h+=K256[i] + str r3,[sp,#7*4] + add r3,r3,r0 eor r2,r2,r11 @ Ch(e,f,g) - eor r0,r5,r5,ror#11 - add r4,r4,r2 @ h+=Ch(e,f,g) -#if 7==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 7<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r5,r6 @ a^b, b^c in next round -#else - ldr r2,[sp,#9*4] @ from future BODY_16_xx - eor r3,r5,r6 @ a^b, b^c in next round - ldr r1,[sp,#6*4] @ from future BODY_16_xx -#endif - eor r0,r0,r5,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r8,r8,r4 @ d+=h - eor r12,r12,r6 @ Maj(a,b,c) - add r4,r4,r0,ror#2 @ h+=Sigma0(a) - @ add r4,r4,r12 @ h+=Maj(a,b,c) + add r3,r3,r4 + mov r4,r5,ror#2 + add r3,r3,r2 + eor r4,r4,r5,ror#13 + add r3,r3,r12 + eor r4,r4,r5,ror#22 @ Sigma0(a) +#if 7>=15 + ldr r1,[sp,#9*4] @ from BODY_16_xx +#endif + orr r0,r5,r6 + and r2,r5,r6 + and r0,r0,r7 + add r4,r4,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r8,r8,r3 + add r4,r4,r0 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 8 -# if 8==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r8,r8,ror#5 - add r4,r4,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r8,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 8 - add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#3] @ 8 ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 8==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r8,r8,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r8,ror#19 @ Sigma1(e) + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 #endif + mov r0,r8,ror#6 ldr r12,[r14],#4 @ *K256++ - add r11,r11,r2 @ h+=X[i] - str r2,[sp,#8*4] + eor r0,r0,r8,ror#11 eor r2,r9,r10 - add r11,r11,r0,ror#6 @ h+=Sigma1(e) +#if 8>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 8==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r8,ror#25 @ Sigma1(e) and r2,r2,r8 - add r11,r11,r12 @ h+=K256[i] + str r3,[sp,#8*4] + add r3,r3,r0 eor r2,r2,r10 @ Ch(e,f,g) - eor r0,r4,r4,ror#11 - add r11,r11,r2 @ h+=Ch(e,f,g) -#if 8==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 8<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r4,r5 @ a^b, b^c in next round -#else - ldr r2,[sp,#10*4] @ from future BODY_16_xx - eor r12,r4,r5 @ a^b, b^c in next round - ldr r1,[sp,#7*4] @ from future BODY_16_xx -#endif - eor r0,r0,r4,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r7,r7,r11 @ d+=h - eor r3,r3,r5 @ Maj(a,b,c) - add r11,r11,r0,ror#2 @ h+=Sigma0(a) - @ add r11,r11,r3 @ h+=Maj(a,b,c) + add r3,r3,r11 + mov r11,r4,ror#2 + add r3,r3,r2 + eor r11,r11,r4,ror#13 + add r3,r3,r12 + eor r11,r11,r4,ror#22 @ Sigma0(a) +#if 8>=15 + ldr r1,[sp,#10*4] @ from BODY_16_xx +#endif + orr r0,r4,r5 + and r2,r4,r5 + and r0,r0,r6 + add r11,r11,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r7,r7,r3 + add r11,r11,r0 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 9 -# if 9==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r7,r7,ror#5 - add r11,r11,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r7,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 9 - add r11,r11,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 9==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r7,r7,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r7,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r10,r10,r2 @ h+=X[i] - str r2,[sp,#9*4] + ldrb r3,[r1,#3] @ 9 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r7,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r7,ror#11 eor r2,r8,r9 - add r10,r10,r0,ror#6 @ h+=Sigma1(e) +#if 9>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 9==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r7,ror#25 @ Sigma1(e) and r2,r2,r7 - add r10,r10,r3 @ h+=K256[i] + str r3,[sp,#9*4] + add r3,r3,r0 eor r2,r2,r9 @ Ch(e,f,g) - eor r0,r11,r11,ror#11 - add r10,r10,r2 @ h+=Ch(e,f,g) -#if 9==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 9<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r11,r4 @ a^b, b^c in next round -#else - ldr r2,[sp,#11*4] @ from future BODY_16_xx - eor r3,r11,r4 @ a^b, b^c in next round - ldr r1,[sp,#8*4] @ from future BODY_16_xx -#endif - eor r0,r0,r11,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r6,r6,r10 @ d+=h - eor r12,r12,r4 @ Maj(a,b,c) - add r10,r10,r0,ror#2 @ h+=Sigma0(a) - @ add r10,r10,r12 @ h+=Maj(a,b,c) + add r3,r3,r10 + mov r10,r11,ror#2 + add r3,r3,r2 + eor r10,r10,r11,ror#13 + add r3,r3,r12 + eor r10,r10,r11,ror#22 @ Sigma0(a) +#if 9>=15 + ldr r1,[sp,#11*4] @ from BODY_16_xx +#endif + orr r0,r11,r4 + and r2,r11,r4 + and r0,r0,r5 + add r10,r10,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r6,r6,r3 + add r10,r10,r0 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 10 -# if 10==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r6,r6,ror#5 - add r10,r10,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r6,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 10 - add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#3] @ 10 ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 10==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r6,r6,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r6,ror#19 @ Sigma1(e) + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 #endif + mov r0,r6,ror#6 ldr r12,[r14],#4 @ *K256++ - add r9,r9,r2 @ h+=X[i] - str r2,[sp,#10*4] + eor r0,r0,r6,ror#11 eor r2,r7,r8 - add r9,r9,r0,ror#6 @ h+=Sigma1(e) +#if 10>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 10==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r6,ror#25 @ Sigma1(e) and r2,r2,r6 - add r9,r9,r12 @ h+=K256[i] + str r3,[sp,#10*4] + add r3,r3,r0 eor r2,r2,r8 @ Ch(e,f,g) - eor r0,r10,r10,ror#11 - add r9,r9,r2 @ h+=Ch(e,f,g) -#if 10==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 10<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r10,r11 @ a^b, b^c in next round -#else - ldr r2,[sp,#12*4] @ from future BODY_16_xx - eor r12,r10,r11 @ a^b, b^c in next round - ldr r1,[sp,#9*4] @ from future BODY_16_xx -#endif - eor r0,r0,r10,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r5,r5,r9 @ d+=h - eor r3,r3,r11 @ Maj(a,b,c) - add r9,r9,r0,ror#2 @ h+=Sigma0(a) - @ add r9,r9,r3 @ h+=Maj(a,b,c) + add r3,r3,r9 + mov r9,r10,ror#2 + add r3,r3,r2 + eor r9,r9,r10,ror#13 + add r3,r3,r12 + eor r9,r9,r10,ror#22 @ Sigma0(a) +#if 10>=15 + ldr r1,[sp,#12*4] @ from BODY_16_xx +#endif + orr r0,r10,r11 + and r2,r10,r11 + and r0,r0,r4 + add r9,r9,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r5,r5,r3 + add r9,r9,r0 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 11 -# if 11==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r5,r5,ror#5 - add r9,r9,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r5,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 11 - add r9,r9,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 11==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r5,r5,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r5,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r8,r8,r2 @ h+=X[i] - str r2,[sp,#11*4] + ldrb r3,[r1,#3] @ 11 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r5,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r5,ror#11 eor r2,r6,r7 - add r8,r8,r0,ror#6 @ h+=Sigma1(e) +#if 11>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 11==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r5,ror#25 @ Sigma1(e) and r2,r2,r5 - add r8,r8,r3 @ h+=K256[i] + str r3,[sp,#11*4] + add r3,r3,r0 eor r2,r2,r7 @ Ch(e,f,g) - eor r0,r9,r9,ror#11 - add r8,r8,r2 @ h+=Ch(e,f,g) -#if 11==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 11<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r9,r10 @ a^b, b^c in next round -#else - ldr r2,[sp,#13*4] @ from future BODY_16_xx - eor r3,r9,r10 @ a^b, b^c in next round - ldr r1,[sp,#10*4] @ from future BODY_16_xx -#endif - eor r0,r0,r9,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r4,r4,r8 @ d+=h - eor r12,r12,r10 @ Maj(a,b,c) - add r8,r8,r0,ror#2 @ h+=Sigma0(a) - @ add r8,r8,r12 @ h+=Maj(a,b,c) + add r3,r3,r8 + mov r8,r9,ror#2 + add r3,r3,r2 + eor r8,r8,r9,ror#13 + add r3,r3,r12 + eor r8,r8,r9,ror#22 @ Sigma0(a) +#if 11>=15 + ldr r1,[sp,#13*4] @ from BODY_16_xx +#endif + orr r0,r9,r10 + and r2,r9,r10 + and r0,r0,r11 + add r8,r8,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r4,r4,r3 + add r8,r8,r0 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 12 -# if 12==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r4,r4,ror#5 - add r8,r8,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r4,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 12 - add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#3] @ 12 ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 12==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r4,r4,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r4,ror#19 @ Sigma1(e) + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 #endif + mov r0,r4,ror#6 ldr r12,[r14],#4 @ *K256++ - add r7,r7,r2 @ h+=X[i] - str r2,[sp,#12*4] + eor r0,r0,r4,ror#11 eor r2,r5,r6 - add r7,r7,r0,ror#6 @ h+=Sigma1(e) +#if 12>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 12==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r4,ror#25 @ Sigma1(e) and r2,r2,r4 - add r7,r7,r12 @ h+=K256[i] + str r3,[sp,#12*4] + add r3,r3,r0 eor r2,r2,r6 @ Ch(e,f,g) - eor r0,r8,r8,ror#11 - add r7,r7,r2 @ h+=Ch(e,f,g) -#if 12==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 12<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r8,r9 @ a^b, b^c in next round -#else - ldr r2,[sp,#14*4] @ from future BODY_16_xx - eor r12,r8,r9 @ a^b, b^c in next round - ldr r1,[sp,#11*4] @ from future BODY_16_xx -#endif - eor r0,r0,r8,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r11,r11,r7 @ d+=h - eor r3,r3,r9 @ Maj(a,b,c) - add r7,r7,r0,ror#2 @ h+=Sigma0(a) - @ add r7,r7,r3 @ h+=Maj(a,b,c) + add r3,r3,r7 + mov r7,r8,ror#2 + add r3,r3,r2 + eor r7,r7,r8,ror#13 + add r3,r3,r12 + eor r7,r7,r8,ror#22 @ Sigma0(a) +#if 12>=15 + ldr r1,[sp,#14*4] @ from BODY_16_xx +#endif + orr r0,r8,r9 + and r2,r8,r9 + and r0,r0,r10 + add r7,r7,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r11,r11,r3 + add r7,r7,r0 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 13 -# if 13==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r11,r11,ror#5 - add r7,r7,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r11,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 13 - add r7,r7,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 13==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r11,r11,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r11,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r6,r6,r2 @ h+=X[i] - str r2,[sp,#13*4] + ldrb r3,[r1,#3] @ 13 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r11,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r11,ror#11 eor r2,r4,r5 - add r6,r6,r0,ror#6 @ h+=Sigma1(e) +#if 13>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 13==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r11,ror#25 @ Sigma1(e) and r2,r2,r11 - add r6,r6,r3 @ h+=K256[i] + str r3,[sp,#13*4] + add r3,r3,r0 eor r2,r2,r5 @ Ch(e,f,g) - eor r0,r7,r7,ror#11 - add r6,r6,r2 @ h+=Ch(e,f,g) -#if 13==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 13<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r7,r8 @ a^b, b^c in next round -#else - ldr r2,[sp,#15*4] @ from future BODY_16_xx - eor r3,r7,r8 @ a^b, b^c in next round - ldr r1,[sp,#12*4] @ from future BODY_16_xx -#endif - eor r0,r0,r7,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r10,r10,r6 @ d+=h - eor r12,r12,r8 @ Maj(a,b,c) - add r6,r6,r0,ror#2 @ h+=Sigma0(a) - @ add r6,r6,r12 @ h+=Maj(a,b,c) + add r3,r3,r6 + mov r6,r7,ror#2 + add r3,r3,r2 + eor r6,r6,r7,ror#13 + add r3,r3,r12 + eor r6,r6,r7,ror#22 @ Sigma0(a) +#if 13>=15 + ldr r1,[sp,#15*4] @ from BODY_16_xx +#endif + orr r0,r7,r8 + and r2,r7,r8 + and r0,r0,r9 + add r6,r6,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r10,r10,r3 + add r6,r6,r0 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 14 -# if 14==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r10,r10,ror#5 - add r6,r6,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r10,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 14 - add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#3] @ 14 ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 14==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r10,r10,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r10,ror#19 @ Sigma1(e) + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 #endif + mov r0,r10,ror#6 ldr r12,[r14],#4 @ *K256++ - add r5,r5,r2 @ h+=X[i] - str r2,[sp,#14*4] + eor r0,r0,r10,ror#11 eor r2,r11,r4 - add r5,r5,r0,ror#6 @ h+=Sigma1(e) +#if 14>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 14==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r10,ror#25 @ Sigma1(e) and r2,r2,r10 - add r5,r5,r12 @ h+=K256[i] + str r3,[sp,#14*4] + add r3,r3,r0 eor r2,r2,r4 @ Ch(e,f,g) - eor r0,r6,r6,ror#11 - add r5,r5,r2 @ h+=Ch(e,f,g) -#if 14==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 14<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r6,r7 @ a^b, b^c in next round -#else - ldr r2,[sp,#0*4] @ from future BODY_16_xx - eor r12,r6,r7 @ a^b, b^c in next round - ldr r1,[sp,#13*4] @ from future BODY_16_xx -#endif - eor r0,r0,r6,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r9,r9,r5 @ d+=h - eor r3,r3,r7 @ Maj(a,b,c) - add r5,r5,r0,ror#2 @ h+=Sigma0(a) - @ add r5,r5,r3 @ h+=Maj(a,b,c) + add r3,r3,r5 + mov r5,r6,ror#2 + add r3,r3,r2 + eor r5,r5,r6,ror#13 + add r3,r3,r12 + eor r5,r5,r6,ror#22 @ Sigma0(a) +#if 14>=15 + ldr r1,[sp,#0*4] @ from BODY_16_xx +#endif + orr r0,r6,r7 + and r2,r6,r7 + and r0,r0,r8 + add r5,r5,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r9,r9,r3 + add r5,r5,r0 #if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 15 -# if 15==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r9,r9,ror#5 - add r5,r5,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r9,ror#19 @ Sigma1(e) - rev r2,r2 + ldr r3,[r1],#4 #else - @ ldrb r2,[r1,#3] @ 15 - add r5,r5,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 15==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r9,r9,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r9,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r4,r4,r2 @ h+=X[i] - str r2,[sp,#15*4] + ldrb r3,[r1,#3] @ 15 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r9,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r9,ror#11 eor r2,r10,r11 - add r4,r4,r0,ror#6 @ h+=Sigma1(e) +#if 15>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 15==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r9,ror#25 @ Sigma1(e) and r2,r2,r9 - add r4,r4,r3 @ h+=K256[i] + str r3,[sp,#15*4] + add r3,r3,r0 eor r2,r2,r11 @ Ch(e,f,g) - eor r0,r5,r5,ror#11 - add r4,r4,r2 @ h+=Ch(e,f,g) -#if 15==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 15<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r5,r6 @ a^b, b^c in next round -#else - ldr r2,[sp,#1*4] @ from future BODY_16_xx - eor r3,r5,r6 @ a^b, b^c in next round - ldr r1,[sp,#14*4] @ from future BODY_16_xx -#endif - eor r0,r0,r5,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r8,r8,r4 @ d+=h - eor r12,r12,r6 @ Maj(a,b,c) - add r4,r4,r0,ror#2 @ h+=Sigma0(a) - @ add r4,r4,r12 @ h+=Maj(a,b,c) + add r3,r3,r4 + mov r4,r5,ror#2 + add r3,r3,r2 + eor r4,r4,r5,ror#13 + add r3,r3,r12 + eor r4,r4,r5,ror#22 @ Sigma0(a) +#if 15>=15 + ldr r1,[sp,#1*4] @ from BODY_16_xx +#endif + orr r0,r5,r6 + and r2,r5,r6 + and r0,r0,r7 + add r4,r4,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r8,r8,r3 + add r4,r4,r0 .Lrounds_16_xx: - @ ldr r2,[sp,#1*4] @ 16 - @ ldr r1,[sp,#14*4] - mov r0,r2,ror#7 - add r4,r4,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#0*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#9*4] - - add r12,r12,r0 - eor r0,r8,r8,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r8,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] + @ ldr r1,[sp,#1*4] @ 16 + ldr r12,[sp,#14*4] + mov r0,r1,ror#7 + ldr r3,[sp,#0*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#9*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r8,ror#6 ldr r12,[r14],#4 @ *K256++ - add r11,r11,r2 @ h+=X[i] - str r2,[sp,#0*4] + eor r0,r0,r8,ror#11 eor r2,r9,r10 - add r11,r11,r0,ror#6 @ h+=Sigma1(e) +#if 16>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 16==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r8,ror#25 @ Sigma1(e) and r2,r2,r8 - add r11,r11,r12 @ h+=K256[i] + str r3,[sp,#0*4] + add r3,r3,r0 eor r2,r2,r10 @ Ch(e,f,g) - eor r0,r4,r4,ror#11 - add r11,r11,r2 @ h+=Ch(e,f,g) -#if 16==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 16<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r4,r5 @ a^b, b^c in next round -#else - ldr r2,[sp,#2*4] @ from future BODY_16_xx - eor r12,r4,r5 @ a^b, b^c in next round - ldr r1,[sp,#15*4] @ from future BODY_16_xx -#endif - eor r0,r0,r4,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r7,r7,r11 @ d+=h - eor r3,r3,r5 @ Maj(a,b,c) - add r11,r11,r0,ror#2 @ h+=Sigma0(a) - @ add r11,r11,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#2*4] @ 17 - @ ldr r1,[sp,#15*4] - mov r0,r2,ror#7 - add r11,r11,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#1*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#10*4] - + add r3,r3,r11 + mov r11,r4,ror#2 + add r3,r3,r2 + eor r11,r11,r4,ror#13 + add r3,r3,r12 + eor r11,r11,r4,ror#22 @ Sigma0(a) +#if 16>=15 + ldr r1,[sp,#2*4] @ from BODY_16_xx +#endif + orr r0,r4,r5 + and r2,r4,r5 + and r0,r0,r6 + add r11,r11,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r7,r7,r3 + add r11,r11,r0 + @ ldr r1,[sp,#2*4] @ 17 + ldr r12,[sp,#15*4] + mov r0,r1,ror#7 + ldr r3,[sp,#1*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#10*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r0,r7,r7,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r7,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r10,r10,r2 @ h+=X[i] - str r2,[sp,#1*4] + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r7,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r7,ror#11 eor r2,r8,r9 - add r10,r10,r0,ror#6 @ h+=Sigma1(e) +#if 17>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 17==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r7,ror#25 @ Sigma1(e) and r2,r2,r7 - add r10,r10,r3 @ h+=K256[i] + str r3,[sp,#1*4] + add r3,r3,r0 eor r2,r2,r9 @ Ch(e,f,g) - eor r0,r11,r11,ror#11 - add r10,r10,r2 @ h+=Ch(e,f,g) -#if 17==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 17<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r11,r4 @ a^b, b^c in next round -#else - ldr r2,[sp,#3*4] @ from future BODY_16_xx - eor r3,r11,r4 @ a^b, b^c in next round - ldr r1,[sp,#0*4] @ from future BODY_16_xx -#endif - eor r0,r0,r11,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r6,r6,r10 @ d+=h - eor r12,r12,r4 @ Maj(a,b,c) - add r10,r10,r0,ror#2 @ h+=Sigma0(a) - @ add r10,r10,r12 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#3*4] @ 18 - @ ldr r1,[sp,#0*4] - mov r0,r2,ror#7 - add r10,r10,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#2*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#11*4] - - add r12,r12,r0 - eor r0,r6,r6,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r6,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] + add r3,r3,r10 + mov r10,r11,ror#2 + add r3,r3,r2 + eor r10,r10,r11,ror#13 + add r3,r3,r12 + eor r10,r10,r11,ror#22 @ Sigma0(a) +#if 17>=15 + ldr r1,[sp,#3*4] @ from BODY_16_xx +#endif + orr r0,r11,r4 + and r2,r11,r4 + and r0,r0,r5 + add r10,r10,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r6,r6,r3 + add r10,r10,r0 + @ ldr r1,[sp,#3*4] @ 18 + ldr r12,[sp,#0*4] + mov r0,r1,ror#7 + ldr r3,[sp,#2*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#11*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r6,ror#6 ldr r12,[r14],#4 @ *K256++ - add r9,r9,r2 @ h+=X[i] - str r2,[sp,#2*4] + eor r0,r0,r6,ror#11 eor r2,r7,r8 - add r9,r9,r0,ror#6 @ h+=Sigma1(e) +#if 18>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 18==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r6,ror#25 @ Sigma1(e) and r2,r2,r6 - add r9,r9,r12 @ h+=K256[i] + str r3,[sp,#2*4] + add r3,r3,r0 eor r2,r2,r8 @ Ch(e,f,g) - eor r0,r10,r10,ror#11 - add r9,r9,r2 @ h+=Ch(e,f,g) -#if 18==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 18<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r10,r11 @ a^b, b^c in next round -#else - ldr r2,[sp,#4*4] @ from future BODY_16_xx - eor r12,r10,r11 @ a^b, b^c in next round - ldr r1,[sp,#1*4] @ from future BODY_16_xx -#endif - eor r0,r0,r10,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r5,r5,r9 @ d+=h - eor r3,r3,r11 @ Maj(a,b,c) - add r9,r9,r0,ror#2 @ h+=Sigma0(a) - @ add r9,r9,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#4*4] @ 19 - @ ldr r1,[sp,#1*4] - mov r0,r2,ror#7 - add r9,r9,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#3*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#12*4] - + add r3,r3,r9 + mov r9,r10,ror#2 + add r3,r3,r2 + eor r9,r9,r10,ror#13 + add r3,r3,r12 + eor r9,r9,r10,ror#22 @ Sigma0(a) +#if 18>=15 + ldr r1,[sp,#4*4] @ from BODY_16_xx +#endif + orr r0,r10,r11 + and r2,r10,r11 + and r0,r0,r4 + add r9,r9,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r5,r5,r3 + add r9,r9,r0 + @ ldr r1,[sp,#4*4] @ 19 + ldr r12,[sp,#1*4] + mov r0,r1,ror#7 + ldr r3,[sp,#3*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#12*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r0,r5,r5,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r5,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r8,r8,r2 @ h+=X[i] - str r2,[sp,#3*4] + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r5,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r5,ror#11 eor r2,r6,r7 - add r8,r8,r0,ror#6 @ h+=Sigma1(e) +#if 19>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 19==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r5,ror#25 @ Sigma1(e) and r2,r2,r5 - add r8,r8,r3 @ h+=K256[i] + str r3,[sp,#3*4] + add r3,r3,r0 eor r2,r2,r7 @ Ch(e,f,g) - eor r0,r9,r9,ror#11 - add r8,r8,r2 @ h+=Ch(e,f,g) -#if 19==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 19<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r9,r10 @ a^b, b^c in next round -#else - ldr r2,[sp,#5*4] @ from future BODY_16_xx - eor r3,r9,r10 @ a^b, b^c in next round - ldr r1,[sp,#2*4] @ from future BODY_16_xx -#endif - eor r0,r0,r9,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r4,r4,r8 @ d+=h - eor r12,r12,r10 @ Maj(a,b,c) - add r8,r8,r0,ror#2 @ h+=Sigma0(a) - @ add r8,r8,r12 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#5*4] @ 20 - @ ldr r1,[sp,#2*4] - mov r0,r2,ror#7 - add r8,r8,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#4*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#13*4] - - add r12,r12,r0 - eor r0,r4,r4,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r4,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] + add r3,r3,r8 + mov r8,r9,ror#2 + add r3,r3,r2 + eor r8,r8,r9,ror#13 + add r3,r3,r12 + eor r8,r8,r9,ror#22 @ Sigma0(a) +#if 19>=15 + ldr r1,[sp,#5*4] @ from BODY_16_xx +#endif + orr r0,r9,r10 + and r2,r9,r10 + and r0,r0,r11 + add r8,r8,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r4,r4,r3 + add r8,r8,r0 + @ ldr r1,[sp,#5*4] @ 20 + ldr r12,[sp,#2*4] + mov r0,r1,ror#7 + ldr r3,[sp,#4*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#13*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r4,ror#6 ldr r12,[r14],#4 @ *K256++ - add r7,r7,r2 @ h+=X[i] - str r2,[sp,#4*4] + eor r0,r0,r4,ror#11 eor r2,r5,r6 - add r7,r7,r0,ror#6 @ h+=Sigma1(e) +#if 20>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 20==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r4,ror#25 @ Sigma1(e) and r2,r2,r4 - add r7,r7,r12 @ h+=K256[i] + str r3,[sp,#4*4] + add r3,r3,r0 eor r2,r2,r6 @ Ch(e,f,g) - eor r0,r8,r8,ror#11 - add r7,r7,r2 @ h+=Ch(e,f,g) -#if 20==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 20<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r8,r9 @ a^b, b^c in next round -#else - ldr r2,[sp,#6*4] @ from future BODY_16_xx - eor r12,r8,r9 @ a^b, b^c in next round - ldr r1,[sp,#3*4] @ from future BODY_16_xx -#endif - eor r0,r0,r8,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r11,r11,r7 @ d+=h - eor r3,r3,r9 @ Maj(a,b,c) - add r7,r7,r0,ror#2 @ h+=Sigma0(a) - @ add r7,r7,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#6*4] @ 21 - @ ldr r1,[sp,#3*4] - mov r0,r2,ror#7 - add r7,r7,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#5*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#14*4] - + add r3,r3,r7 + mov r7,r8,ror#2 + add r3,r3,r2 + eor r7,r7,r8,ror#13 + add r3,r3,r12 + eor r7,r7,r8,ror#22 @ Sigma0(a) +#if 20>=15 + ldr r1,[sp,#6*4] @ from BODY_16_xx +#endif + orr r0,r8,r9 + and r2,r8,r9 + and r0,r0,r10 + add r7,r7,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r11,r11,r3 + add r7,r7,r0 + @ ldr r1,[sp,#6*4] @ 21 + ldr r12,[sp,#3*4] + mov r0,r1,ror#7 + ldr r3,[sp,#5*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#14*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r0,r11,r11,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r11,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r6,r6,r2 @ h+=X[i] - str r2,[sp,#5*4] + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r11,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r11,ror#11 eor r2,r4,r5 - add r6,r6,r0,ror#6 @ h+=Sigma1(e) +#if 21>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 21==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r11,ror#25 @ Sigma1(e) and r2,r2,r11 - add r6,r6,r3 @ h+=K256[i] + str r3,[sp,#5*4] + add r3,r3,r0 eor r2,r2,r5 @ Ch(e,f,g) - eor r0,r7,r7,ror#11 - add r6,r6,r2 @ h+=Ch(e,f,g) -#if 21==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 21<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r7,r8 @ a^b, b^c in next round -#else - ldr r2,[sp,#7*4] @ from future BODY_16_xx - eor r3,r7,r8 @ a^b, b^c in next round - ldr r1,[sp,#4*4] @ from future BODY_16_xx -#endif - eor r0,r0,r7,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r10,r10,r6 @ d+=h - eor r12,r12,r8 @ Maj(a,b,c) - add r6,r6,r0,ror#2 @ h+=Sigma0(a) - @ add r6,r6,r12 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#7*4] @ 22 - @ ldr r1,[sp,#4*4] - mov r0,r2,ror#7 - add r6,r6,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#6*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#15*4] - - add r12,r12,r0 - eor r0,r10,r10,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r10,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] + add r3,r3,r6 + mov r6,r7,ror#2 + add r3,r3,r2 + eor r6,r6,r7,ror#13 + add r3,r3,r12 + eor r6,r6,r7,ror#22 @ Sigma0(a) +#if 21>=15 + ldr r1,[sp,#7*4] @ from BODY_16_xx +#endif + orr r0,r7,r8 + and r2,r7,r8 + and r0,r0,r9 + add r6,r6,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r10,r10,r3 + add r6,r6,r0 + @ ldr r1,[sp,#7*4] @ 22 + ldr r12,[sp,#4*4] + mov r0,r1,ror#7 + ldr r3,[sp,#6*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#15*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r10,ror#6 ldr r12,[r14],#4 @ *K256++ - add r5,r5,r2 @ h+=X[i] - str r2,[sp,#6*4] + eor r0,r0,r10,ror#11 eor r2,r11,r4 - add r5,r5,r0,ror#6 @ h+=Sigma1(e) +#if 22>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 22==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r10,ror#25 @ Sigma1(e) and r2,r2,r10 - add r5,r5,r12 @ h+=K256[i] + str r3,[sp,#6*4] + add r3,r3,r0 eor r2,r2,r4 @ Ch(e,f,g) - eor r0,r6,r6,ror#11 - add r5,r5,r2 @ h+=Ch(e,f,g) -#if 22==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 22<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r6,r7 @ a^b, b^c in next round -#else - ldr r2,[sp,#8*4] @ from future BODY_16_xx - eor r12,r6,r7 @ a^b, b^c in next round - ldr r1,[sp,#5*4] @ from future BODY_16_xx -#endif - eor r0,r0,r6,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r9,r9,r5 @ d+=h - eor r3,r3,r7 @ Maj(a,b,c) - add r5,r5,r0,ror#2 @ h+=Sigma0(a) - @ add r5,r5,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#8*4] @ 23 - @ ldr r1,[sp,#5*4] - mov r0,r2,ror#7 - add r5,r5,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#7*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#0*4] - + add r3,r3,r5 + mov r5,r6,ror#2 + add r3,r3,r2 + eor r5,r5,r6,ror#13 + add r3,r3,r12 + eor r5,r5,r6,ror#22 @ Sigma0(a) +#if 22>=15 + ldr r1,[sp,#8*4] @ from BODY_16_xx +#endif + orr r0,r6,r7 + and r2,r6,r7 + and r0,r0,r8 + add r5,r5,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r9,r9,r3 + add r5,r5,r0 + @ ldr r1,[sp,#8*4] @ 23 + ldr r12,[sp,#5*4] + mov r0,r1,ror#7 + ldr r3,[sp,#7*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#0*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r0,r9,r9,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r9,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r4,r4,r2 @ h+=X[i] - str r2,[sp,#7*4] + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r9,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r9,ror#11 eor r2,r10,r11 - add r4,r4,r0,ror#6 @ h+=Sigma1(e) +#if 23>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 23==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r9,ror#25 @ Sigma1(e) and r2,r2,r9 - add r4,r4,r3 @ h+=K256[i] + str r3,[sp,#7*4] + add r3,r3,r0 eor r2,r2,r11 @ Ch(e,f,g) - eor r0,r5,r5,ror#11 - add r4,r4,r2 @ h+=Ch(e,f,g) -#if 23==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 23<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r5,r6 @ a^b, b^c in next round -#else - ldr r2,[sp,#9*4] @ from future BODY_16_xx - eor r3,r5,r6 @ a^b, b^c in next round - ldr r1,[sp,#6*4] @ from future BODY_16_xx -#endif - eor r0,r0,r5,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r8,r8,r4 @ d+=h - eor r12,r12,r6 @ Maj(a,b,c) - add r4,r4,r0,ror#2 @ h+=Sigma0(a) - @ add r4,r4,r12 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#9*4] @ 24 - @ ldr r1,[sp,#6*4] - mov r0,r2,ror#7 - add r4,r4,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#8*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#1*4] - - add r12,r12,r0 - eor r0,r8,r8,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r8,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] + add r3,r3,r4 + mov r4,r5,ror#2 + add r3,r3,r2 + eor r4,r4,r5,ror#13 + add r3,r3,r12 + eor r4,r4,r5,ror#22 @ Sigma0(a) +#if 23>=15 + ldr r1,[sp,#9*4] @ from BODY_16_xx +#endif + orr r0,r5,r6 + and r2,r5,r6 + and r0,r0,r7 + add r4,r4,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r8,r8,r3 + add r4,r4,r0 + @ ldr r1,[sp,#9*4] @ 24 + ldr r12,[sp,#6*4] + mov r0,r1,ror#7 + ldr r3,[sp,#8*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#1*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r8,ror#6 ldr r12,[r14],#4 @ *K256++ - add r11,r11,r2 @ h+=X[i] - str r2,[sp,#8*4] + eor r0,r0,r8,ror#11 eor r2,r9,r10 - add r11,r11,r0,ror#6 @ h+=Sigma1(e) +#if 24>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 24==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r8,ror#25 @ Sigma1(e) and r2,r2,r8 - add r11,r11,r12 @ h+=K256[i] + str r3,[sp,#8*4] + add r3,r3,r0 eor r2,r2,r10 @ Ch(e,f,g) - eor r0,r4,r4,ror#11 - add r11,r11,r2 @ h+=Ch(e,f,g) -#if 24==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 24<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r4,r5 @ a^b, b^c in next round -#else - ldr r2,[sp,#10*4] @ from future BODY_16_xx - eor r12,r4,r5 @ a^b, b^c in next round - ldr r1,[sp,#7*4] @ from future BODY_16_xx -#endif - eor r0,r0,r4,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r7,r7,r11 @ d+=h - eor r3,r3,r5 @ Maj(a,b,c) - add r11,r11,r0,ror#2 @ h+=Sigma0(a) - @ add r11,r11,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#10*4] @ 25 - @ ldr r1,[sp,#7*4] - mov r0,r2,ror#7 - add r11,r11,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#9*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#2*4] - + add r3,r3,r11 + mov r11,r4,ror#2 + add r3,r3,r2 + eor r11,r11,r4,ror#13 + add r3,r3,r12 + eor r11,r11,r4,ror#22 @ Sigma0(a) +#if 24>=15 + ldr r1,[sp,#10*4] @ from BODY_16_xx +#endif + orr r0,r4,r5 + and r2,r4,r5 + and r0,r0,r6 + add r11,r11,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r7,r7,r3 + add r11,r11,r0 + @ ldr r1,[sp,#10*4] @ 25 + ldr r12,[sp,#7*4] + mov r0,r1,ror#7 + ldr r3,[sp,#9*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#2*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r0,r7,r7,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r7,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r10,r10,r2 @ h+=X[i] - str r2,[sp,#9*4] + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r7,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r7,ror#11 eor r2,r8,r9 - add r10,r10,r0,ror#6 @ h+=Sigma1(e) +#if 25>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 25==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r7,ror#25 @ Sigma1(e) and r2,r2,r7 - add r10,r10,r3 @ h+=K256[i] + str r3,[sp,#9*4] + add r3,r3,r0 eor r2,r2,r9 @ Ch(e,f,g) - eor r0,r11,r11,ror#11 - add r10,r10,r2 @ h+=Ch(e,f,g) -#if 25==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 25<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r11,r4 @ a^b, b^c in next round -#else - ldr r2,[sp,#11*4] @ from future BODY_16_xx - eor r3,r11,r4 @ a^b, b^c in next round - ldr r1,[sp,#8*4] @ from future BODY_16_xx -#endif - eor r0,r0,r11,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r6,r6,r10 @ d+=h - eor r12,r12,r4 @ Maj(a,b,c) - add r10,r10,r0,ror#2 @ h+=Sigma0(a) - @ add r10,r10,r12 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#11*4] @ 26 - @ ldr r1,[sp,#8*4] - mov r0,r2,ror#7 - add r10,r10,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#10*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#3*4] - - add r12,r12,r0 - eor r0,r6,r6,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r6,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] + add r3,r3,r10 + mov r10,r11,ror#2 + add r3,r3,r2 + eor r10,r10,r11,ror#13 + add r3,r3,r12 + eor r10,r10,r11,ror#22 @ Sigma0(a) +#if 25>=15 + ldr r1,[sp,#11*4] @ from BODY_16_xx +#endif + orr r0,r11,r4 + and r2,r11,r4 + and r0,r0,r5 + add r10,r10,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r6,r6,r3 + add r10,r10,r0 + @ ldr r1,[sp,#11*4] @ 26 + ldr r12,[sp,#8*4] + mov r0,r1,ror#7 + ldr r3,[sp,#10*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#3*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r6,ror#6 ldr r12,[r14],#4 @ *K256++ - add r9,r9,r2 @ h+=X[i] - str r2,[sp,#10*4] + eor r0,r0,r6,ror#11 eor r2,r7,r8 - add r9,r9,r0,ror#6 @ h+=Sigma1(e) +#if 26>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 26==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r6,ror#25 @ Sigma1(e) and r2,r2,r6 - add r9,r9,r12 @ h+=K256[i] + str r3,[sp,#10*4] + add r3,r3,r0 eor r2,r2,r8 @ Ch(e,f,g) - eor r0,r10,r10,ror#11 - add r9,r9,r2 @ h+=Ch(e,f,g) -#if 26==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 26<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r10,r11 @ a^b, b^c in next round -#else - ldr r2,[sp,#12*4] @ from future BODY_16_xx - eor r12,r10,r11 @ a^b, b^c in next round - ldr r1,[sp,#9*4] @ from future BODY_16_xx -#endif - eor r0,r0,r10,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r5,r5,r9 @ d+=h - eor r3,r3,r11 @ Maj(a,b,c) - add r9,r9,r0,ror#2 @ h+=Sigma0(a) - @ add r9,r9,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#12*4] @ 27 - @ ldr r1,[sp,#9*4] - mov r0,r2,ror#7 - add r9,r9,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#11*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#4*4] - + add r3,r3,r9 + mov r9,r10,ror#2 + add r3,r3,r2 + eor r9,r9,r10,ror#13 + add r3,r3,r12 + eor r9,r9,r10,ror#22 @ Sigma0(a) +#if 26>=15 + ldr r1,[sp,#12*4] @ from BODY_16_xx +#endif + orr r0,r10,r11 + and r2,r10,r11 + and r0,r0,r4 + add r9,r9,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r5,r5,r3 + add r9,r9,r0 + @ ldr r1,[sp,#12*4] @ 27 + ldr r12,[sp,#9*4] + mov r0,r1,ror#7 + ldr r3,[sp,#11*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#4*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r0,r5,r5,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r5,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r8,r8,r2 @ h+=X[i] - str r2,[sp,#11*4] + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r5,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r5,ror#11 eor r2,r6,r7 - add r8,r8,r0,ror#6 @ h+=Sigma1(e) +#if 27>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 27==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r5,ror#25 @ Sigma1(e) and r2,r2,r5 - add r8,r8,r3 @ h+=K256[i] + str r3,[sp,#11*4] + add r3,r3,r0 eor r2,r2,r7 @ Ch(e,f,g) - eor r0,r9,r9,ror#11 - add r8,r8,r2 @ h+=Ch(e,f,g) -#if 27==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 27<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r9,r10 @ a^b, b^c in next round -#else - ldr r2,[sp,#13*4] @ from future BODY_16_xx - eor r3,r9,r10 @ a^b, b^c in next round - ldr r1,[sp,#10*4] @ from future BODY_16_xx -#endif - eor r0,r0,r9,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r4,r4,r8 @ d+=h - eor r12,r12,r10 @ Maj(a,b,c) - add r8,r8,r0,ror#2 @ h+=Sigma0(a) - @ add r8,r8,r12 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#13*4] @ 28 - @ ldr r1,[sp,#10*4] - mov r0,r2,ror#7 - add r8,r8,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#12*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#5*4] - - add r12,r12,r0 - eor r0,r4,r4,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r4,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] + add r3,r3,r8 + mov r8,r9,ror#2 + add r3,r3,r2 + eor r8,r8,r9,ror#13 + add r3,r3,r12 + eor r8,r8,r9,ror#22 @ Sigma0(a) +#if 27>=15 + ldr r1,[sp,#13*4] @ from BODY_16_xx +#endif + orr r0,r9,r10 + and r2,r9,r10 + and r0,r0,r11 + add r8,r8,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r4,r4,r3 + add r8,r8,r0 + @ ldr r1,[sp,#13*4] @ 28 + ldr r12,[sp,#10*4] + mov r0,r1,ror#7 + ldr r3,[sp,#12*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#5*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r4,ror#6 ldr r12,[r14],#4 @ *K256++ - add r7,r7,r2 @ h+=X[i] - str r2,[sp,#12*4] + eor r0,r0,r4,ror#11 eor r2,r5,r6 - add r7,r7,r0,ror#6 @ h+=Sigma1(e) +#if 28>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 28==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r4,ror#25 @ Sigma1(e) and r2,r2,r4 - add r7,r7,r12 @ h+=K256[i] + str r3,[sp,#12*4] + add r3,r3,r0 eor r2,r2,r6 @ Ch(e,f,g) - eor r0,r8,r8,ror#11 - add r7,r7,r2 @ h+=Ch(e,f,g) -#if 28==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 28<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r8,r9 @ a^b, b^c in next round -#else - ldr r2,[sp,#14*4] @ from future BODY_16_xx - eor r12,r8,r9 @ a^b, b^c in next round - ldr r1,[sp,#11*4] @ from future BODY_16_xx -#endif - eor r0,r0,r8,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r11,r11,r7 @ d+=h - eor r3,r3,r9 @ Maj(a,b,c) - add r7,r7,r0,ror#2 @ h+=Sigma0(a) - @ add r7,r7,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#14*4] @ 29 - @ ldr r1,[sp,#11*4] - mov r0,r2,ror#7 - add r7,r7,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#13*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#6*4] - + add r3,r3,r7 + mov r7,r8,ror#2 + add r3,r3,r2 + eor r7,r7,r8,ror#13 + add r3,r3,r12 + eor r7,r7,r8,ror#22 @ Sigma0(a) +#if 28>=15 + ldr r1,[sp,#14*4] @ from BODY_16_xx +#endif + orr r0,r8,r9 + and r2,r8,r9 + and r0,r0,r10 + add r7,r7,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r11,r11,r3 + add r7,r7,r0 + @ ldr r1,[sp,#14*4] @ 29 + ldr r12,[sp,#11*4] + mov r0,r1,ror#7 + ldr r3,[sp,#13*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#6*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r0,r11,r11,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r11,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r6,r6,r2 @ h+=X[i] - str r2,[sp,#13*4] + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r11,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r11,ror#11 eor r2,r4,r5 - add r6,r6,r0,ror#6 @ h+=Sigma1(e) +#if 29>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 29==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r11,ror#25 @ Sigma1(e) and r2,r2,r11 - add r6,r6,r3 @ h+=K256[i] + str r3,[sp,#13*4] + add r3,r3,r0 eor r2,r2,r5 @ Ch(e,f,g) - eor r0,r7,r7,ror#11 - add r6,r6,r2 @ h+=Ch(e,f,g) -#if 29==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 29<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r7,r8 @ a^b, b^c in next round -#else - ldr r2,[sp,#15*4] @ from future BODY_16_xx - eor r3,r7,r8 @ a^b, b^c in next round - ldr r1,[sp,#12*4] @ from future BODY_16_xx -#endif - eor r0,r0,r7,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r10,r10,r6 @ d+=h - eor r12,r12,r8 @ Maj(a,b,c) - add r6,r6,r0,ror#2 @ h+=Sigma0(a) - @ add r6,r6,r12 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#15*4] @ 30 - @ ldr r1,[sp,#12*4] - mov r0,r2,ror#7 - add r6,r6,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#14*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#7*4] - - add r12,r12,r0 - eor r0,r10,r10,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r10,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] + add r3,r3,r6 + mov r6,r7,ror#2 + add r3,r3,r2 + eor r6,r6,r7,ror#13 + add r3,r3,r12 + eor r6,r6,r7,ror#22 @ Sigma0(a) +#if 29>=15 + ldr r1,[sp,#15*4] @ from BODY_16_xx +#endif + orr r0,r7,r8 + and r2,r7,r8 + and r0,r0,r9 + add r6,r6,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r10,r10,r3 + add r6,r6,r0 + @ ldr r1,[sp,#15*4] @ 30 + ldr r12,[sp,#12*4] + mov r0,r1,ror#7 + ldr r3,[sp,#14*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#7*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r10,ror#6 ldr r12,[r14],#4 @ *K256++ - add r5,r5,r2 @ h+=X[i] - str r2,[sp,#14*4] + eor r0,r0,r10,ror#11 eor r2,r11,r4 - add r5,r5,r0,ror#6 @ h+=Sigma1(e) +#if 30>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 30==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r10,ror#25 @ Sigma1(e) and r2,r2,r10 - add r5,r5,r12 @ h+=K256[i] + str r3,[sp,#14*4] + add r3,r3,r0 eor r2,r2,r4 @ Ch(e,f,g) - eor r0,r6,r6,ror#11 - add r5,r5,r2 @ h+=Ch(e,f,g) -#if 30==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 30<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r6,r7 @ a^b, b^c in next round -#else - ldr r2,[sp,#0*4] @ from future BODY_16_xx - eor r12,r6,r7 @ a^b, b^c in next round - ldr r1,[sp,#13*4] @ from future BODY_16_xx -#endif - eor r0,r0,r6,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r9,r9,r5 @ d+=h - eor r3,r3,r7 @ Maj(a,b,c) - add r5,r5,r0,ror#2 @ h+=Sigma0(a) - @ add r5,r5,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#0*4] @ 31 - @ ldr r1,[sp,#13*4] - mov r0,r2,ror#7 - add r5,r5,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#15*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#8*4] - + add r3,r3,r5 + mov r5,r6,ror#2 + add r3,r3,r2 + eor r5,r5,r6,ror#13 + add r3,r3,r12 + eor r5,r5,r6,ror#22 @ Sigma0(a) +#if 30>=15 + ldr r1,[sp,#0*4] @ from BODY_16_xx +#endif + orr r0,r6,r7 + and r2,r6,r7 + and r0,r0,r8 + add r5,r5,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r9,r9,r3 + add r5,r5,r0 + @ ldr r1,[sp,#0*4] @ 31 + ldr r12,[sp,#13*4] + mov r0,r1,ror#7 + ldr r3,[sp,#15*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#8*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r0,r9,r9,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r9,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r4,r4,r2 @ h+=X[i] - str r2,[sp,#15*4] + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r9,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r9,ror#11 eor r2,r10,r11 - add r4,r4,r0,ror#6 @ h+=Sigma1(e) +#if 31>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 31==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r9,ror#25 @ Sigma1(e) and r2,r2,r9 - add r4,r4,r3 @ h+=K256[i] + str r3,[sp,#15*4] + add r3,r3,r0 eor r2,r2,r11 @ Ch(e,f,g) - eor r0,r5,r5,ror#11 - add r4,r4,r2 @ h+=Ch(e,f,g) -#if 31==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 31<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r5,r6 @ a^b, b^c in next round -#else - ldr r2,[sp,#1*4] @ from future BODY_16_xx - eor r3,r5,r6 @ a^b, b^c in next round - ldr r1,[sp,#14*4] @ from future BODY_16_xx -#endif - eor r0,r0,r5,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r8,r8,r4 @ d+=h - eor r12,r12,r6 @ Maj(a,b,c) - add r4,r4,r0,ror#2 @ h+=Sigma0(a) - @ add r4,r4,r12 @ h+=Maj(a,b,c) - ldreq r3,[sp,#16*4] @ pull ctx + add r3,r3,r4 + mov r4,r5,ror#2 + add r3,r3,r2 + eor r4,r4,r5,ror#13 + add r3,r3,r12 + eor r4,r4,r5,ror#22 @ Sigma0(a) +#if 31>=15 + ldr r1,[sp,#1*4] @ from BODY_16_xx +#endif + orr r0,r5,r6 + and r2,r5,r6 + and r0,r0,r7 + add r4,r4,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r8,r8,r3 + add r4,r4,r0 + and r12,r12,#0xff + cmp r12,#0xf2 bne .Lrounds_16_xx - add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r3,[sp,#16*4] @ pull ctx ldr r0,[r3,#0] ldr r2,[r3,#4] ldr r12,[r3,#8] @@ -1770,921 +1512,6 @@ sha256_block_data_order: moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif -.size sha256_block_data_order,.-sha256_block_data_order -#if __ARM_ARCH__>=7 -.fpu neon - -.type sha256_block_data_order_neon,%function -.align 4 -sha256_block_data_order_neon: -.LNEON: - stmdb sp!,{r4-r12,lr} - - mov r12,sp - sub sp,sp,#16*4+16 @ alloca - sub r14,r3,#256+32 @ K256 - bic sp,sp,#15 @ align for 128-bit stores - - vld1.8 {q0},[r1]! - vld1.8 {q1},[r1]! - vld1.8 {q2},[r1]! - vld1.8 {q3},[r1]! - vld1.32 {q8},[r14,:128]! - vld1.32 {q9},[r14,:128]! - vld1.32 {q10},[r14,:128]! - vld1.32 {q11},[r14,:128]! - vrev32.8 q0,q0 @ yes, even on - str r0,[sp,#64] - vrev32.8 q1,q1 @ big-endian - str r1,[sp,#68] - mov r1,sp - vrev32.8 q2,q2 - str r2,[sp,#72] - vrev32.8 q3,q3 - str r12,[sp,#76] @ save original sp - vadd.i32 q8,q8,q0 - vadd.i32 q9,q9,q1 - vst1.32 {q8},[r1,:128]! - vadd.i32 q10,q10,q2 - vst1.32 {q9},[r1,:128]! - vadd.i32 q11,q11,q3 - vst1.32 {q10},[r1,:128]! - vst1.32 {q11},[r1,:128]! - - ldmia r0,{r4-r11} - sub r1,r1,#64 - ldr r2,[sp,#0] - eor r12,r12,r12 - eor r3,r5,r6 - b .L_00_48 - -.align 4 -.L_00_48: - vext.8 q8,q0,q1,#4 - add r11,r11,r2 - eor r2,r9,r10 - eor r0,r8,r8,ror#5 - vext.8 q9,q2,q3,#4 - add r4,r4,r12 - and r2,r2,r8 - eor r12,r0,r8,ror#19 - vshr.u32 q10,q8,#7 - eor r0,r4,r4,ror#11 - eor r2,r2,r10 - vadd.i32 q0,q0,q9 - add r11,r11,r12,ror#6 - eor r12,r4,r5 - vshr.u32 q9,q8,#3 - eor r0,r0,r4,ror#20 - add r11,r11,r2 - vsli.32 q10,q8,#25 - ldr r2,[sp,#4] - and r3,r3,r12 - vshr.u32 q11,q8,#18 - add r7,r7,r11 - add r11,r11,r0,ror#2 - eor r3,r3,r5 - veor q9,q9,q10 - add r10,r10,r2 - vsli.32 q11,q8,#14 - eor r2,r8,r9 - eor r0,r7,r7,ror#5 - vshr.u32 d24,d7,#17 - add r11,r11,r3 - and r2,r2,r7 - veor q9,q9,q11 - eor r3,r0,r7,ror#19 - eor r0,r11,r11,ror#11 - vsli.32 d24,d7,#15 - eor r2,r2,r9 - add r10,r10,r3,ror#6 - vshr.u32 d25,d7,#10 - eor r3,r11,r4 - eor r0,r0,r11,ror#20 - vadd.i32 q0,q0,q9 - add r10,r10,r2 - ldr r2,[sp,#8] - veor d25,d25,d24 - and r12,r12,r3 - add r6,r6,r10 - vshr.u32 d24,d7,#19 - add r10,r10,r0,ror#2 - eor r12,r12,r4 - vsli.32 d24,d7,#13 - add r9,r9,r2 - eor r2,r7,r8 - veor d25,d25,d24 - eor r0,r6,r6,ror#5 - add r10,r10,r12 - vadd.i32 d0,d0,d25 - and r2,r2,r6 - eor r12,r0,r6,ror#19 - vshr.u32 d24,d0,#17 - eor r0,r10,r10,ror#11 - eor r2,r2,r8 - vsli.32 d24,d0,#15 - add r9,r9,r12,ror#6 - eor r12,r10,r11 - vshr.u32 d25,d0,#10 - eor r0,r0,r10,ror#20 - add r9,r9,r2 - veor d25,d25,d24 - ldr r2,[sp,#12] - and r3,r3,r12 - vshr.u32 d24,d0,#19 - add r5,r5,r9 - add r9,r9,r0,ror#2 - eor r3,r3,r11 - vld1.32 {q8},[r14,:128]! - add r8,r8,r2 - vsli.32 d24,d0,#13 - eor r2,r6,r7 - eor r0,r5,r5,ror#5 - veor d25,d25,d24 - add r9,r9,r3 - and r2,r2,r5 - vadd.i32 d1,d1,d25 - eor r3,r0,r5,ror#19 - eor r0,r9,r9,ror#11 - vadd.i32 q8,q8,q0 - eor r2,r2,r7 - add r8,r8,r3,ror#6 - eor r3,r9,r10 - eor r0,r0,r9,ror#20 - add r8,r8,r2 - ldr r2,[sp,#16] - and r12,r12,r3 - add r4,r4,r8 - vst1.32 {q8},[r1,:128]! - add r8,r8,r0,ror#2 - eor r12,r12,r10 - vext.8 q8,q1,q2,#4 - add r7,r7,r2 - eor r2,r5,r6 - eor r0,r4,r4,ror#5 - vext.8 q9,q3,q0,#4 - add r8,r8,r12 - and r2,r2,r4 - eor r12,r0,r4,ror#19 - vshr.u32 q10,q8,#7 - eor r0,r8,r8,ror#11 - eor r2,r2,r6 - vadd.i32 q1,q1,q9 - add r7,r7,r12,ror#6 - eor r12,r8,r9 - vshr.u32 q9,q8,#3 - eor r0,r0,r8,ror#20 - add r7,r7,r2 - vsli.32 q10,q8,#25 - ldr r2,[sp,#20] - and r3,r3,r12 - vshr.u32 q11,q8,#18 - add r11,r11,r7 - add r7,r7,r0,ror#2 - eor r3,r3,r9 - veor q9,q9,q10 - add r6,r6,r2 - vsli.32 q11,q8,#14 - eor r2,r4,r5 - eor r0,r11,r11,ror#5 - vshr.u32 d24,d1,#17 - add r7,r7,r3 - and r2,r2,r11 - veor q9,q9,q11 - eor r3,r0,r11,ror#19 - eor r0,r7,r7,ror#11 - vsli.32 d24,d1,#15 - eor r2,r2,r5 - add r6,r6,r3,ror#6 - vshr.u32 d25,d1,#10 - eor r3,r7,r8 - eor r0,r0,r7,ror#20 - vadd.i32 q1,q1,q9 - add r6,r6,r2 - ldr r2,[sp,#24] - veor d25,d25,d24 - and r12,r12,r3 - add r10,r10,r6 - vshr.u32 d24,d1,#19 - add r6,r6,r0,ror#2 - eor r12,r12,r8 - vsli.32 d24,d1,#13 - add r5,r5,r2 - eor r2,r11,r4 - veor d25,d25,d24 - eor r0,r10,r10,ror#5 - add r6,r6,r12 - vadd.i32 d2,d2,d25 - and r2,r2,r10 - eor r12,r0,r10,ror#19 - vshr.u32 d24,d2,#17 - eor r0,r6,r6,ror#11 - eor r2,r2,r4 - vsli.32 d24,d2,#15 - add r5,r5,r12,ror#6 - eor r12,r6,r7 - vshr.u32 d25,d2,#10 - eor r0,r0,r6,ror#20 - add r5,r5,r2 - veor d25,d25,d24 - ldr r2,[sp,#28] - and r3,r3,r12 - vshr.u32 d24,d2,#19 - add r9,r9,r5 - add r5,r5,r0,ror#2 - eor r3,r3,r7 - vld1.32 {q8},[r14,:128]! - add r4,r4,r2 - vsli.32 d24,d2,#13 - eor r2,r10,r11 - eor r0,r9,r9,ror#5 - veor d25,d25,d24 - add r5,r5,r3 - and r2,r2,r9 - vadd.i32 d3,d3,d25 - eor r3,r0,r9,ror#19 - eor r0,r5,r5,ror#11 - vadd.i32 q8,q8,q1 - eor r2,r2,r11 - add r4,r4,r3,ror#6 - eor r3,r5,r6 - eor r0,r0,r5,ror#20 - add r4,r4,r2 - ldr r2,[sp,#32] - and r12,r12,r3 - add r8,r8,r4 - vst1.32 {q8},[r1,:128]! - add r4,r4,r0,ror#2 - eor r12,r12,r6 - vext.8 q8,q2,q3,#4 - add r11,r11,r2 - eor r2,r9,r10 - eor r0,r8,r8,ror#5 - vext.8 q9,q0,q1,#4 - add r4,r4,r12 - and r2,r2,r8 - eor r12,r0,r8,ror#19 - vshr.u32 q10,q8,#7 - eor r0,r4,r4,ror#11 - eor r2,r2,r10 - vadd.i32 q2,q2,q9 - add r11,r11,r12,ror#6 - eor r12,r4,r5 - vshr.u32 q9,q8,#3 - eor r0,r0,r4,ror#20 - add r11,r11,r2 - vsli.32 q10,q8,#25 - ldr r2,[sp,#36] - and r3,r3,r12 - vshr.u32 q11,q8,#18 - add r7,r7,r11 - add r11,r11,r0,ror#2 - eor r3,r3,r5 - veor q9,q9,q10 - add r10,r10,r2 - vsli.32 q11,q8,#14 - eor r2,r8,r9 - eor r0,r7,r7,ror#5 - vshr.u32 d24,d3,#17 - add r11,r11,r3 - and r2,r2,r7 - veor q9,q9,q11 - eor r3,r0,r7,ror#19 - eor r0,r11,r11,ror#11 - vsli.32 d24,d3,#15 - eor r2,r2,r9 - add r10,r10,r3,ror#6 - vshr.u32 d25,d3,#10 - eor r3,r11,r4 - eor r0,r0,r11,ror#20 - vadd.i32 q2,q2,q9 - add r10,r10,r2 - ldr r2,[sp,#40] - veor d25,d25,d24 - and r12,r12,r3 - add r6,r6,r10 - vshr.u32 d24,d3,#19 - add r10,r10,r0,ror#2 - eor r12,r12,r4 - vsli.32 d24,d3,#13 - add r9,r9,r2 - eor r2,r7,r8 - veor d25,d25,d24 - eor r0,r6,r6,ror#5 - add r10,r10,r12 - vadd.i32 d4,d4,d25 - and r2,r2,r6 - eor r12,r0,r6,ror#19 - vshr.u32 d24,d4,#17 - eor r0,r10,r10,ror#11 - eor r2,r2,r8 - vsli.32 d24,d4,#15 - add r9,r9,r12,ror#6 - eor r12,r10,r11 - vshr.u32 d25,d4,#10 - eor r0,r0,r10,ror#20 - add r9,r9,r2 - veor d25,d25,d24 - ldr r2,[sp,#44] - and r3,r3,r12 - vshr.u32 d24,d4,#19 - add r5,r5,r9 - add r9,r9,r0,ror#2 - eor r3,r3,r11 - vld1.32 {q8},[r14,:128]! - add r8,r8,r2 - vsli.32 d24,d4,#13 - eor r2,r6,r7 - eor r0,r5,r5,ror#5 - veor d25,d25,d24 - add r9,r9,r3 - and r2,r2,r5 - vadd.i32 d5,d5,d25 - eor r3,r0,r5,ror#19 - eor r0,r9,r9,ror#11 - vadd.i32 q8,q8,q2 - eor r2,r2,r7 - add r8,r8,r3,ror#6 - eor r3,r9,r10 - eor r0,r0,r9,ror#20 - add r8,r8,r2 - ldr r2,[sp,#48] - and r12,r12,r3 - add r4,r4,r8 - vst1.32 {q8},[r1,:128]! - add r8,r8,r0,ror#2 - eor r12,r12,r10 - vext.8 q8,q3,q0,#4 - add r7,r7,r2 - eor r2,r5,r6 - eor r0,r4,r4,ror#5 - vext.8 q9,q1,q2,#4 - add r8,r8,r12 - and r2,r2,r4 - eor r12,r0,r4,ror#19 - vshr.u32 q10,q8,#7 - eor r0,r8,r8,ror#11 - eor r2,r2,r6 - vadd.i32 q3,q3,q9 - add r7,r7,r12,ror#6 - eor r12,r8,r9 - vshr.u32 q9,q8,#3 - eor r0,r0,r8,ror#20 - add r7,r7,r2 - vsli.32 q10,q8,#25 - ldr r2,[sp,#52] - and r3,r3,r12 - vshr.u32 q11,q8,#18 - add r11,r11,r7 - add r7,r7,r0,ror#2 - eor r3,r3,r9 - veor q9,q9,q10 - add r6,r6,r2 - vsli.32 q11,q8,#14 - eor r2,r4,r5 - eor r0,r11,r11,ror#5 - vshr.u32 d24,d5,#17 - add r7,r7,r3 - and r2,r2,r11 - veor q9,q9,q11 - eor r3,r0,r11,ror#19 - eor r0,r7,r7,ror#11 - vsli.32 d24,d5,#15 - eor r2,r2,r5 - add r6,r6,r3,ror#6 - vshr.u32 d25,d5,#10 - eor r3,r7,r8 - eor r0,r0,r7,ror#20 - vadd.i32 q3,q3,q9 - add r6,r6,r2 - ldr r2,[sp,#56] - veor d25,d25,d24 - and r12,r12,r3 - add r10,r10,r6 - vshr.u32 d24,d5,#19 - add r6,r6,r0,ror#2 - eor r12,r12,r8 - vsli.32 d24,d5,#13 - add r5,r5,r2 - eor r2,r11,r4 - veor d25,d25,d24 - eor r0,r10,r10,ror#5 - add r6,r6,r12 - vadd.i32 d6,d6,d25 - and r2,r2,r10 - eor r12,r0,r10,ror#19 - vshr.u32 d24,d6,#17 - eor r0,r6,r6,ror#11 - eor r2,r2,r4 - vsli.32 d24,d6,#15 - add r5,r5,r12,ror#6 - eor r12,r6,r7 - vshr.u32 d25,d6,#10 - eor r0,r0,r6,ror#20 - add r5,r5,r2 - veor d25,d25,d24 - ldr r2,[sp,#60] - and r3,r3,r12 - vshr.u32 d24,d6,#19 - add r9,r9,r5 - add r5,r5,r0,ror#2 - eor r3,r3,r7 - vld1.32 {q8},[r14,:128]! - add r4,r4,r2 - vsli.32 d24,d6,#13 - eor r2,r10,r11 - eor r0,r9,r9,ror#5 - veor d25,d25,d24 - add r5,r5,r3 - and r2,r2,r9 - vadd.i32 d7,d7,d25 - eor r3,r0,r9,ror#19 - eor r0,r5,r5,ror#11 - vadd.i32 q8,q8,q3 - eor r2,r2,r11 - add r4,r4,r3,ror#6 - eor r3,r5,r6 - eor r0,r0,r5,ror#20 - add r4,r4,r2 - ldr r2,[r14] - and r12,r12,r3 - add r8,r8,r4 - vst1.32 {q8},[r1,:128]! - add r4,r4,r0,ror#2 - eor r12,r12,r6 - teq r2,#0 @ check for K256 terminator - ldr r2,[sp,#0] - sub r1,r1,#64 - bne .L_00_48 - - ldr r1,[sp,#68] - ldr r0,[sp,#72] - sub r14,r14,#256 @ rewind r14 - teq r1,r0 - subeq r1,r1,#64 @ avoid SEGV - vld1.8 {q0},[r1]! @ load next input block - vld1.8 {q1},[r1]! - vld1.8 {q2},[r1]! - vld1.8 {q3},[r1]! - strne r1,[sp,#68] - mov r1,sp - add r11,r11,r2 - eor r2,r9,r10 - eor r0,r8,r8,ror#5 - add r4,r4,r12 - vld1.32 {q8},[r14,:128]! - and r2,r2,r8 - eor r12,r0,r8,ror#19 - eor r0,r4,r4,ror#11 - eor r2,r2,r10 - vrev32.8 q0,q0 - add r11,r11,r12,ror#6 - eor r12,r4,r5 - eor r0,r0,r4,ror#20 - add r11,r11,r2 - vadd.i32 q8,q8,q0 - ldr r2,[sp,#4] - and r3,r3,r12 - add r7,r7,r11 - add r11,r11,r0,ror#2 - eor r3,r3,r5 - add r10,r10,r2 - eor r2,r8,r9 - eor r0,r7,r7,ror#5 - add r11,r11,r3 - and r2,r2,r7 - eor r3,r0,r7,ror#19 - eor r0,r11,r11,ror#11 - eor r2,r2,r9 - add r10,r10,r3,ror#6 - eor r3,r11,r4 - eor r0,r0,r11,ror#20 - add r10,r10,r2 - ldr r2,[sp,#8] - and r12,r12,r3 - add r6,r6,r10 - add r10,r10,r0,ror#2 - eor r12,r12,r4 - add r9,r9,r2 - eor r2,r7,r8 - eor r0,r6,r6,ror#5 - add r10,r10,r12 - and r2,r2,r6 - eor r12,r0,r6,ror#19 - eor r0,r10,r10,ror#11 - eor r2,r2,r8 - add r9,r9,r12,ror#6 - eor r12,r10,r11 - eor r0,r0,r10,ror#20 - add r9,r9,r2 - ldr r2,[sp,#12] - and r3,r3,r12 - add r5,r5,r9 - add r9,r9,r0,ror#2 - eor r3,r3,r11 - add r8,r8,r2 - eor r2,r6,r7 - eor r0,r5,r5,ror#5 - add r9,r9,r3 - and r2,r2,r5 - eor r3,r0,r5,ror#19 - eor r0,r9,r9,ror#11 - eor r2,r2,r7 - add r8,r8,r3,ror#6 - eor r3,r9,r10 - eor r0,r0,r9,ror#20 - add r8,r8,r2 - ldr r2,[sp,#16] - and r12,r12,r3 - add r4,r4,r8 - add r8,r8,r0,ror#2 - eor r12,r12,r10 - vst1.32 {q8},[r1,:128]! - add r7,r7,r2 - eor r2,r5,r6 - eor r0,r4,r4,ror#5 - add r8,r8,r12 - vld1.32 {q8},[r14,:128]! - and r2,r2,r4 - eor r12,r0,r4,ror#19 - eor r0,r8,r8,ror#11 - eor r2,r2,r6 - vrev32.8 q1,q1 - add r7,r7,r12,ror#6 - eor r12,r8,r9 - eor r0,r0,r8,ror#20 - add r7,r7,r2 - vadd.i32 q8,q8,q1 - ldr r2,[sp,#20] - and r3,r3,r12 - add r11,r11,r7 - add r7,r7,r0,ror#2 - eor r3,r3,r9 - add r6,r6,r2 - eor r2,r4,r5 - eor r0,r11,r11,ror#5 - add r7,r7,r3 - and r2,r2,r11 - eor r3,r0,r11,ror#19 - eor r0,r7,r7,ror#11 - eor r2,r2,r5 - add r6,r6,r3,ror#6 - eor r3,r7,r8 - eor r0,r0,r7,ror#20 - add r6,r6,r2 - ldr r2,[sp,#24] - and r12,r12,r3 - add r10,r10,r6 - add r6,r6,r0,ror#2 - eor r12,r12,r8 - add r5,r5,r2 - eor r2,r11,r4 - eor r0,r10,r10,ror#5 - add r6,r6,r12 - and r2,r2,r10 - eor r12,r0,r10,ror#19 - eor r0,r6,r6,ror#11 - eor r2,r2,r4 - add r5,r5,r12,ror#6 - eor r12,r6,r7 - eor r0,r0,r6,ror#20 - add r5,r5,r2 - ldr r2,[sp,#28] - and r3,r3,r12 - add r9,r9,r5 - add r5,r5,r0,ror#2 - eor r3,r3,r7 - add r4,r4,r2 - eor r2,r10,r11 - eor r0,r9,r9,ror#5 - add r5,r5,r3 - and r2,r2,r9 - eor r3,r0,r9,ror#19 - eor r0,r5,r5,ror#11 - eor r2,r2,r11 - add r4,r4,r3,ror#6 - eor r3,r5,r6 - eor r0,r0,r5,ror#20 - add r4,r4,r2 - ldr r2,[sp,#32] - and r12,r12,r3 - add r8,r8,r4 - add r4,r4,r0,ror#2 - eor r12,r12,r6 - vst1.32 {q8},[r1,:128]! - add r11,r11,r2 - eor r2,r9,r10 - eor r0,r8,r8,ror#5 - add r4,r4,r12 - vld1.32 {q8},[r14,:128]! - and r2,r2,r8 - eor r12,r0,r8,ror#19 - eor r0,r4,r4,ror#11 - eor r2,r2,r10 - vrev32.8 q2,q2 - add r11,r11,r12,ror#6 - eor r12,r4,r5 - eor r0,r0,r4,ror#20 - add r11,r11,r2 - vadd.i32 q8,q8,q2 - ldr r2,[sp,#36] - and r3,r3,r12 - add r7,r7,r11 - add r11,r11,r0,ror#2 - eor r3,r3,r5 - add r10,r10,r2 - eor r2,r8,r9 - eor r0,r7,r7,ror#5 - add r11,r11,r3 - and r2,r2,r7 - eor r3,r0,r7,ror#19 - eor r0,r11,r11,ror#11 - eor r2,r2,r9 - add r10,r10,r3,ror#6 - eor r3,r11,r4 - eor r0,r0,r11,ror#20 - add r10,r10,r2 - ldr r2,[sp,#40] - and r12,r12,r3 - add r6,r6,r10 - add r10,r10,r0,ror#2 - eor r12,r12,r4 - add r9,r9,r2 - eor r2,r7,r8 - eor r0,r6,r6,ror#5 - add r10,r10,r12 - and r2,r2,r6 - eor r12,r0,r6,ror#19 - eor r0,r10,r10,ror#11 - eor r2,r2,r8 - add r9,r9,r12,ror#6 - eor r12,r10,r11 - eor r0,r0,r10,ror#20 - add r9,r9,r2 - ldr r2,[sp,#44] - and r3,r3,r12 - add r5,r5,r9 - add r9,r9,r0,ror#2 - eor r3,r3,r11 - add r8,r8,r2 - eor r2,r6,r7 - eor r0,r5,r5,ror#5 - add r9,r9,r3 - and r2,r2,r5 - eor r3,r0,r5,ror#19 - eor r0,r9,r9,ror#11 - eor r2,r2,r7 - add r8,r8,r3,ror#6 - eor r3,r9,r10 - eor r0,r0,r9,ror#20 - add r8,r8,r2 - ldr r2,[sp,#48] - and r12,r12,r3 - add r4,r4,r8 - add r8,r8,r0,ror#2 - eor r12,r12,r10 - vst1.32 {q8},[r1,:128]! - add r7,r7,r2 - eor r2,r5,r6 - eor r0,r4,r4,ror#5 - add r8,r8,r12 - vld1.32 {q8},[r14,:128]! - and r2,r2,r4 - eor r12,r0,r4,ror#19 - eor r0,r8,r8,ror#11 - eor r2,r2,r6 - vrev32.8 q3,q3 - add r7,r7,r12,ror#6 - eor r12,r8,r9 - eor r0,r0,r8,ror#20 - add r7,r7,r2 - vadd.i32 q8,q8,q3 - ldr r2,[sp,#52] - and r3,r3,r12 - add r11,r11,r7 - add r7,r7,r0,ror#2 - eor r3,r3,r9 - add r6,r6,r2 - eor r2,r4,r5 - eor r0,r11,r11,ror#5 - add r7,r7,r3 - and r2,r2,r11 - eor r3,r0,r11,ror#19 - eor r0,r7,r7,ror#11 - eor r2,r2,r5 - add r6,r6,r3,ror#6 - eor r3,r7,r8 - eor r0,r0,r7,ror#20 - add r6,r6,r2 - ldr r2,[sp,#56] - and r12,r12,r3 - add r10,r10,r6 - add r6,r6,r0,ror#2 - eor r12,r12,r8 - add r5,r5,r2 - eor r2,r11,r4 - eor r0,r10,r10,ror#5 - add r6,r6,r12 - and r2,r2,r10 - eor r12,r0,r10,ror#19 - eor r0,r6,r6,ror#11 - eor r2,r2,r4 - add r5,r5,r12,ror#6 - eor r12,r6,r7 - eor r0,r0,r6,ror#20 - add r5,r5,r2 - ldr r2,[sp,#60] - and r3,r3,r12 - add r9,r9,r5 - add r5,r5,r0,ror#2 - eor r3,r3,r7 - add r4,r4,r2 - eor r2,r10,r11 - eor r0,r9,r9,ror#5 - add r5,r5,r3 - and r2,r2,r9 - eor r3,r0,r9,ror#19 - eor r0,r5,r5,ror#11 - eor r2,r2,r11 - add r4,r4,r3,ror#6 - eor r3,r5,r6 - eor r0,r0,r5,ror#20 - add r4,r4,r2 - ldr r2,[sp,#64] - and r12,r12,r3 - add r8,r8,r4 - add r4,r4,r0,ror#2 - eor r12,r12,r6 - vst1.32 {q8},[r1,:128]! - ldr r0,[r2,#0] - add r4,r4,r12 @ h+=Maj(a,b,c) from the past - ldr r12,[r2,#4] - ldr r3,[r2,#8] - ldr r1,[r2,#12] - add r4,r4,r0 @ accumulate - ldr r0,[r2,#16] - add r5,r5,r12 - ldr r12,[r2,#20] - add r6,r6,r3 - ldr r3,[r2,#24] - add r7,r7,r1 - ldr r1,[r2,#28] - add r8,r8,r0 - str r4,[r2],#4 - add r9,r9,r12 - str r5,[r2],#4 - add r10,r10,r3 - str r6,[r2],#4 - add r11,r11,r1 - str r7,[r2],#4 - stmia r2,{r8-r11} - - movne r1,sp - ldrne r2,[sp,#0] - eorne r12,r12,r12 - ldreq sp,[sp,#76] @ restore original sp - eorne r3,r5,r6 - bne .L_00_48 - - ldmia sp!,{r4-r12,pc} -.size sha256_block_data_order_neon,.-sha256_block_data_order_neon -#endif -#if __ARM_ARCH__>=7 -.type sha256_block_data_order_armv8,%function -.align 5 -sha256_block_data_order_armv8: -.LARMv8: - vld1.32 {q0,q1},[r0] - sub r3,r3,#sha256_block_data_order-K256 - -.Loop_v8: - vld1.8 {q8-q9},[r1]! - vld1.8 {q10-q11},[r1]! - vld1.32 {q12},[r3]! - vrev32.8 q8,q8 - vrev32.8 q9,q9 - vrev32.8 q10,q10 - vrev32.8 q11,q11 - vmov q14,q0 @ offload - vmov q15,q1 - teq r1,r2 - vld1.32 {q13},[r3]! - vadd.i32 q12,q12,q8 - .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 - vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 - vld1.32 {q12},[r3]! - vadd.i32 q13,q13,q9 - .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 - vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 - vld1.32 {q13},[r3]! - vadd.i32 q12,q12,q10 - .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 - vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 - vld1.32 {q12},[r3]! - vadd.i32 q13,q13,q11 - .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 - vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 - vld1.32 {q13},[r3]! - vadd.i32 q12,q12,q8 - .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 - vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 - vld1.32 {q12},[r3]! - vadd.i32 q13,q13,q9 - .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 - vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 - vld1.32 {q13},[r3]! - vadd.i32 q12,q12,q10 - .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 - vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 - vld1.32 {q12},[r3]! - vadd.i32 q13,q13,q11 - .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 - vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 - vld1.32 {q13},[r3]! - vadd.i32 q12,q12,q8 - .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 - vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 - vld1.32 {q12},[r3]! - vadd.i32 q13,q13,q9 - .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 - vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 - vld1.32 {q13},[r3]! - vadd.i32 q12,q12,q10 - .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 - vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 - vld1.32 {q12},[r3]! - vadd.i32 q13,q13,q11 - .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 - vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 - vld1.32 {q13},[r3]! - vadd.i32 q12,q12,q8 - vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - - vld1.32 {q12},[r3]! - vadd.i32 q13,q13,q9 - vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - - vld1.32 {q13},[r3] - vadd.i32 q12,q12,q10 - sub r3,r3,#256-16 @ rewind - vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - - vadd.i32 q13,q13,q11 - vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - - vadd.i32 q0,q0,q14 - vadd.i32 q1,q1,q15 - bne .Loop_v8 - - vst1.32 {q0,q1},[r0] - - bx lr @ bx lr -.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 -#endif -.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " +.size sha256_block_data_order,.-sha256_block_data_order +.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by " .align 2 -.comm OPENSSL_armcap_P,4,4 diff --git a/app/openssl/crypto/sha/asm/sha256-armv8.S b/app/openssl/crypto/sha/asm/sha256-armv8.S deleted file mode 100644 index bd43b1fe..00000000 --- a/app/openssl/crypto/sha/asm/sha256-armv8.S +++ /dev/null @@ -1,1141 +0,0 @@ -#include "arm_arch.h" - -.text - -.globl sha256_block_data_order -.type sha256_block_data_order,%function -.align 6 -sha256_block_data_order: - ldr x16,.LOPENSSL_armcap_P - adr x17,.LOPENSSL_armcap_P - add x16,x16,x17 - ldr w16,[x16] - tst w16,#ARMV8_SHA256 - b.ne .Lv8_entry - stp x29,x30,[sp,#-128]! - add x29,sp,#0 - - stp x19,x20,[sp,#16] - stp x21,x22,[sp,#32] - stp x23,x24,[sp,#48] - stp x25,x26,[sp,#64] - stp x27,x28,[sp,#80] - sub sp,sp,#4*4 - - ldp w20,w21,[x0] // load context - ldp w22,w23,[x0,#2*4] - ldp w24,w25,[x0,#4*4] - add x2,x1,x2,lsl#6 // end of input - ldp w26,w27,[x0,#6*4] - adr x30,K256 - stp x0,x2,[x29,#96] - -.Loop: - ldp w3,w4,[x1],#2*4 - ldr w19,[x30],#4 // *K++ - eor w28,w21,w22 // magic seed - str x1,[x29,#112] -#ifndef __ARMEB__ - rev w3,w3 // 0 -#endif - ror w16,w24,#6 - add w27,w27,w19 // h+=K[i] - eor w6,w24,w24,ror#14 - and w17,w25,w24 - bic w19,w26,w24 - add w27,w27,w3 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w20,w21 // a^b, b^c in next round - eor w16,w16,w6,ror#11 // Sigma1(e) - ror w6,w20,#2 - add w27,w27,w17 // h+=Ch(e,f,g) - eor w17,w20,w20,ror#9 - add w27,w27,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w23,w23,w27 // d+=h - eor w28,w28,w21 // Maj(a,b,c) - eor w17,w6,w17,ror#13 // Sigma0(a) - add w27,w27,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w27,w27,w17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev w4,w4 // 1 -#endif - ldp w5,w6,[x1],#2*4 - add w27,w27,w17 // h+=Sigma0(a) - ror w16,w23,#6 - add w26,w26,w28 // h+=K[i] - eor w7,w23,w23,ror#14 - and w17,w24,w23 - bic w28,w25,w23 - add w26,w26,w4 // h+=X[i] - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w27,w20 // a^b, b^c in next round - eor w16,w16,w7,ror#11 // Sigma1(e) - ror w7,w27,#2 - add w26,w26,w17 // h+=Ch(e,f,g) - eor w17,w27,w27,ror#9 - add w26,w26,w16 // h+=Sigma1(e) - and w19,w19,w28 // (b^c)&=(a^b) - add w22,w22,w26 // d+=h - eor w19,w19,w20 // Maj(a,b,c) - eor w17,w7,w17,ror#13 // Sigma0(a) - add w26,w26,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - //add w26,w26,w17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev w5,w5 // 2 -#endif - add w26,w26,w17 // h+=Sigma0(a) - ror w16,w22,#6 - add w25,w25,w19 // h+=K[i] - eor w8,w22,w22,ror#14 - and w17,w23,w22 - bic w19,w24,w22 - add w25,w25,w5 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w26,w27 // a^b, b^c in next round - eor w16,w16,w8,ror#11 // Sigma1(e) - ror w8,w26,#2 - add w25,w25,w17 // h+=Ch(e,f,g) - eor w17,w26,w26,ror#9 - add w25,w25,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w21,w21,w25 // d+=h - eor w28,w28,w27 // Maj(a,b,c) - eor w17,w8,w17,ror#13 // Sigma0(a) - add w25,w25,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w25,w25,w17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev w6,w6 // 3 -#endif - ldp w7,w8,[x1],#2*4 - add w25,w25,w17 // h+=Sigma0(a) - ror w16,w21,#6 - add w24,w24,w28 // h+=K[i] - eor w9,w21,w21,ror#14 - and w17,w22,w21 - bic w28,w23,w21 - add w24,w24,w6 // h+=X[i] - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w25,w26 // a^b, b^c in next round - eor w16,w16,w9,ror#11 // Sigma1(e) - ror w9,w25,#2 - add w24,w24,w17 // h+=Ch(e,f,g) - eor w17,w25,w25,ror#9 - add w24,w24,w16 // h+=Sigma1(e) - and w19,w19,w28 // (b^c)&=(a^b) - add w20,w20,w24 // d+=h - eor w19,w19,w26 // Maj(a,b,c) - eor w17,w9,w17,ror#13 // Sigma0(a) - add w24,w24,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - //add w24,w24,w17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev w7,w7 // 4 -#endif - add w24,w24,w17 // h+=Sigma0(a) - ror w16,w20,#6 - add w23,w23,w19 // h+=K[i] - eor w10,w20,w20,ror#14 - and w17,w21,w20 - bic w19,w22,w20 - add w23,w23,w7 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w24,w25 // a^b, b^c in next round - eor w16,w16,w10,ror#11 // Sigma1(e) - ror w10,w24,#2 - add w23,w23,w17 // h+=Ch(e,f,g) - eor w17,w24,w24,ror#9 - add w23,w23,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w27,w27,w23 // d+=h - eor w28,w28,w25 // Maj(a,b,c) - eor w17,w10,w17,ror#13 // Sigma0(a) - add w23,w23,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w23,w23,w17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev w8,w8 // 5 -#endif - ldp w9,w10,[x1],#2*4 - add w23,w23,w17 // h+=Sigma0(a) - ror w16,w27,#6 - add w22,w22,w28 // h+=K[i] - eor w11,w27,w27,ror#14 - and w17,w20,w27 - bic w28,w21,w27 - add w22,w22,w8 // h+=X[i] - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w23,w24 // a^b, b^c in next round - eor w16,w16,w11,ror#11 // Sigma1(e) - ror w11,w23,#2 - add w22,w22,w17 // h+=Ch(e,f,g) - eor w17,w23,w23,ror#9 - add w22,w22,w16 // h+=Sigma1(e) - and w19,w19,w28 // (b^c)&=(a^b) - add w26,w26,w22 // d+=h - eor w19,w19,w24 // Maj(a,b,c) - eor w17,w11,w17,ror#13 // Sigma0(a) - add w22,w22,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - //add w22,w22,w17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev w9,w9 // 6 -#endif - add w22,w22,w17 // h+=Sigma0(a) - ror w16,w26,#6 - add w21,w21,w19 // h+=K[i] - eor w12,w26,w26,ror#14 - and w17,w27,w26 - bic w19,w20,w26 - add w21,w21,w9 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w22,w23 // a^b, b^c in next round - eor w16,w16,w12,ror#11 // Sigma1(e) - ror w12,w22,#2 - add w21,w21,w17 // h+=Ch(e,f,g) - eor w17,w22,w22,ror#9 - add w21,w21,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w25,w25,w21 // d+=h - eor w28,w28,w23 // Maj(a,b,c) - eor w17,w12,w17,ror#13 // Sigma0(a) - add w21,w21,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w21,w21,w17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev w10,w10 // 7 -#endif - ldp w11,w12,[x1],#2*4 - add w21,w21,w17 // h+=Sigma0(a) - ror w16,w25,#6 - add w20,w20,w28 // h+=K[i] - eor w13,w25,w25,ror#14 - and w17,w26,w25 - bic w28,w27,w25 - add w20,w20,w10 // h+=X[i] - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w21,w22 // a^b, b^c in next round - eor w16,w16,w13,ror#11 // Sigma1(e) - ror w13,w21,#2 - add w20,w20,w17 // h+=Ch(e,f,g) - eor w17,w21,w21,ror#9 - add w20,w20,w16 // h+=Sigma1(e) - and w19,w19,w28 // (b^c)&=(a^b) - add w24,w24,w20 // d+=h - eor w19,w19,w22 // Maj(a,b,c) - eor w17,w13,w17,ror#13 // Sigma0(a) - add w20,w20,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - //add w20,w20,w17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev w11,w11 // 8 -#endif - add w20,w20,w17 // h+=Sigma0(a) - ror w16,w24,#6 - add w27,w27,w19 // h+=K[i] - eor w14,w24,w24,ror#14 - and w17,w25,w24 - bic w19,w26,w24 - add w27,w27,w11 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w20,w21 // a^b, b^c in next round - eor w16,w16,w14,ror#11 // Sigma1(e) - ror w14,w20,#2 - add w27,w27,w17 // h+=Ch(e,f,g) - eor w17,w20,w20,ror#9 - add w27,w27,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w23,w23,w27 // d+=h - eor w28,w28,w21 // Maj(a,b,c) - eor w17,w14,w17,ror#13 // Sigma0(a) - add w27,w27,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w27,w27,w17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev w12,w12 // 9 -#endif - ldp w13,w14,[x1],#2*4 - add w27,w27,w17 // h+=Sigma0(a) - ror w16,w23,#6 - add w26,w26,w28 // h+=K[i] - eor w15,w23,w23,ror#14 - and w17,w24,w23 - bic w28,w25,w23 - add w26,w26,w12 // h+=X[i] - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w27,w20 // a^b, b^c in next round - eor w16,w16,w15,ror#11 // Sigma1(e) - ror w15,w27,#2 - add w26,w26,w17 // h+=Ch(e,f,g) - eor w17,w27,w27,ror#9 - add w26,w26,w16 // h+=Sigma1(e) - and w19,w19,w28 // (b^c)&=(a^b) - add w22,w22,w26 // d+=h - eor w19,w19,w20 // Maj(a,b,c) - eor w17,w15,w17,ror#13 // Sigma0(a) - add w26,w26,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - //add w26,w26,w17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev w13,w13 // 10 -#endif - add w26,w26,w17 // h+=Sigma0(a) - ror w16,w22,#6 - add w25,w25,w19 // h+=K[i] - eor w0,w22,w22,ror#14 - and w17,w23,w22 - bic w19,w24,w22 - add w25,w25,w13 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w26,w27 // a^b, b^c in next round - eor w16,w16,w0,ror#11 // Sigma1(e) - ror w0,w26,#2 - add w25,w25,w17 // h+=Ch(e,f,g) - eor w17,w26,w26,ror#9 - add w25,w25,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w21,w21,w25 // d+=h - eor w28,w28,w27 // Maj(a,b,c) - eor w17,w0,w17,ror#13 // Sigma0(a) - add w25,w25,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w25,w25,w17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev w14,w14 // 11 -#endif - ldp w15,w0,[x1],#2*4 - add w25,w25,w17 // h+=Sigma0(a) - str w6,[sp,#12] - ror w16,w21,#6 - add w24,w24,w28 // h+=K[i] - eor w6,w21,w21,ror#14 - and w17,w22,w21 - bic w28,w23,w21 - add w24,w24,w14 // h+=X[i] - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w25,w26 // a^b, b^c in next round - eor w16,w16,w6,ror#11 // Sigma1(e) - ror w6,w25,#2 - add w24,w24,w17 // h+=Ch(e,f,g) - eor w17,w25,w25,ror#9 - add w24,w24,w16 // h+=Sigma1(e) - and w19,w19,w28 // (b^c)&=(a^b) - add w20,w20,w24 // d+=h - eor w19,w19,w26 // Maj(a,b,c) - eor w17,w6,w17,ror#13 // Sigma0(a) - add w24,w24,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - //add w24,w24,w17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev w15,w15 // 12 -#endif - add w24,w24,w17 // h+=Sigma0(a) - str w7,[sp,#0] - ror w16,w20,#6 - add w23,w23,w19 // h+=K[i] - eor w7,w20,w20,ror#14 - and w17,w21,w20 - bic w19,w22,w20 - add w23,w23,w15 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w24,w25 // a^b, b^c in next round - eor w16,w16,w7,ror#11 // Sigma1(e) - ror w7,w24,#2 - add w23,w23,w17 // h+=Ch(e,f,g) - eor w17,w24,w24,ror#9 - add w23,w23,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w27,w27,w23 // d+=h - eor w28,w28,w25 // Maj(a,b,c) - eor w17,w7,w17,ror#13 // Sigma0(a) - add w23,w23,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w23,w23,w17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev w0,w0 // 13 -#endif - ldp w1,w2,[x1] - add w23,w23,w17 // h+=Sigma0(a) - str w8,[sp,#4] - ror w16,w27,#6 - add w22,w22,w28 // h+=K[i] - eor w8,w27,w27,ror#14 - and w17,w20,w27 - bic w28,w21,w27 - add w22,w22,w0 // h+=X[i] - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w23,w24 // a^b, b^c in next round - eor w16,w16,w8,ror#11 // Sigma1(e) - ror w8,w23,#2 - add w22,w22,w17 // h+=Ch(e,f,g) - eor w17,w23,w23,ror#9 - add w22,w22,w16 // h+=Sigma1(e) - and w19,w19,w28 // (b^c)&=(a^b) - add w26,w26,w22 // d+=h - eor w19,w19,w24 // Maj(a,b,c) - eor w17,w8,w17,ror#13 // Sigma0(a) - add w22,w22,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - //add w22,w22,w17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev w1,w1 // 14 -#endif - ldr w6,[sp,#12] - add w22,w22,w17 // h+=Sigma0(a) - str w9,[sp,#8] - ror w16,w26,#6 - add w21,w21,w19 // h+=K[i] - eor w9,w26,w26,ror#14 - and w17,w27,w26 - bic w19,w20,w26 - add w21,w21,w1 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w22,w23 // a^b, b^c in next round - eor w16,w16,w9,ror#11 // Sigma1(e) - ror w9,w22,#2 - add w21,w21,w17 // h+=Ch(e,f,g) - eor w17,w22,w22,ror#9 - add w21,w21,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w25,w25,w21 // d+=h - eor w28,w28,w23 // Maj(a,b,c) - eor w17,w9,w17,ror#13 // Sigma0(a) - add w21,w21,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w21,w21,w17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev w2,w2 // 15 -#endif - ldr w7,[sp,#0] - add w21,w21,w17 // h+=Sigma0(a) - str w10,[sp,#12] - ror w16,w25,#6 - add w20,w20,w28 // h+=K[i] - ror w9,w4,#7 - and w17,w26,w25 - ror w8,w1,#17 - bic w28,w27,w25 - ror w10,w21,#2 - add w20,w20,w2 // h+=X[i] - eor w16,w16,w25,ror#11 - eor w9,w9,w4,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w21,w22 // a^b, b^c in next round - eor w16,w16,w25,ror#25 // Sigma1(e) - eor w10,w10,w21,ror#13 - add w20,w20,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w8,w8,w1,ror#19 - eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) - add w20,w20,w16 // h+=Sigma1(e) - eor w19,w19,w22 // Maj(a,b,c) - eor w17,w10,w21,ror#22 // Sigma0(a) - eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) - add w3,w3,w12 - add w24,w24,w20 // d+=h - add w20,w20,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w3,w3,w9 - add w20,w20,w17 // h+=Sigma0(a) - add w3,w3,w8 -.Loop_16_xx: - ldr w8,[sp,#4] - str w11,[sp,#0] - ror w16,w24,#6 - add w27,w27,w19 // h+=K[i] - ror w10,w5,#7 - and w17,w25,w24 - ror w9,w2,#17 - bic w19,w26,w24 - ror w11,w20,#2 - add w27,w27,w3 // h+=X[i] - eor w16,w16,w24,ror#11 - eor w10,w10,w5,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w20,w21 // a^b, b^c in next round - eor w16,w16,w24,ror#25 // Sigma1(e) - eor w11,w11,w20,ror#13 - add w27,w27,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w9,w9,w2,ror#19 - eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) - add w27,w27,w16 // h+=Sigma1(e) - eor w28,w28,w21 // Maj(a,b,c) - eor w17,w11,w20,ror#22 // Sigma0(a) - eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) - add w4,w4,w13 - add w23,w23,w27 // d+=h - add w27,w27,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w4,w4,w10 - add w27,w27,w17 // h+=Sigma0(a) - add w4,w4,w9 - ldr w9,[sp,#8] - str w12,[sp,#4] - ror w16,w23,#6 - add w26,w26,w28 // h+=K[i] - ror w11,w6,#7 - and w17,w24,w23 - ror w10,w3,#17 - bic w28,w25,w23 - ror w12,w27,#2 - add w26,w26,w4 // h+=X[i] - eor w16,w16,w23,ror#11 - eor w11,w11,w6,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w27,w20 // a^b, b^c in next round - eor w16,w16,w23,ror#25 // Sigma1(e) - eor w12,w12,w27,ror#13 - add w26,w26,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w10,w10,w3,ror#19 - eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) - add w26,w26,w16 // h+=Sigma1(e) - eor w19,w19,w20 // Maj(a,b,c) - eor w17,w12,w27,ror#22 // Sigma0(a) - eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) - add w5,w5,w14 - add w22,w22,w26 // d+=h - add w26,w26,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w5,w5,w11 - add w26,w26,w17 // h+=Sigma0(a) - add w5,w5,w10 - ldr w10,[sp,#12] - str w13,[sp,#8] - ror w16,w22,#6 - add w25,w25,w19 // h+=K[i] - ror w12,w7,#7 - and w17,w23,w22 - ror w11,w4,#17 - bic w19,w24,w22 - ror w13,w26,#2 - add w25,w25,w5 // h+=X[i] - eor w16,w16,w22,ror#11 - eor w12,w12,w7,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w26,w27 // a^b, b^c in next round - eor w16,w16,w22,ror#25 // Sigma1(e) - eor w13,w13,w26,ror#13 - add w25,w25,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w11,w11,w4,ror#19 - eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) - add w25,w25,w16 // h+=Sigma1(e) - eor w28,w28,w27 // Maj(a,b,c) - eor w17,w13,w26,ror#22 // Sigma0(a) - eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) - add w6,w6,w15 - add w21,w21,w25 // d+=h - add w25,w25,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w6,w6,w12 - add w25,w25,w17 // h+=Sigma0(a) - add w6,w6,w11 - ldr w11,[sp,#0] - str w14,[sp,#12] - ror w16,w21,#6 - add w24,w24,w28 // h+=K[i] - ror w13,w8,#7 - and w17,w22,w21 - ror w12,w5,#17 - bic w28,w23,w21 - ror w14,w25,#2 - add w24,w24,w6 // h+=X[i] - eor w16,w16,w21,ror#11 - eor w13,w13,w8,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w25,w26 // a^b, b^c in next round - eor w16,w16,w21,ror#25 // Sigma1(e) - eor w14,w14,w25,ror#13 - add w24,w24,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w12,w12,w5,ror#19 - eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) - add w24,w24,w16 // h+=Sigma1(e) - eor w19,w19,w26 // Maj(a,b,c) - eor w17,w14,w25,ror#22 // Sigma0(a) - eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) - add w7,w7,w0 - add w20,w20,w24 // d+=h - add w24,w24,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w7,w7,w13 - add w24,w24,w17 // h+=Sigma0(a) - add w7,w7,w12 - ldr w12,[sp,#4] - str w15,[sp,#0] - ror w16,w20,#6 - add w23,w23,w19 // h+=K[i] - ror w14,w9,#7 - and w17,w21,w20 - ror w13,w6,#17 - bic w19,w22,w20 - ror w15,w24,#2 - add w23,w23,w7 // h+=X[i] - eor w16,w16,w20,ror#11 - eor w14,w14,w9,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w24,w25 // a^b, b^c in next round - eor w16,w16,w20,ror#25 // Sigma1(e) - eor w15,w15,w24,ror#13 - add w23,w23,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w13,w13,w6,ror#19 - eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) - add w23,w23,w16 // h+=Sigma1(e) - eor w28,w28,w25 // Maj(a,b,c) - eor w17,w15,w24,ror#22 // Sigma0(a) - eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) - add w8,w8,w1 - add w27,w27,w23 // d+=h - add w23,w23,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w8,w8,w14 - add w23,w23,w17 // h+=Sigma0(a) - add w8,w8,w13 - ldr w13,[sp,#8] - str w0,[sp,#4] - ror w16,w27,#6 - add w22,w22,w28 // h+=K[i] - ror w15,w10,#7 - and w17,w20,w27 - ror w14,w7,#17 - bic w28,w21,w27 - ror w0,w23,#2 - add w22,w22,w8 // h+=X[i] - eor w16,w16,w27,ror#11 - eor w15,w15,w10,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w23,w24 // a^b, b^c in next round - eor w16,w16,w27,ror#25 // Sigma1(e) - eor w0,w0,w23,ror#13 - add w22,w22,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w14,w14,w7,ror#19 - eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) - add w22,w22,w16 // h+=Sigma1(e) - eor w19,w19,w24 // Maj(a,b,c) - eor w17,w0,w23,ror#22 // Sigma0(a) - eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) - add w9,w9,w2 - add w26,w26,w22 // d+=h - add w22,w22,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w9,w9,w15 - add w22,w22,w17 // h+=Sigma0(a) - add w9,w9,w14 - ldr w14,[sp,#12] - str w1,[sp,#8] - ror w16,w26,#6 - add w21,w21,w19 // h+=K[i] - ror w0,w11,#7 - and w17,w27,w26 - ror w15,w8,#17 - bic w19,w20,w26 - ror w1,w22,#2 - add w21,w21,w9 // h+=X[i] - eor w16,w16,w26,ror#11 - eor w0,w0,w11,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w22,w23 // a^b, b^c in next round - eor w16,w16,w26,ror#25 // Sigma1(e) - eor w1,w1,w22,ror#13 - add w21,w21,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w15,w15,w8,ror#19 - eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) - add w21,w21,w16 // h+=Sigma1(e) - eor w28,w28,w23 // Maj(a,b,c) - eor w17,w1,w22,ror#22 // Sigma0(a) - eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) - add w10,w10,w3 - add w25,w25,w21 // d+=h - add w21,w21,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w10,w10,w0 - add w21,w21,w17 // h+=Sigma0(a) - add w10,w10,w15 - ldr w15,[sp,#0] - str w2,[sp,#12] - ror w16,w25,#6 - add w20,w20,w28 // h+=K[i] - ror w1,w12,#7 - and w17,w26,w25 - ror w0,w9,#17 - bic w28,w27,w25 - ror w2,w21,#2 - add w20,w20,w10 // h+=X[i] - eor w16,w16,w25,ror#11 - eor w1,w1,w12,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w21,w22 // a^b, b^c in next round - eor w16,w16,w25,ror#25 // Sigma1(e) - eor w2,w2,w21,ror#13 - add w20,w20,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w0,w0,w9,ror#19 - eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) - add w20,w20,w16 // h+=Sigma1(e) - eor w19,w19,w22 // Maj(a,b,c) - eor w17,w2,w21,ror#22 // Sigma0(a) - eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) - add w11,w11,w4 - add w24,w24,w20 // d+=h - add w20,w20,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w11,w11,w1 - add w20,w20,w17 // h+=Sigma0(a) - add w11,w11,w0 - ldr w0,[sp,#4] - str w3,[sp,#0] - ror w16,w24,#6 - add w27,w27,w19 // h+=K[i] - ror w2,w13,#7 - and w17,w25,w24 - ror w1,w10,#17 - bic w19,w26,w24 - ror w3,w20,#2 - add w27,w27,w11 // h+=X[i] - eor w16,w16,w24,ror#11 - eor w2,w2,w13,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w20,w21 // a^b, b^c in next round - eor w16,w16,w24,ror#25 // Sigma1(e) - eor w3,w3,w20,ror#13 - add w27,w27,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w1,w1,w10,ror#19 - eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) - add w27,w27,w16 // h+=Sigma1(e) - eor w28,w28,w21 // Maj(a,b,c) - eor w17,w3,w20,ror#22 // Sigma0(a) - eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) - add w12,w12,w5 - add w23,w23,w27 // d+=h - add w27,w27,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w12,w12,w2 - add w27,w27,w17 // h+=Sigma0(a) - add w12,w12,w1 - ldr w1,[sp,#8] - str w4,[sp,#4] - ror w16,w23,#6 - add w26,w26,w28 // h+=K[i] - ror w3,w14,#7 - and w17,w24,w23 - ror w2,w11,#17 - bic w28,w25,w23 - ror w4,w27,#2 - add w26,w26,w12 // h+=X[i] - eor w16,w16,w23,ror#11 - eor w3,w3,w14,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w27,w20 // a^b, b^c in next round - eor w16,w16,w23,ror#25 // Sigma1(e) - eor w4,w4,w27,ror#13 - add w26,w26,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w2,w2,w11,ror#19 - eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) - add w26,w26,w16 // h+=Sigma1(e) - eor w19,w19,w20 // Maj(a,b,c) - eor w17,w4,w27,ror#22 // Sigma0(a) - eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) - add w13,w13,w6 - add w22,w22,w26 // d+=h - add w26,w26,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w13,w13,w3 - add w26,w26,w17 // h+=Sigma0(a) - add w13,w13,w2 - ldr w2,[sp,#12] - str w5,[sp,#8] - ror w16,w22,#6 - add w25,w25,w19 // h+=K[i] - ror w4,w15,#7 - and w17,w23,w22 - ror w3,w12,#17 - bic w19,w24,w22 - ror w5,w26,#2 - add w25,w25,w13 // h+=X[i] - eor w16,w16,w22,ror#11 - eor w4,w4,w15,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w26,w27 // a^b, b^c in next round - eor w16,w16,w22,ror#25 // Sigma1(e) - eor w5,w5,w26,ror#13 - add w25,w25,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w3,w3,w12,ror#19 - eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) - add w25,w25,w16 // h+=Sigma1(e) - eor w28,w28,w27 // Maj(a,b,c) - eor w17,w5,w26,ror#22 // Sigma0(a) - eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) - add w14,w14,w7 - add w21,w21,w25 // d+=h - add w25,w25,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w14,w14,w4 - add w25,w25,w17 // h+=Sigma0(a) - add w14,w14,w3 - ldr w3,[sp,#0] - str w6,[sp,#12] - ror w16,w21,#6 - add w24,w24,w28 // h+=K[i] - ror w5,w0,#7 - and w17,w22,w21 - ror w4,w13,#17 - bic w28,w23,w21 - ror w6,w25,#2 - add w24,w24,w14 // h+=X[i] - eor w16,w16,w21,ror#11 - eor w5,w5,w0,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w25,w26 // a^b, b^c in next round - eor w16,w16,w21,ror#25 // Sigma1(e) - eor w6,w6,w25,ror#13 - add w24,w24,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w4,w4,w13,ror#19 - eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) - add w24,w24,w16 // h+=Sigma1(e) - eor w19,w19,w26 // Maj(a,b,c) - eor w17,w6,w25,ror#22 // Sigma0(a) - eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) - add w15,w15,w8 - add w20,w20,w24 // d+=h - add w24,w24,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w15,w15,w5 - add w24,w24,w17 // h+=Sigma0(a) - add w15,w15,w4 - ldr w4,[sp,#4] - str w7,[sp,#0] - ror w16,w20,#6 - add w23,w23,w19 // h+=K[i] - ror w6,w1,#7 - and w17,w21,w20 - ror w5,w14,#17 - bic w19,w22,w20 - ror w7,w24,#2 - add w23,w23,w15 // h+=X[i] - eor w16,w16,w20,ror#11 - eor w6,w6,w1,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w24,w25 // a^b, b^c in next round - eor w16,w16,w20,ror#25 // Sigma1(e) - eor w7,w7,w24,ror#13 - add w23,w23,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w5,w5,w14,ror#19 - eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) - add w23,w23,w16 // h+=Sigma1(e) - eor w28,w28,w25 // Maj(a,b,c) - eor w17,w7,w24,ror#22 // Sigma0(a) - eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) - add w0,w0,w9 - add w27,w27,w23 // d+=h - add w23,w23,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w0,w0,w6 - add w23,w23,w17 // h+=Sigma0(a) - add w0,w0,w5 - ldr w5,[sp,#8] - str w8,[sp,#4] - ror w16,w27,#6 - add w22,w22,w28 // h+=K[i] - ror w7,w2,#7 - and w17,w20,w27 - ror w6,w15,#17 - bic w28,w21,w27 - ror w8,w23,#2 - add w22,w22,w0 // h+=X[i] - eor w16,w16,w27,ror#11 - eor w7,w7,w2,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w23,w24 // a^b, b^c in next round - eor w16,w16,w27,ror#25 // Sigma1(e) - eor w8,w8,w23,ror#13 - add w22,w22,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w6,w6,w15,ror#19 - eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) - add w22,w22,w16 // h+=Sigma1(e) - eor w19,w19,w24 // Maj(a,b,c) - eor w17,w8,w23,ror#22 // Sigma0(a) - eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) - add w1,w1,w10 - add w26,w26,w22 // d+=h - add w22,w22,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w1,w1,w7 - add w22,w22,w17 // h+=Sigma0(a) - add w1,w1,w6 - ldr w6,[sp,#12] - str w9,[sp,#8] - ror w16,w26,#6 - add w21,w21,w19 // h+=K[i] - ror w8,w3,#7 - and w17,w27,w26 - ror w7,w0,#17 - bic w19,w20,w26 - ror w9,w22,#2 - add w21,w21,w1 // h+=X[i] - eor w16,w16,w26,ror#11 - eor w8,w8,w3,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w22,w23 // a^b, b^c in next round - eor w16,w16,w26,ror#25 // Sigma1(e) - eor w9,w9,w22,ror#13 - add w21,w21,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w7,w7,w0,ror#19 - eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) - add w21,w21,w16 // h+=Sigma1(e) - eor w28,w28,w23 // Maj(a,b,c) - eor w17,w9,w22,ror#22 // Sigma0(a) - eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) - add w2,w2,w11 - add w25,w25,w21 // d+=h - add w21,w21,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w2,w2,w8 - add w21,w21,w17 // h+=Sigma0(a) - add w2,w2,w7 - ldr w7,[sp,#0] - str w10,[sp,#12] - ror w16,w25,#6 - add w20,w20,w28 // h+=K[i] - ror w9,w4,#7 - and w17,w26,w25 - ror w8,w1,#17 - bic w28,w27,w25 - ror w10,w21,#2 - add w20,w20,w2 // h+=X[i] - eor w16,w16,w25,ror#11 - eor w9,w9,w4,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w21,w22 // a^b, b^c in next round - eor w16,w16,w25,ror#25 // Sigma1(e) - eor w10,w10,w21,ror#13 - add w20,w20,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w8,w8,w1,ror#19 - eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) - add w20,w20,w16 // h+=Sigma1(e) - eor w19,w19,w22 // Maj(a,b,c) - eor w17,w10,w21,ror#22 // Sigma0(a) - eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) - add w3,w3,w12 - add w24,w24,w20 // d+=h - add w20,w20,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w3,w3,w9 - add w20,w20,w17 // h+=Sigma0(a) - add w3,w3,w8 - cbnz w19,.Loop_16_xx - - ldp x0,x2,[x29,#96] - ldr x1,[x29,#112] - sub x30,x30,#260 // rewind - - ldp w3,w4,[x0] - ldp w5,w6,[x0,#2*4] - add x1,x1,#14*4 // advance input pointer - ldp w7,w8,[x0,#4*4] - add w20,w20,w3 - ldp w9,w10,[x0,#6*4] - add w21,w21,w4 - add w22,w22,w5 - add w23,w23,w6 - stp w20,w21,[x0] - add w24,w24,w7 - add w25,w25,w8 - stp w22,w23,[x0,#2*4] - add w26,w26,w9 - add w27,w27,w10 - cmp x1,x2 - stp w24,w25,[x0,#4*4] - stp w26,w27,[x0,#6*4] - b.ne .Loop - - ldp x19,x20,[x29,#16] - add sp,sp,#4*4 - ldp x21,x22,[x29,#32] - ldp x23,x24,[x29,#48] - ldp x25,x26,[x29,#64] - ldp x27,x28,[x29,#80] - ldp x29,x30,[sp],#128 - ret -.size sha256_block_data_order,.-sha256_block_data_order - -.align 6 -.type K256,%object -K256: - .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 - .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 - .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 - .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 - .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc - .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da - .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 - .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 - .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 - .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 - .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 - .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 - .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 - .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 - .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 - .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 - .long 0 //terminator -.size K256,.-K256 -.align 3 -.LOPENSSL_armcap_P: - .quad OPENSSL_armcap_P-. -.asciz "SHA256 block transform for ARMv8, CRYPTOGAMS by " -.align 2 -.type sha256_block_armv8,%function -.align 6 -sha256_block_armv8: -.Lv8_entry: - stp x29,x30,[sp,#-16]! - add x29,sp,#0 - - ld1 {v0.4s,v1.4s},[x0] - adr x3,K256 - -.Loop_hw: - ld1 {v4.16b-v7.16b},[x1],#64 - sub x2,x2,#1 - ld1 {v16.4s},[x3],#16 - rev32 v4.16b,v4.16b - rev32 v5.16b,v5.16b - rev32 v6.16b,v6.16b - rev32 v7.16b,v7.16b - orr v18.16b,v0.16b,v0.16b // offload - orr v19.16b,v1.16b,v1.16b - ld1 {v17.4s},[x3],#16 - add v16.4s,v16.4s,v4.4s - .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b - ld1 {v16.4s},[x3],#16 - add v17.4s,v17.4s,v5.4s - .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b - ld1 {v17.4s},[x3],#16 - add v16.4s,v16.4s,v6.4s - .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b - ld1 {v16.4s},[x3],#16 - add v17.4s,v17.4s,v7.4s - .inst 0x5e282887 //sha256su0 v7.16b,v4.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b - ld1 {v17.4s},[x3],#16 - add v16.4s,v16.4s,v4.4s - .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b - ld1 {v16.4s},[x3],#16 - add v17.4s,v17.4s,v5.4s - .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b - ld1 {v17.4s},[x3],#16 - add v16.4s,v16.4s,v6.4s - .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b - ld1 {v16.4s},[x3],#16 - add v17.4s,v17.4s,v7.4s - .inst 0x5e282887 //sha256su0 v7.16b,v4.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b - ld1 {v17.4s},[x3],#16 - add v16.4s,v16.4s,v4.4s - .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b - ld1 {v16.4s},[x3],#16 - add v17.4s,v17.4s,v5.4s - .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b - ld1 {v17.4s},[x3],#16 - add v16.4s,v16.4s,v6.4s - .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b - ld1 {v16.4s},[x3],#16 - add v17.4s,v17.4s,v7.4s - .inst 0x5e282887 //sha256su0 v7.16b,v4.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b - ld1 {v17.4s},[x3],#16 - add v16.4s,v16.4s,v4.4s - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - - ld1 {v16.4s},[x3],#16 - add v17.4s,v17.4s,v5.4s - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - - ld1 {v17.4s},[x3] - add v16.4s,v16.4s,v6.4s - sub x3,x3,#64*4-16 // rewind - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - - add v17.4s,v17.4s,v7.4s - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - - add v0.4s,v0.4s,v18.4s - add v1.4s,v1.4s,v19.4s - - cbnz x2,.Loop_hw - - st1 {v0.4s,v1.4s},[x0] - - ldr x29,[sp],#16 - ret -.size sha256_block_armv8,.-sha256_block_armv8 -.comm OPENSSL_armcap_P,4,4 diff --git a/app/openssl/crypto/sha/asm/sha512-armv4.pl b/app/openssl/crypto/sha/asm/sha512-armv4.pl index 71aa9356..7faf37b1 100644 --- a/app/openssl/crypto/sha/asm/sha512-armv4.pl +++ b/app/openssl/crypto/sha/asm/sha512-armv4.pl @@ -565,7 +565,7 @@ $code.=<<___; bne .Loop_neon vldmia sp!,{d8-d15} @ epilogue - ret @ bx lr + bx lr #endif ___ } @@ -578,6 +578,5 @@ ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -$code =~ s/\bret\b/bx lr/gm; print $code; close STDOUT; # enforce flush diff --git a/app/openssl/crypto/sha/asm/sha512-armv4.s b/app/openssl/crypto/sha/asm/sha512-armv4.s index fd462771..57301922 100644 --- a/app/openssl/crypto/sha/asm/sha512-armv4.s +++ b/app/openssl/crypto/sha/asm/sha512-armv4.s @@ -1775,7 +1775,7 @@ sha512_block_data_order: bne .Loop_neon vldmia sp!,{d8-d15} @ epilogue - bx lr @ .word 0xe12fff1e + .word 0xe12fff1e #endif .size sha512_block_data_order,.-sha512_block_data_order .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by " diff --git a/app/openssl/crypto/sha/asm/sha512-armv8.S b/app/openssl/crypto/sha/asm/sha512-armv8.S deleted file mode 100644 index 6b0d1940..00000000 --- a/app/openssl/crypto/sha/asm/sha512-armv8.S +++ /dev/null @@ -1,1021 +0,0 @@ -#include "arm_arch.h" - -.text - -.globl sha512_block_data_order -.type sha512_block_data_order,%function -.align 6 -sha512_block_data_order: - stp x29,x30,[sp,#-128]! - add x29,sp,#0 - - stp x19,x20,[sp,#16] - stp x21,x22,[sp,#32] - stp x23,x24,[sp,#48] - stp x25,x26,[sp,#64] - stp x27,x28,[sp,#80] - sub sp,sp,#4*8 - - ldp x20,x21,[x0] // load context - ldp x22,x23,[x0,#2*8] - ldp x24,x25,[x0,#4*8] - add x2,x1,x2,lsl#7 // end of input - ldp x26,x27,[x0,#6*8] - adr x30,K512 - stp x0,x2,[x29,#96] - -.Loop: - ldp x3,x4,[x1],#2*8 - ldr x19,[x30],#8 // *K++ - eor x28,x21,x22 // magic seed - str x1,[x29,#112] -#ifndef __ARMEB__ - rev x3,x3 // 0 -#endif - ror x16,x24,#14 - add x27,x27,x19 // h+=K[i] - eor x6,x24,x24,ror#23 - and x17,x25,x24 - bic x19,x26,x24 - add x27,x27,x3 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x20,x21 // a^b, b^c in next round - eor x16,x16,x6,ror#18 // Sigma1(e) - ror x6,x20,#28 - add x27,x27,x17 // h+=Ch(e,f,g) - eor x17,x20,x20,ror#5 - add x27,x27,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x23,x23,x27 // d+=h - eor x28,x28,x21 // Maj(a,b,c) - eor x17,x6,x17,ror#34 // Sigma0(a) - add x27,x27,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x27,x27,x17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev x4,x4 // 1 -#endif - ldp x5,x6,[x1],#2*8 - add x27,x27,x17 // h+=Sigma0(a) - ror x16,x23,#14 - add x26,x26,x28 // h+=K[i] - eor x7,x23,x23,ror#23 - and x17,x24,x23 - bic x28,x25,x23 - add x26,x26,x4 // h+=X[i] - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x27,x20 // a^b, b^c in next round - eor x16,x16,x7,ror#18 // Sigma1(e) - ror x7,x27,#28 - add x26,x26,x17 // h+=Ch(e,f,g) - eor x17,x27,x27,ror#5 - add x26,x26,x16 // h+=Sigma1(e) - and x19,x19,x28 // (b^c)&=(a^b) - add x22,x22,x26 // d+=h - eor x19,x19,x20 // Maj(a,b,c) - eor x17,x7,x17,ror#34 // Sigma0(a) - add x26,x26,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - //add x26,x26,x17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev x5,x5 // 2 -#endif - add x26,x26,x17 // h+=Sigma0(a) - ror x16,x22,#14 - add x25,x25,x19 // h+=K[i] - eor x8,x22,x22,ror#23 - and x17,x23,x22 - bic x19,x24,x22 - add x25,x25,x5 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x26,x27 // a^b, b^c in next round - eor x16,x16,x8,ror#18 // Sigma1(e) - ror x8,x26,#28 - add x25,x25,x17 // h+=Ch(e,f,g) - eor x17,x26,x26,ror#5 - add x25,x25,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x21,x21,x25 // d+=h - eor x28,x28,x27 // Maj(a,b,c) - eor x17,x8,x17,ror#34 // Sigma0(a) - add x25,x25,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x25,x25,x17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev x6,x6 // 3 -#endif - ldp x7,x8,[x1],#2*8 - add x25,x25,x17 // h+=Sigma0(a) - ror x16,x21,#14 - add x24,x24,x28 // h+=K[i] - eor x9,x21,x21,ror#23 - and x17,x22,x21 - bic x28,x23,x21 - add x24,x24,x6 // h+=X[i] - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x25,x26 // a^b, b^c in next round - eor x16,x16,x9,ror#18 // Sigma1(e) - ror x9,x25,#28 - add x24,x24,x17 // h+=Ch(e,f,g) - eor x17,x25,x25,ror#5 - add x24,x24,x16 // h+=Sigma1(e) - and x19,x19,x28 // (b^c)&=(a^b) - add x20,x20,x24 // d+=h - eor x19,x19,x26 // Maj(a,b,c) - eor x17,x9,x17,ror#34 // Sigma0(a) - add x24,x24,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - //add x24,x24,x17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev x7,x7 // 4 -#endif - add x24,x24,x17 // h+=Sigma0(a) - ror x16,x20,#14 - add x23,x23,x19 // h+=K[i] - eor x10,x20,x20,ror#23 - and x17,x21,x20 - bic x19,x22,x20 - add x23,x23,x7 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x24,x25 // a^b, b^c in next round - eor x16,x16,x10,ror#18 // Sigma1(e) - ror x10,x24,#28 - add x23,x23,x17 // h+=Ch(e,f,g) - eor x17,x24,x24,ror#5 - add x23,x23,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x27,x27,x23 // d+=h - eor x28,x28,x25 // Maj(a,b,c) - eor x17,x10,x17,ror#34 // Sigma0(a) - add x23,x23,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x23,x23,x17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev x8,x8 // 5 -#endif - ldp x9,x10,[x1],#2*8 - add x23,x23,x17 // h+=Sigma0(a) - ror x16,x27,#14 - add x22,x22,x28 // h+=K[i] - eor x11,x27,x27,ror#23 - and x17,x20,x27 - bic x28,x21,x27 - add x22,x22,x8 // h+=X[i] - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x23,x24 // a^b, b^c in next round - eor x16,x16,x11,ror#18 // Sigma1(e) - ror x11,x23,#28 - add x22,x22,x17 // h+=Ch(e,f,g) - eor x17,x23,x23,ror#5 - add x22,x22,x16 // h+=Sigma1(e) - and x19,x19,x28 // (b^c)&=(a^b) - add x26,x26,x22 // d+=h - eor x19,x19,x24 // Maj(a,b,c) - eor x17,x11,x17,ror#34 // Sigma0(a) - add x22,x22,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - //add x22,x22,x17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev x9,x9 // 6 -#endif - add x22,x22,x17 // h+=Sigma0(a) - ror x16,x26,#14 - add x21,x21,x19 // h+=K[i] - eor x12,x26,x26,ror#23 - and x17,x27,x26 - bic x19,x20,x26 - add x21,x21,x9 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x22,x23 // a^b, b^c in next round - eor x16,x16,x12,ror#18 // Sigma1(e) - ror x12,x22,#28 - add x21,x21,x17 // h+=Ch(e,f,g) - eor x17,x22,x22,ror#5 - add x21,x21,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x25,x25,x21 // d+=h - eor x28,x28,x23 // Maj(a,b,c) - eor x17,x12,x17,ror#34 // Sigma0(a) - add x21,x21,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x21,x21,x17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev x10,x10 // 7 -#endif - ldp x11,x12,[x1],#2*8 - add x21,x21,x17 // h+=Sigma0(a) - ror x16,x25,#14 - add x20,x20,x28 // h+=K[i] - eor x13,x25,x25,ror#23 - and x17,x26,x25 - bic x28,x27,x25 - add x20,x20,x10 // h+=X[i] - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x21,x22 // a^b, b^c in next round - eor x16,x16,x13,ror#18 // Sigma1(e) - ror x13,x21,#28 - add x20,x20,x17 // h+=Ch(e,f,g) - eor x17,x21,x21,ror#5 - add x20,x20,x16 // h+=Sigma1(e) - and x19,x19,x28 // (b^c)&=(a^b) - add x24,x24,x20 // d+=h - eor x19,x19,x22 // Maj(a,b,c) - eor x17,x13,x17,ror#34 // Sigma0(a) - add x20,x20,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - //add x20,x20,x17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev x11,x11 // 8 -#endif - add x20,x20,x17 // h+=Sigma0(a) - ror x16,x24,#14 - add x27,x27,x19 // h+=K[i] - eor x14,x24,x24,ror#23 - and x17,x25,x24 - bic x19,x26,x24 - add x27,x27,x11 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x20,x21 // a^b, b^c in next round - eor x16,x16,x14,ror#18 // Sigma1(e) - ror x14,x20,#28 - add x27,x27,x17 // h+=Ch(e,f,g) - eor x17,x20,x20,ror#5 - add x27,x27,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x23,x23,x27 // d+=h - eor x28,x28,x21 // Maj(a,b,c) - eor x17,x14,x17,ror#34 // Sigma0(a) - add x27,x27,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x27,x27,x17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev x12,x12 // 9 -#endif - ldp x13,x14,[x1],#2*8 - add x27,x27,x17 // h+=Sigma0(a) - ror x16,x23,#14 - add x26,x26,x28 // h+=K[i] - eor x15,x23,x23,ror#23 - and x17,x24,x23 - bic x28,x25,x23 - add x26,x26,x12 // h+=X[i] - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x27,x20 // a^b, b^c in next round - eor x16,x16,x15,ror#18 // Sigma1(e) - ror x15,x27,#28 - add x26,x26,x17 // h+=Ch(e,f,g) - eor x17,x27,x27,ror#5 - add x26,x26,x16 // h+=Sigma1(e) - and x19,x19,x28 // (b^c)&=(a^b) - add x22,x22,x26 // d+=h - eor x19,x19,x20 // Maj(a,b,c) - eor x17,x15,x17,ror#34 // Sigma0(a) - add x26,x26,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - //add x26,x26,x17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev x13,x13 // 10 -#endif - add x26,x26,x17 // h+=Sigma0(a) - ror x16,x22,#14 - add x25,x25,x19 // h+=K[i] - eor x0,x22,x22,ror#23 - and x17,x23,x22 - bic x19,x24,x22 - add x25,x25,x13 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x26,x27 // a^b, b^c in next round - eor x16,x16,x0,ror#18 // Sigma1(e) - ror x0,x26,#28 - add x25,x25,x17 // h+=Ch(e,f,g) - eor x17,x26,x26,ror#5 - add x25,x25,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x21,x21,x25 // d+=h - eor x28,x28,x27 // Maj(a,b,c) - eor x17,x0,x17,ror#34 // Sigma0(a) - add x25,x25,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x25,x25,x17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev x14,x14 // 11 -#endif - ldp x15,x0,[x1],#2*8 - add x25,x25,x17 // h+=Sigma0(a) - str x6,[sp,#24] - ror x16,x21,#14 - add x24,x24,x28 // h+=K[i] - eor x6,x21,x21,ror#23 - and x17,x22,x21 - bic x28,x23,x21 - add x24,x24,x14 // h+=X[i] - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x25,x26 // a^b, b^c in next round - eor x16,x16,x6,ror#18 // Sigma1(e) - ror x6,x25,#28 - add x24,x24,x17 // h+=Ch(e,f,g) - eor x17,x25,x25,ror#5 - add x24,x24,x16 // h+=Sigma1(e) - and x19,x19,x28 // (b^c)&=(a^b) - add x20,x20,x24 // d+=h - eor x19,x19,x26 // Maj(a,b,c) - eor x17,x6,x17,ror#34 // Sigma0(a) - add x24,x24,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - //add x24,x24,x17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev x15,x15 // 12 -#endif - add x24,x24,x17 // h+=Sigma0(a) - str x7,[sp,#0] - ror x16,x20,#14 - add x23,x23,x19 // h+=K[i] - eor x7,x20,x20,ror#23 - and x17,x21,x20 - bic x19,x22,x20 - add x23,x23,x15 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x24,x25 // a^b, b^c in next round - eor x16,x16,x7,ror#18 // Sigma1(e) - ror x7,x24,#28 - add x23,x23,x17 // h+=Ch(e,f,g) - eor x17,x24,x24,ror#5 - add x23,x23,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x27,x27,x23 // d+=h - eor x28,x28,x25 // Maj(a,b,c) - eor x17,x7,x17,ror#34 // Sigma0(a) - add x23,x23,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x23,x23,x17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev x0,x0 // 13 -#endif - ldp x1,x2,[x1] - add x23,x23,x17 // h+=Sigma0(a) - str x8,[sp,#8] - ror x16,x27,#14 - add x22,x22,x28 // h+=K[i] - eor x8,x27,x27,ror#23 - and x17,x20,x27 - bic x28,x21,x27 - add x22,x22,x0 // h+=X[i] - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x23,x24 // a^b, b^c in next round - eor x16,x16,x8,ror#18 // Sigma1(e) - ror x8,x23,#28 - add x22,x22,x17 // h+=Ch(e,f,g) - eor x17,x23,x23,ror#5 - add x22,x22,x16 // h+=Sigma1(e) - and x19,x19,x28 // (b^c)&=(a^b) - add x26,x26,x22 // d+=h - eor x19,x19,x24 // Maj(a,b,c) - eor x17,x8,x17,ror#34 // Sigma0(a) - add x22,x22,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - //add x22,x22,x17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev x1,x1 // 14 -#endif - ldr x6,[sp,#24] - add x22,x22,x17 // h+=Sigma0(a) - str x9,[sp,#16] - ror x16,x26,#14 - add x21,x21,x19 // h+=K[i] - eor x9,x26,x26,ror#23 - and x17,x27,x26 - bic x19,x20,x26 - add x21,x21,x1 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x22,x23 // a^b, b^c in next round - eor x16,x16,x9,ror#18 // Sigma1(e) - ror x9,x22,#28 - add x21,x21,x17 // h+=Ch(e,f,g) - eor x17,x22,x22,ror#5 - add x21,x21,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x25,x25,x21 // d+=h - eor x28,x28,x23 // Maj(a,b,c) - eor x17,x9,x17,ror#34 // Sigma0(a) - add x21,x21,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x21,x21,x17 // h+=Sigma0(a) -#ifndef __ARMEB__ - rev x2,x2 // 15 -#endif - ldr x7,[sp,#0] - add x21,x21,x17 // h+=Sigma0(a) - str x10,[sp,#24] - ror x16,x25,#14 - add x20,x20,x28 // h+=K[i] - ror x9,x4,#1 - and x17,x26,x25 - ror x8,x1,#19 - bic x28,x27,x25 - ror x10,x21,#28 - add x20,x20,x2 // h+=X[i] - eor x16,x16,x25,ror#18 - eor x9,x9,x4,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x21,x22 // a^b, b^c in next round - eor x16,x16,x25,ror#41 // Sigma1(e) - eor x10,x10,x21,ror#34 - add x20,x20,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x8,x8,x1,ror#61 - eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) - add x20,x20,x16 // h+=Sigma1(e) - eor x19,x19,x22 // Maj(a,b,c) - eor x17,x10,x21,ror#39 // Sigma0(a) - eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) - add x3,x3,x12 - add x24,x24,x20 // d+=h - add x20,x20,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x3,x3,x9 - add x20,x20,x17 // h+=Sigma0(a) - add x3,x3,x8 -.Loop_16_xx: - ldr x8,[sp,#8] - str x11,[sp,#0] - ror x16,x24,#14 - add x27,x27,x19 // h+=K[i] - ror x10,x5,#1 - and x17,x25,x24 - ror x9,x2,#19 - bic x19,x26,x24 - ror x11,x20,#28 - add x27,x27,x3 // h+=X[i] - eor x16,x16,x24,ror#18 - eor x10,x10,x5,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x20,x21 // a^b, b^c in next round - eor x16,x16,x24,ror#41 // Sigma1(e) - eor x11,x11,x20,ror#34 - add x27,x27,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x9,x9,x2,ror#61 - eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) - add x27,x27,x16 // h+=Sigma1(e) - eor x28,x28,x21 // Maj(a,b,c) - eor x17,x11,x20,ror#39 // Sigma0(a) - eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) - add x4,x4,x13 - add x23,x23,x27 // d+=h - add x27,x27,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x4,x4,x10 - add x27,x27,x17 // h+=Sigma0(a) - add x4,x4,x9 - ldr x9,[sp,#16] - str x12,[sp,#8] - ror x16,x23,#14 - add x26,x26,x28 // h+=K[i] - ror x11,x6,#1 - and x17,x24,x23 - ror x10,x3,#19 - bic x28,x25,x23 - ror x12,x27,#28 - add x26,x26,x4 // h+=X[i] - eor x16,x16,x23,ror#18 - eor x11,x11,x6,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x27,x20 // a^b, b^c in next round - eor x16,x16,x23,ror#41 // Sigma1(e) - eor x12,x12,x27,ror#34 - add x26,x26,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x10,x10,x3,ror#61 - eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) - add x26,x26,x16 // h+=Sigma1(e) - eor x19,x19,x20 // Maj(a,b,c) - eor x17,x12,x27,ror#39 // Sigma0(a) - eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) - add x5,x5,x14 - add x22,x22,x26 // d+=h - add x26,x26,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x5,x5,x11 - add x26,x26,x17 // h+=Sigma0(a) - add x5,x5,x10 - ldr x10,[sp,#24] - str x13,[sp,#16] - ror x16,x22,#14 - add x25,x25,x19 // h+=K[i] - ror x12,x7,#1 - and x17,x23,x22 - ror x11,x4,#19 - bic x19,x24,x22 - ror x13,x26,#28 - add x25,x25,x5 // h+=X[i] - eor x16,x16,x22,ror#18 - eor x12,x12,x7,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x26,x27 // a^b, b^c in next round - eor x16,x16,x22,ror#41 // Sigma1(e) - eor x13,x13,x26,ror#34 - add x25,x25,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x11,x11,x4,ror#61 - eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) - add x25,x25,x16 // h+=Sigma1(e) - eor x28,x28,x27 // Maj(a,b,c) - eor x17,x13,x26,ror#39 // Sigma0(a) - eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) - add x6,x6,x15 - add x21,x21,x25 // d+=h - add x25,x25,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x6,x6,x12 - add x25,x25,x17 // h+=Sigma0(a) - add x6,x6,x11 - ldr x11,[sp,#0] - str x14,[sp,#24] - ror x16,x21,#14 - add x24,x24,x28 // h+=K[i] - ror x13,x8,#1 - and x17,x22,x21 - ror x12,x5,#19 - bic x28,x23,x21 - ror x14,x25,#28 - add x24,x24,x6 // h+=X[i] - eor x16,x16,x21,ror#18 - eor x13,x13,x8,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x25,x26 // a^b, b^c in next round - eor x16,x16,x21,ror#41 // Sigma1(e) - eor x14,x14,x25,ror#34 - add x24,x24,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x12,x12,x5,ror#61 - eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) - add x24,x24,x16 // h+=Sigma1(e) - eor x19,x19,x26 // Maj(a,b,c) - eor x17,x14,x25,ror#39 // Sigma0(a) - eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) - add x7,x7,x0 - add x20,x20,x24 // d+=h - add x24,x24,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x7,x7,x13 - add x24,x24,x17 // h+=Sigma0(a) - add x7,x7,x12 - ldr x12,[sp,#8] - str x15,[sp,#0] - ror x16,x20,#14 - add x23,x23,x19 // h+=K[i] - ror x14,x9,#1 - and x17,x21,x20 - ror x13,x6,#19 - bic x19,x22,x20 - ror x15,x24,#28 - add x23,x23,x7 // h+=X[i] - eor x16,x16,x20,ror#18 - eor x14,x14,x9,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x24,x25 // a^b, b^c in next round - eor x16,x16,x20,ror#41 // Sigma1(e) - eor x15,x15,x24,ror#34 - add x23,x23,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x13,x13,x6,ror#61 - eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) - add x23,x23,x16 // h+=Sigma1(e) - eor x28,x28,x25 // Maj(a,b,c) - eor x17,x15,x24,ror#39 // Sigma0(a) - eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) - add x8,x8,x1 - add x27,x27,x23 // d+=h - add x23,x23,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x8,x8,x14 - add x23,x23,x17 // h+=Sigma0(a) - add x8,x8,x13 - ldr x13,[sp,#16] - str x0,[sp,#8] - ror x16,x27,#14 - add x22,x22,x28 // h+=K[i] - ror x15,x10,#1 - and x17,x20,x27 - ror x14,x7,#19 - bic x28,x21,x27 - ror x0,x23,#28 - add x22,x22,x8 // h+=X[i] - eor x16,x16,x27,ror#18 - eor x15,x15,x10,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x23,x24 // a^b, b^c in next round - eor x16,x16,x27,ror#41 // Sigma1(e) - eor x0,x0,x23,ror#34 - add x22,x22,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x14,x14,x7,ror#61 - eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) - add x22,x22,x16 // h+=Sigma1(e) - eor x19,x19,x24 // Maj(a,b,c) - eor x17,x0,x23,ror#39 // Sigma0(a) - eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) - add x9,x9,x2 - add x26,x26,x22 // d+=h - add x22,x22,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x9,x9,x15 - add x22,x22,x17 // h+=Sigma0(a) - add x9,x9,x14 - ldr x14,[sp,#24] - str x1,[sp,#16] - ror x16,x26,#14 - add x21,x21,x19 // h+=K[i] - ror x0,x11,#1 - and x17,x27,x26 - ror x15,x8,#19 - bic x19,x20,x26 - ror x1,x22,#28 - add x21,x21,x9 // h+=X[i] - eor x16,x16,x26,ror#18 - eor x0,x0,x11,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x22,x23 // a^b, b^c in next round - eor x16,x16,x26,ror#41 // Sigma1(e) - eor x1,x1,x22,ror#34 - add x21,x21,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x15,x15,x8,ror#61 - eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) - add x21,x21,x16 // h+=Sigma1(e) - eor x28,x28,x23 // Maj(a,b,c) - eor x17,x1,x22,ror#39 // Sigma0(a) - eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) - add x10,x10,x3 - add x25,x25,x21 // d+=h - add x21,x21,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x10,x10,x0 - add x21,x21,x17 // h+=Sigma0(a) - add x10,x10,x15 - ldr x15,[sp,#0] - str x2,[sp,#24] - ror x16,x25,#14 - add x20,x20,x28 // h+=K[i] - ror x1,x12,#1 - and x17,x26,x25 - ror x0,x9,#19 - bic x28,x27,x25 - ror x2,x21,#28 - add x20,x20,x10 // h+=X[i] - eor x16,x16,x25,ror#18 - eor x1,x1,x12,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x21,x22 // a^b, b^c in next round - eor x16,x16,x25,ror#41 // Sigma1(e) - eor x2,x2,x21,ror#34 - add x20,x20,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x0,x0,x9,ror#61 - eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) - add x20,x20,x16 // h+=Sigma1(e) - eor x19,x19,x22 // Maj(a,b,c) - eor x17,x2,x21,ror#39 // Sigma0(a) - eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) - add x11,x11,x4 - add x24,x24,x20 // d+=h - add x20,x20,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x11,x11,x1 - add x20,x20,x17 // h+=Sigma0(a) - add x11,x11,x0 - ldr x0,[sp,#8] - str x3,[sp,#0] - ror x16,x24,#14 - add x27,x27,x19 // h+=K[i] - ror x2,x13,#1 - and x17,x25,x24 - ror x1,x10,#19 - bic x19,x26,x24 - ror x3,x20,#28 - add x27,x27,x11 // h+=X[i] - eor x16,x16,x24,ror#18 - eor x2,x2,x13,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x20,x21 // a^b, b^c in next round - eor x16,x16,x24,ror#41 // Sigma1(e) - eor x3,x3,x20,ror#34 - add x27,x27,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x1,x1,x10,ror#61 - eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) - add x27,x27,x16 // h+=Sigma1(e) - eor x28,x28,x21 // Maj(a,b,c) - eor x17,x3,x20,ror#39 // Sigma0(a) - eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) - add x12,x12,x5 - add x23,x23,x27 // d+=h - add x27,x27,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x12,x12,x2 - add x27,x27,x17 // h+=Sigma0(a) - add x12,x12,x1 - ldr x1,[sp,#16] - str x4,[sp,#8] - ror x16,x23,#14 - add x26,x26,x28 // h+=K[i] - ror x3,x14,#1 - and x17,x24,x23 - ror x2,x11,#19 - bic x28,x25,x23 - ror x4,x27,#28 - add x26,x26,x12 // h+=X[i] - eor x16,x16,x23,ror#18 - eor x3,x3,x14,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x27,x20 // a^b, b^c in next round - eor x16,x16,x23,ror#41 // Sigma1(e) - eor x4,x4,x27,ror#34 - add x26,x26,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x2,x2,x11,ror#61 - eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) - add x26,x26,x16 // h+=Sigma1(e) - eor x19,x19,x20 // Maj(a,b,c) - eor x17,x4,x27,ror#39 // Sigma0(a) - eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) - add x13,x13,x6 - add x22,x22,x26 // d+=h - add x26,x26,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x13,x13,x3 - add x26,x26,x17 // h+=Sigma0(a) - add x13,x13,x2 - ldr x2,[sp,#24] - str x5,[sp,#16] - ror x16,x22,#14 - add x25,x25,x19 // h+=K[i] - ror x4,x15,#1 - and x17,x23,x22 - ror x3,x12,#19 - bic x19,x24,x22 - ror x5,x26,#28 - add x25,x25,x13 // h+=X[i] - eor x16,x16,x22,ror#18 - eor x4,x4,x15,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x26,x27 // a^b, b^c in next round - eor x16,x16,x22,ror#41 // Sigma1(e) - eor x5,x5,x26,ror#34 - add x25,x25,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x3,x3,x12,ror#61 - eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) - add x25,x25,x16 // h+=Sigma1(e) - eor x28,x28,x27 // Maj(a,b,c) - eor x17,x5,x26,ror#39 // Sigma0(a) - eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) - add x14,x14,x7 - add x21,x21,x25 // d+=h - add x25,x25,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x14,x14,x4 - add x25,x25,x17 // h+=Sigma0(a) - add x14,x14,x3 - ldr x3,[sp,#0] - str x6,[sp,#24] - ror x16,x21,#14 - add x24,x24,x28 // h+=K[i] - ror x5,x0,#1 - and x17,x22,x21 - ror x4,x13,#19 - bic x28,x23,x21 - ror x6,x25,#28 - add x24,x24,x14 // h+=X[i] - eor x16,x16,x21,ror#18 - eor x5,x5,x0,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x25,x26 // a^b, b^c in next round - eor x16,x16,x21,ror#41 // Sigma1(e) - eor x6,x6,x25,ror#34 - add x24,x24,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x4,x4,x13,ror#61 - eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) - add x24,x24,x16 // h+=Sigma1(e) - eor x19,x19,x26 // Maj(a,b,c) - eor x17,x6,x25,ror#39 // Sigma0(a) - eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) - add x15,x15,x8 - add x20,x20,x24 // d+=h - add x24,x24,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x15,x15,x5 - add x24,x24,x17 // h+=Sigma0(a) - add x15,x15,x4 - ldr x4,[sp,#8] - str x7,[sp,#0] - ror x16,x20,#14 - add x23,x23,x19 // h+=K[i] - ror x6,x1,#1 - and x17,x21,x20 - ror x5,x14,#19 - bic x19,x22,x20 - ror x7,x24,#28 - add x23,x23,x15 // h+=X[i] - eor x16,x16,x20,ror#18 - eor x6,x6,x1,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x24,x25 // a^b, b^c in next round - eor x16,x16,x20,ror#41 // Sigma1(e) - eor x7,x7,x24,ror#34 - add x23,x23,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x5,x5,x14,ror#61 - eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) - add x23,x23,x16 // h+=Sigma1(e) - eor x28,x28,x25 // Maj(a,b,c) - eor x17,x7,x24,ror#39 // Sigma0(a) - eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) - add x0,x0,x9 - add x27,x27,x23 // d+=h - add x23,x23,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x0,x0,x6 - add x23,x23,x17 // h+=Sigma0(a) - add x0,x0,x5 - ldr x5,[sp,#16] - str x8,[sp,#8] - ror x16,x27,#14 - add x22,x22,x28 // h+=K[i] - ror x7,x2,#1 - and x17,x20,x27 - ror x6,x15,#19 - bic x28,x21,x27 - ror x8,x23,#28 - add x22,x22,x0 // h+=X[i] - eor x16,x16,x27,ror#18 - eor x7,x7,x2,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x23,x24 // a^b, b^c in next round - eor x16,x16,x27,ror#41 // Sigma1(e) - eor x8,x8,x23,ror#34 - add x22,x22,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x6,x6,x15,ror#61 - eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) - add x22,x22,x16 // h+=Sigma1(e) - eor x19,x19,x24 // Maj(a,b,c) - eor x17,x8,x23,ror#39 // Sigma0(a) - eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) - add x1,x1,x10 - add x26,x26,x22 // d+=h - add x22,x22,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x1,x1,x7 - add x22,x22,x17 // h+=Sigma0(a) - add x1,x1,x6 - ldr x6,[sp,#24] - str x9,[sp,#16] - ror x16,x26,#14 - add x21,x21,x19 // h+=K[i] - ror x8,x3,#1 - and x17,x27,x26 - ror x7,x0,#19 - bic x19,x20,x26 - ror x9,x22,#28 - add x21,x21,x1 // h+=X[i] - eor x16,x16,x26,ror#18 - eor x8,x8,x3,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x22,x23 // a^b, b^c in next round - eor x16,x16,x26,ror#41 // Sigma1(e) - eor x9,x9,x22,ror#34 - add x21,x21,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x7,x7,x0,ror#61 - eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) - add x21,x21,x16 // h+=Sigma1(e) - eor x28,x28,x23 // Maj(a,b,c) - eor x17,x9,x22,ror#39 // Sigma0(a) - eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) - add x2,x2,x11 - add x25,x25,x21 // d+=h - add x21,x21,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x2,x2,x8 - add x21,x21,x17 // h+=Sigma0(a) - add x2,x2,x7 - ldr x7,[sp,#0] - str x10,[sp,#24] - ror x16,x25,#14 - add x20,x20,x28 // h+=K[i] - ror x9,x4,#1 - and x17,x26,x25 - ror x8,x1,#19 - bic x28,x27,x25 - ror x10,x21,#28 - add x20,x20,x2 // h+=X[i] - eor x16,x16,x25,ror#18 - eor x9,x9,x4,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x21,x22 // a^b, b^c in next round - eor x16,x16,x25,ror#41 // Sigma1(e) - eor x10,x10,x21,ror#34 - add x20,x20,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x8,x8,x1,ror#61 - eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) - add x20,x20,x16 // h+=Sigma1(e) - eor x19,x19,x22 // Maj(a,b,c) - eor x17,x10,x21,ror#39 // Sigma0(a) - eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) - add x3,x3,x12 - add x24,x24,x20 // d+=h - add x20,x20,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x3,x3,x9 - add x20,x20,x17 // h+=Sigma0(a) - add x3,x3,x8 - cbnz x19,.Loop_16_xx - - ldp x0,x2,[x29,#96] - ldr x1,[x29,#112] - sub x30,x30,#648 // rewind - - ldp x3,x4,[x0] - ldp x5,x6,[x0,#2*8] - add x1,x1,#14*8 // advance input pointer - ldp x7,x8,[x0,#4*8] - add x20,x20,x3 - ldp x9,x10,[x0,#6*8] - add x21,x21,x4 - add x22,x22,x5 - add x23,x23,x6 - stp x20,x21,[x0] - add x24,x24,x7 - add x25,x25,x8 - stp x22,x23,[x0,#2*8] - add x26,x26,x9 - add x27,x27,x10 - cmp x1,x2 - stp x24,x25,[x0,#4*8] - stp x26,x27,[x0,#6*8] - b.ne .Loop - - ldp x19,x20,[x29,#16] - add sp,sp,#4*8 - ldp x21,x22,[x29,#32] - ldp x23,x24,[x29,#48] - ldp x25,x26,[x29,#64] - ldp x27,x28,[x29,#80] - ldp x29,x30,[sp],#128 - ret -.size sha512_block_data_order,.-sha512_block_data_order - -.align 6 -.type K512,%object -K512: - .quad 0x428a2f98d728ae22,0x7137449123ef65cd - .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc - .quad 0x3956c25bf348b538,0x59f111f1b605d019 - .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 - .quad 0xd807aa98a3030242,0x12835b0145706fbe - .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 - .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 - .quad 0x9bdc06a725c71235,0xc19bf174cf692694 - .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 - .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 - .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 - .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 - .quad 0x983e5152ee66dfab,0xa831c66d2db43210 - .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 - .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 - .quad 0x06ca6351e003826f,0x142929670a0e6e70 - .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 - .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df - .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 - .quad 0x81c2c92e47edaee6,0x92722c851482353b - .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 - .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 - .quad 0xd192e819d6ef5218,0xd69906245565a910 - .quad 0xf40e35855771202a,0x106aa07032bbd1b8 - .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 - .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 - .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb - .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 - .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 - .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec - .quad 0x90befffa23631e28,0xa4506cebde82bde9 - .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b - .quad 0xca273eceea26619c,0xd186b8c721c0c207 - .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 - .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 - .quad 0x113f9804bef90dae,0x1b710b35131c471b - .quad 0x28db77f523047d84,0x32caab7b40c72493 - .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c - .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a - .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 - .quad 0 // terminator -.size K512,.-K512 -.align 3 -.LOPENSSL_armcap_P: - .quad OPENSSL_armcap_P-. -.asciz "SHA512 block transform for ARMv8, CRYPTOGAMS by " -.align 2 -.comm OPENSSL_armcap_P,4,4 diff --git a/app/openssl/crypto/sha/asm/sha512-armv8.pl b/app/openssl/crypto/sha/asm/sha512-armv8.pl deleted file mode 100644 index 6935ed65..00000000 --- a/app/openssl/crypto/sha/asm/sha512-armv8.pl +++ /dev/null @@ -1,414 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# SHA256/512 for ARMv8. -# -# Performance in cycles per processed byte and improvement coefficient -# over code generated with "default" compiler: -# -# SHA256-hw SHA256(*) SHA512 -# Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) -# Cortex-A5x n/a n/a n/a -# -# (*) Software SHA256 results are of lesser relevance, presented -# mostly for informational purposes. -# (**) The result is a trade-off: it's possible to improve it by -# 10%, but at the cost of 20% loss on Cortex-A5x. - -$flavour=shift; -$output=shift; -open STDOUT,">$output"; - -if ($output =~ /512/) { - $BITS=512; - $SZ=8; - @Sigma0=(28,34,39); - @Sigma1=(14,18,41); - @sigma0=(1, 8, 7); - @sigma1=(19,61, 6); - $rounds=80; - $reg_t="x"; -} else { - $BITS=256; - $SZ=4; - @Sigma0=( 2,13,22); - @Sigma1=( 6,11,25); - @sigma0=( 7,18, 3); - @sigma1=(17,19,10); - $rounds=64; - $reg_t="w"; -} - -$func="sha${BITS}_block_data_order"; - -($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30)); - -@X=map("$reg_t$_",(3..15,0..2)); -@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("$reg_t$_",(20..27)); -($t0,$t1,$t2,$t3)=map("$reg_t$_",(16,17,19,28)); - -sub BODY_00_xx { -my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; -my $j=($i+1)&15; -my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]); - $T0=@X[$i+3] if ($i<11); - -$code.=<<___ if ($i<16); -#ifndef __ARMEB__ - rev @X[$i],@X[$i] // $i -#endif -___ -$code.=<<___ if ($i<13 && ($i&1)); - ldp @X[$i+1],@X[$i+2],[$inp],#2*$SZ -___ -$code.=<<___ if ($i==13); - ldp @X[14],@X[15],[$inp] -___ -$code.=<<___ if ($i>=14); - ldr @X[($i-11)&15],[sp,#`$SZ*(($i-11)%4)`] -___ -$code.=<<___ if ($i>0 && $i<16); - add $a,$a,$t1 // h+=Sigma0(a) -___ -$code.=<<___ if ($i>=11); - str @X[($i-8)&15],[sp,#`$SZ*(($i-8)%4)`] -___ -# While ARMv8 specifies merged rotate-n-logical operation such as -# 'eor x,y,z,ror#n', it was found to negatively affect performance -# on Apple A7. The reason seems to be that it requires even 'y' to -# be available earlier. This means that such merged instruction is -# not necessarily best choice on critical path... On the other hand -# Cortex-A5x handles merged instructions much better than disjoint -# rotate and logical... See (**) footnote above. -$code.=<<___ if ($i<15); - ror $t0,$e,#$Sigma1[0] - add $h,$h,$t2 // h+=K[i] - eor $T0,$e,$e,ror#`$Sigma1[2]-$Sigma1[1]` - and $t1,$f,$e - bic $t2,$g,$e - add $h,$h,@X[$i&15] // h+=X[i] - orr $t1,$t1,$t2 // Ch(e,f,g) - eor $t2,$a,$b // a^b, b^c in next round - eor $t0,$t0,$T0,ror#$Sigma1[1] // Sigma1(e) - ror $T0,$a,#$Sigma0[0] - add $h,$h,$t1 // h+=Ch(e,f,g) - eor $t1,$a,$a,ror#`$Sigma0[2]-$Sigma0[1]` - add $h,$h,$t0 // h+=Sigma1(e) - and $t3,$t3,$t2 // (b^c)&=(a^b) - add $d,$d,$h // d+=h - eor $t3,$t3,$b // Maj(a,b,c) - eor $t1,$T0,$t1,ror#$Sigma0[1] // Sigma0(a) - add $h,$h,$t3 // h+=Maj(a,b,c) - ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round - //add $h,$h,$t1 // h+=Sigma0(a) -___ -$code.=<<___ if ($i>=15); - ror $t0,$e,#$Sigma1[0] - add $h,$h,$t2 // h+=K[i] - ror $T1,@X[($j+1)&15],#$sigma0[0] - and $t1,$f,$e - ror $T2,@X[($j+14)&15],#$sigma1[0] - bic $t2,$g,$e - ror $T0,$a,#$Sigma0[0] - add $h,$h,@X[$i&15] // h+=X[i] - eor $t0,$t0,$e,ror#$Sigma1[1] - eor $T1,$T1,@X[($j+1)&15],ror#$sigma0[1] - orr $t1,$t1,$t2 // Ch(e,f,g) - eor $t2,$a,$b // a^b, b^c in next round - eor $t0,$t0,$e,ror#$Sigma1[2] // Sigma1(e) - eor $T0,$T0,$a,ror#$Sigma0[1] - add $h,$h,$t1 // h+=Ch(e,f,g) - and $t3,$t3,$t2 // (b^c)&=(a^b) - eor $T2,$T2,@X[($j+14)&15],ror#$sigma1[1] - eor $T1,$T1,@X[($j+1)&15],lsr#$sigma0[2] // sigma0(X[i+1]) - add $h,$h,$t0 // h+=Sigma1(e) - eor $t3,$t3,$b // Maj(a,b,c) - eor $t1,$T0,$a,ror#$Sigma0[2] // Sigma0(a) - eor $T2,$T2,@X[($j+14)&15],lsr#$sigma1[2] // sigma1(X[i+14]) - add @X[$j],@X[$j],@X[($j+9)&15] - add $d,$d,$h // d+=h - add $h,$h,$t3 // h+=Maj(a,b,c) - ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round - add @X[$j],@X[$j],$T1 - add $h,$h,$t1 // h+=Sigma0(a) - add @X[$j],@X[$j],$T2 -___ - ($t2,$t3)=($t3,$t2); -} - -$code.=<<___; -#include "arm_arch.h" - -.text - -.globl $func -.type $func,%function -.align 6 -$func: -___ -$code.=<<___ if ($SZ==4); - ldr x16,.LOPENSSL_armcap_P - adr x17,.LOPENSSL_armcap_P - add x16,x16,x17 - ldr w16,[x16] - tst w16,#ARMV8_SHA256 - b.ne .Lv8_entry -___ -$code.=<<___; - stp x29,x30,[sp,#-128]! - add x29,sp,#0 - - stp x19,x20,[sp,#16] - stp x21,x22,[sp,#32] - stp x23,x24,[sp,#48] - stp x25,x26,[sp,#64] - stp x27,x28,[sp,#80] - sub sp,sp,#4*$SZ - - ldp $A,$B,[$ctx] // load context - ldp $C,$D,[$ctx,#2*$SZ] - ldp $E,$F,[$ctx,#4*$SZ] - add $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input - ldp $G,$H,[$ctx,#6*$SZ] - adr $Ktbl,K$BITS - stp $ctx,$num,[x29,#96] - -.Loop: - ldp @X[0],@X[1],[$inp],#2*$SZ - ldr $t2,[$Ktbl],#$SZ // *K++ - eor $t3,$B,$C // magic seed - str $inp,[x29,#112] -___ -for ($i=0;$i<16;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); } -$code.=".Loop_16_xx:\n"; -for (;$i<32;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - cbnz $t2,.Loop_16_xx - - ldp $ctx,$num,[x29,#96] - ldr $inp,[x29,#112] - sub $Ktbl,$Ktbl,#`$SZ*($rounds+1)` // rewind - - ldp @X[0],@X[1],[$ctx] - ldp @X[2],@X[3],[$ctx,#2*$SZ] - add $inp,$inp,#14*$SZ // advance input pointer - ldp @X[4],@X[5],[$ctx,#4*$SZ] - add $A,$A,@X[0] - ldp @X[6],@X[7],[$ctx,#6*$SZ] - add $B,$B,@X[1] - add $C,$C,@X[2] - add $D,$D,@X[3] - stp $A,$B,[$ctx] - add $E,$E,@X[4] - add $F,$F,@X[5] - stp $C,$D,[$ctx,#2*$SZ] - add $G,$G,@X[6] - add $H,$H,@X[7] - cmp $inp,$num - stp $E,$F,[$ctx,#4*$SZ] - stp $G,$H,[$ctx,#6*$SZ] - b.ne .Loop - - ldp x19,x20,[x29,#16] - add sp,sp,#4*$SZ - ldp x21,x22,[x29,#32] - ldp x23,x24,[x29,#48] - ldp x25,x26,[x29,#64] - ldp x27,x28,[x29,#80] - ldp x29,x30,[sp],#128 - ret -.size $func,.-$func - -.align 6 -.type K$BITS,%object -K$BITS: -___ -$code.=<<___ if ($SZ==8); - .quad 0x428a2f98d728ae22,0x7137449123ef65cd - .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc - .quad 0x3956c25bf348b538,0x59f111f1b605d019 - .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 - .quad 0xd807aa98a3030242,0x12835b0145706fbe - .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 - .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 - .quad 0x9bdc06a725c71235,0xc19bf174cf692694 - .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 - .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 - .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 - .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 - .quad 0x983e5152ee66dfab,0xa831c66d2db43210 - .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 - .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 - .quad 0x06ca6351e003826f,0x142929670a0e6e70 - .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 - .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df - .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 - .quad 0x81c2c92e47edaee6,0x92722c851482353b - .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 - .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 - .quad 0xd192e819d6ef5218,0xd69906245565a910 - .quad 0xf40e35855771202a,0x106aa07032bbd1b8 - .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 - .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 - .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb - .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 - .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 - .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec - .quad 0x90befffa23631e28,0xa4506cebde82bde9 - .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b - .quad 0xca273eceea26619c,0xd186b8c721c0c207 - .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 - .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 - .quad 0x113f9804bef90dae,0x1b710b35131c471b - .quad 0x28db77f523047d84,0x32caab7b40c72493 - .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c - .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a - .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 - .quad 0 // terminator -___ -$code.=<<___ if ($SZ==4); - .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 - .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 - .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 - .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 - .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc - .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da - .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 - .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 - .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 - .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 - .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 - .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 - .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 - .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 - .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 - .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 - .long 0 //terminator -___ -$code.=<<___; -.size K$BITS,.-K$BITS -.align 3 -.LOPENSSL_armcap_P: - .quad OPENSSL_armcap_P-. -.asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by " -.align 2 -___ - -if ($SZ==4) { -my $Ktbl="x3"; - -my ($ABCD,$EFGH,$abcd)=map("v$_.16b",(0..2)); -my @MSG=map("v$_.16b",(4..7)); -my ($W0,$W1)=("v16.4s","v17.4s"); -my ($ABCD_SAVE,$EFGH_SAVE)=("v18.16b","v19.16b"); - -$code.=<<___; -.type sha256_block_armv8,%function -.align 6 -sha256_block_armv8: -.Lv8_entry: - stp x29,x30,[sp,#-16]! - add x29,sp,#0 - - ld1.32 {$ABCD,$EFGH},[$ctx] - adr $Ktbl,K256 - -.Loop_hw: - ld1 {@MSG[0]-@MSG[3]},[$inp],#64 - sub $num,$num,#1 - ld1.32 {$W0},[$Ktbl],#16 - rev32 @MSG[0],@MSG[0] - rev32 @MSG[1],@MSG[1] - rev32 @MSG[2],@MSG[2] - rev32 @MSG[3],@MSG[3] - orr $ABCD_SAVE,$ABCD,$ABCD // offload - orr $EFGH_SAVE,$EFGH,$EFGH -___ -for($i=0;$i<12;$i++) { -$code.=<<___; - ld1.32 {$W1},[$Ktbl],#16 - add.i32 $W0,$W0,@MSG[0] - sha256su0 @MSG[0],@MSG[1] - orr $abcd,$ABCD,$ABCD - sha256h $ABCD,$EFGH,$W0 - sha256h2 $EFGH,$abcd,$W0 - sha256su1 @MSG[0],@MSG[2],@MSG[3] -___ - ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); -} -$code.=<<___; - ld1.32 {$W1},[$Ktbl],#16 - add.i32 $W0,$W0,@MSG[0] - orr $abcd,$ABCD,$ABCD - sha256h $ABCD,$EFGH,$W0 - sha256h2 $EFGH,$abcd,$W0 - - ld1.32 {$W0},[$Ktbl],#16 - add.i32 $W1,$W1,@MSG[1] - orr $abcd,$ABCD,$ABCD - sha256h $ABCD,$EFGH,$W1 - sha256h2 $EFGH,$abcd,$W1 - - ld1.32 {$W1},[$Ktbl] - add.i32 $W0,$W0,@MSG[2] - sub $Ktbl,$Ktbl,#$rounds*$SZ-16 // rewind - orr $abcd,$ABCD,$ABCD - sha256h $ABCD,$EFGH,$W0 - sha256h2 $EFGH,$abcd,$W0 - - add.i32 $W1,$W1,@MSG[3] - orr $abcd,$ABCD,$ABCD - sha256h $ABCD,$EFGH,$W1 - sha256h2 $EFGH,$abcd,$W1 - - add.i32 $ABCD,$ABCD,$ABCD_SAVE - add.i32 $EFGH,$EFGH,$EFGH_SAVE - - cbnz $num,.Loop_hw - - st1.32 {$ABCD,$EFGH},[$ctx] - - ldr x29,[sp],#16 - ret -.size sha256_block_armv8,.-sha256_block_armv8 -___ -} - -$code.=<<___; -.comm OPENSSL_armcap_P,4,4 -___ - -{ my %opcode = ( - "sha256h" => 0x5e004000, "sha256h2" => 0x5e005000, - "sha256su0" => 0x5e282800, "sha256su1" => 0x5e006000 ); - - sub unsha256 { - my ($mnemonic,$arg)=@_; - - $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o - && - sprintf ".inst\t0x%08x\t//%s %s", - $opcode{$mnemonic}|$1|($2<<5)|($3<<16), - $mnemonic,$arg; - } -} - -foreach(split("\n",$code)) { - - s/\`([^\`]*)\`/eval($1)/geo; - - s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/geo; - - s/\.\w?32\b//o and s/\.16b/\.4s/go; - m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go; - - print $_,"\n"; -} - -close STDOUT; diff --git a/app/openssl/crypto/srp/srp_vfy.c b/app/openssl/crypto/srp/srp_vfy.c index fdca19ff..4a3d13ed 100644 --- a/app/openssl/crypto/srp/srp_vfy.c +++ b/app/openssl/crypto/srp/srp_vfy.c @@ -93,9 +93,6 @@ static int t_fromb64(unsigned char *a, const char *src) else a[i] = loc - b64table; ++i; } - /* if nothing valid to process we have a zero length response */ - if (i == 0) - return 0; size = i; i = size - 1; j = size; diff --git a/app/openssl/crypto/x509v3/v3_purp.c b/app/openssl/crypto/x509v3/v3_purp.c index f59bfc18..ad688657 100644 --- a/app/openssl/crypto/x509v3/v3_purp.c +++ b/app/openssl/crypto/x509v3/v3_purp.c @@ -389,8 +389,8 @@ static void x509v3_cache_extensions(X509 *x) /* Handle proxy certificates */ if((pci=X509_get_ext_d2i(x, NID_proxyCertInfo, NULL, NULL))) { if (x->ex_flags & EXFLAG_CA - || X509_get_ext_by_NID(x, NID_subject_alt_name, -1) >= 0 - || X509_get_ext_by_NID(x, NID_issuer_alt_name, -1) >= 0) { + || X509_get_ext_by_NID(x, NID_subject_alt_name, 0) >= 0 + || X509_get_ext_by_NID(x, NID_issuer_alt_name, 0) >= 0) { x->ex_flags |= EXFLAG_INVALID; } if (pci->pcPathLengthConstraint) { @@ -670,7 +670,7 @@ static int check_purpose_timestamp_sign(const X509_PURPOSE *xp, const X509 *x, return 0; /* Extended Key Usage MUST be critical */ - i_ext = X509_get_ext_by_NID((X509 *) x, NID_ext_key_usage, -1); + i_ext = X509_get_ext_by_NID((X509 *) x, NID_ext_key_usage, 0); if (i_ext >= 0) { X509_EXTENSION *ext = X509_get_ext((X509 *) x, i_ext); diff --git a/app/openssl/import_openssl.sh b/app/openssl/import_openssl.sh index f16596bc..02d2ab1c 100755 --- a/app/openssl/import_openssl.sh +++ b/app/openssl/import_openssl.sh @@ -128,16 +128,7 @@ function default_asm_file () { function gen_asm_arm () { local OUT OUT=$(default_asm_file "$@") - $PERL_EXE "$1" void "$OUT" > "$OUT" -} - -# Generate an ARMv8 64-bit assembly file. -# $1: generator (perl script) -# $2: [optional] output file name -function gen_asm_arm64 () { - local OUT - OUT=$(default_asm_file "$@") - $PERL_EXE "$1" linux64 "$OUT" > "$OUT" + $PERL_EXE "$1" > "$OUT" } function gen_asm_mips () { @@ -186,54 +177,6 @@ function print_autogenerated_header() { echo "#" } -function run_verbose() { - echo Running: $@ - $@ -} - -function scan_opensslconf_for_flags() { - for flag in "$@"; do - awk "/^#define ${flag}$/ { print \$2 }" crypto/opensslconf.h - done -} - -CRYPTO_CONF_FLAGS=( -OPENSSL_CPUID_OBJ -DES_LONG -DES_PTR -DES_RISC1 -DES_RISC2 -DES_UNROLL -RC4_INT -RC4_CHUNK -RC4_INDEX -) - -function check_asm_flags() { - local arch="$1" - local target="$2" - local unsorted_flags - local expected_flags - local actual_flags - local defines="OPENSSL_CRYPTO_DEFINES_$arch" - - PERL=/usr/bin/perl run_verbose ./Configure $CONFIGURE_ARGS $target - - unsorted_flags="$(awk '/^CFLAG=/ { sub(/^CFLAG= .*-Wall /, ""); gsub(/-D/, ""); print; }' Makefile)" - unsorted_flags="$unsorted_flags $(scan_opensslconf_for_flags "${CRYPTO_CONF_FLAGS[@]}")" - - expected_flags="$(echo $unsorted_flags | tr ' ' '\n' | sort | tr '\n' ' ')" - actual_flags="$(echo ${!defines} | tr ' ' '\n' | sort | tr '\n' ' ')" - - if [[ $actual_flags != $expected_flags ]]; then - echo ${defines} is wrong! - echo " $actual_flags" - echo Please update to: - echo " $expected_flags" - exit 1 - fi -} - # Run Configure and generate headers # $1: 32 for 32-bit arch, 64 for 64-bit arch, trusty for Trusty # $2: 1 if building for static version @@ -249,9 +192,9 @@ function generate_build_config_headers() { fi if [[ $1 == trusty ]] ; then - PERL=/usr/bin/perl run_verbose ./Configure $CONFIGURE_ARGS_TRUSTY + PERL=/usr/bin/perl ./Configure $CONFIGURE_ARGS_TRUSTY else - PERL=/usr/bin/perl run_verbose ./Configure $CONFIGURE_ARGS ${!configure_args_bits} ${!configure_args_stat} + PERL=/usr/bin/perl ./Configure $CONFIGURE_ARGS ${!configure_args_bits} ${!configure_args_stat} fi rm -f apps/CA.pl.bak crypto/opensslconf.h.bak @@ -481,16 +424,8 @@ function import() { declare -r OPENSSL_SOURCE=$1 untar $OPENSSL_SOURCE readonly applypatches $OPENSSL_DIR - convert_iso8859_to_utf8 $OPENSSL_DIR cd $OPENSSL_DIR - - # Check the ASM flags for each arch - check_asm_flags arm linux-armv4 - check_asm_flags arm64 linux-aarch64 - check_asm_flags x86 linux-elf - check_asm_flags x86_64 linux-x86_64 - generate_build_config_mk generate_opensslconf_h @@ -508,23 +443,14 @@ function import() { # Generate arm asm gen_asm_arm crypto/aes/asm/aes-armv4.pl - gen_asm_arm crypto/aes/asm/aesv8-armx.pl gen_asm_arm crypto/aes/asm/bsaes-armv7.pl gen_asm_arm crypto/bn/asm/armv4-gf2m.pl gen_asm_arm crypto/bn/asm/armv4-mont.pl gen_asm_arm crypto/modes/asm/ghash-armv4.pl - gen_asm_arm crypto/modes/asm/ghashv8-armx.pl gen_asm_arm crypto/sha/asm/sha1-armv4-large.pl gen_asm_arm crypto/sha/asm/sha256-armv4.pl gen_asm_arm crypto/sha/asm/sha512-armv4.pl - # Generate armv8 asm - gen_asm_arm64 crypto/aes/asm/aesv8-armx.pl crypto/aes/asm/aesv8-armx-64.S - gen_asm_arm64 crypto/modes/asm/ghashv8-armx.pl crypto/modes/asm/ghashv8-armx-64.S - gen_asm_arm64 crypto/sha/asm/sha1-armv8.pl - gen_asm_arm64 crypto/sha/asm/sha512-armv8.pl crypto/sha/asm/sha256-armv8.S - gen_asm_arm64 crypto/sha/asm/sha512-armv8.pl - # Generate mips asm gen_asm_mips crypto/aes/asm/aes-mips.pl gen_asm_mips crypto/bn/asm/mips.pl crypto/bn/asm/bn-mips.S @@ -659,6 +585,7 @@ function untar() { # Process new source tar -zxf $OPENSSL_SOURCE + convert_iso8859_to_utf8 $OPENSSL_DIR cp -RfP $OPENSSL_DIR $OPENSSL_DIR_ORIG if [ ! -z $readonly ]; then find $OPENSSL_DIR_ORIG -type f -print0 | xargs -0 chmod a-w @@ -683,13 +610,12 @@ function applypatches () { cd $dir # Apply appropriate patches - patches=(../patches/[0-9][0-9][0-9][0-9]-*.patch) - for i in "${patches[@]}"; do - if [[ $skip_patch != ${i##*/} ]]; then + for i in $OPENSSL_PATCHES; do + if [ ! "$skip_patch" = "patches/$i" ]; then echo "Applying patch $i" - patch -p1 < $i || die "Could not apply $i. Fix source and run: $0 regenerate patches/${i##*/}" + patch -p1 < ../patches/$i || die "Could not apply patches/$i. Fix source and run: $0 regenerate patches/$i" else - echo "Skiping patch ${i##*/}" + echo "Skiping patch $i" fi done diff --git a/app/openssl/include/openssl/bio.h b/app/openssl/include/openssl/bio.h index d05fa22a..05699ab2 100644 --- a/app/openssl/include/openssl/bio.h +++ b/app/openssl/include/openssl/bio.h @@ -266,9 +266,6 @@ void BIO_clear_flags(BIO *b, int flags); #define BIO_RR_CONNECT 0x02 /* Returned from the accept BIO when an accept would have blocked */ #define BIO_RR_ACCEPT 0x03 -/* Returned from the SSL bio when the channel id retrieval code cannot find the - * private key. */ -#define BIO_RR_SSL_CHANNEL_ID_LOOKUP 0x04 /* These are passed by the BIO callback */ #define BIO_CB_FREE 0x01 diff --git a/app/openssl/include/openssl/opensslconf-32.h b/app/openssl/include/openssl/opensslconf-32.h index caf6f1b8..d6625489 100644 --- a/app/openssl/include/openssl/opensslconf-32.h +++ b/app/openssl/include/openssl/opensslconf-32.h @@ -53,9 +53,6 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -140,9 +137,6 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -# define NO_RIPEMD -# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/include/openssl/opensslconf-64.h b/app/openssl/include/openssl/opensslconf-64.h index 88fb0419..70c5a2cb 100644 --- a/app/openssl/include/openssl/opensslconf-64.h +++ b/app/openssl/include/openssl/opensslconf-64.h @@ -53,9 +53,6 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -140,9 +137,6 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -# define NO_RIPEMD -# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/include/openssl/opensslconf-static-32.h b/app/openssl/include/openssl/opensslconf-static-32.h index caf6f1b8..d6625489 100644 --- a/app/openssl/include/openssl/opensslconf-static-32.h +++ b/app/openssl/include/openssl/opensslconf-static-32.h @@ -53,9 +53,6 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -140,9 +137,6 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -# define NO_RIPEMD -# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/include/openssl/opensslconf-static-64.h b/app/openssl/include/openssl/opensslconf-static-64.h index 88fb0419..70c5a2cb 100644 --- a/app/openssl/include/openssl/opensslconf-static-64.h +++ b/app/openssl/include/openssl/opensslconf-static-64.h @@ -53,9 +53,6 @@ #ifndef OPENSSL_NO_RFC3779 # define OPENSSL_NO_RFC3779 #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif #ifndef OPENSSL_NO_RSAX # define OPENSSL_NO_RSAX #endif @@ -140,9 +137,6 @@ # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) # define NO_RFC3779 # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -# define NO_RIPEMD -# endif # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) # define NO_RSAX # endif diff --git a/app/openssl/include/openssl/opensslv.h b/app/openssl/include/openssl/opensslv.h index c3b6acec..ebe71807 100644 --- a/app/openssl/include/openssl/opensslv.h +++ b/app/openssl/include/openssl/opensslv.h @@ -25,11 +25,11 @@ * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for * major minor fix final patch/beta) */ -#define OPENSSL_VERSION_NUMBER 0x1000108fL +#define OPENSSL_VERSION_NUMBER 0x1000107fL #ifdef OPENSSL_FIPS -#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1h-fips 5 Jun 2014" +#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1g-fips 7 Apr 2014" #else -#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1h 5 Jun 2014" +#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1g 7 Apr 2014" #endif #define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT diff --git a/app/openssl/include/openssl/pkcs7.h b/app/openssl/include/openssl/pkcs7.h index 04f60379..e4d44319 100644 --- a/app/openssl/include/openssl/pkcs7.h +++ b/app/openssl/include/openssl/pkcs7.h @@ -453,7 +453,6 @@ void ERR_load_PKCS7_strings(void); #define PKCS7_R_ERROR_SETTING_CIPHER 121 #define PKCS7_R_INVALID_MIME_TYPE 131 #define PKCS7_R_INVALID_NULL_POINTER 143 -#define PKCS7_R_INVALID_SIGNED_DATA_TYPE 155 #define PKCS7_R_MIME_NO_CONTENT_TYPE 132 #define PKCS7_R_MIME_PARSE_ERROR 133 #define PKCS7_R_MIME_SIG_PARSE_ERROR 134 diff --git a/app/openssl/include/openssl/ssl.h b/app/openssl/include/openssl/ssl.h index a85841b3..54b0eb6c 100644 --- a/app/openssl/include/openssl/ssl.h +++ b/app/openssl/include/openssl/ssl.h @@ -544,13 +544,6 @@ struct ssl_session_st #ifndef OPENSSL_NO_SRP char *srp_username; #endif - - /* original_handshake_hash contains the handshake hash (either - * SHA-1+MD5 or SHA-2, depending on TLS version) for the original, full - * handshake that created a session. This is used by Channel IDs during - * resumption. */ - unsigned char original_handshake_hash[EVP_MAX_MD_SIZE]; - unsigned int original_handshake_hash_len; }; #endif @@ -560,7 +553,7 @@ struct ssl_session_st /* Allow initial connection to servers that don't support RI */ #define SSL_OP_LEGACY_SERVER_CONNECT 0x00000004L #define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x00000008L -#define SSL_OP_TLSEXT_PADDING 0x00000010L +#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x00000010L #define SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER 0x00000020L #define SSL_OP_SAFARI_ECDHE_ECDSA_BUG 0x00000040L #define SSL_OP_SSLEAY_080_CLIENT_DH_BUG 0x00000080L @@ -569,8 +562,6 @@ struct ssl_session_st /* Hasn't done anything since OpenSSL 0.9.7h, retained for compatibility */ #define SSL_OP_MSIE_SSLV2_RSA_PADDING 0x0 -/* Refers to ancient SSLREF and SSLv2, retained for compatibility */ -#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x0 /* SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS is vestigial. Previously it disabled the * insertion of empty records in CBC mode, but the empty records were commonly @@ -657,14 +648,12 @@ struct ssl_session_st * TLS only.) "Released" buffers are put onto a free-list in the context * or just freed (depending on the context's setting for freelist_max_len). */ #define SSL_MODE_RELEASE_BUFFERS 0x00000010L - /* Send the current time in the Random fields of the ClientHello and * ServerHello records for compatibility with hypothetical implementations * that require it. */ #define SSL_MODE_SEND_CLIENTHELLO_TIME 0x00000020L #define SSL_MODE_SEND_SERVERHELLO_TIME 0x00000040L - /* When set, clients may send application data before receipt of CCS * and Finished. This mode enables full-handshakes to 'complete' in * one RTT. */ @@ -877,9 +866,6 @@ struct ssl_ctx_st /* get client cert callback */ int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey); - /* get channel id callback */ - void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey); - /* cookie generate callback */ int (*app_gen_cookie_cb)(SSL *ssl, unsigned char *cookie, unsigned int *cookie_len); @@ -1042,10 +1028,6 @@ struct ssl_ctx_st /* If true, a client will advertise the Channel ID extension and a * server will echo it. */ char tlsext_channel_id_enabled; - /* tlsext_channel_id_enabled_new is a hack to support both old and new - * ChannelID signatures. It indicates that a client should advertise the - * new ChannelID extension number. */ - char tlsext_channel_id_enabled_new; /* The client's Channel ID private key. */ EVP_PKEY *tlsext_channel_id_private; #endif @@ -1104,8 +1086,6 @@ void SSL_CTX_set_info_callback(SSL_CTX *ctx, void (*cb)(const SSL *ssl,int type, void (*SSL_CTX_get_info_callback(SSL_CTX *ctx))(const SSL *ssl,int type,int val); void SSL_CTX_set_client_cert_cb(SSL_CTX *ctx, int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey)); int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx))(SSL *ssl, X509 **x509, EVP_PKEY **pkey); -void SSL_CTX_set_channel_id_cb(SSL_CTX *ctx, void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey)); -void (*SSL_CTX_get_channel_id_cb(SSL_CTX *ctx))(SSL *ssl, EVP_PKEY **pkey); #ifndef OPENSSL_NO_ENGINE int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e); #endif @@ -1182,14 +1162,12 @@ const char *SSL_get_psk_identity(const SSL *s); #define SSL_WRITING 2 #define SSL_READING 3 #define SSL_X509_LOOKUP 4 -#define SSL_CHANNEL_ID_LOOKUP 5 /* These will only be used when doing non-blocking IO */ #define SSL_want_nothing(s) (SSL_want(s) == SSL_NOTHING) #define SSL_want_read(s) (SSL_want(s) == SSL_READING) #define SSL_want_write(s) (SSL_want(s) == SSL_WRITING) #define SSL_want_x509_lookup(s) (SSL_want(s) == SSL_X509_LOOKUP) -#define SSL_want_channel_id_lookup(s) (SSL_want(s) == SSL_CHANNEL_ID_LOOKUP) #define SSL_MAC_FLAG_READ_MAC_STREAM 1 #define SSL_MAC_FLAG_WRITE_MAC_STREAM 2 @@ -1624,7 +1602,6 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) #define SSL_ERROR_ZERO_RETURN 6 #define SSL_ERROR_WANT_CONNECT 7 #define SSL_ERROR_WANT_ACCEPT 8 -#define SSL_ERROR_WANT_CHANNEL_ID_LOOKUP 9 #define SSL_CTRL_NEED_TMP_RSA 1 #define SSL_CTRL_SET_TMP_RSA 2 @@ -1762,11 +1739,10 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) #define SSL_set_tmp_ecdh(ssl,ecdh) \ SSL_ctrl(ssl,SSL_CTRL_SET_TMP_ECDH,0,(char *)ecdh) -/* SSL_enable_tls_channel_id either configures a TLS server to accept TLS client - * IDs from clients, or configure a client to send TLS client IDs to server. - * Returns 1 on success. */ -#define SSL_enable_tls_channel_id(s) \ - SSL_ctrl(s,SSL_CTRL_CHANNEL_ID,0,NULL) +/* SSL_enable_tls_channel_id configures a TLS server to accept TLS client + * IDs from clients. Returns 1 on success. */ +#define SSL_enable_tls_channel_id(ctx) \ + SSL_ctrl(ctx,SSL_CTRL_CHANNEL_ID,0,NULL) /* SSL_set1_tls_channel_id configures a TLS client to send a TLS Channel ID to * compatible servers. private_key must be a P-256 EVP_PKEY*. Returns 1 on * success. */ @@ -1816,7 +1792,7 @@ int SSL_CIPHER_get_bits(const SSL_CIPHER *c,int *alg_bits); char * SSL_CIPHER_get_version(const SSL_CIPHER *c); const char * SSL_CIPHER_get_name(const SSL_CIPHER *c); unsigned long SSL_CIPHER_get_id(const SSL_CIPHER *c); -const char * SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher); +const char* SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher); int SSL_get_fd(const SSL *s); int SSL_get_rfd(const SSL *s); @@ -2731,6 +2707,7 @@ void ERR_load_SSL_strings(void); #define SSL_R_WRONG_VERSION_NUMBER 267 #define SSL_R_X509_LIB 268 #define SSL_R_X509_VERIFICATION_SETUP_PROBLEMS 269 +#define SSL_R_UNEXPECTED_CCS 388 #ifdef __cplusplus } diff --git a/app/openssl/include/openssl/ssl3.h b/app/openssl/include/openssl/ssl3.h index 83d59bff..f205f73d 100644 --- a/app/openssl/include/openssl/ssl3.h +++ b/app/openssl/include/openssl/ssl3.h @@ -388,6 +388,9 @@ typedef struct ssl3_buffer_st #define TLS1_FLAGS_TLS_PADDING_BUG 0x0008 #define TLS1_FLAGS_SKIP_CERT_VERIFY 0x0010 #define TLS1_FLAGS_KEEP_HANDSHAKE 0x0020 +/* SSL3_FLAGS_CCS_OK indicates that a ChangeCipherSpec record is acceptable at + * this point in the handshake. If this flag is not set then received CCS + * records will cause a fatal error for the connection. */ #define SSL3_FLAGS_CCS_OK 0x0080 /* SSL3_FLAGS_SGC_RESTART_DONE is set when we @@ -555,11 +558,6 @@ typedef struct ssl3_state_st * for Channel IDs and that tlsext_channel_id will be valid after the * handshake. */ char tlsext_channel_id_valid; - /* tlsext_channel_id_new means that the updated Channel ID extension - * was negotiated. This is a temporary hack in the code to support both - * forms of Channel ID extension while we transition to the new format, - * which fixed a security issue. */ - char tlsext_channel_id_new; /* For a server: * If |tlsext_channel_id_valid| is true, then this contains the * verified Channel ID from the client: a P256 point, (x,y), where @@ -680,11 +678,11 @@ typedef struct ssl3_state_st #define SSL3_ST_SR_CERT_VRFY_B (0x1A1|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANGE_A (0x1B0|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANGE_B (0x1B1|SSL_ST_ACCEPT) +#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT) #ifndef OPENSSL_NO_NEXTPROTONEG #define SSL3_ST_SR_NEXT_PROTO_A (0x210|SSL_ST_ACCEPT) #define SSL3_ST_SR_NEXT_PROTO_B (0x211|SSL_ST_ACCEPT) #endif -#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANNEL_ID_A (0x220|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANNEL_ID_B (0x221|SSL_ST_ACCEPT) #define SSL3_ST_SR_FINISHED_A (0x1C0|SSL_ST_ACCEPT) diff --git a/app/openssl/include/openssl/tls1.h b/app/openssl/include/openssl/tls1.h index b9a0899e..ec8948d5 100644 --- a/app/openssl/include/openssl/tls1.h +++ b/app/openssl/include/openssl/tls1.h @@ -259,7 +259,6 @@ extern "C" { /* This is not an IANA defined extension number */ #define TLSEXT_TYPE_channel_id 30031 -#define TLSEXT_TYPE_channel_id_new 30032 /* NameType value from RFC 3546 */ #define TLSEXT_NAMETYPE_host_name 0 @@ -532,11 +531,9 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031 #define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032 -/* ECDHE PSK ciphersuites from RFC5489 - * SHA-2 cipher suites are omitted because they cannot be used safely with - * SSLv3. */ -#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA 0x0300C035 -#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA 0x0300C036 +/* ECDHE PSK ciphersuites from RFC 5489 */ +#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256 0x0300C037 +#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384 0x0300C038 /* XXX * Inconsistency alert: @@ -689,9 +686,9 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SHA256" #define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SHA384" -/* ECDHE PSK ciphersuites from RFC5489 */ -#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA "ECDHE-PSK-AES128-CBC-SHA" -#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA "ECDHE-PSK-AES256-CBC-SHA" +/* ECDHE PSK ciphersuites from RFC 5489 */ +#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256 "ECDHE-PSK-WITH-AES-128-CBC-SHA256" +#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384 "ECDHE-PSK-WITH-AES-256-CBC-SHA384" #define TLS_CT_RSA_SIGN 1 #define TLS_CT_DSS_SIGN 2 diff --git a/app/openssl/openssl.config b/app/openssl/openssl.config index 867711fe..aa028705 100644 --- a/app/openssl/openssl.config +++ b/app/openssl/openssl.config @@ -13,7 +13,6 @@ no-md2 \ no-mdc2 \ no-rc5 \ no-rdrand \ -no-ripemd \ no-rfc3779 \ no-rsax \ no-sctp \ @@ -54,7 +53,6 @@ crypto/cast \ crypto/idea \ crypto/md2 \ crypto/rc5 \ -crypto/ripemd \ crypto/seed \ crypto/whrlpool \ demos \ @@ -96,6 +94,7 @@ README.ASN1 \ README.ENGINE \ apps/CA.com \ apps/Makefile \ +apps/Makefile.save \ apps/install-apps.com \ apps/makeapps.com \ apps/openssl-vms.cnf \ @@ -105,10 +104,14 @@ apps/vms_decc_init.c \ config \ crypto/LPdir_vms.c \ crypto/Makefile \ +crypto/Makefile.save \ crypto/aes/Makefile \ +crypto/aes/Makefile.save \ crypto/asn1/Makefile \ +crypto/asn1/Makefile.save \ crypto/bf/INSTALL \ crypto/bf/Makefile \ +crypto/bf/Makefile.save \ crypto/bf/README \ crypto/bf/VERSION \ crypto/bf/asm/readme \ @@ -118,77 +121,117 @@ crypto/bf/bfs.cpp \ crypto/bf/bfspeed.c \ crypto/bf/bftest.c \ crypto/bio/Makefile \ +crypto/bio/Makefile.save \ crypto/bio/bss_rtcp.c \ crypto/bn/Makefile \ +crypto/bn/Makefile.save \ crypto/bn/asm/vms.mar \ crypto/bn/bn_x931p.c \ crypto/bn/vms-helper.c \ crypto/buffer/Makefile \ +crypto/buffer/Makefile.save \ crypto/cmac/Makefile \ +crypto/cmac/Makefile.save \ crypto/cms/Makefile \ +crypto/cms/Makefile.save \ crypto/comp/Makefile \ +crypto/comp/Makefile.save \ crypto/conf/Makefile \ +crypto/conf/Makefile.save \ crypto/crypto-lib.com \ crypto/des/Makefile \ +crypto/des/Makefile.save \ crypto/des/des-lib.com \ crypto/dh/Makefile \ +crypto/dh/Makefile.save \ crypto/dh/dh_prn.c \ crypto/dsa/Makefile \ +crypto/dsa/Makefile.save \ crypto/dso/Makefile \ +crypto/dso/Makefile.save \ crypto/dso/dso_beos.c \ crypto/dso/dso_vms.c \ crypto/dso/dso_win32.c \ crypto/ec/Makefile \ +crypto/ec/Makefile.save \ crypto/ec/ecp_nistp224.c \ crypto/ec/ecp_nistp256.c \ crypto/ec/ecp_nistp521.c \ crypto/ec/ecp_nistputil.c \ crypto/ecdh/Makefile \ +crypto/ecdh/Makefile.save \ crypto/ecdsa/Makefile \ +crypto/ecdsa/Makefile.save \ crypto/engine/Makefile \ +crypto/engine/Makefile.save \ crypto/engine/eng_rdrand.c \ crypto/engine/eng_rsax.c \ crypto/err/Makefile \ +crypto/err/Makefile.save \ crypto/evp/Makefile \ +crypto/evp/Makefile.save \ crypto/evp/evp_fips.c \ crypto/evp/m_md2.c \ crypto/evp/m_sha.c \ crypto/fips_err.h \ crypto/fips_ers.c \ crypto/hmac/Makefile \ +crypto/hmac/Makefile.save \ crypto/install-crypto.com \ crypto/jpake/Makefile \ crypto/krb5/Makefile \ +crypto/krb5/Makefile.save \ crypto/lhash/Makefile \ +crypto/lhash/Makefile.save \ crypto/md4/Makefile \ +crypto/md4/Makefile.save \ crypto/md5/Makefile \ +crypto/md5/Makefile.save \ crypto/mdc2/Makefile \ +crypto/mdc2/Makefile.save \ crypto/modes/Makefile \ +crypto/modes/Makefile.save \ crypto/modes/cts128.c \ crypto/modes/modes.h \ crypto/o_fips.c \ crypto/objects/Makefile \ +crypto/objects/Makefile.save \ crypto/ocsp/Makefile \ +crypto/ocsp/Makefile.save \ crypto/pem/Makefile \ +crypto/pem/Makefile.save \ crypto/pkcs12/Makefile \ +crypto/pkcs12/Makefile.save \ crypto/pkcs7/Makefile \ +crypto/pkcs7/Makefile.save \ crypto/pkcs7/bio_pk7.c \ crypto/ppccap.c \ crypto/pqueue/Makefile \ +crypto/pqueue/Makefile.save \ crypto/rand/Makefile \ +crypto/rand/Makefile.save \ crypto/rand/rand_vms.c \ crypto/rc2/Makefile \ +crypto/rc2/Makefile.save \ crypto/rc4/Makefile \ +crypto/rc4/Makefile.save \ +crypto/ripemd/Makefile \ +crypto/ripemd/Makefile.save \ crypto/rsa/Makefile \ +crypto/rsa/Makefile.save \ crypto/sha/Makefile \ +crypto/sha/Makefile.save \ crypto/sha/sha_one.c \ crypto/srp/Makefile \ +crypto/srp/Makefile.save \ crypto/srp/srptest.c \ crypto/stack/Makefile \ +crypto/stack/Makefile.save \ crypto/store/Makefile \ crypto/threads/pthreads-vms.com \ crypto/threads/win32.bat \ crypto/ts/Makefile \ +crypto/ts/Makefile.save \ crypto/ts/ts.h \ crypto/ts/ts_asn1.c \ crypto/ts/ts_conf.c \ @@ -201,10 +244,14 @@ crypto/ts/ts_rsp_utils.c \ crypto/ts/ts_rsp_verify.c \ crypto/ts/ts_verify_ctx.c \ crypto/txt_db/Makefile \ +crypto/txt_db/Makefile.save \ crypto/ui/Makefile \ +crypto/ui/Makefile.save \ crypto/vms_rms.h crypto/x509/Makefile \ +crypto/x509/Makefile.save \ crypto/x509v3/Makefile \ +crypto/x509v3/Makefile.save \ include/openssl/camellia.h \ include/openssl/cast.h \ include/openssl/idea.h \ @@ -216,11 +263,11 @@ makevms.com \ openssl.doxy \ openssl.spec \ ssl/Makefile \ -ssl/heartbeat_test.c \ +ssl/Makefile.save \ ssl/install-ssl.com \ ssl/ssl-lib.com \ ssl/ssl_task.c \ -" +" NEEDED_SOURCES="\ apps \ @@ -238,24 +285,19 @@ NO_WINDOWS_BRAINDEATH \ " OPENSSL_CRYPTO_DEFINES_arm="\ -AES_ASM \ -BSAES_ASM \ -DES_UNROLL \ -GHASH_ASM \ OPENSSL_BN_ASM_GF2m \ OPENSSL_BN_ASM_MONT \ OPENSSL_CPUID_OBJ \ +GHASH_ASM \ +AES_ASM \ +BSAES_ASM \ SHA1_ASM \ SHA256_ASM \ SHA512_ASM \ " OPENSSL_CRYPTO_DEFINES_arm64="\ -DES_UNROLL \ -OPENSSL_CPUID_OBJ \ -SHA1_ASM \ -SHA256_ASM \ -SHA512_ASM \ +OPENSSL_NO_ASM \ " OPENSSL_CRYPTO_DEFINES_mips="\ @@ -266,40 +308,39 @@ SHA256_ASM \ " OPENSSL_CRYPTO_DEFINES_x86="\ -AES_ASM \ -DES_PTR \ -DES_RISC1 \ -DES_UNROLL \ -GHASH_ASM \ -MD5_ASM \ +OPENSSL_IA32_SSE2 \ OPENSSL_BN_ASM_GF2m \ OPENSSL_BN_ASM_MONT \ OPENSSL_BN_ASM_PART_WORDS \ -OPENSSL_CPUID_OBJ \ -OPENSSL_IA32_SSE2 \ -RC4_INDEX \ -RMD160_ASM \ +AES_ASM \ +VPAES_ASM \ +GHASH_ASM \ SHA1_ASM \ SHA256_ASM \ SHA512_ASM \ -VPAES_ASM \ +MD5_ASM \ +DES_PTR \ +DES_RISC1 \ +DES_UNROLL \ +OPENSSL_CPUID_OBJ \ " OPENSSL_CRYPTO_DEFINES_x86_64="\ -AES_ASM \ -BSAES_ASM \ -DES_UNROLL \ -GHASH_ASM \ -MD5_ASM \ OPENSSL_BN_ASM_GF2m \ OPENSSL_BN_ASM_MONT \ OPENSSL_BN_ASM_MONT5 \ -OPENSSL_CPUID_OBJ \ -OPENSSL_IA32_SSE2 \ +AES_ASM \ +VPAES_ASM \ +BSAES_ASM \ +GHASH_ASM \ SHA1_ASM \ SHA256_ASM \ SHA512_ASM \ -VPAES_ASM \ +MD5_ASM \ +DES_PTR \ +DES_RISC1 \ +DES_UNROLL \ +OPENSSL_CPUID_OBJ \ " OPENSSL_CRYPTO_INCLUDES="\ @@ -635,6 +676,7 @@ crypto/evp/m_md4.c \ crypto/evp/m_md5.c \ crypto/evp/m_mdc2.c \ crypto/evp/m_null.c \ +crypto/evp/m_ripemd.c \ crypto/evp/m_sha1.c \ crypto/evp/m_sigver.c \ crypto/evp/m_wp.c \ @@ -733,6 +775,8 @@ crypto/rc2/rc2ofb64.c \ crypto/rc4/rc4_enc.c \ crypto/rc4/rc4_skey.c \ crypto/rc4/rc4_utl.c \ +crypto/ripemd/rmd_dgst.c \ +crypto/ripemd/rmd_one.c \ crypto/rsa/rsa_ameth.c \ crypto/rsa/rsa_asn1.c \ crypto/rsa/rsa_chk.c \ @@ -829,14 +873,12 @@ crypto/x509v3/v3err.c \ OPENSSL_CRYPTO_SOURCES_arm="\ crypto/aes/asm/aes-armv4.S \ -crypto/aes/asm/aesv8-armx.S \ crypto/aes/asm/bsaes-armv7.S \ crypto/armcap.c \ crypto/armv4cpuid.S \ crypto/bn/asm/armv4-gf2m.S \ crypto/bn/asm/armv4-mont.S \ crypto/modes/asm/ghash-armv4.S \ -crypto/modes/asm/ghashv8-armx.S \ crypto/sha/asm/sha1-armv4-large.S \ crypto/sha/asm/sha256-armv4.S \ crypto/sha/asm/sha512-armv4.S \ @@ -848,13 +890,6 @@ crypto/mem_clr.c \ " OPENSSL_CRYPTO_SOURCES_arm64="\ -crypto/armcap.c \ -crypto/arm64cpuid.S \ -crypto/aes/asm/aesv8-armx-64.S \ -crypto/modes/asm/ghashv8-armx-64.S \ -crypto/sha/asm/sha1-armv8.S \ -crypto/sha/asm/sha256-armv8.S \ -crypto/sha/asm/sha512-armv8.S \ " OPENSSL_CRYPTO_SOURCES_EXCLUDES_arm64="\ @@ -1047,4 +1082,23 @@ apps/version.c \ apps/x509.c \ " +OPENSSL_PATCHES="\ +progs.patch \ +handshake_cutthrough.patch \ +jsse.patch \ +channelid.patch \ +eng_dyn_dirs.patch \ +fix_clang_build.patch \ +tls12_digests.patch \ +alpn.patch \ +cbc_record_splitting.patch \ +dsa_nonce.patch \ +ecdhe_psk.patch \ +wincrypt.patch \ +tls_psk_hint.patch \ +arm_asm.patch \ +psk_client_callback_128_byte_id_bug.patch \ +early_ccs.patch \ +" + source ./openssl.trusty.config diff --git a/app/openssl/openssl.version b/app/openssl/openssl.version index ab2e62bf..2e849911 100644 --- a/app/openssl/openssl.version +++ b/app/openssl/openssl.version @@ -1 +1 @@ -OPENSSL_VERSION=1.0.1h +OPENSSL_VERSION=1.0.1g diff --git a/app/openssl/patches/README b/app/openssl/patches/README index 13e9bd8b..2ff69282 100644 --- a/app/openssl/patches/README +++ b/app/openssl/patches/README @@ -53,19 +53,6 @@ ecdhe_psk.patch Adds support for ECDHE Pre-Shared Key (PSK) TLS cipher suites. -ecdhe_psk_part2.patch - -Removes ECHDE-PSK cipher suites with SHA-2 because they cannot be used with -SSLv3 (and there's no way to express that in OpenSSL's configuration). Adds -SHA-1 based ECDHE-PSK AES-CBC cipher suites instead. - -arm_asm.patch - -Adds newer ARM assembly pack with BSAES for ARMv7 and acceleration for ARMv8 -Based on branch available at: -https://git.linaro.org/people/ard.biesheuvel/openssl.git/shortlog/refs/heads/openssl-1.0.1f-with-arm-patches -c7b582ef23eb6f4386664e841e6e406d984c38d3^..cb8b1ab03e5c179a719afe83f03fecb1c2c78730 - tls_psk_hint.patch Fixes issues with TLS-PSK identity hint implementation where diff --git a/app/openssl/ssl/bio_ssl.c b/app/openssl/ssl/bio_ssl.c index 06a13de4..e9552cae 100644 --- a/app/openssl/ssl/bio_ssl.c +++ b/app/openssl/ssl/bio_ssl.c @@ -206,10 +206,6 @@ static int ssl_read(BIO *b, char *out, int outl) BIO_set_retry_special(b); retry_reason=BIO_RR_SSL_X509_LOOKUP; break; - case SSL_ERROR_WANT_CHANNEL_ID_LOOKUP: - BIO_set_retry_special(b); - retry_reason=BIO_RR_SSL_CHANNEL_ID_LOOKUP; - break; case SSL_ERROR_WANT_ACCEPT: BIO_set_retry_special(b); retry_reason=BIO_RR_ACCEPT; @@ -284,10 +280,6 @@ static int ssl_write(BIO *b, const char *out, int outl) BIO_set_retry_special(b); retry_reason=BIO_RR_SSL_X509_LOOKUP; break; - case SSL_ERROR_WANT_CHANNEL_ID_LOOKUP: - BIO_set_retry_special(b); - retry_reason=BIO_RR_SSL_CHANNEL_ID_LOOKUP; - break; case SSL_ERROR_WANT_CONNECT: BIO_set_retry_special(b); retry_reason=BIO_RR_CONNECT; diff --git a/app/openssl/ssl/d1_both.c b/app/openssl/ssl/d1_both.c index 04aa2310..2e8cf681 100644 --- a/app/openssl/ssl/d1_both.c +++ b/app/openssl/ssl/d1_both.c @@ -627,16 +627,7 @@ dtls1_reassemble_fragment(SSL *s, struct hm_header_st* msg_hdr, int *ok) frag->msg_header.frag_off = 0; } else - { frag = (hm_fragment*) item->data; - if (frag->msg_header.msg_len != msg_hdr->msg_len) - { - item = NULL; - frag = NULL; - goto err; - } - } - /* If message is already reassembled, this must be a * retransmit and can be dropped. @@ -683,8 +674,8 @@ dtls1_reassemble_fragment(SSL *s, struct hm_header_st* msg_hdr, int *ok) item = pitem_new(seq64be, frag); if (item == NULL) { - i = -1; goto err; + i = -1; } pqueue_insert(s->d1->buffered_messages, item); @@ -793,7 +784,6 @@ dtls1_get_message_fragment(SSL *s, int st1, int stn, long max, int *ok) int i,al; struct hm_header_st msg_hdr; - redo: /* see if we have the required fragment already */ if ((frag_len = dtls1_retrieve_buffered_fragment(s,max,ok)) || *ok) { @@ -852,7 +842,8 @@ dtls1_get_message_fragment(SSL *s, int st1, int stn, long max, int *ok) s->msg_callback_arg); s->init_num = 0; - goto redo; + return dtls1_get_message_fragment(s, st1, stn, + max, ok); } else /* Incorrectly formated Hello request */ { diff --git a/app/openssl/ssl/d1_lib.c b/app/openssl/ssl/d1_lib.c index 6bde16fa..106939f2 100644 --- a/app/openssl/ssl/d1_lib.c +++ b/app/openssl/ssl/d1_lib.c @@ -176,12 +176,9 @@ static void dtls1_clear_queues(SSL *s) while ( (item = pqueue_pop(s->d1->buffered_app_data.q)) != NULL) { - rdata = (DTLS1_RECORD_DATA *) item->data; - if (rdata->rbuf.buf) - { - OPENSSL_free(rdata->rbuf.buf); - } - OPENSSL_free(item->data); + frag = (hm_fragment *)item->data; + OPENSSL_free(frag->fragment); + OPENSSL_free(frag); pitem_free(item); } } diff --git a/app/openssl/ssl/d1_pkt.c b/app/openssl/ssl/d1_pkt.c index 363fc8c8..5b84e97c 100644 --- a/app/openssl/ssl/d1_pkt.c +++ b/app/openssl/ssl/d1_pkt.c @@ -241,28 +241,27 @@ dtls1_buffer_record(SSL *s, record_pqueue *queue, unsigned char *priority) } #endif - s->packet = NULL; - s->packet_length = 0; - memset(&(s->s3->rbuf), 0, sizeof(SSL3_BUFFER)); - memset(&(s->s3->rrec), 0, sizeof(SSL3_RECORD)); - - if (!ssl3_setup_buffers(s)) + /* insert should not fail, since duplicates are dropped */ + if (pqueue_insert(queue->q, item) == NULL) { - SSLerr(SSL_F_DTLS1_BUFFER_RECORD, ERR_R_INTERNAL_ERROR); OPENSSL_free(rdata); pitem_free(item); return(0); } - /* insert should not fail, since duplicates are dropped */ - if (pqueue_insert(queue->q, item) == NULL) + s->packet = NULL; + s->packet_length = 0; + memset(&(s->s3->rbuf), 0, sizeof(SSL3_BUFFER)); + memset(&(s->s3->rrec), 0, sizeof(SSL3_RECORD)); + + if (!ssl3_setup_buffers(s)) { SSLerr(SSL_F_DTLS1_BUFFER_RECORD, ERR_R_INTERNAL_ERROR); OPENSSL_free(rdata); pitem_free(item); return(0); } - + return(1); } diff --git a/app/openssl/ssl/d1_srvr.c b/app/openssl/ssl/d1_srvr.c index c181db6d..09f47627 100644 --- a/app/openssl/ssl/d1_srvr.c +++ b/app/openssl/ssl/d1_srvr.c @@ -1356,7 +1356,6 @@ int dtls1_send_server_key_exchange(SSL *s) (unsigned char *)encodedPoint, encodedlen); OPENSSL_free(encodedPoint); - encodedPoint = NULL; p += encodedlen; } #endif diff --git a/app/openssl/ssl/s3_both.c b/app/openssl/ssl/s3_both.c index 607990d0..d9e18a31 100644 --- a/app/openssl/ssl/s3_both.c +++ b/app/openssl/ssl/s3_both.c @@ -561,7 +561,7 @@ long ssl3_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok) #endif /* Feed this message into MAC computation. */ - if (*((unsigned char*) s->init_buf->data) != SSL3_MT_ENCRYPTED_EXTENSIONS) + if (*(unsigned char*)s->init_buf->data != SSL3_MT_ENCRYPTED_EXTENSIONS) ssl3_finish_mac(s, (unsigned char *)s->init_buf->data, s->init_num + 4); if (s->msg_callback) s->msg_callback(0, s->version, SSL3_RT_HANDSHAKE, s->init_buf->data, (size_t)s->init_num + 4, s, s->msg_callback_arg); diff --git a/app/openssl/ssl/s3_clnt.c b/app/openssl/ssl/s3_clnt.c index 486f538b..5e15b75c 100644 --- a/app/openssl/ssl/s3_clnt.c +++ b/app/openssl/ssl/s3_clnt.c @@ -215,12 +215,24 @@ int ssl3_connect(SSL *s) } #endif +// BEGIN android-added +#if 0 +/* Send app data in separate packet, otherwise, some particular site + * (only one site so far) closes the socket. http://b/2511073 + * Note: there is a very small chance that two TCP packets + * could be arriving at server combined into a single TCP packet, + * then trigger that site to break. We haven't encounter that though. + */ +// END android-added if (SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) { /* Send app data along with CCS/Finished */ s->s3->flags |= SSL3_FLAGS_DELAY_CLIENT_FINISHED; } +// BEGIN android-added +#endif +// END android-added for (;;) { state=s->state; @@ -546,20 +558,7 @@ int ssl3_connect(SSL *s) } else { - /* This is a non-resumption handshake. If it - * involves ChannelID, then record the - * handshake hashes at this point in the - * session so that any resumption of this - * session with ChannelID can sign those - * hashes. */ - if (s->s3->tlsext_channel_id_new) - { - ret = tls1_record_handshake_hashes_for_channel_id(s); - if (ret <= 0) - goto end; - } - if ((SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) - && ssl3_can_cutthrough(s) + if ((SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) && SSL_get_cipher_bits(s, NULL) >= 128 && s->s3->previous_server_finished_len == 0 /* no cutthrough on renegotiation (would complicate the state machine) */ ) { @@ -608,7 +607,6 @@ int ssl3_connect(SSL *s) case SSL3_ST_CR_FINISHED_A: case SSL3_ST_CR_FINISHED_B: - s->s3->flags |= SSL3_FLAGS_CCS_OK; ret=ssl3_get_finished(s,SSL3_ST_CR_FINISHED_A, SSL3_ST_CR_FINISHED_B); @@ -2304,7 +2302,7 @@ int ssl3_get_server_done(SSL *s) int ssl3_send_client_key_exchange(SSL *s) { unsigned char *p,*d; - int n = 0; + int n; unsigned long alg_k; unsigned long alg_a; #ifndef OPENSSL_NO_RSA @@ -2690,13 +2688,6 @@ int ssl3_send_client_key_exchange(SSL *s) unsigned int i; #endif - if (s->session->sess_cert == NULL) - { - ssl3_send_alert(s,SSL3_AL_FATAL,SSL_AD_UNEXPECTED_MESSAGE); - SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,SSL_R_UNEXPECTED_MESSAGE); - goto err; - } - /* Did we send out the client's * ECDH share for use in premaster * computation as part of client certificate? @@ -3036,7 +3027,7 @@ int ssl3_send_client_key_exchange(SSL *s) } } #endif - else if (!(alg_k & SSL_kPSK) || ((alg_k & SSL_kPSK) && !(alg_a & SSL_aPSK))) + else if (!(alg_k & SSL_kPSK)) { ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE); @@ -3500,29 +3491,10 @@ int ssl3_send_channel_id(SSL *s) if (s->state != SSL3_ST_CW_CHANNEL_ID_A) return ssl3_do_write(s, SSL3_RT_HANDSHAKE); - if (!s->tlsext_channel_id_private && s->ctx->channel_id_cb) - { - EVP_PKEY *key = NULL; - s->ctx->channel_id_cb(s, &key); - if (key != NULL) - { - s->tlsext_channel_id_private = key; - } - } - if (!s->tlsext_channel_id_private) - { - s->rwstate=SSL_CHANNEL_ID_LOOKUP; - return (-1); - } - s->rwstate=SSL_NOTHING; - d = (unsigned char *)s->init_buf->data; *(d++)=SSL3_MT_ENCRYPTED_EXTENSIONS; l2n3(2 + 2 + TLSEXT_CHANNEL_ID_SIZE, d); - if (s->s3->tlsext_channel_id_new) - s2n(TLSEXT_TYPE_channel_id_new, d); - else - s2n(TLSEXT_TYPE_channel_id, d); + s2n(TLSEXT_TYPE_channel_id, d); s2n(TLSEXT_CHANNEL_ID_SIZE, d); EVP_MD_CTX_init(&md_ctx); @@ -3533,9 +3505,9 @@ int ssl3_send_channel_id(SSL *s) SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_CANNOT_SERIALIZE_PUBLIC_KEY); goto err; } - /* i2d_PublicKey will produce an ANSI X9.62 public key which, for a - * P-256 key, is 0x04 (meaning uncompressed) followed by the x and y - * field elements as 32-byte, big-endian numbers. */ + // i2d_PublicKey will produce an ANSI X9.62 public key which, for a + // P-256 key, is 0x04 (meaning uncompressed) followed by the x and y + // field elements as 32-byte, big-endian numbers. if (public_key_len != 65) { SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_CHANNEL_ID_NOT_P256); @@ -3581,14 +3553,14 @@ int ssl3_send_channel_id(SSL *s) } derp = der_sig; - sig = d2i_ECDSA_SIG(NULL, (const unsigned char**) &derp, sig_len); + sig = d2i_ECDSA_SIG(NULL, (const unsigned char**)&derp, sig_len); if (sig == NULL) { SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_D2I_ECDSA_SIG); goto err; } - /* The first byte of public_key will be 0x4, denoting an uncompressed key. */ + // The first byte of public_key will be 0x4, denoting an uncompressed key. memcpy(d, public_key + 1, 64); d += 64; memset(d, 0, 2 * 32); diff --git a/app/openssl/ssl/s3_enc.c b/app/openssl/ssl/s3_enc.c index 53b94b7c..90fbb180 100644 --- a/app/openssl/ssl/s3_enc.c +++ b/app/openssl/ssl/s3_enc.c @@ -728,7 +728,7 @@ int n_ssl3_mac(SSL *ssl, unsigned char *md, int send) } t=EVP_MD_CTX_size(hash); - if (t < 0 || t > 20) + if (t < 0) return -1; md_size=t; npad=(48/md_size)*md_size; diff --git a/app/openssl/ssl/s3_lib.c b/app/openssl/ssl/s3_lib.c index 896d1e19..f84da7f5 100644 --- a/app/openssl/ssl/s3_lib.c +++ b/app/openssl/ssl/s3_lib.c @@ -2828,34 +2828,35 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ #ifndef OPENSSL_NO_PSK /* ECDH PSK ciphersuites from RFC 5489 */ - /* Cipher C035 */ + + /* Cipher C037 */ { 1, - TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA, - TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA, + TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256, + TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256, SSL_kEECDH, SSL_aPSK, SSL_AES128, - SSL_SHA1, + SSL_SHA256, SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + SSL_NOT_EXP|SSL_HIGH, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF_SHA256, 128, 128, }, - /* Cipher C036 */ + /* Cipher C038 */ { 1, - TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA, - TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA, + TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384, + TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384, SSL_kEECDH, SSL_aPSK, SSL_AES256, - SSL_SHA1, + SSL_SHA384, SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + SSL_NOT_EXP|SSL_HIGH, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF_SHA384, 256, 256, }, @@ -3411,6 +3412,8 @@ long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg) break; #endif case SSL_CTRL_CHANNEL_ID: + if (!s->server) + break; s->tlsext_channel_id_enabled = 1; ret = 1; break; @@ -3426,7 +3429,7 @@ long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg) } if (s->tlsext_channel_id_private) EVP_PKEY_free(s->tlsext_channel_id_private); - s->tlsext_channel_id_private = EVP_PKEY_dup((EVP_PKEY*) parg); + s->tlsext_channel_id_private = (EVP_PKEY*) parg; ret = 1; break; @@ -3741,7 +3744,7 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg) } if (ctx->tlsext_channel_id_private) EVP_PKEY_free(ctx->tlsext_channel_id_private); - ctx->tlsext_channel_id_private = EVP_PKEY_dup((EVP_PKEY*) parg); + ctx->tlsext_channel_id_private = (EVP_PKEY*) parg; break; default: diff --git a/app/openssl/ssl/s3_pkt.c b/app/openssl/ssl/s3_pkt.c index 60c4f1a4..75997ac2 100644 --- a/app/openssl/ssl/s3_pkt.c +++ b/app/openssl/ssl/s3_pkt.c @@ -110,7 +110,6 @@ */ #include -#include #include #define USE_SOCKETS #include "ssl_locl.h" @@ -581,11 +580,10 @@ int ssl3_do_compress(SSL *ssl) int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len) { const unsigned char *buf=buf_; - unsigned int n,nw; - int i,tot; + unsigned int tot,n,nw; + int i; s->rwstate=SSL_NOTHING; - OPENSSL_assert(s->s3->wnum <= INT_MAX); tot=s->s3->wnum; s->s3->wnum=0; @@ -600,22 +598,6 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len) } } - /* ensure that if we end up with a smaller value of data to write - * out than the the original len from a write which didn't complete - * for non-blocking I/O and also somehow ended up avoiding - * the check for this in ssl3_write_pending/SSL_R_BAD_WRITE_RETRY as - * it must never be possible to end up with (len-tot) as a large - * number that will then promptly send beyond the end of the users - * buffer ... so we trap and report the error in a way the user - * will notice - */ - if (len < tot) - { - SSLerr(SSL_F_SSL3_WRITE_BYTES,SSL_R_BAD_LENGTH); - return(-1); - } - - n=(len-tot); for (;;) { @@ -686,6 +668,9 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, SSL3_BUFFER *wb=&(s->s3->wbuf); SSL_SESSION *sess; + if (wb->buf == NULL) + if (!ssl3_setup_write_buffer(s)) + return -1; /* first check if there is a SSL3_BUFFER still being written * out. This will happen with non blocking IO */ @@ -701,10 +686,6 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, /* if it went, fall through and send more stuff */ } - if (wb->buf == NULL) - if (!ssl3_setup_write_buffer(s)) - return -1; - if (len == 0) return 0; @@ -1086,7 +1067,7 @@ start: { s->rstate=SSL_ST_READ_HEADER; rr->off=0; - if (s->mode & SSL_MODE_RELEASE_BUFFERS && s->s3->rbuf.left == 0) + if (s->mode & SSL_MODE_RELEASE_BUFFERS) ssl3_release_read_buffer(s); } } @@ -1331,12 +1312,10 @@ start: if (!(s->s3->flags & SSL3_FLAGS_CCS_OK)) { al=SSL_AD_UNEXPECTED_MESSAGE; - SSLerr(SSL_F_SSL3_READ_BYTES,SSL_R_CCS_RECEIVED_EARLY); + SSLerr(SSL_F_SSL3_READ_BYTES,SSL_R_UNEXPECTED_CCS); goto f_err; } - s->s3->flags &= ~SSL3_FLAGS_CCS_OK; - rr->length=0; if (s->msg_callback) @@ -1471,7 +1450,12 @@ int ssl3_do_change_cipher_spec(SSL *s) if (s->s3->tmp.key_block == NULL) { - if (s->session == NULL || s->session->master_key_length == 0) + if (s->session->master_key_length == 0) + { + SSLerr(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC,SSL_R_UNEXPECTED_CCS); + return (0); + } + if (s->session == NULL) { /* might happen if dtls1_read_bytes() calls this */ SSLerr(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC,SSL_R_CCS_RECEIVED_EARLY); diff --git a/app/openssl/ssl/s3_srvr.c b/app/openssl/ssl/s3_srvr.c index f83c9366..1976efa7 100644 --- a/app/openssl/ssl/s3_srvr.c +++ b/app/openssl/ssl/s3_srvr.c @@ -675,8 +675,8 @@ int ssl3_accept(SSL *s) case SSL3_ST_SR_CERT_VRFY_A: case SSL3_ST_SR_CERT_VRFY_B: - s->s3->flags |= SSL3_FLAGS_CCS_OK; /* we should decide if we expected this one */ + s->s3->flags |= SSL3_FLAGS_CCS_OK; ret=ssl3_get_cert_verify(s); if (ret <= 0) goto end; @@ -694,6 +694,7 @@ int ssl3_accept(SSL *s) channel_id = s->s3->tlsext_channel_id_valid; #endif + s->s3->flags |= SSL3_FLAGS_CCS_OK; if (next_proto_neg) s->state=SSL3_ST_SR_NEXT_PROTO_A; else if (channel_id) @@ -728,7 +729,6 @@ int ssl3_accept(SSL *s) case SSL3_ST_SR_FINISHED_A: case SSL3_ST_SR_FINISHED_B: - s->s3->flags |= SSL3_FLAGS_CCS_OK; ret=ssl3_get_finished(s,SSL3_ST_SR_FINISHED_A, SSL3_ST_SR_FINISHED_B); if (ret <= 0) goto end; @@ -740,15 +740,6 @@ int ssl3_accept(SSL *s) #endif else s->state=SSL3_ST_SW_CHANGE_A; - /* If this is a full handshake with ChannelID then - * record the hashshake hashes in |s->session| in case - * we need them to verify a ChannelID signature on a - * resumption of this session in the future. */ - if (!s->hit && s->s3->tlsext_channel_id_new) - { - ret = tls1_record_handshake_hashes_for_channel_id(s); - if (ret <= 0) goto end; - } s->init_num=0; break; @@ -1477,22 +1468,6 @@ int ssl3_send_server_hello(SSL *s) if (s->state == SSL3_ST_SW_SRVR_HELLO_A) { - /* We only accept ChannelIDs on connections with ECDHE in order - * to avoid a known attack while we fix ChannelID itself. */ - if (s->s3 && - s->s3->tlsext_channel_id_valid && - (s->s3->tmp.new_cipher->algorithm_mkey & SSL_kEECDH) == 0) - s->s3->tlsext_channel_id_valid = 0; - - /* If this is a resumption and the original handshake didn't - * support ChannelID then we didn't record the original - * handshake hashes in the session and so cannot resume with - * ChannelIDs. */ - if (s->hit && - s->s3->tlsext_channel_id_new && - s->session->original_handshake_hash_len == 0) - s->s3->tlsext_channel_id_valid = 0; - buf=(unsigned char *)s->init_buf->data; #ifdef OPENSSL_NO_TLSEXT p=s->s3->server_random; @@ -2168,11 +2143,6 @@ int ssl3_send_certificate_request(SSL *s) s->init_num=n+4; s->init_off=0; #ifdef NETSCAPE_HANG_BUG - if (!BUF_MEM_grow_clean(buf, s->init_num + 4)) - { - SSLerr(SSL_F_SSL3_SEND_CERTIFICATE_REQUEST,ERR_R_BUF_LIB); - goto err; - } p=(unsigned char *)s->init_buf->data + s->init_num; /* do the header */ @@ -2915,8 +2885,6 @@ int ssl3_get_client_key_exchange(SSL *s) unsigned char premaster_secret[32], *start; size_t outlen=32, inlen; unsigned long alg_a; - int Ttag, Tclass; - long Tlen; /* Get our certificate private key*/ alg_a = s->s3->tmp.new_cipher->algorithm_auth; @@ -2938,16 +2906,28 @@ int ssl3_get_client_key_exchange(SSL *s) ERR_clear_error(); } /* Decrypt session key */ - if (ASN1_get_object((const unsigned char **)&p, &Tlen, &Ttag, &Tclass, n) != V_ASN1_CONSTRUCTED || - Ttag != V_ASN1_SEQUENCE || - Tclass != V_ASN1_UNIVERSAL) + if ((*p!=( V_ASN1_SEQUENCE| V_ASN1_CONSTRUCTED))) + { + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED); + goto gerr; + } + if (p[1] == 0x81) + { + start = p+3; + inlen = p[2]; + } + else if (p[1] < 0x80) + { + start = p+2; + inlen = p[1]; + } + else { SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED); goto gerr; } - start = p; - inlen = Tlen; if (EVP_PKEY_decrypt(pkey_ctx,premaster_secret,&outlen,start,inlen) <=0) + { SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED); goto gerr; @@ -3695,7 +3675,6 @@ int ssl3_get_channel_id(SSL *s) EC_POINT* point = NULL; ECDSA_SIG sig; BIGNUM x, y; - unsigned short expected_extension_type; if (s->state == SSL3_ST_SR_CHANNEL_ID_A && s->init_num == 0) { @@ -3753,11 +3732,7 @@ int ssl3_get_channel_id(SSL *s) n2s(p, extension_type); n2s(p, extension_len); - expected_extension_type = TLSEXT_TYPE_channel_id; - if (s->s3->tlsext_channel_id_new) - expected_extension_type = TLSEXT_TYPE_channel_id_new; - - if (extension_type != expected_extension_type || + if (extension_type != TLSEXT_TYPE_channel_id || extension_len != TLSEXT_CHANNEL_ID_SIZE) { SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_INVALID_MESSAGE); diff --git a/app/openssl/ssl/ssl.h b/app/openssl/ssl/ssl.h index a85841b3..54b0eb6c 100644 --- a/app/openssl/ssl/ssl.h +++ b/app/openssl/ssl/ssl.h @@ -544,13 +544,6 @@ struct ssl_session_st #ifndef OPENSSL_NO_SRP char *srp_username; #endif - - /* original_handshake_hash contains the handshake hash (either - * SHA-1+MD5 or SHA-2, depending on TLS version) for the original, full - * handshake that created a session. This is used by Channel IDs during - * resumption. */ - unsigned char original_handshake_hash[EVP_MAX_MD_SIZE]; - unsigned int original_handshake_hash_len; }; #endif @@ -560,7 +553,7 @@ struct ssl_session_st /* Allow initial connection to servers that don't support RI */ #define SSL_OP_LEGACY_SERVER_CONNECT 0x00000004L #define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x00000008L -#define SSL_OP_TLSEXT_PADDING 0x00000010L +#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x00000010L #define SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER 0x00000020L #define SSL_OP_SAFARI_ECDHE_ECDSA_BUG 0x00000040L #define SSL_OP_SSLEAY_080_CLIENT_DH_BUG 0x00000080L @@ -569,8 +562,6 @@ struct ssl_session_st /* Hasn't done anything since OpenSSL 0.9.7h, retained for compatibility */ #define SSL_OP_MSIE_SSLV2_RSA_PADDING 0x0 -/* Refers to ancient SSLREF and SSLv2, retained for compatibility */ -#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x0 /* SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS is vestigial. Previously it disabled the * insertion of empty records in CBC mode, but the empty records were commonly @@ -657,14 +648,12 @@ struct ssl_session_st * TLS only.) "Released" buffers are put onto a free-list in the context * or just freed (depending on the context's setting for freelist_max_len). */ #define SSL_MODE_RELEASE_BUFFERS 0x00000010L - /* Send the current time in the Random fields of the ClientHello and * ServerHello records for compatibility with hypothetical implementations * that require it. */ #define SSL_MODE_SEND_CLIENTHELLO_TIME 0x00000020L #define SSL_MODE_SEND_SERVERHELLO_TIME 0x00000040L - /* When set, clients may send application data before receipt of CCS * and Finished. This mode enables full-handshakes to 'complete' in * one RTT. */ @@ -877,9 +866,6 @@ struct ssl_ctx_st /* get client cert callback */ int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey); - /* get channel id callback */ - void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey); - /* cookie generate callback */ int (*app_gen_cookie_cb)(SSL *ssl, unsigned char *cookie, unsigned int *cookie_len); @@ -1042,10 +1028,6 @@ struct ssl_ctx_st /* If true, a client will advertise the Channel ID extension and a * server will echo it. */ char tlsext_channel_id_enabled; - /* tlsext_channel_id_enabled_new is a hack to support both old and new - * ChannelID signatures. It indicates that a client should advertise the - * new ChannelID extension number. */ - char tlsext_channel_id_enabled_new; /* The client's Channel ID private key. */ EVP_PKEY *tlsext_channel_id_private; #endif @@ -1104,8 +1086,6 @@ void SSL_CTX_set_info_callback(SSL_CTX *ctx, void (*cb)(const SSL *ssl,int type, void (*SSL_CTX_get_info_callback(SSL_CTX *ctx))(const SSL *ssl,int type,int val); void SSL_CTX_set_client_cert_cb(SSL_CTX *ctx, int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey)); int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx))(SSL *ssl, X509 **x509, EVP_PKEY **pkey); -void SSL_CTX_set_channel_id_cb(SSL_CTX *ctx, void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey)); -void (*SSL_CTX_get_channel_id_cb(SSL_CTX *ctx))(SSL *ssl, EVP_PKEY **pkey); #ifndef OPENSSL_NO_ENGINE int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e); #endif @@ -1182,14 +1162,12 @@ const char *SSL_get_psk_identity(const SSL *s); #define SSL_WRITING 2 #define SSL_READING 3 #define SSL_X509_LOOKUP 4 -#define SSL_CHANNEL_ID_LOOKUP 5 /* These will only be used when doing non-blocking IO */ #define SSL_want_nothing(s) (SSL_want(s) == SSL_NOTHING) #define SSL_want_read(s) (SSL_want(s) == SSL_READING) #define SSL_want_write(s) (SSL_want(s) == SSL_WRITING) #define SSL_want_x509_lookup(s) (SSL_want(s) == SSL_X509_LOOKUP) -#define SSL_want_channel_id_lookup(s) (SSL_want(s) == SSL_CHANNEL_ID_LOOKUP) #define SSL_MAC_FLAG_READ_MAC_STREAM 1 #define SSL_MAC_FLAG_WRITE_MAC_STREAM 2 @@ -1624,7 +1602,6 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) #define SSL_ERROR_ZERO_RETURN 6 #define SSL_ERROR_WANT_CONNECT 7 #define SSL_ERROR_WANT_ACCEPT 8 -#define SSL_ERROR_WANT_CHANNEL_ID_LOOKUP 9 #define SSL_CTRL_NEED_TMP_RSA 1 #define SSL_CTRL_SET_TMP_RSA 2 @@ -1762,11 +1739,10 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) #define SSL_set_tmp_ecdh(ssl,ecdh) \ SSL_ctrl(ssl,SSL_CTRL_SET_TMP_ECDH,0,(char *)ecdh) -/* SSL_enable_tls_channel_id either configures a TLS server to accept TLS client - * IDs from clients, or configure a client to send TLS client IDs to server. - * Returns 1 on success. */ -#define SSL_enable_tls_channel_id(s) \ - SSL_ctrl(s,SSL_CTRL_CHANNEL_ID,0,NULL) +/* SSL_enable_tls_channel_id configures a TLS server to accept TLS client + * IDs from clients. Returns 1 on success. */ +#define SSL_enable_tls_channel_id(ctx) \ + SSL_ctrl(ctx,SSL_CTRL_CHANNEL_ID,0,NULL) /* SSL_set1_tls_channel_id configures a TLS client to send a TLS Channel ID to * compatible servers. private_key must be a P-256 EVP_PKEY*. Returns 1 on * success. */ @@ -1816,7 +1792,7 @@ int SSL_CIPHER_get_bits(const SSL_CIPHER *c,int *alg_bits); char * SSL_CIPHER_get_version(const SSL_CIPHER *c); const char * SSL_CIPHER_get_name(const SSL_CIPHER *c); unsigned long SSL_CIPHER_get_id(const SSL_CIPHER *c); -const char * SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher); +const char* SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher); int SSL_get_fd(const SSL *s); int SSL_get_rfd(const SSL *s); @@ -2731,6 +2707,7 @@ void ERR_load_SSL_strings(void); #define SSL_R_WRONG_VERSION_NUMBER 267 #define SSL_R_X509_LIB 268 #define SSL_R_X509_VERIFICATION_SETUP_PROBLEMS 269 +#define SSL_R_UNEXPECTED_CCS 388 #ifdef __cplusplus } diff --git a/app/openssl/ssl/ssl3.h b/app/openssl/ssl/ssl3.h index 83d59bff..f205f73d 100644 --- a/app/openssl/ssl/ssl3.h +++ b/app/openssl/ssl/ssl3.h @@ -388,6 +388,9 @@ typedef struct ssl3_buffer_st #define TLS1_FLAGS_TLS_PADDING_BUG 0x0008 #define TLS1_FLAGS_SKIP_CERT_VERIFY 0x0010 #define TLS1_FLAGS_KEEP_HANDSHAKE 0x0020 +/* SSL3_FLAGS_CCS_OK indicates that a ChangeCipherSpec record is acceptable at + * this point in the handshake. If this flag is not set then received CCS + * records will cause a fatal error for the connection. */ #define SSL3_FLAGS_CCS_OK 0x0080 /* SSL3_FLAGS_SGC_RESTART_DONE is set when we @@ -555,11 +558,6 @@ typedef struct ssl3_state_st * for Channel IDs and that tlsext_channel_id will be valid after the * handshake. */ char tlsext_channel_id_valid; - /* tlsext_channel_id_new means that the updated Channel ID extension - * was negotiated. This is a temporary hack in the code to support both - * forms of Channel ID extension while we transition to the new format, - * which fixed a security issue. */ - char tlsext_channel_id_new; /* For a server: * If |tlsext_channel_id_valid| is true, then this contains the * verified Channel ID from the client: a P256 point, (x,y), where @@ -680,11 +678,11 @@ typedef struct ssl3_state_st #define SSL3_ST_SR_CERT_VRFY_B (0x1A1|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANGE_A (0x1B0|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANGE_B (0x1B1|SSL_ST_ACCEPT) +#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT) #ifndef OPENSSL_NO_NEXTPROTONEG #define SSL3_ST_SR_NEXT_PROTO_A (0x210|SSL_ST_ACCEPT) #define SSL3_ST_SR_NEXT_PROTO_B (0x211|SSL_ST_ACCEPT) #endif -#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANNEL_ID_A (0x220|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANNEL_ID_B (0x221|SSL_ST_ACCEPT) #define SSL3_ST_SR_FINISHED_A (0x1C0|SSL_ST_ACCEPT) diff --git a/app/openssl/ssl/ssl_asn1.c b/app/openssl/ssl/ssl_asn1.c index f83e18f8..38540be1 100644 --- a/app/openssl/ssl/ssl_asn1.c +++ b/app/openssl/ssl/ssl_asn1.c @@ -117,13 +117,12 @@ typedef struct ssl_session_asn1_st #ifndef OPENSSL_NO_SRP ASN1_OCTET_STRING srp_username; #endif /* OPENSSL_NO_SRP */ - ASN1_OCTET_STRING original_handshake_hash; } SSL_SESSION_ASN1; int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp) { #define LSIZE2 (sizeof(long)*2) - int v1=0,v2=0,v3=0,v4=0,v5=0,v7=0,v8=0,v14=0; + int v1=0,v2=0,v3=0,v4=0,v5=0,v7=0,v8=0; unsigned char buf[4],ibuf1[LSIZE2],ibuf2[LSIZE2]; unsigned char ibuf3[LSIZE2],ibuf4[LSIZE2],ibuf5[LSIZE2]; #ifndef OPENSSL_NO_TLSEXT @@ -273,13 +272,6 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp) a.psk_identity.type=V_ASN1_OCTET_STRING; a.psk_identity.data=(unsigned char *)(in->psk_identity); } - - if (in->original_handshake_hash_len > 0) - { - a.original_handshake_hash.length = in->original_handshake_hash_len; - a.original_handshake_hash.type = V_ASN1_OCTET_STRING; - a.original_handshake_hash.data = in->original_handshake_hash; - } #endif /* OPENSSL_NO_PSK */ #ifndef OPENSSL_NO_SRP if (in->srp_username) @@ -333,8 +325,6 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp) if (in->srp_username) M_ASN1_I2D_len_EXP_opt(&(a.srp_username), i2d_ASN1_OCTET_STRING,12,v12); #endif /* OPENSSL_NO_SRP */ - if (in->original_handshake_hash_len > 0) - M_ASN1_I2D_len_EXP_opt(&(a.original_handshake_hash),i2d_ASN1_OCTET_STRING,14,v14); M_ASN1_I2D_seq_total(); @@ -383,8 +373,6 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp) if (in->srp_username) M_ASN1_I2D_put_EXP_opt(&(a.srp_username), i2d_ASN1_OCTET_STRING,12,v12); #endif /* OPENSSL_NO_SRP */ - if (in->original_handshake_hash_len > 0) - M_ASN1_I2D_put_EXP_opt(&(a.original_handshake_hash),i2d_ASN1_OCTET_STRING,14,v14); M_ASN1_I2D_finish(); } @@ -420,7 +408,6 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, if (os.length != 3) { c.error=SSL_R_CIPHER_CODE_WRONG_LENGTH; - c.line=__LINE__; goto err; } id=0x02000000L| @@ -433,7 +420,6 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, if (os.length != 2) { c.error=SSL_R_CIPHER_CODE_WRONG_LENGTH; - c.line=__LINE__; goto err; } id=0x03000000L| @@ -443,7 +429,6 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, else { c.error=SSL_R_UNKNOWN_SSL_VERSION; - c.line=__LINE__; goto err; } @@ -536,7 +521,6 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, if (os.length > SSL_MAX_SID_CTX_LENGTH) { c.error=SSL_R_BAD_LENGTH; - c.line=__LINE__; goto err; } else @@ -654,16 +638,5 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, ret->srp_username=NULL; #endif /* OPENSSL_NO_SRP */ - os.length=0; - os.data=NULL; - M_ASN1_D2I_get_EXP_opt(osp,d2i_ASN1_OCTET_STRING,14); - if (os.data && os.length < (int)sizeof(ret->original_handshake_hash)) - { - memcpy(ret->original_handshake_hash, os.data, os.length); - ret->original_handshake_hash_len = os.length; - OPENSSL_free(os.data); - os.data = NULL; - } - M_ASN1_D2I_Finish(a,SSL_SESSION_free,SSL_F_D2I_SSL_SESSION); } diff --git a/app/openssl/ssl/ssl_err.c b/app/openssl/ssl/ssl_err.c index ac0aad9b..bddd7949 100644 --- a/app/openssl/ssl/ssl_err.c +++ b/app/openssl/ssl/ssl_err.c @@ -553,7 +553,7 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_TLSV1_UNRECOGNIZED_NAME),"tlsv1 unrecognized name"}, {ERR_REASON(SSL_R_TLSV1_UNSUPPORTED_EXTENSION),"tlsv1 unsupported extension"}, {ERR_REASON(SSL_R_TLS_CLIENT_CERT_REQ_WITH_ANON_CIPHER),"tls client cert req with anon cipher"}, -{ERR_REASON(SSL_R_TLS_HEARTBEAT_PEER_DOESNT_ACCEPT),"peer does not accept heartbeats"}, +{ERR_REASON(SSL_R_TLS_HEARTBEAT_PEER_DOESNT_ACCEPT),"peer does not accept heartbearts"}, {ERR_REASON(SSL_R_TLS_HEARTBEAT_PENDING) ,"heartbeat request already pending"}, {ERR_REASON(SSL_R_TLS_ILLEGAL_EXPORTER_LABEL),"tls illegal exporter label"}, {ERR_REASON(SSL_R_TLS_INVALID_ECPOINTFORMAT_LIST),"tls invalid ecpointformat list"}, @@ -604,6 +604,7 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_WRONG_VERSION_NUMBER) ,"wrong version number"}, {ERR_REASON(SSL_R_X509_LIB) ,"x509 lib"}, {ERR_REASON(SSL_R_X509_VERIFICATION_SETUP_PROBLEMS),"x509 verification setup problems"}, +{ERR_REASON(SSL_R_UNEXPECTED_CCS),"unexpected CCS"}, {0,NULL} }; diff --git a/app/openssl/ssl/ssl_lib.c b/app/openssl/ssl/ssl_lib.c index 3de68a78..8d2c3a76 100644 --- a/app/openssl/ssl/ssl_lib.c +++ b/app/openssl/ssl/ssl_lib.c @@ -1403,10 +1403,6 @@ char *SSL_get_shared_ciphers(const SSL *s,char *buf,int len) p=buf; sk=s->session->ciphers; - - if (sk_SSL_CIPHER_num(sk) == 0) - return NULL; - for (i=0; iversion >= SSL3_VERSION && s->s3->in_read_app_data == 0 && /* cutthrough only applies to write() */ (SSL_get_mode((SSL*)s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) && /* cutthrough enabled */ - ssl3_can_cutthrough(s) && /* cutthrough allowed */ + SSL_get_cipher_bits(s, NULL) >= 128 && /* strong cipher choosen */ s->s3->previous_server_finished_len == 0 && /* not a renegotiation handshake */ (s->state == SSL3_ST_CR_SESSION_TICKET_A || /* ready to write app-data*/ s->state == SSL3_ST_CR_FINISHED_A)); } -int ssl3_can_cutthrough(const SSL *s) - { - const SSL_CIPHER *c; - - /* require a strong enough cipher */ - if (SSL_get_cipher_bits(s, NULL) < 128) - return 0; - - /* require ALPN or NPN extension */ - if (!s->s3->alpn_selected -#ifndef OPENSSL_NO_NEXTPROTONEG - && !s->s3->next_proto_neg_seen -#endif - ) - { - return 0; - } - - /* require a forward-secret cipher */ - c = SSL_get_current_cipher(s); - if (!c || (c->algorithm_mkey != SSL_kEDH && - c->algorithm_mkey != SSL_kEECDH)) - { - return 0; - } - - return 1; - } - /* Allocates new EVP_MD_CTX and sets pointer to it into given pointer * vairable, freeing EVP_MD_CTX previously stored in that variable, if * any. If EVP_MD pointer is passed, initializes ctx with this md diff --git a/app/openssl/ssl/ssl_locl.h b/app/openssl/ssl/ssl_locl.h index 6b7731a4..f79ab009 100644 --- a/app/openssl/ssl/ssl_locl.h +++ b/app/openssl/ssl/ssl_locl.h @@ -1070,7 +1070,6 @@ void ssl_free_wbio_buffer(SSL *s); int tls1_change_cipher_state(SSL *s, int which); int tls1_setup_key_block(SSL *s); int tls1_enc(SSL *s, int snd); -int tls1_handshake_digest(SSL *s, unsigned char *out, size_t out_len); int tls1_final_finish_mac(SSL *s, const char *str, int slen, unsigned char *p); int tls1_cert_verify_mac(SSL *s, int md_nid, unsigned char *p); @@ -1127,10 +1126,8 @@ int tls12_get_sigid(const EVP_PKEY *pk); const EVP_MD *tls12_get_hash(unsigned char hash_alg); int tls1_channel_id_hash(EVP_MD_CTX *ctx, SSL *s); -int tls1_record_handshake_hashes_for_channel_id(SSL *s); #endif -int ssl3_can_cutthrough(const SSL *s); EVP_MD_CTX* ssl_replace_hash(EVP_MD_CTX **hash,const EVP_MD *md) ; void ssl_clear_hash_ctx(EVP_MD_CTX **hash); int ssl_add_serverhello_renegotiate_ext(SSL *s, unsigned char *p, int *len, diff --git a/app/openssl/ssl/ssl_sess.c b/app/openssl/ssl/ssl_sess.c index 7d170852..ec088404 100644 --- a/app/openssl/ssl/ssl_sess.c +++ b/app/openssl/ssl/ssl_sess.c @@ -1144,17 +1144,6 @@ int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx))(SSL * ssl, X509 ** x509 , EVP_PK return ctx->client_cert_cb; } -void SSL_CTX_set_channel_id_cb(SSL_CTX *ctx, - void (*cb)(SSL *ssl, EVP_PKEY **pkey)) - { - ctx->channel_id_cb=cb; - } - -void (*SSL_CTX_get_channel_id_cb(SSL_CTX *ctx))(SSL * ssl, EVP_PKEY **pkey) - { - return ctx->channel_id_cb; - } - #ifndef OPENSSL_NO_ENGINE int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e) { diff --git a/app/openssl/ssl/t1_enc.c b/app/openssl/ssl/t1_enc.c index 22dd3cab..2ed2e076 100644 --- a/app/openssl/ssl/t1_enc.c +++ b/app/openssl/ssl/t1_enc.c @@ -895,79 +895,54 @@ int tls1_cert_verify_mac(SSL *s, int md_nid, unsigned char *out) return((int)ret); } -/* tls1_handshake_digest calculates the current handshake hash and writes it to - * |out|, which has space for |out_len| bytes. It returns the number of bytes - * written or -1 in the event of an error. This function works on a copy of the - * underlying digests so can be called multiple times and prior to the final - * update etc. */ -int tls1_handshake_digest(SSL *s, unsigned char *out, size_t out_len) - { - const EVP_MD *md; - EVP_MD_CTX ctx; - int i, err = 0, len = 0; - long mask; - - EVP_MD_CTX_init(&ctx); - - for (i = 0; ssl_get_handshake_digest(i, &mask, &md); i++) - { - int hash_size; - unsigned int digest_len; - EVP_MD_CTX *hdgst = s->s3->handshake_dgst[i]; - - if ((mask & ssl_get_algorithm2(s)) == 0) - continue; - - hash_size = EVP_MD_size(md); - if (!hdgst || hash_size < 0 || (size_t)hash_size > out_len) - { - err = 1; - break; - } - - if (!EVP_MD_CTX_copy_ex(&ctx, hdgst) || - !EVP_DigestFinal_ex(&ctx, out, &digest_len) || - digest_len != (unsigned int)hash_size) /* internal error */ - { - err = 1; - break; - } - out += digest_len; - out_len -= digest_len; - len += digest_len; - } - - EVP_MD_CTX_cleanup(&ctx); - - if (err != 0) - return -1; - return len; - } - int tls1_final_finish_mac(SSL *s, const char *str, int slen, unsigned char *out) { + unsigned int i; + EVP_MD_CTX ctx; unsigned char buf[2*EVP_MAX_MD_SIZE]; - unsigned char buf2[12]; + unsigned char *q,buf2[12]; + int idx; + long mask; int err=0; - int digests_len; + const EVP_MD *md; - if (s->s3->handshake_buffer) + q=buf; + + if (s->s3->handshake_buffer) if (!ssl3_digest_cached_records(s)) return 0; - digests_len = tls1_handshake_digest(s, buf, sizeof(buf)); - if (digests_len < 0) + EVP_MD_CTX_init(&ctx); + + for (idx=0;ssl_get_handshake_digest(idx,&mask,&md);idx++) { - err = 1; - digests_len = 0; + if (mask & ssl_get_algorithm2(s)) + { + int hashsize = EVP_MD_size(md); + EVP_MD_CTX *hdgst = s->s3->handshake_dgst[idx]; + if (!hdgst || hashsize < 0 || hashsize > (int)(sizeof buf - (size_t)(q-buf))) + { + /* internal error: 'buf' is too small for this cipersuite! */ + err = 1; + } + else + { + if (!EVP_MD_CTX_copy_ex(&ctx, hdgst) || + !EVP_DigestFinal_ex(&ctx,q,&i) || + (i != (unsigned int)hashsize)) + err = 1; + q+=hashsize; + } + } } - + if (!tls1_PRF(ssl_get_algorithm2(s), - str,slen, buf, digests_len, NULL,0, NULL,0, NULL,0, + str,slen, buf,(int)(q-buf), NULL,0, NULL,0, NULL,0, s->session->master_key,s->session->master_key_length, out,buf2,sizeof buf2)) err = 1; + EVP_MD_CTX_cleanup(&ctx); if (err) return 0; @@ -1073,10 +1048,14 @@ int tls1_mac(SSL *ssl, unsigned char *md, int send) if (!stream_mac) EVP_MD_CTX_cleanup(&hmac); #ifdef TLS_DEBUG +printf("sec="); +{unsigned int z; for (z=0; zdata[z]); printf("\n"); } +{unsigned int z; for (z=0; zlength; z++) printf("%02X ",buf[z]); printf("\n"); } #endif if (ssl->version != DTLS1_VERSION && ssl->version != DTLS1_BAD_VER) @@ -1206,7 +1185,7 @@ int tls1_export_keying_material(SSL *s, unsigned char *out, size_t olen, if (memcmp(val, TLS_MD_KEY_EXPANSION_CONST, TLS_MD_KEY_EXPANSION_CONST_SIZE) == 0) goto err1; - rv = tls1_PRF(ssl_get_algorithm2(s), + rv = tls1_PRF(s->s3->tmp.new_cipher->algorithm2, val, vallen, NULL, 0, NULL, 0, diff --git a/app/openssl/ssl/t1_lib.c b/app/openssl/ssl/t1_lib.c index 122a25f5..369e09f4 100644 --- a/app/openssl/ssl/t1_lib.c +++ b/app/openssl/ssl/t1_lib.c @@ -617,8 +617,6 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha #ifndef OPENSSL_NO_HEARTBEATS /* Add Heartbeat extension */ - if ((limit - ret - 4 - 1) < 0) - return NULL; s2n(TLSEXT_TYPE_heartbeat,ret); s2n(1,ret); /* Set mode: @@ -649,10 +647,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha * support for Channel ID. */ if (limit - ret - 4 < 0) return NULL; - if (s->ctx->tlsext_channel_id_enabled_new) - s2n(TLSEXT_TYPE_channel_id_new,ret); - else - s2n(TLSEXT_TYPE_channel_id,ret); + s2n(TLSEXT_TYPE_channel_id,ret); s2n(0,ret); } @@ -688,35 +683,36 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha ret += el; } #endif + +#ifdef TLSEXT_TYPE_padding /* Add padding to workaround bugs in F5 terminators. * See https://tools.ietf.org/html/draft-agl-tls-padding-03 * * NB: because this code works out the length of all existing * extensions it MUST always appear last. */ - if (s->options & SSL_OP_TLSEXT_PADDING) + { + int hlen = ret - (unsigned char *)s->init_buf->data; + /* The code in s23_clnt.c to build ClientHello messages includes the + * 5-byte record header in the buffer, while the code in s3_clnt.c does + * not. */ + if (s->state == SSL23_ST_CW_CLNT_HELLO_A) + hlen -= 5; + if (hlen > 0xff && hlen < 0x200) { - int hlen = ret - (unsigned char *)s->init_buf->data; - /* The code in s23_clnt.c to build ClientHello messages - * includes the 5-byte record header in the buffer, while - * the code in s3_clnt.c does not. - */ - if (s->state == SSL23_ST_CW_CLNT_HELLO_A) - hlen -= 5; - if (hlen > 0xff && hlen < 0x200) - { - hlen = 0x200 - hlen; - if (hlen >= 4) - hlen -= 4; - else - hlen = 0; + hlen = 0x200 - hlen; + if (hlen >= 4) + hlen -= 4; + else + hlen = 0; - s2n(TLSEXT_TYPE_padding, ret); - s2n(hlen, ret); - memset(ret, 0, hlen); - ret += hlen; - } + s2n(TLSEXT_TYPE_padding, ret); + s2n(hlen, ret); + memset(ret, 0, hlen); + ret += hlen; } + } +#endif if ((extdatalen = ret-p-2)== 0) return p; @@ -871,8 +867,6 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *p, unsigned cha /* Add Heartbeat extension if we've received one */ if (s->tlsext_heartbeat & SSL_TLSEXT_HB_ENABLED) { - if ((limit - ret - 4 - 1) < 0) - return NULL; s2n(TLSEXT_TYPE_heartbeat,ret); s2n(1,ret); /* Set mode: @@ -915,10 +909,7 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *p, unsigned cha { if (limit - ret - 4 < 0) return NULL; - if (s->s3->tlsext_channel_id_new) - s2n(TLSEXT_TYPE_channel_id_new,ret); - else - s2n(TLSEXT_TYPE_channel_id,ret); + s2n(TLSEXT_TYPE_channel_id,ret); s2n(0,ret); } @@ -1581,13 +1572,6 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in else if (type == TLSEXT_TYPE_channel_id && s->tlsext_channel_id_enabled) s->s3->tlsext_channel_id_valid = 1; - else if (type == TLSEXT_TYPE_channel_id_new && - s->tlsext_channel_id_enabled) - { - s->s3->tlsext_channel_id_valid = 1; - s->s3->tlsext_channel_id_new = 1; - } - else if (type == TLSEXT_TYPE_application_layer_protocol_negotiation && s->ctx->alpn_select_cb && s->s3->tmp.finish_md_len == 0) @@ -1837,12 +1821,6 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in else if (type == TLSEXT_TYPE_channel_id) s->s3->tlsext_channel_id_valid = 1; - else if (type == TLSEXT_TYPE_channel_id_new) - { - s->s3->tlsext_channel_id_valid = 1; - s->s3->tlsext_channel_id_new = 1; - } - else if (type == TLSEXT_TYPE_application_layer_protocol_negotiation) { unsigned len; @@ -2930,17 +2908,6 @@ tls1_channel_id_hash(EVP_MD_CTX *md, SSL *s) EVP_DigestUpdate(md, kClientIDMagic, sizeof(kClientIDMagic)); - if (s->hit && s->s3->tlsext_channel_id_new) - { - static const char kResumptionMagic[] = "Resumption"; - EVP_DigestUpdate(md, kResumptionMagic, - sizeof(kResumptionMagic)); - if (s->session->original_handshake_hash_len == 0) - return 0; - EVP_DigestUpdate(md, s->session->original_handshake_hash, - s->session->original_handshake_hash_len); - } - EVP_MD_CTX_init(&ctx); for (i = 0; i < SSL_MAX_DIGEST; i++) { @@ -2955,29 +2922,3 @@ tls1_channel_id_hash(EVP_MD_CTX *md, SSL *s) return 1; } #endif - -/* tls1_record_handshake_hashes_for_channel_id records the current handshake - * hashes in |s->session| so that Channel ID resumptions can sign that data. */ -int tls1_record_handshake_hashes_for_channel_id(SSL *s) - { - int digest_len; - /* This function should never be called for a resumed session because - * the handshake hashes that we wish to record are for the original, - * full handshake. */ - if (s->hit) - return -1; - /* It only makes sense to call this function if Channel IDs have been - * negotiated. */ - if (!s->s3->tlsext_channel_id_new) - return -1; - - digest_len = tls1_handshake_digest( - s, s->session->original_handshake_hash, - sizeof(s->session->original_handshake_hash)); - if (digest_len < 0) - return -1; - - s->session->original_handshake_hash_len = digest_len; - - return 1; - } diff --git a/app/openssl/ssl/tls1.h b/app/openssl/ssl/tls1.h index b9a0899e..ec8948d5 100644 --- a/app/openssl/ssl/tls1.h +++ b/app/openssl/ssl/tls1.h @@ -259,7 +259,6 @@ extern "C" { /* This is not an IANA defined extension number */ #define TLSEXT_TYPE_channel_id 30031 -#define TLSEXT_TYPE_channel_id_new 30032 /* NameType value from RFC 3546 */ #define TLSEXT_NAMETYPE_host_name 0 @@ -532,11 +531,9 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031 #define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032 -/* ECDHE PSK ciphersuites from RFC5489 - * SHA-2 cipher suites are omitted because they cannot be used safely with - * SSLv3. */ -#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA 0x0300C035 -#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA 0x0300C036 +/* ECDHE PSK ciphersuites from RFC 5489 */ +#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256 0x0300C037 +#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384 0x0300C038 /* XXX * Inconsistency alert: @@ -689,9 +686,9 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SHA256" #define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SHA384" -/* ECDHE PSK ciphersuites from RFC5489 */ -#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA "ECDHE-PSK-AES128-CBC-SHA" -#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA "ECDHE-PSK-AES256-CBC-SHA" +/* ECDHE PSK ciphersuites from RFC 5489 */ +#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256 "ECDHE-PSK-WITH-AES-128-CBC-SHA256" +#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384 "ECDHE-PSK-WITH-AES-256-CBC-SHA384" #define TLS_CT_RSA_SIGN 1 #define TLS_CT_DSS_SIGN 2 diff --git a/app/openvpn/doc/android.txt b/app/openvpn/doc/android.txt index cf8b3c79..871e3997 100644 --- a/app/openvpn/doc/android.txt +++ b/app/openvpn/doc/android.txt @@ -2,7 +2,7 @@ This file documents the support in OpenVPN for Android 4.0 and up. This support is primarily used in the "OpenVPN for Android" app (http://code.google.com/p/ics-openvpn/). For building see the developer -README: http://code.google.com/p/ics-openvpn/source/browse/doc/README.txt. +README: http://code.google.com/p/ics-openvpn/source/browse/README.txt. Android provides the VPNService API (http://developer.android.com/reference/android/net/VpnService.html) @@ -74,12 +74,3 @@ are not specific to Android but are rarely used on other platform. For example using SIGUSR1 and management-hold to restart, pause, continue the VPN on network changes or the external key management --management-external-key option and inline files. - -Due to a bug in Android 4.4-4.4.2 there the Android Control will also -query what action the daemon should take when opening the fd. The GUI -should compare the last configuration of the tun device with the current -tun configuration and reply with either - -- NOACTION: Keep using the old fd -- OPEN_AFTER_CLOSE: First close the old fd and then open a new to workaround the bug -- OPEN_BEFORE_CLOSE: the normal behaviour when the VPN configuration changed diff --git a/app/openvpn/doc/openvpn.8 b/app/openvpn/doc/openvpn.8 index f2911c0e..aee0bc83 100644 --- a/app/openvpn/doc/openvpn.8 +++ b/app/openvpn/doc/openvpn.8 @@ -1011,6 +1011,13 @@ table (not supported on all OSes). address if OpenVPN is being run in client mode, and is undefined in server mode. .\"********************************************************* .TP +.B \-\-max-routes n +Allow a maximum number of n +.B \-\-route +options to be specified, either in the local configuration file, +or pulled from an OpenVPN server. By default, n=100. +.\"********************************************************* +.TP .B \-\-route-gateway gw|'dhcp' Specify a default gateway .B gw diff --git a/app/openvpn/openvpn.sln b/app/openvpn/openvpn.sln index f832e7a4..90c01b89 100644 --- a/app/openvpn/openvpn.sln +++ b/app/openvpn/openvpn.sln @@ -1,38 +1,38 @@ - -Microsoft Visual Studio Solution File, Format Version 11.00 -# Visual C++ Express 2010 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "openvpnserv", "src\openvpnserv\openvpnserv.vcxproj", "{9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "openvpn", "src\openvpn\openvpn.vcxproj", "{29DF226E-4D4E-440F-ADAF-5829CFD4CA94}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "msvc-generate", "build\msvc\msvc-generate\msvc-generate.vcxproj", "{8598C2C8-34C4-47A1-99B0-7C295A890615}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "compat", "src\compat\compat.vcxproj", "{4B2E2719-E661-45D7-9203-F6F456B22F19}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Win32 = Debug|Win32 - Release|Win32 = Release|Win32 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Debug|Win32.ActiveCfg = Debug|Win32 - {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Debug|Win32.Build.0 = Debug|Win32 - {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Release|Win32.ActiveCfg = Release|Win32 - {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Release|Win32.Build.0 = Release|Win32 - {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Debug|Win32.ActiveCfg = Debug|Win32 - {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Debug|Win32.Build.0 = Debug|Win32 - {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Release|Win32.ActiveCfg = Release|Win32 - {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Release|Win32.Build.0 = Release|Win32 - {8598C2C8-34C4-47A1-99B0-7C295A890615}.Debug|Win32.ActiveCfg = Debug|Win32 - {8598C2C8-34C4-47A1-99B0-7C295A890615}.Debug|Win32.Build.0 = Debug|Win32 - {8598C2C8-34C4-47A1-99B0-7C295A890615}.Release|Win32.ActiveCfg = Release|Win32 - {8598C2C8-34C4-47A1-99B0-7C295A890615}.Release|Win32.Build.0 = Release|Win32 - {4B2E2719-E661-45D7-9203-F6F456B22F19}.Debug|Win32.ActiveCfg = Debug|Win32 - {4B2E2719-E661-45D7-9203-F6F456B22F19}.Debug|Win32.Build.0 = Debug|Win32 - {4B2E2719-E661-45D7-9203-F6F456B22F19}.Release|Win32.ActiveCfg = Release|Win32 - {4B2E2719-E661-45D7-9203-F6F456B22F19}.Release|Win32.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal + +Microsoft Visual Studio Solution File, Format Version 11.00 +# Visual C++ Express 2010 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "openvpnserv", "src\openvpnserv\openvpnserv.vcxproj", "{9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "openvpn", "src\openvpn\openvpn.vcxproj", "{29DF226E-4D4E-440F-ADAF-5829CFD4CA94}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "msvc-generate", "build\msvc\msvc-generate\msvc-generate.vcxproj", "{8598C2C8-34C4-47A1-99B0-7C295A890615}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "compat", "src\compat\compat.vcxproj", "{4B2E2719-E661-45D7-9203-F6F456B22F19}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Debug|Win32.ActiveCfg = Debug|Win32 + {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Debug|Win32.Build.0 = Debug|Win32 + {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Release|Win32.ActiveCfg = Release|Win32 + {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD}.Release|Win32.Build.0 = Release|Win32 + {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Debug|Win32.ActiveCfg = Debug|Win32 + {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Debug|Win32.Build.0 = Debug|Win32 + {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Release|Win32.ActiveCfg = Release|Win32 + {29DF226E-4D4E-440F-ADAF-5829CFD4CA94}.Release|Win32.Build.0 = Release|Win32 + {8598C2C8-34C4-47A1-99B0-7C295A890615}.Debug|Win32.ActiveCfg = Debug|Win32 + {8598C2C8-34C4-47A1-99B0-7C295A890615}.Debug|Win32.Build.0 = Debug|Win32 + {8598C2C8-34C4-47A1-99B0-7C295A890615}.Release|Win32.ActiveCfg = Release|Win32 + {8598C2C8-34C4-47A1-99B0-7C295A890615}.Release|Win32.Build.0 = Release|Win32 + {4B2E2719-E661-45D7-9203-F6F456B22F19}.Debug|Win32.ActiveCfg = Debug|Win32 + {4B2E2719-E661-45D7-9203-F6F456B22F19}.Debug|Win32.Build.0 = Debug|Win32 + {4B2E2719-E661-45D7-9203-F6F456B22F19}.Release|Win32.ActiveCfg = Release|Win32 + {4B2E2719-E661-45D7-9203-F6F456B22F19}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/app/openvpn/sample/sample-keys/pkcs12.p12 b/app/openvpn/sample/sample-keys/pkcs12.p12 index 253d4081..8df2ccb5 100644 Binary files a/app/openvpn/sample/sample-keys/pkcs12.p12 and b/app/openvpn/sample/sample-keys/pkcs12.p12 differ diff --git a/app/openvpn/src/compat/compat.vcxproj b/app/openvpn/src/compat/compat.vcxproj index d872fa75..42979c11 100644 --- a/app/openvpn/src/compat/compat.vcxproj +++ b/app/openvpn/src/compat/compat.vcxproj @@ -1,87 +1,87 @@ - - - - - Debug - Win32 - - - Release - Win32 - - - - {4B2E2719-E661-45D7-9203-F6F456B22F19} - compat - Win32Proj - - - - StaticLibrary - MultiByte - true - - - StaticLibrary - MultiByte - - - - - - - - - - - - - <_ProjectFileVersion>10.0.30319.1 - $(SolutionDir)$(Platform)-Output\$(Configuration)\ - $(Configuration)\ - $(SolutionDir)$(Platform)-Output\$(Configuration)\ - $(Configuration)\ - - - - Disabled - $(SOURCEBASE);$(SOURCEBASE)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) - WIN32;_DEBUG;_LIB;$(CPPFLAGS);%(PreprocessorDefinitions) - true - EnableFastChecks - MultiThreadedDebugDLL - - - Level3 - EditAndContinue - - - - - MaxSpeed - true - $(SOURCEBASE);$(SOURCEBASE)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) - WIN32;NDEBUG;_LIB;$(CPPFLAGS);%(PreprocessorDefinitions) - MultiThreadedDLL - true - - - Level3 - ProgramDatabase - - - - - - - - - - - - - - - - + + + + + Debug + Win32 + + + Release + Win32 + + + + {4B2E2719-E661-45D7-9203-F6F456B22F19} + compat + Win32Proj + + + + StaticLibrary + MultiByte + true + + + StaticLibrary + MultiByte + + + + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + $(SolutionDir)$(Platform)-Output\$(Configuration)\ + $(Configuration)\ + $(SolutionDir)$(Platform)-Output\$(Configuration)\ + $(Configuration)\ + + + + Disabled + $(SOURCEBASE);$(SOURCEBASE)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_LIB;$(CPPFLAGS);%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebugDLL + + + Level3 + EditAndContinue + + + + + MaxSpeed + true + $(SOURCEBASE);$(SOURCEBASE)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_LIB;$(CPPFLAGS);%(PreprocessorDefinitions) + MultiThreadedDLL + true + + + Level3 + ProgramDatabase + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/app/openvpn/src/compat/compat.vcxproj.filters b/app/openvpn/src/compat/compat.vcxproj.filters index 9576c512..00bb0ffa 100644 --- a/app/openvpn/src/compat/compat.vcxproj.filters +++ b/app/openvpn/src/compat/compat.vcxproj.filters @@ -1,42 +1,42 @@ - - - - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx - - - {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hpp;hxx;hm;inl;inc;xsd - - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav - - - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - - - Header Files - - + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + \ No newline at end of file diff --git a/app/openvpn/src/openvpn/openvpn.vcxproj b/app/openvpn/src/openvpn/openvpn.vcxproj index 452876fc..3b2340ee 100755 --- a/app/openvpn/src/openvpn/openvpn.vcxproj +++ b/app/openvpn/src/openvpn/openvpn.vcxproj @@ -1,263 +1,263 @@ - - - - - Debug - Win32 - - - Release - Win32 - - - - {29DF226E-4D4E-440F-ADAF-5829CFD4CA94} - openvpn - Win32Proj - - - - Application - true - Unicode - - - Application - Unicode - - - - - - - - - - - - - <_ProjectFileVersion>10.0.30319.1 - $(SolutionDir)$(Platform)-Output\$(Configuration)\ - $(Configuration)\ - true - $(SolutionDir)$(Platform)-Output\$(Configuration)\ - $(Configuration)\ - false - - - - Disabled - $(SOURCEBASE);$(SOURCEBASE)/src/compat;$(SOURCEBASE)/include;$(TAP_WINDOWS_HOME)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) - WIN32;_DEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) - true - EnableFastChecks - MultiThreadedDebugDLL - - - Level3 - EditAndContinue - UNICODE - - - $(SOURCEBASE);%(AdditionalIncludeDirectories) - - - libeay32.lib;ssleay32.lib;lzo2.lib;pkcs11-helper.dll.lib;gdi32.lib;ws2_32.lib;wininet.lib;crypt32.lib;iphlpapi.lib;winmm.lib;%(AdditionalDependencies) - $(OPENSSL_HOME)/lib;$(LZO_HOME)/lib;$(PKCS11H_HOME)/lib;%(AdditionalLibraryDirectories) - true - Console - MachineX86 - - - - - MaxSpeed - true - $(SOURCEBASE);$(SOURCEBASE)/src/compat;$(SOURCEBASE)/include;$(TAP_WINDOWS_HOME)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) - WIN32;NDEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) - MultiThreadedDLL - true - - - Level3 - ProgramDatabase - UNICODE - - - $(SOURCEBASE);%(AdditionalIncludeDirectories) - - - libeay32.lib;ssleay32.lib;lzo2.lib;pkcs11-helper.dll.lib;gdi32.lib;ws2_32.lib;wininet.lib;crypt32.lib;iphlpapi.lib;winmm.lib;%(AdditionalDependencies) - $(OPENSSL_HOME)/lib;$(LZO_HOME)/lib;$(PKCS11H_HOME)/lib;%(AdditionalLibraryDirectories) - true - Console - true - true - MachineX86 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - {8598c2c8-34c4-47a1-99b0-7c295a890615} - false - - - {4b2e2719-e661-45d7-9203-f6f456b22f19} - false - - - - - + + + + + Debug + Win32 + + + Release + Win32 + + + + {29DF226E-4D4E-440F-ADAF-5829CFD4CA94} + openvpn + Win32Proj + + + + Application + true + Unicode + + + Application + Unicode + + + + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + $(SolutionDir)$(Platform)-Output\$(Configuration)\ + $(Configuration)\ + true + $(SolutionDir)$(Platform)-Output\$(Configuration)\ + $(Configuration)\ + false + + + + Disabled + $(SOURCEBASE);$(SOURCEBASE)/src/compat;$(SOURCEBASE)/include;$(TAP_WINDOWS_HOME)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebugDLL + + + Level3 + EditAndContinue + UNICODE + + + $(SOURCEBASE);%(AdditionalIncludeDirectories) + + + libeay32.lib;ssleay32.lib;lzo2.lib;pkcs11-helper.dll.lib;gdi32.lib;ws2_32.lib;wininet.lib;crypt32.lib;iphlpapi.lib;winmm.lib;%(AdditionalDependencies) + $(OPENSSL_HOME)/lib;$(LZO_HOME)/lib;$(PKCS11H_HOME)/lib;%(AdditionalLibraryDirectories) + true + Console + MachineX86 + + + + + MaxSpeed + true + $(SOURCEBASE);$(SOURCEBASE)/src/compat;$(SOURCEBASE)/include;$(TAP_WINDOWS_HOME)/include;$(OPENSSL_HOME)/include;$(LZO_HOME)/include;$(PKCS11H_HOME)/include;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) + MultiThreadedDLL + true + + + Level3 + ProgramDatabase + UNICODE + + + $(SOURCEBASE);%(AdditionalIncludeDirectories) + + + libeay32.lib;ssleay32.lib;lzo2.lib;pkcs11-helper.dll.lib;gdi32.lib;ws2_32.lib;wininet.lib;crypt32.lib;iphlpapi.lib;winmm.lib;%(AdditionalDependencies) + $(OPENSSL_HOME)/lib;$(LZO_HOME)/lib;$(PKCS11H_HOME)/lib;%(AdditionalLibraryDirectories) + true + Console + true + true + MachineX86 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {8598c2c8-34c4-47a1-99b0-7c295a890615} + false + + + {4b2e2719-e661-45d7-9203-f6f456b22f19} + false + + + + + \ No newline at end of file diff --git a/app/openvpn/src/openvpn/openvpn.vcxproj.filters b/app/openvpn/src/openvpn/openvpn.vcxproj.filters index ec5e676c..40336ba8 100644 --- a/app/openvpn/src/openvpn/openvpn.vcxproj.filters +++ b/app/openvpn/src/openvpn/openvpn.vcxproj.filters @@ -1,458 +1,458 @@ - - - - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx - - - {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hpp;hxx;hm;inl;inc;xsd - - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav - - - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - - - Resource Files - - + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Resource Files + + \ No newline at end of file diff --git a/app/openvpn/src/openvpnserv/openvpnserv.vcxproj b/app/openvpn/src/openvpnserv/openvpnserv.vcxproj index f2c00718..0b75ed01 100644 --- a/app/openvpn/src/openvpnserv/openvpnserv.vcxproj +++ b/app/openvpn/src/openvpnserv/openvpnserv.vcxproj @@ -1,112 +1,112 @@ - - - - - Debug - Win32 - - - Release - Win32 - - - - {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD} - openvpnserv - Win32Proj - - - - Application - MultiByte - true - - - Application - MultiByte - - - - - - - - - - - - - <_ProjectFileVersion>10.0.30319.1 - $(SolutionDir)$(Platform)-Output\$(Configuration)\ - $(Configuration)\ - true - $(SolutionDir)$(Platform)-Output\$(Configuration)\ - $(Configuration)\ - false - - - - Disabled - $(SOURCEBASE);%(AdditionalIncludeDirectories) - WIN32;_DEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) - true - EnableFastChecks - MultiThreadedDebugDLL - - - Level3 - EditAndContinue - - - $(SOURCEBASE);%(AdditionalIncludeDirectories) - - - true - Console - MachineX86 - - - - - MaxSpeed - true - $(SOURCEBASE);%(AdditionalIncludeDirectories) - WIN32;NDEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) - MultiThreadedDLL - true - - - Level3 - ProgramDatabase - - - $(SOURCEBASE);%(AdditionalIncludeDirectories) - - - true - Console - true - true - MachineX86 - - - - - - - - - - - - - - - {8598c2c8-34c4-47a1-99b0-7c295a890615} - false - - - - - + + + + + Debug + Win32 + + + Release + Win32 + + + + {9C91EE0B-817D-420A-A1E6-15A5A9D98BAD} + openvpnserv + Win32Proj + + + + Application + MultiByte + true + + + Application + MultiByte + + + + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + $(SolutionDir)$(Platform)-Output\$(Configuration)\ + $(Configuration)\ + true + $(SolutionDir)$(Platform)-Output\$(Configuration)\ + $(Configuration)\ + false + + + + Disabled + $(SOURCEBASE);%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebugDLL + + + Level3 + EditAndContinue + + + $(SOURCEBASE);%(AdditionalIncludeDirectories) + + + true + Console + MachineX86 + + + + + MaxSpeed + true + $(SOURCEBASE);%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;$(CPPFLAGS);%(PreprocessorDefinitions) + MultiThreadedDLL + true + + + Level3 + ProgramDatabase + + + $(SOURCEBASE);%(AdditionalIncludeDirectories) + + + true + Console + true + true + MachineX86 + + + + + + + + + + + + + + + {8598c2c8-34c4-47a1-99b0-7c295a890615} + false + + + + + \ No newline at end of file diff --git a/app/openvpn/src/openvpnserv/openvpnserv.vcxproj.filters b/app/openvpn/src/openvpnserv/openvpnserv.vcxproj.filters index a6f8ecc6..0c89b4f4 100644 --- a/app/openvpn/src/openvpnserv/openvpnserv.vcxproj.filters +++ b/app/openvpn/src/openvpnserv/openvpnserv.vcxproj.filters @@ -1,35 +1,35 @@ - - - - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx - - - {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hpp;hxx;hm;inl;inc;xsd - - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav - - - - - Source Files - - - Source Files - - - - - Header Files - - - - - Resource Files - - + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav + + + + + Source Files + + + Source Files + + + + + Header Files + + + + + Resource Files + + \ No newline at end of file -- cgit v1.2.3