diff options
Diffstat (limited to 'app/openssl')
116 files changed, 2162 insertions, 19811 deletions
diff --git a/app/openssl/Apps-config-host.mk b/app/openssl/Apps-config-host.mk index 5c1604e0..37dcb78b 100644 --- a/app/openssl/Apps-config-host.mk +++ b/app/openssl/Apps-config-host.mk @@ -1,6 +1,6 @@  # Auto-generated - DO NOT EDIT!  # To regenerate, edit openssl.config, then run: -#     ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +#     ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz  #  # This script will append to the following variables:  # diff --git a/app/openssl/Apps-config-target.mk b/app/openssl/Apps-config-target.mk index 0c567d4d..bccd250d 100644 --- a/app/openssl/Apps-config-target.mk +++ b/app/openssl/Apps-config-target.mk @@ -1,6 +1,6 @@  # Auto-generated - DO NOT EDIT!  # To regenerate, edit openssl.config, then run: -#     ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +#     ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz  #  # This script will append to the following variables:  # diff --git a/app/openssl/Apps.mk b/app/openssl/Apps.mk index b2d871c1..3fb94dbe 100644 --- a/app/openssl/Apps.mk +++ b/app/openssl/Apps.mk @@ -1,12 +1,9 @@  # Copyright 2006 The Android Open Source Project -LOCAL_PATH := $(call my-dir) +LOCAL_PATH:= $(call my-dir)  include $(CLEAR_VARS) -LOCAL_MODULE := openssl -LOCAL_MULTILIB := both -LOCAL_MODULE_STEM_32 := openssl -LOCAL_MODULE_STEM_64 := openssl64 +LOCAL_MODULE:= openssl  LOCAL_CLANG := true  LOCAL_MODULE_TAGS := optional  LOCAL_SHARED_LIBRARIES := libssl libcrypto @@ -16,7 +13,7 @@ LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/A  include $(BUILD_EXECUTABLE)  include $(CLEAR_VARS) -LOCAL_MODULE := openssl +LOCAL_MODULE:= openssl  LOCAL_MODULE_TAGS := optional  LOCAL_SHARED_LIBRARIES := libssl-host libcrypto-host  include $(LOCAL_PATH)/Apps-config-host.mk diff --git a/app/openssl/Crypto-config-host.mk b/app/openssl/Crypto-config-host.mk index 5b643792..a377fec4 100644 --- a/app/openssl/Crypto-config-host.mk +++ b/app/openssl/Crypto-config-host.mk @@ -1,6 +1,6 @@  # Auto-generated - DO NOT EDIT!  # To regenerate, edit openssl.config, then run: -#     ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +#     ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz  #  # This script will append to the following variables:  # @@ -332,6 +332,7 @@ common_src_files := \    crypto/evp/m_md5.c \    crypto/evp/m_mdc2.c \    crypto/evp/m_null.c \ +  crypto/evp/m_ripemd.c \    crypto/evp/m_sha1.c \    crypto/evp/m_sigver.c \    crypto/evp/m_wp.c \ @@ -437,6 +438,8 @@ common_src_files := \    crypto/rc4/rc4_enc.c \    crypto/rc4/rc4_skey.c \    crypto/rc4/rc4_utl.c \ +  crypto/ripemd/rmd_dgst.c \ +  crypto/ripemd/rmd_one.c \    crypto/rsa/rsa_ameth.c \    crypto/rsa/rsa_asn1.c \    crypto/rsa/rsa_chk.c \ @@ -543,7 +546,6 @@ common_c_includes := \  arm_cflags := \    -DAES_ASM \    -DBSAES_ASM \ -  -DDES_UNROLL \    -DGHASH_ASM \    -DOPENSSL_BN_ASM_GF2m \    -DOPENSSL_BN_ASM_MONT \ @@ -554,14 +556,12 @@ arm_cflags := \  arm_src_files := \    crypto/aes/asm/aes-armv4.S \ -  crypto/aes/asm/aesv8-armx.S \    crypto/aes/asm/bsaes-armv7.S \    crypto/armcap.c \    crypto/armv4cpuid.S \    crypto/bn/asm/armv4-gf2m.S \    crypto/bn/asm/armv4-mont.S \    crypto/modes/asm/ghash-armv4.S \ -  crypto/modes/asm/ghashv8-armx.S \    crypto/sha/asm/sha1-armv4-large.S \    crypto/sha/asm/sha256-armv4.S \    crypto/sha/asm/sha512-armv4.S \ @@ -571,20 +571,9 @@ arm_exclude_files := \    crypto/mem_clr.c \  arm64_cflags := \ -  -DDES_UNROLL \ -  -DOPENSSL_CPUID_OBJ \ -  -DSHA1_ASM \ -  -DSHA256_ASM \ -  -DSHA512_ASM \ +  -DOPENSSL_NO_ASM \ -arm64_src_files := \ -  crypto/aes/asm/aesv8-armx-64.S \ -  crypto/arm64cpuid.S \ -  crypto/armcap.c \ -  crypto/modes/asm/ghashv8-armx-64.S \ -  crypto/sha/asm/sha1-armv8.S \ -  crypto/sha/asm/sha256-armv8.S \ -  crypto/sha/asm/sha512-armv8.S \ +arm64_src_files :=  arm64_exclude_files := @@ -600,8 +589,6 @@ x86_cflags := \    -DOPENSSL_BN_ASM_PART_WORDS \    -DOPENSSL_CPUID_OBJ \    -DOPENSSL_IA32_SSE2 \ -  -DRC4_INDEX \ -  -DRMD160_ASM \    -DSHA1_ASM \    -DSHA256_ASM \    -DSHA512_ASM \ @@ -637,6 +624,8 @@ x86_exclude_files := \  x86_64_cflags := \    -DAES_ASM \    -DBSAES_ASM \ +  -DDES_PTR \ +  -DDES_RISC1 \    -DDES_UNROLL \    -DGHASH_ASM \    -DMD5_ASM \ @@ -644,7 +633,6 @@ x86_64_cflags := \    -DOPENSSL_BN_ASM_MONT \    -DOPENSSL_BN_ASM_MONT5 \    -DOPENSSL_CPUID_OBJ \ -  -DOPENSSL_IA32_SSE2 \    -DSHA1_ASM \    -DSHA256_ASM \    -DSHA512_ASM \ diff --git a/app/openssl/Crypto-config-target.mk b/app/openssl/Crypto-config-target.mk index bd29dfe5..2c5b01e5 100644 --- a/app/openssl/Crypto-config-target.mk +++ b/app/openssl/Crypto-config-target.mk @@ -1,6 +1,6 @@  # Auto-generated - DO NOT EDIT!  # To regenerate, edit openssl.config, then run: -#     ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +#     ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz  #  # This script will append to the following variables:  # @@ -332,6 +332,7 @@ common_src_files := \    crypto/evp/m_md5.c \    crypto/evp/m_mdc2.c \    crypto/evp/m_null.c \ +  crypto/evp/m_ripemd.c \    crypto/evp/m_sha1.c \    crypto/evp/m_sigver.c \    crypto/evp/m_wp.c \ @@ -437,6 +438,8 @@ common_src_files := \    crypto/rc4/rc4_enc.c \    crypto/rc4/rc4_skey.c \    crypto/rc4/rc4_utl.c \ +  crypto/ripemd/rmd_dgst.c \ +  crypto/ripemd/rmd_one.c \    crypto/rsa/rsa_ameth.c \    crypto/rsa/rsa_asn1.c \    crypto/rsa/rsa_chk.c \ @@ -543,7 +546,6 @@ common_c_includes := \  arm_cflags := \    -DAES_ASM \    -DBSAES_ASM \ -  -DDES_UNROLL \    -DGHASH_ASM \    -DOPENSSL_BN_ASM_GF2m \    -DOPENSSL_BN_ASM_MONT \ @@ -554,14 +556,12 @@ arm_cflags := \  arm_src_files := \    crypto/aes/asm/aes-armv4.S \ -  crypto/aes/asm/aesv8-armx.S \    crypto/aes/asm/bsaes-armv7.S \    crypto/armcap.c \    crypto/armv4cpuid.S \    crypto/bn/asm/armv4-gf2m.S \    crypto/bn/asm/armv4-mont.S \    crypto/modes/asm/ghash-armv4.S \ -  crypto/modes/asm/ghashv8-armx.S \    crypto/sha/asm/sha1-armv4-large.S \    crypto/sha/asm/sha256-armv4.S \    crypto/sha/asm/sha512-armv4.S \ @@ -571,20 +571,9 @@ arm_exclude_files := \    crypto/mem_clr.c \  arm64_cflags := \ -  -DDES_UNROLL \ -  -DOPENSSL_CPUID_OBJ \ -  -DSHA1_ASM \ -  -DSHA256_ASM \ -  -DSHA512_ASM \ +  -DOPENSSL_NO_ASM \ -arm64_src_files := \ -  crypto/aes/asm/aesv8-armx-64.S \ -  crypto/arm64cpuid.S \ -  crypto/armcap.c \ -  crypto/modes/asm/ghashv8-armx-64.S \ -  crypto/sha/asm/sha1-armv8.S \ -  crypto/sha/asm/sha256-armv8.S \ -  crypto/sha/asm/sha512-armv8.S \ +arm64_src_files :=  arm64_exclude_files := @@ -600,8 +589,6 @@ x86_cflags := \    -DOPENSSL_BN_ASM_PART_WORDS \    -DOPENSSL_CPUID_OBJ \    -DOPENSSL_IA32_SSE2 \ -  -DRC4_INDEX \ -  -DRMD160_ASM \    -DSHA1_ASM \    -DSHA256_ASM \    -DSHA512_ASM \ @@ -637,6 +624,8 @@ x86_exclude_files := \  x86_64_cflags := \    -DAES_ASM \    -DBSAES_ASM \ +  -DDES_PTR \ +  -DDES_RISC1 \    -DDES_UNROLL \    -DGHASH_ASM \    -DMD5_ASM \ @@ -644,7 +633,6 @@ x86_64_cflags := \    -DOPENSSL_BN_ASM_MONT \    -DOPENSSL_BN_ASM_MONT5 \    -DOPENSSL_CPUID_OBJ \ -  -DOPENSSL_IA32_SSE2 \    -DSHA1_ASM \    -DSHA256_ASM \    -DSHA512_ASM \ diff --git a/app/openssl/Crypto-config-trusty.mk b/app/openssl/Crypto-config-trusty.mk index 59915986..dc5b12c2 100644 --- a/app/openssl/Crypto-config-trusty.mk +++ b/app/openssl/Crypto-config-trusty.mk @@ -1,6 +1,6 @@  # Auto-generated - DO NOT EDIT!  # To regenerate, edit openssl.config, then run: -#     ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +#     ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz  #  # This script will append to the following variables:  # diff --git a/app/openssl/Crypto.mk b/app/openssl/Crypto.mk index 6565f97c..4214b91e 100644 --- a/app/openssl/Crypto.mk +++ b/app/openssl/Crypto.mk @@ -9,7 +9,7 @@ LOCAL_SHARED_LIBRARIES := $(log_shared_libraries)  LOCAL_SDK_VERSION := 9  LOCAL_MODULE_TAGS := optional -LOCAL_MODULE := libcrypto_static +LOCAL_MODULE:= libcrypto_static  LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk  include $(LOCAL_PATH)/Crypto-config-target.mk  include $(LOCAL_PATH)/android-config.mk @@ -31,7 +31,7 @@ LOCAL_SHARED_LIBRARIES := $(log_shared_libraries)  # in the NDK.  ifeq (,$(TARGET_BUILD_APPS))  LOCAL_CLANG := true -ifeq ($(HOST_OS), darwin_does_not_wrok) +ifeq ($(HOST_OS), darwin_XXX)  LOCAL_ASFLAGS += -no-integrated-as  LOCAL_CFLAGS += -no-integrated-as  endif @@ -41,7 +41,7 @@ endif  LOCAL_LDFLAGS += -ldl  LOCAL_MODULE_TAGS := optional -LOCAL_MODULE := libcrypto +LOCAL_MODULE:= libcrypto  LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk  include $(LOCAL_PATH)/Crypto-config-target.mk  include $(LOCAL_PATH)/android-config.mk @@ -50,16 +50,16 @@ include $(BUILD_SHARED_LIBRARY)  #######################################  # host shared library -#include $(CLEAR_VARS) -#LOCAL_SHARED_LIBRARIES := $(log_shared_libraries) -#LOCAL_CFLAGS += -DPURIFY -#LOCAL_LDLIBS += -ldl -#LOCAL_MODULE_TAGS := optional -#LOCAL_MODULE := libcrypto-host -#LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk -#include $(LOCAL_PATH)/Crypto-config-host.mk -#include $(LOCAL_PATH)/android-config.mk -#include $(BUILD_HOST_SHARED_LIBRARY) +# include $(CLEAR_VARS) +# LOCAL_SHARED_LIBRARIES := $(log_shared_libraries) +# LOCAL_CFLAGS += -DPURIFY +# LOCAL_LDLIBS += -ldl +# LOCAL_MODULE_TAGS := optional +# LOCAL_MODULE:= libcrypto-host +# LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk +# include $(LOCAL_PATH)/Crypto-config-host.mk +# include $(LOCAL_PATH)/android-config.mk +# include $(BUILD_HOST_SHARED_LIBRARY)  ########################################  # host static library, which is used by some SDK tools. @@ -69,9 +69,8 @@ include $(BUILD_SHARED_LIBRARY)  # LOCAL_CFLAGS += -DPURIFY  # LOCAL_LDLIBS += -ldl  # LOCAL_MODULE_TAGS := optional -# LOCAL_MODULE := libcrypto_static +# LOCAL_MODULE:= libcrypto_static  # LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk  # include $(LOCAL_PATH)/Crypto-config-host.mk  # include $(LOCAL_PATH)/android-config.mk  # include $(BUILD_HOST_STATIC_LIBRARY) - diff --git a/app/openssl/Ssl-config-host.mk b/app/openssl/Ssl-config-host.mk index 57ea3775..95035487 100644 --- a/app/openssl/Ssl-config-host.mk +++ b/app/openssl/Ssl-config-host.mk @@ -1,6 +1,6 @@  # Auto-generated - DO NOT EDIT!  # To regenerate, edit openssl.config, then run: -#     ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +#     ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz  #  # This script will append to the following variables:  # diff --git a/app/openssl/Ssl-config-target.mk b/app/openssl/Ssl-config-target.mk index c08a971d..32439d3f 100644 --- a/app/openssl/Ssl-config-target.mk +++ b/app/openssl/Ssl-config-target.mk @@ -1,6 +1,6 @@  # Auto-generated - DO NOT EDIT!  # To regenerate, edit openssl.config, then run: -#     ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +#     ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz  #  # This script will append to the following variables:  # diff --git a/app/openssl/Ssl.mk b/app/openssl/Ssl.mk index 6c04950a..8ce82d9b 100644 --- a/app/openssl/Ssl.mk +++ b/app/openssl/Ssl.mk @@ -12,7 +12,7 @@ LOCAL_CFLAGS += $(target_c_flags)  LOCAL_C_INCLUDES += $(target_c_includes)  LOCAL_SHARED_LIBRARIES = $(log_shared_libraries)  LOCAL_MODULE_TAGS := optional -LOCAL_MODULE := libssl_static +LOCAL_MODULE:= libssl_static  LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk  include $(LOCAL_PATH)/Ssl-config-target.mk  include $(LOCAL_PATH)/android-config.mk @@ -35,20 +35,19 @@ endif  LOCAL_SHARED_LIBRARIES += libcrypto $(log_shared_libraries)  LOCAL_MODULE_TAGS := optional -LOCAL_MODULE := libssl +LOCAL_MODULE:= libssl  LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk  include $(LOCAL_PATH)/Ssl-config-target.mk  include $(LOCAL_PATH)/android-config.mk  include $(LOCAL_PATH)/ndk-build.mk  include $(BUILD_SHARED_LIBRARY) -  # #######################################  # # host shared library  # include $(CLEAR_VARS)  # LOCAL_SHARED_LIBRARIES += libcrypto-host $(log_shared_libraries)  # LOCAL_MODULE_TAGS := optional -# LOCAL_MODULE := libssl-host +# LOCAL_MODULE:= libssl-host  # LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk  # include $(LOCAL_PATH)/Ssl-config-host.mk  # include $(LOCAL_PATH)/android-config.mk @@ -57,12 +56,9 @@ include $(BUILD_SHARED_LIBRARY)  # #######################################  # # ssltest  # include $(CLEAR_VARS) -# LOCAL_SRC_FILES := ssl/ssltest.c +# LOCAL_SRC_FILES:= ssl/ssltest.c  # LOCAL_SHARED_LIBRARIES := libssl libcrypto $(log_shared_libraries) -# LOCAL_MODULE := ssltest -# LOCAL_MULTILIB := both -# LOCAL_MODULE_STEM_32 := ssltest -# LOCAL_MODULE_STEM_64 := ssltest64 +# LOCAL_MODULE:= ssltest  # LOCAL_MODULE_TAGS := optional  # LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk  # include $(LOCAL_PATH)/Ssl-config-host.mk diff --git a/app/openssl/apps/enc.c b/app/openssl/apps/enc.c index 19ea3df9..719acc32 100644 --- a/app/openssl/apps/enc.c +++ b/app/openssl/apps/enc.c @@ -331,12 +331,6 @@ bad:          setup_engine(bio_err, engine, 0);  #endif -	if (cipher && EVP_CIPHER_flags(cipher) & EVP_CIPH_FLAG_AEAD_CIPHER) -		{ -		BIO_printf(bio_err, "AEAD ciphers not supported by the enc utility\n"); -		goto end; -		} -  	if (md && (dgst=EVP_get_digestbyname(md)) == NULL)  		{  		BIO_printf(bio_err,"%s is an unsupported message digest type\n",md); diff --git a/app/openssl/apps/ocsp.c b/app/openssl/apps/ocsp.c index 767f12c6..83c5a767 100644 --- a/app/openssl/apps/ocsp.c +++ b/app/openssl/apps/ocsp.c @@ -127,7 +127,6 @@ int MAIN(int argc, char **argv)  	ENGINE *e = NULL;  	char **args;  	char *host = NULL, *port = NULL, *path = "/"; -	char *thost = NULL, *tport = NULL, *tpath = NULL;  	char *reqin = NULL, *respin = NULL;  	char *reqout = NULL, *respout = NULL;  	char *signfile = NULL, *keyfile = NULL; @@ -205,12 +204,6 @@ int MAIN(int argc, char **argv)  			}  		else if (!strcmp(*args, "-url"))  			{ -			if (thost) -				OPENSSL_free(thost); -			if (tport) -				OPENSSL_free(tport); -			if (tpath) -				OPENSSL_free(tpath);  			if (args[1])  				{  				args++; @@ -219,9 +212,6 @@ int MAIN(int argc, char **argv)  					BIO_printf(bio_err, "Error parsing URL\n");  					badarg = 1;  					} -				thost = host; -				tport = port; -				tpath = path;  				}  			else badarg = 1;  			} @@ -930,12 +920,12 @@ end:  	sk_X509_pop_free(verify_other, X509_free);  	sk_CONF_VALUE_pop_free(headers, X509V3_conf_free); -	if (thost) -		OPENSSL_free(thost); -	if (tport) -		OPENSSL_free(tport); -	if (tpath) -		OPENSSL_free(tpath); +	if (use_ssl != -1) +		{ +		OPENSSL_free(host); +		OPENSSL_free(port); +		OPENSSL_free(path); +		}  	OPENSSL_EXIT(ret);  } diff --git a/app/openssl/apps/req.c b/app/openssl/apps/req.c index d41385d7..5e034a85 100644 --- a/app/openssl/apps/req.c +++ b/app/openssl/apps/req.c @@ -1489,13 +1489,7 @@ start:  #ifdef CHARSET_EBCDIC  	ebcdic2ascii(buf, buf, i);  #endif -	if(!req_check_len(i, n_min, n_max)) -		{ -		if (batch || value) -			return 0; -		goto start; -		} - +	if(!req_check_len(i, n_min, n_max)) goto start;  	if (!X509_NAME_add_entry_by_NID(n,nid, chtype,  				(unsigned char *) buf, -1,-1,mval)) goto err;  	ret=1; @@ -1554,12 +1548,7 @@ start:  #ifdef CHARSET_EBCDIC  	ebcdic2ascii(buf, buf, i);  #endif -	if(!req_check_len(i, n_min, n_max)) -		{ -		if (batch || value) -			return 0; -		goto start; -		} +	if(!req_check_len(i, n_min, n_max)) goto start;  	if(!X509_REQ_add1_attr_by_NID(req, nid, chtype,  					(unsigned char *)buf, -1)) { diff --git a/app/openssl/apps/s_cb.c b/app/openssl/apps/s_cb.c index 146a9607..84c3b447 100644 --- a/app/openssl/apps/s_cb.c +++ b/app/openssl/apps/s_cb.c @@ -747,10 +747,6 @@ void MS_CALLBACK tlsext_cb(SSL *s, int client_server, int type,  		break;  #endif -		case TLSEXT_TYPE_padding: -		extname = "TLS padding"; -		break; -  		default:  		extname = "unknown";  		break; diff --git a/app/openssl/apps/s_socket.c b/app/openssl/apps/s_socket.c index 94eb40f3..380efdb1 100644 --- a/app/openssl/apps/s_socket.c +++ b/app/openssl/apps/s_socket.c @@ -274,7 +274,7 @@ static int init_client_ip(int *sock, unsigned char ip[4], int port, int type)  		{  		i=0;  		i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE,(char *)&i,sizeof(i)); -		if (i < 0) { closesocket(s); perror("keepalive"); return(0); } +		if (i < 0) { perror("keepalive"); return(0); }  		}  #endif @@ -450,7 +450,6 @@ redoit:  		if ((*host=(char *)OPENSSL_malloc(strlen(h1->h_name)+1)) == NULL)  			{  			perror("OPENSSL_malloc"); -			closesocket(ret);  			return(0);  			}  		BUF_strlcpy(*host,h1->h_name,strlen(h1->h_name)+1); @@ -459,13 +458,11 @@ redoit:  		if (h2 == NULL)  			{  			BIO_printf(bio_err,"gethostbyname failure\n"); -			closesocket(ret);  			return(0);  			}  		if (h2->h_addrtype != AF_INET)  			{  			BIO_printf(bio_err,"gethostbyname addr is not AF_INET\n"); -			closesocket(ret);  			return(0);  			}  		} diff --git a/app/openssl/apps/smime.c b/app/openssl/apps/smime.c index d1fe32d3..c583f8a0 100644 --- a/app/openssl/apps/smime.c +++ b/app/openssl/apps/smime.c @@ -541,8 +541,8 @@ int MAIN(int argc, char **argv)  		{  		if (!cipher)  			{ -#ifndef OPENSSL_NO_DES			 -			cipher = EVP_des_ede3_cbc(); +#ifndef OPENSSL_NO_RC2			 +			cipher = EVP_rc2_40_cbc();  #else  			BIO_printf(bio_err, "No cipher selected\n");  			goto end; diff --git a/app/openssl/build-config-32.mk b/app/openssl/build-config-32.mk index d035f1e4..4f7484b9 100644 --- a/app/openssl/build-config-32.mk +++ b/app/openssl/build-config-32.mk @@ -1,6 +1,6 @@  # Auto-generated - DO NOT EDIT!  # To regenerate, edit openssl.config, then run: -#     ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +#     ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz  #  openssl_cflags_32 := \    -DOPENSSL_THREADS \ @@ -24,7 +24,6 @@ openssl_cflags_32 := \    -DOPENSSL_NO_RC5 \    -DOPENSSL_NO_RDRAND \    -DOPENSSL_NO_RFC3779 \ -  -DOPENSSL_NO_RIPEMD \    -DOPENSSL_NO_RSAX \    -DOPENSSL_NO_SCTP \    -DOPENSSL_NO_SEED \ @@ -53,7 +52,6 @@ openssl_cflags_static_32 := \    -DOPENSSL_NO_RC5 \    -DOPENSSL_NO_RDRAND \    -DOPENSSL_NO_RFC3779 \ -  -DOPENSSL_NO_RIPEMD \    -DOPENSSL_NO_RSAX \    -DOPENSSL_NO_SCTP \    -DOPENSSL_NO_SEED \ diff --git a/app/openssl/build-config-64.mk b/app/openssl/build-config-64.mk index 45a8141d..c0e6f6de 100644 --- a/app/openssl/build-config-64.mk +++ b/app/openssl/build-config-64.mk @@ -1,6 +1,6 @@  # Auto-generated - DO NOT EDIT!  # To regenerate, edit openssl.config, then run: -#     ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +#     ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz  #  openssl_cflags_64 := \    -DOPENSSL_THREADS \ @@ -24,7 +24,6 @@ openssl_cflags_64 := \    -DOPENSSL_NO_RC5 \    -DOPENSSL_NO_RDRAND \    -DOPENSSL_NO_RFC3779 \ -  -DOPENSSL_NO_RIPEMD \    -DOPENSSL_NO_RSAX \    -DOPENSSL_NO_SCTP \    -DOPENSSL_NO_SEED \ @@ -53,7 +52,6 @@ openssl_cflags_static_64 := \    -DOPENSSL_NO_RC5 \    -DOPENSSL_NO_RDRAND \    -DOPENSSL_NO_RFC3779 \ -  -DOPENSSL_NO_RIPEMD \    -DOPENSSL_NO_RSAX \    -DOPENSSL_NO_SCTP \    -DOPENSSL_NO_SEED \ diff --git a/app/openssl/build-config-trusty.mk b/app/openssl/build-config-trusty.mk index 4d6fb58c..e5809a3b 100644 --- a/app/openssl/build-config-trusty.mk +++ b/app/openssl/build-config-trusty.mk @@ -1,6 +1,6 @@  # Auto-generated - DO NOT EDIT!  # To regenerate, edit openssl.config, then run: -#     ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz +#     ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz  #  openssl_cflags_trusty := \    -DL_ENDIAN \ diff --git a/app/openssl/check-all-builds.sh b/app/openssl/check-all-builds.sh index 9743872a..cff2ba5d 100755 --- a/app/openssl/check-all-builds.sh +++ b/app/openssl/check-all-builds.sh @@ -143,7 +143,7 @@ esac  # NOTE: x86_64 is not ready yet, while the toolchain is in  # prebuilts/ it doesn't have a sysroot which means it requires  # a platform build to get Bionic and stuff. -ANDROID_ARCHS="arm arm64 x86 x86_64 mips" +ANDROID_ARCHS="arm x86 mips"  BUILD_TYPES=  for ARCH in $ANDROID_ARCHS; do @@ -311,14 +311,11 @@ get_build_arch () {  # Out: GNU configuration target (e.g. arm-linux-androideabi)  get_build_arch_target () {    case $1 in -    arm64) -      echo "aarch64-linux-android" -      ;;      arm)        echo "arm-linux-androideabi"        ;;      x86) -      echo "x86_64-linux-android" +      echo "i686-linux-android"        ;;      x86_64)        echo "x86_64-linux-android" @@ -332,8 +329,8 @@ get_build_arch_target () {    esac  } -GCC_VERSION=4.8 -CLANG_VERSION=3.2 +GCC_VERSION=4.7 +CLANG_VERSION=3.1  get_prebuilt_gcc_dir_for_arch () {    local arch=$1 @@ -344,9 +341,6 @@ get_prebuilt_gcc_dir_for_arch () {      x86_64)          arch=x86          ;; -    arm64) -        arch=aarch64 -        ;;    esac    echo "$ANDROID_BUILD_TOP/prebuilts/gcc/$ANDROID_HOST_TAG/$arch/$target-$GCC_VERSION"  } @@ -403,7 +397,7 @@ get_build_compiler () {    # Force -m32 flag when needed for 32-bit builds.    case $1 in -    *-x86|*-generic32) +    *-linux-x86|*-darwin-x86|*-generic32)        result="$result -m32"        ;;    esac diff --git a/app/openssl/crypto/aes/asm/aes-armv4.pl b/app/openssl/crypto/aes/asm/aes-armv4.pl index 4f891708..86b86c4a 100644 --- a/app/openssl/crypto/aes/asm/aes-armv4.pl +++ b/app/openssl/crypto/aes/asm/aes-armv4.pl @@ -1,7 +1,7 @@  #!/usr/bin/env perl  # ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL  # project. The module is, however, dual licensed under OpenSSL and  # CRYPTOGAMS licenses depending on where you obtain it. For further  # details see http://www.openssl.org/~appro/cryptogams/. @@ -51,23 +51,9 @@ $key="r11";  $rounds="r12";  $code=<<___; -#ifndef __KERNEL__ -# include "arm_arch.h" -#else -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ -#endif - +#include "arm_arch.h"  .text -#if __ARM_ARCH__<7 -.code	32 -#else -.syntax	unified -# ifdef __thumb2__ -.thumb -# else  .code	32 -# endif -#endif  .type	AES_Te,%object  .align	5 @@ -181,11 +167,7 @@ AES_Te:  .type   AES_encrypt,%function  .align	5  AES_encrypt: -#if __ARM_ARCH__<7  	sub	r3,pc,#8		@ AES_encrypt -#else -	adr	r3,AES_encrypt -#endif  	stmdb   sp!,{r1,r4-r12,lr}  	mov	$rounds,r0		@ inp  	mov	$key,r2 @@ -427,21 +409,11 @@ _armv4_AES_encrypt:  .align	5  private_AES_set_encrypt_key:  _armv4_AES_set_encrypt_key: -#if __ARM_ARCH__<7  	sub	r3,pc,#8		@ AES_set_encrypt_key -#else -	adr	r3,private_AES_set_encrypt_key -#endif  	teq	r0,#0 -#if __ARM_ARCH__>=7 -	itt	eq			@ Thumb2 thing, sanity check in ARM -#endif  	moveq	r0,#-1  	beq	.Labrt  	teq	r2,#0 -#if __ARM_ARCH__>=7 -	itt	eq			@ Thumb2 thing, sanity check in ARM -#endif  	moveq	r0,#-1  	beq	.Labrt @@ -450,9 +422,6 @@ _armv4_AES_set_encrypt_key:  	teq	r1,#192  	beq	.Lok  	teq	r1,#256 -#if __ARM_ARCH__>=7 -	itt	ne			@ Thumb2 thing, sanity check in ARM -#endif  	movne	r0,#-1  	bne	.Labrt @@ -607,9 +576,6 @@ _armv4_AES_set_encrypt_key:  	str	$s2,[$key,#-16]  	subs	$rounds,$rounds,#1  	str	$s3,[$key,#-12] -#if __ARM_ARCH__>=7 -	itt	eq				@ Thumb2 thing, sanity check in ARM -#endif  	subeq	r2,$key,#216  	beq	.Ldone @@ -679,9 +645,6 @@ _armv4_AES_set_encrypt_key:  	str	$s2,[$key,#-24]  	subs	$rounds,$rounds,#1  	str	$s3,[$key,#-20] -#if __ARM_ARCH__>=7 -	itt	eq				@ Thumb2 thing, sanity check in ARM -#endif  	subeq	r2,$key,#256  	beq	.Ldone @@ -711,17 +674,11 @@ _armv4_AES_set_encrypt_key:  	str	$i3,[$key,#-4]  	b	.L256_loop -.align	2  .Ldone:	mov	r0,#0  	ldmia   sp!,{r4-r12,lr} -.Labrt: -#if __ARM_ARCH__>=5 -	ret				@ bx lr -#else -	tst	lr,#1 +.Labrt:	tst	lr,#1  	moveq	pc,lr			@ be binary compatible with V4, yet  	bx	lr			@ interoperable with Thumb ISA:-) -#endif  .size	private_AES_set_encrypt_key,.-private_AES_set_encrypt_key  .global private_AES_set_decrypt_key @@ -731,57 +688,34 @@ private_AES_set_decrypt_key:  	str	lr,[sp,#-4]!            @ push lr  	bl	_armv4_AES_set_encrypt_key  	teq	r0,#0 -	ldr	lr,[sp],#4              @ pop lr +	ldrne	lr,[sp],#4              @ pop lr  	bne	.Labrt -	mov	r0,r2			@ AES_set_encrypt_key preserves r2, -	mov	r1,r2			@ which is AES_KEY *key -	b	_armv4_AES_set_enc2dec_key -.size	private_AES_set_decrypt_key,.-private_AES_set_decrypt_key +	stmdb   sp!,{r4-r12} -@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out) -.global	AES_set_enc2dec_key -.type	AES_set_enc2dec_key,%function -.align	5 -AES_set_enc2dec_key: -_armv4_AES_set_enc2dec_key: -	stmdb   sp!,{r4-r12,lr} - -	ldr	$rounds,[r0,#240] -	mov	$i1,r0			@ input -	add	$i2,r0,$rounds,lsl#4 -	mov	$key,r1			@ ouput -	add	$tbl,r1,$rounds,lsl#4 -	str	$rounds,[r1,#240] - -.Linv:	ldr	$s0,[$i1],#16 -	ldr	$s1,[$i1,#-12] -	ldr	$s2,[$i1,#-8] -	ldr	$s3,[$i1,#-4] -	ldr	$t1,[$i2],#-16 -	ldr	$t2,[$i2,#16+4] -	ldr	$t3,[$i2,#16+8] -	ldr	$i3,[$i2,#16+12] -	str	$s0,[$tbl],#-16 -	str	$s1,[$tbl,#16+4] -	str	$s2,[$tbl,#16+8] -	str	$s3,[$tbl,#16+12] -	str	$t1,[$key],#16 -	str	$t2,[$key,#-12] -	str	$t3,[$key,#-8] -	str	$i3,[$key,#-4] -	teq	$i1,$i2 -	bne	.Linv +	ldr	$rounds,[r2,#240]	@ AES_set_encrypt_key preserves r2, +	mov	$key,r2			@ which is AES_KEY *key +	mov	$i1,r2 +	add	$i2,r2,$rounds,lsl#4 -	ldr	$s0,[$i1] +.Linv:	ldr	$s0,[$i1]  	ldr	$s1,[$i1,#4]  	ldr	$s2,[$i1,#8]  	ldr	$s3,[$i1,#12] -	str	$s0,[$key] -	str	$s1,[$key,#4] -	str	$s2,[$key,#8] -	str	$s3,[$key,#12] -	sub	$key,$key,$rounds,lsl#3 +	ldr	$t1,[$i2] +	ldr	$t2,[$i2,#4] +	ldr	$t3,[$i2,#8] +	ldr	$i3,[$i2,#12] +	str	$s0,[$i2],#-16 +	str	$s1,[$i2,#16+4] +	str	$s2,[$i2,#16+8] +	str	$s3,[$i2,#16+12] +	str	$t1,[$i1],#16 +	str	$t2,[$i1,#-12] +	str	$t3,[$i1,#-8] +	str	$i3,[$i1,#-4] +	teq	$i1,$i2 +	bne	.Linv  ___  $mask80=$i1;  $mask1b=$i2; @@ -839,7 +773,7 @@ $code.=<<___;  	moveq	pc,lr			@ be binary compatible with V4, yet  	bx	lr			@ interoperable with Thumb ISA:-)  #endif -.size	AES_set_enc2dec_key,.-AES_set_enc2dec_key +.size	private_AES_set_decrypt_key,.-private_AES_set_decrypt_key  .type	AES_Td,%object  .align	5 @@ -949,11 +883,7 @@ AES_Td:  .type   AES_decrypt,%function  .align	5  AES_decrypt: -#if __ARM_ARCH__<7  	sub	r3,pc,#8		@ AES_decrypt -#else -	adr	r3,AES_decrypt -#endif  	stmdb   sp!,{r1,r4-r12,lr}  	mov	$rounds,r0		@ inp  	mov	$key,r2 @@ -1150,9 +1080,8 @@ _armv4_AES_decrypt:  	ldrb	$t3,[$tbl,$i3]		@ Td4[s0>>0]  	and	$i3,lr,$s1,lsr#8 -	add	$s1,$tbl,$s1,lsr#24  	ldrb	$i1,[$tbl,$i1]		@ Td4[s1>>0] -	ldrb	$s1,[$s1]		@ Td4[s1>>24] +	ldrb	$s1,[$tbl,$s1,lsr#24]	@ Td4[s1>>24]  	ldrb	$i2,[$tbl,$i2]		@ Td4[s1>>16]  	eor	$s0,$i1,$s0,lsl#24  	ldrb	$i3,[$tbl,$i3]		@ Td4[s1>>8] @@ -1165,8 +1094,7 @@ _armv4_AES_decrypt:  	ldrb	$i2,[$tbl,$i2]		@ Td4[s2>>0]  	and	$i3,lr,$s2,lsr#16 -	add	$s2,$tbl,$s2,lsr#24 -	ldrb	$s2,[$s2]		@ Td4[s2>>24] +	ldrb	$s2,[$tbl,$s2,lsr#24]	@ Td4[s2>>24]  	eor	$s0,$s0,$i1,lsl#8  	ldrb	$i3,[$tbl,$i3]		@ Td4[s2>>16]  	eor	$s1,$i2,$s1,lsl#16 @@ -1178,9 +1106,8 @@ _armv4_AES_decrypt:  	ldrb	$i2,[$tbl,$i2]		@ Td4[s3>>8]  	and	$i3,lr,$s3		@ i2 -	add	$s3,$tbl,$s3,lsr#24  	ldrb	$i3,[$tbl,$i3]		@ Td4[s3>>0] -	ldrb	$s3,[$s3]		@ Td4[s3>>24] +	ldrb	$s3,[$tbl,$s3,lsr#24]	@ Td4[s3>>24]  	eor	$s0,$s0,$i1,lsl#16  	ldr	$i1,[$key,#0]  	eor	$s1,$s1,$i2,lsl#8 @@ -1203,15 +1130,5 @@ _armv4_AES_decrypt:  ___  $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4 -$code =~ s/\bret\b/bx\tlr/gm; - -open SELF,$0; -while(<SELF>) { -	next if (/^#!/); -	last if (!s/^#/@/ and !/^$/); -	print; -} -close SELF; -  print $code;  close STDOUT;	# enforce flush diff --git a/app/openssl/crypto/aes/asm/aes-armv4.s b/app/openssl/crypto/aes/asm/aes-armv4.s index 333a5227..2697d4ce 100644 --- a/app/openssl/crypto/aes/asm/aes-armv4.s +++ b/app/openssl/crypto/aes/asm/aes-armv4.s @@ -1,53 +1,6 @@ - -@ ==================================================================== -@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -@ project. The module is, however, dual licensed under OpenSSL and -@ CRYPTOGAMS licenses depending on where you obtain it. For further -@ details see http://www.openssl.org/~appro/cryptogams/. -@ ==================================================================== - -@ AES for ARMv4 - -@ January 2007. -@ -@ Code uses single 1K S-box and is >2 times faster than code generated -@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which -@ allows to merge logical or arithmetic operation with shift or rotate -@ in one instruction and emit combined result every cycle. The module -@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit -@ key [on single-issue Xscale PXA250 core]. - -@ May 2007. -@ -@ AES_set_[en|de]crypt_key is added. - -@ July 2010. -@ -@ Rescheduling for dual-issue pipeline resulted in 12% improvement on -@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key. - -@ February 2011. -@ -@ Profiler-assisted and platform-specific optimization resulted in 16% -@ improvement on Cortex A8 core and ~21.5 cycles per byte. - -#ifndef __KERNEL__ -# include "arm_arch.h" -#else -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ -#endif - +#include "arm_arch.h"  .text -#if __ARM_ARCH__<7 -.code	32 -#else -.syntax	unified -# ifdef __thumb2__ -.thumb -# else  .code	32 -# endif -#endif  .type	AES_Te,%object  .align	5 @@ -161,11 +114,7 @@ AES_Te:  .type   AES_encrypt,%function  .align	5  AES_encrypt: -#if __ARM_ARCH__<7  	sub	r3,pc,#8		@ AES_encrypt -#else -	adr	r3,AES_encrypt -#endif  	stmdb   sp!,{r1,r4-r12,lr}  	mov	r12,r0		@ inp  	mov	r11,r2 @@ -407,21 +356,11 @@ _armv4_AES_encrypt:  .align	5  private_AES_set_encrypt_key:  _armv4_AES_set_encrypt_key: -#if __ARM_ARCH__<7  	sub	r3,pc,#8		@ AES_set_encrypt_key -#else -	adr	r3,private_AES_set_encrypt_key -#endif  	teq	r0,#0 -#if __ARM_ARCH__>=7 -	itt	eq			@ Thumb2 thing, sanity check in ARM -#endif  	moveq	r0,#-1  	beq	.Labrt  	teq	r2,#0 -#if __ARM_ARCH__>=7 -	itt	eq			@ Thumb2 thing, sanity check in ARM -#endif  	moveq	r0,#-1  	beq	.Labrt @@ -430,9 +369,6 @@ _armv4_AES_set_encrypt_key:  	teq	r1,#192  	beq	.Lok  	teq	r1,#256 -#if __ARM_ARCH__>=7 -	itt	ne			@ Thumb2 thing, sanity check in ARM -#endif  	movne	r0,#-1  	bne	.Labrt @@ -587,9 +523,6 @@ _armv4_AES_set_encrypt_key:  	str	r2,[r11,#-16]  	subs	r12,r12,#1  	str	r3,[r11,#-12] -#if __ARM_ARCH__>=7 -	itt	eq				@ Thumb2 thing, sanity check in ARM -#endif  	subeq	r2,r11,#216  	beq	.Ldone @@ -659,9 +592,6 @@ _armv4_AES_set_encrypt_key:  	str	r2,[r11,#-24]  	subs	r12,r12,#1  	str	r3,[r11,#-20] -#if __ARM_ARCH__>=7 -	itt	eq				@ Thumb2 thing, sanity check in ARM -#endif  	subeq	r2,r11,#256  	beq	.Ldone @@ -691,17 +621,11 @@ _armv4_AES_set_encrypt_key:  	str	r9,[r11,#-4]  	b	.L256_loop -.align	2  .Ldone:	mov	r0,#0  	ldmia   sp!,{r4-r12,lr} -.Labrt: -#if __ARM_ARCH__>=5 -	bx	lr				@ .word	0xe12fff1e -#else -	tst	lr,#1 +.Labrt:	tst	lr,#1  	moveq	pc,lr			@ be binary compatible with V4, yet  	.word	0xe12fff1e			@ interoperable with Thumb ISA:-) -#endif  .size	private_AES_set_encrypt_key,.-private_AES_set_encrypt_key  .global private_AES_set_decrypt_key @@ -711,57 +635,34 @@ private_AES_set_decrypt_key:  	str	lr,[sp,#-4]!            @ push lr  	bl	_armv4_AES_set_encrypt_key  	teq	r0,#0 -	ldr	lr,[sp],#4              @ pop lr +	ldrne	lr,[sp],#4              @ pop lr  	bne	.Labrt -	mov	r0,r2			@ AES_set_encrypt_key preserves r2, -	mov	r1,r2			@ which is AES_KEY *key -	b	_armv4_AES_set_enc2dec_key -.size	private_AES_set_decrypt_key,.-private_AES_set_decrypt_key - -@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out) -.global	AES_set_enc2dec_key -.type	AES_set_enc2dec_key,%function -.align	5 -AES_set_enc2dec_key: -_armv4_AES_set_enc2dec_key: -	stmdb   sp!,{r4-r12,lr} - -	ldr	r12,[r0,#240] -	mov	r7,r0			@ input -	add	r8,r0,r12,lsl#4 -	mov	r11,r1			@ ouput -	add	r10,r1,r12,lsl#4 -	str	r12,[r1,#240] +	stmdb   sp!,{r4-r12} -.Linv:	ldr	r0,[r7],#16 -	ldr	r1,[r7,#-12] -	ldr	r2,[r7,#-8] -	ldr	r3,[r7,#-4] -	ldr	r4,[r8],#-16 -	ldr	r5,[r8,#16+4] -	ldr	r6,[r8,#16+8] -	ldr	r9,[r8,#16+12] -	str	r0,[r10],#-16 -	str	r1,[r10,#16+4] -	str	r2,[r10,#16+8] -	str	r3,[r10,#16+12] -	str	r4,[r11],#16 -	str	r5,[r11,#-12] -	str	r6,[r11,#-8] -	str	r9,[r11,#-4] -	teq	r7,r8 -	bne	.Linv +	ldr	r12,[r2,#240]	@ AES_set_encrypt_key preserves r2, +	mov	r11,r2			@ which is AES_KEY *key +	mov	r7,r2 +	add	r8,r2,r12,lsl#4 -	ldr	r0,[r7] +.Linv:	ldr	r0,[r7]  	ldr	r1,[r7,#4]  	ldr	r2,[r7,#8]  	ldr	r3,[r7,#12] -	str	r0,[r11] -	str	r1,[r11,#4] -	str	r2,[r11,#8] -	str	r3,[r11,#12] -	sub	r11,r11,r12,lsl#3 +	ldr	r4,[r8] +	ldr	r5,[r8,#4] +	ldr	r6,[r8,#8] +	ldr	r9,[r8,#12] +	str	r0,[r8],#-16 +	str	r1,[r8,#16+4] +	str	r2,[r8,#16+8] +	str	r3,[r8,#16+12] +	str	r4,[r7],#16 +	str	r5,[r7,#-12] +	str	r6,[r7,#-8] +	str	r9,[r7,#-4] +	teq	r7,r8 +	bne	.Linv  	ldr	r0,[r11,#16]!		@ prefetch tp1  	mov	r7,#0x80  	mov	r8,#0x1b @@ -814,7 +715,7 @@ _armv4_AES_set_enc2dec_key:  	moveq	pc,lr			@ be binary compatible with V4, yet  	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)  #endif -.size	AES_set_enc2dec_key,.-AES_set_enc2dec_key +.size	private_AES_set_decrypt_key,.-private_AES_set_decrypt_key  .type	AES_Td,%object  .align	5 @@ -924,11 +825,7 @@ AES_Td:  .type   AES_decrypt,%function  .align	5  AES_decrypt: -#if __ARM_ARCH__<7  	sub	r3,pc,#8		@ AES_decrypt -#else -	adr	r3,AES_decrypt -#endif  	stmdb   sp!,{r1,r4-r12,lr}  	mov	r12,r0		@ inp  	mov	r11,r2 @@ -1125,9 +1022,8 @@ _armv4_AES_decrypt:  	ldrb	r6,[r10,r9]		@ Td4[s0>>0]  	and	r9,lr,r1,lsr#8 -	add	r1,r10,r1,lsr#24  	ldrb	r7,[r10,r7]		@ Td4[s1>>0] -	ldrb	r1,[r1]		@ Td4[s1>>24] +	ldrb	r1,[r10,r1,lsr#24]	@ Td4[s1>>24]  	ldrb	r8,[r10,r8]		@ Td4[s1>>16]  	eor	r0,r7,r0,lsl#24  	ldrb	r9,[r10,r9]		@ Td4[s1>>8] @@ -1140,8 +1036,7 @@ _armv4_AES_decrypt:  	ldrb	r8,[r10,r8]		@ Td4[s2>>0]  	and	r9,lr,r2,lsr#16 -	add	r2,r10,r2,lsr#24 -	ldrb	r2,[r2]		@ Td4[s2>>24] +	ldrb	r2,[r10,r2,lsr#24]	@ Td4[s2>>24]  	eor	r0,r0,r7,lsl#8  	ldrb	r9,[r10,r9]		@ Td4[s2>>16]  	eor	r1,r8,r1,lsl#16 @@ -1153,9 +1048,8 @@ _armv4_AES_decrypt:  	ldrb	r8,[r10,r8]		@ Td4[s3>>8]  	and	r9,lr,r3		@ i2 -	add	r3,r10,r3,lsr#24  	ldrb	r9,[r10,r9]		@ Td4[s3>>0] -	ldrb	r3,[r3]		@ Td4[s3>>24] +	ldrb	r3,[r10,r3,lsr#24]	@ Td4[s3>>24]  	eor	r0,r0,r7,lsl#16  	ldr	r7,[r11,#0]  	eor	r1,r1,r8,lsl#8 diff --git a/app/openssl/crypto/aes/asm/aesv8-armx-64.S b/app/openssl/crypto/aes/asm/aesv8-armx-64.S deleted file mode 100644 index be0a13df..00000000 --- a/app/openssl/crypto/aes/asm/aesv8-armx-64.S +++ /dev/null @@ -1,761 +0,0 @@ -#include "arm_arch.h" - -#if __ARM_ARCH__>=7 -.text -.arch	armv8-a+crypto -.align	5 -rcon: -.long	0x01,0x01,0x01,0x01 -.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat -.long	0x1b,0x1b,0x1b,0x1b - -.globl	aes_v8_set_encrypt_key -.type	aes_v8_set_encrypt_key,%function -.align	5 -aes_v8_set_encrypt_key: -.Lenc_key: -	stp	x29,x30,[sp,#-16]! -	add	x29,sp,#0 -	adr	x3,rcon -	cmp	w1,#192 - -	eor	v0.16b,v0.16b,v0.16b -	ld1	{v3.16b},[x0],#16 -	mov	w1,#8		// reuse w1 -	ld1	{v1.4s,v2.4s},[x3],#32 - -	b.lt	.Loop128 -	b.eq	.L192 -	b	.L256 - -.align	4 -.Loop128: -	tbl	v6.16b,{v3.16b},v2.16b -	ext	v5.16b,v0.16b,v3.16b,#12 -	st1	{v3.4s},[x2],#16 -	aese	v6.16b,v0.16b -	subs	w1,w1,#1 - -	eor	v3.16b,v3.16b,v5.16b -	ext	v5.16b,v0.16b,v5.16b,#12 -	eor	v3.16b,v3.16b,v5.16b -	ext	v5.16b,v0.16b,v5.16b,#12 -	 eor	v6.16b,v6.16b,v1.16b -	eor	v3.16b,v3.16b,v5.16b -	shl	v1.16b,v1.16b,#1 -	eor	v3.16b,v3.16b,v6.16b -	b.ne	.Loop128 - -	ld1	{v1.4s},[x3] - -	tbl	v6.16b,{v3.16b},v2.16b -	ext	v5.16b,v0.16b,v3.16b,#12 -	st1	{v3.4s},[x2],#16 -	aese	v6.16b,v0.16b - -	eor	v3.16b,v3.16b,v5.16b -	ext	v5.16b,v0.16b,v5.16b,#12 -	eor	v3.16b,v3.16b,v5.16b -	ext	v5.16b,v0.16b,v5.16b,#12 -	 eor	v6.16b,v6.16b,v1.16b -	eor	v3.16b,v3.16b,v5.16b -	shl	v1.16b,v1.16b,#1 -	eor	v3.16b,v3.16b,v6.16b - -	tbl	v6.16b,{v3.16b},v2.16b -	ext	v5.16b,v0.16b,v3.16b,#12 -	st1	{v3.4s},[x2],#16 -	aese	v6.16b,v0.16b - -	eor	v3.16b,v3.16b,v5.16b -	ext	v5.16b,v0.16b,v5.16b,#12 -	eor	v3.16b,v3.16b,v5.16b -	ext	v5.16b,v0.16b,v5.16b,#12 -	 eor	v6.16b,v6.16b,v1.16b -	eor	v3.16b,v3.16b,v5.16b -	eor	v3.16b,v3.16b,v6.16b -	st1	{v3.4s},[x2] -	add	x2,x2,#0x50 - -	mov	w12,#10 -	b	.Ldone - -.align	4 -.L192: -	ld1	{v4.8b},[x0],#8 -	movi	v6.16b,#8			// borrow v6.16b -	st1	{v3.4s},[x2],#16 -	sub	v2.16b,v2.16b,v6.16b	// adjust the mask - -.Loop192: -	tbl	v6.16b,{v4.16b},v2.16b -	ext	v5.16b,v0.16b,v3.16b,#12 -	st1	{v4.8b},[x2],#8 -	aese	v6.16b,v0.16b -	subs	w1,w1,#1 - -	eor	v3.16b,v3.16b,v5.16b -	ext	v5.16b,v0.16b,v5.16b,#12 -	eor	v3.16b,v3.16b,v5.16b -	ext	v5.16b,v0.16b,v5.16b,#12 -	eor	v3.16b,v3.16b,v5.16b - -	dup	v5.4s,v3.s[3] -	eor	v5.16b,v5.16b,v4.16b -	 eor	v6.16b,v6.16b,v1.16b -	ext	v4.16b,v0.16b,v4.16b,#12 -	shl	v1.16b,v1.16b,#1 -	eor	v4.16b,v4.16b,v5.16b -	eor	v3.16b,v3.16b,v6.16b -	eor	v4.16b,v4.16b,v6.16b -	st1	{v3.4s},[x2],#16 -	b.ne	.Loop192 - -	mov	w12,#12 -	add	x2,x2,#0x20 -	b	.Ldone - -.align	4 -.L256: -	ld1	{v4.16b},[x0] -	mov	w1,#7 -	mov	w12,#14 -	st1	{v3.4s},[x2],#16 - -.Loop256: -	tbl	v6.16b,{v4.16b},v2.16b -	ext	v5.16b,v0.16b,v3.16b,#12 -	st1	{v4.4s},[x2],#16 -	aese	v6.16b,v0.16b -	subs	w1,w1,#1 - -	eor	v3.16b,v3.16b,v5.16b -	ext	v5.16b,v0.16b,v5.16b,#12 -	eor	v3.16b,v3.16b,v5.16b -	ext	v5.16b,v0.16b,v5.16b,#12 -	 eor	v6.16b,v6.16b,v1.16b -	eor	v3.16b,v3.16b,v5.16b -	shl	v1.16b,v1.16b,#1 -	eor	v3.16b,v3.16b,v6.16b -	st1	{v3.4s},[x2],#16 -	b.eq	.Ldone - -	dup	v6.4s,v3.s[3]		// just splat -	ext	v5.16b,v0.16b,v4.16b,#12 -	aese	v6.16b,v0.16b - -	eor	v4.16b,v4.16b,v5.16b -	ext	v5.16b,v0.16b,v5.16b,#12 -	eor	v4.16b,v4.16b,v5.16b -	ext	v5.16b,v0.16b,v5.16b,#12 -	eor	v4.16b,v4.16b,v5.16b - -	eor	v4.16b,v4.16b,v6.16b -	b	.Loop256 - -.Ldone: -	str	w12,[x2] - -	eor	x0,x0,x0		// return value -	ldr	x29,[sp],#16 -	ret -.size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key - -.globl	aes_v8_set_decrypt_key -.type	aes_v8_set_decrypt_key,%function -.align	5 -aes_v8_set_decrypt_key: -	stp	x29,x30,[sp,#-16]! -	add	x29,sp,#0 -	bl	.Lenc_key - -	sub	x2,x2,#240		// restore original x2 -	mov	x4,#-16 -	add	x0,x2,x12,lsl#4	// end of key schedule - -	ld1	{v0.4s},[x2] -	ld1	{v1.4s},[x0] -	st1	{v0.4s},[x0],x4 -	st1	{v1.4s},[x2],#16 - -.Loop_imc: -	ld1	{v0.4s},[x2] -	ld1	{v1.4s},[x0] -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	st1	{v0.4s},[x0],x4 -	st1	{v1.4s},[x2],#16 -	cmp	x0,x2 -	b.hi	.Loop_imc - -	ld1	{v0.4s},[x2] -	aesimc	v0.16b,v0.16b -	st1	{v0.4s},[x0] - -	eor	x0,x0,x0		// return value -	ldp	x29,x30,[sp],#16 -	ret -.size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key -.globl	aes_v8_encrypt -.type	aes_v8_encrypt,%function -.align	5 -aes_v8_encrypt: -	ldr	w3,[x2,#240] -	ld1	{v0.4s},[x2],#16 -	ld1	{v2.16b},[x0] -	sub	w3,w3,#2 -	ld1	{v1.4s},[x2],#16 - -.Loop_enc: -	aese	v2.16b,v0.16b -	ld1	{v0.4s},[x2],#16 -	aesmc	v2.16b,v2.16b -	subs	w3,w3,#2 -	aese	v2.16b,v1.16b -	ld1	{v1.4s},[x2],#16 -	aesmc	v2.16b,v2.16b -	b.gt	.Loop_enc - -	aese	v2.16b,v0.16b -	ld1	{v0.4s},[x2] -	aesmc	v2.16b,v2.16b -	aese	v2.16b,v1.16b -	eor	v2.16b,v2.16b,v0.16b - -	st1	{v2.16b},[x1] -	ret -.size	aes_v8_encrypt,.-aes_v8_encrypt -.globl	aes_v8_decrypt -.type	aes_v8_decrypt,%function -.align	5 -aes_v8_decrypt: -	ldr	w3,[x2,#240] -	ld1	{v0.4s},[x2],#16 -	ld1	{v2.16b},[x0] -	sub	w3,w3,#2 -	ld1	{v1.4s},[x2],#16 - -.Loop_dec: -	aesd	v2.16b,v0.16b -	ld1	{v0.4s},[x2],#16 -	aesimc	v2.16b,v2.16b -	subs	w3,w3,#2 -	aesd	v2.16b,v1.16b -	ld1	{v1.4s},[x2],#16 -	aesimc	v2.16b,v2.16b -	b.gt	.Loop_dec - -	aesd	v2.16b,v0.16b -	ld1	{v0.4s},[x2] -	aesimc	v2.16b,v2.16b -	aesd	v2.16b,v1.16b -	eor	v2.16b,v2.16b,v0.16b - -	st1	{v2.16b},[x1] -	ret -.size	aes_v8_decrypt,.-aes_v8_decrypt -.globl	aes_v8_cbc_encrypt -.type	aes_v8_cbc_encrypt,%function -.align	5 -aes_v8_cbc_encrypt: -	stp	x29,x30,[sp,#-16]! -	add	x29,sp,#0 -	subs	x2,x2,#16 -	mov	x8,#16 -	b.lo	.Lcbc_abort -	csel	x8,xzr,x8,eq - -	cmp	w5,#0			// en- or decrypting? -	ldr	w5,[x3,#240] -	and	x2,x2,#-16 -	ld1	{v6.16b},[x4] -	ld1	{v0.16b},[x0],x8 - -	ld1	{v16.4s-v17.4s},[x3]		// load key schedule... -	sub	w5,w5,#6 -	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys -	sub	w5,w5,#2 -	ld1	{v18.4s-v19.4s},[x7],#32 -	ld1	{v20.4s-v21.4s},[x7],#32 -	ld1	{v22.4s-v23.4s},[x7],#32 -	ld1	{v7.4s},[x7] - -	add	x7,x3,#32 -	mov	w6,w5 -	b.eq	.Lcbc_dec - -	cmp	w5,#2 -	eor	v0.16b,v0.16b,v6.16b -	eor	v5.16b,v16.16b,v7.16b -	b.eq	.Lcbc_enc128 - -.Loop_cbc_enc: -	aese	v0.16b,v16.16b -	ld1	{v16.4s},[x7],#16 -	aesmc	v0.16b,v0.16b -	subs	w6,w6,#2 -	aese	v0.16b,v17.16b -	ld1	{v17.4s},[x7],#16 -	aesmc	v0.16b,v0.16b -	b.gt	.Loop_cbc_enc - -	aese	v0.16b,v16.16b -	aesmc	v0.16b,v0.16b -	 subs	x2,x2,#16 -	aese	v0.16b,v17.16b -	aesmc	v0.16b,v0.16b -	 csel	x8,xzr,x8,eq -	aese	v0.16b,v18.16b -	aesmc	v0.16b,v0.16b -	 add	x7,x3,#16 -	aese	v0.16b,v19.16b -	aesmc	v0.16b,v0.16b -	 ld1	{v16.16b},[x0],x8 -	aese	v0.16b,v20.16b -	aesmc	v0.16b,v0.16b -	 eor	v16.16b,v16.16b,v5.16b -	aese	v0.16b,v21.16b -	aesmc	v0.16b,v0.16b -	 ld1 {v17.4s},[x7],#16	// re-pre-load rndkey[1] -	aese	v0.16b,v22.16b -	aesmc	v0.16b,v0.16b -	aese	v0.16b,v23.16b - -	 mov	w6,w5 -	eor	v6.16b,v0.16b,v7.16b -	st1	{v6.16b},[x1],#16 -	b.hs	.Loop_cbc_enc - -	b	.Lcbc_done - -.align	5 -.Lcbc_enc128: -	ld1	{v2.4s-v3.4s},[x7] -	aese	v0.16b,v16.16b -	aesmc	v0.16b,v0.16b -	b	.Lenter_cbc_enc128 -.Loop_cbc_enc128: -	aese	v0.16b,v16.16b -	aesmc	v0.16b,v0.16b -	 st1	{v6.16b},[x1],#16 -.Lenter_cbc_enc128: -	aese	v0.16b,v17.16b -	aesmc	v0.16b,v0.16b -	 subs	x2,x2,#16 -	aese	v0.16b,v2.16b -	aesmc	v0.16b,v0.16b -	 csel	x8,xzr,x8,eq -	aese	v0.16b,v3.16b -	aesmc	v0.16b,v0.16b -	aese	v0.16b,v18.16b -	aesmc	v0.16b,v0.16b -	aese	v0.16b,v19.16b -	aesmc	v0.16b,v0.16b -	 ld1	{v16.16b},[x0],x8 -	aese	v0.16b,v20.16b -	aesmc	v0.16b,v0.16b -	aese	v0.16b,v21.16b -	aesmc	v0.16b,v0.16b -	aese	v0.16b,v22.16b -	aesmc	v0.16b,v0.16b -	 eor	v16.16b,v16.16b,v5.16b -	aese	v0.16b,v23.16b -	eor	v6.16b,v0.16b,v7.16b -	b.hs	.Loop_cbc_enc128 - -	st1	{v6.16b},[x1],#16 -	b	.Lcbc_done - -.align	5 -.Lcbc_dec128: -	ld1	{v4.4s-v5.4s},[x7] -	eor	v6.16b,v6.16b,v7.16b -	eor	v2.16b,v0.16b,v7.16b -	mov	x12,x8 - -.Loop2x_cbc_dec128: -	aesd	v0.16b,v16.16b -	aesd	v1.16b,v16.16b -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	 subs	x2,x2,#32 -	aesd	v0.16b,v17.16b -	aesd	v1.16b,v17.16b -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	 csel	x8,xzr,x8,lo -	aesd	v0.16b,v4.16b -	aesd	v1.16b,v4.16b -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	 csel	x12,xzr,x12,ls -	aesd	v0.16b,v5.16b -	aesd	v1.16b,v5.16b -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	aesd	v0.16b,v18.16b -	aesd	v1.16b,v18.16b -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	aesd	v0.16b,v19.16b -	aesd	v1.16b,v19.16b -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	aesd	v0.16b,v20.16b -	aesd	v1.16b,v20.16b -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	aesd	v0.16b,v21.16b -	aesd	v1.16b,v21.16b -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	aesd	v0.16b,v22.16b -	aesd	v1.16b,v22.16b -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	aesd	v0.16b,v23.16b -	aesd	v1.16b,v23.16b - -	eor	v6.16b,v6.16b,v0.16b -	ld1	{v0.16b},[x0],x8 -	eor	v2.16b,v2.16b,v1.16b -	ld1	{v1.16b},[x0],x12 -	st1	{v6.16b},[x1],#16 -	eor	v6.16b,v3.16b,v7.16b -	st1	{v2.16b},[x1],#16 -	eor	v2.16b,v0.16b,v7.16b -	orr	v3.16b,v1.16b,v1.16b -	b.hs	.Loop2x_cbc_dec128 - -	adds	x2,x2,#32 -	eor	v6.16b,v6.16b,v7.16b -	b.eq	.Lcbc_done -	eor	v2.16b,v2.16b,v7.16b -	b	.Lcbc_dec_tail - -.align	5 -.Lcbc_dec: -	subs	x2,x2,#16 -	orr	v2.16b,v0.16b,v0.16b -	b.lo	.Lcbc_dec_tail - -	csel	x8,xzr,x8,eq -	cmp	w5,#2 -	ld1	{v1.16b},[x0],x8 -	orr	v3.16b,v1.16b,v1.16b -	b.eq	.Lcbc_dec128 - -.Loop2x_cbc_dec: -	aesd	v0.16b,v16.16b -	aesd	v1.16b,v16.16b -	ld1	{v16.4s},[x7],#16 -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	subs	w6,w6,#2 -	aesd	v0.16b,v17.16b -	aesd	v1.16b,v17.16b -	ld1	{v17.4s},[x7],#16 -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	b.gt	.Loop2x_cbc_dec - -	aesd	v0.16b,v16.16b -	aesd	v1.16b,v16.16b -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	 eor	v4.16b,v6.16b,v7.16b -	 eor	v5.16b,v2.16b,v7.16b -	aesd	v0.16b,v17.16b -	aesd	v1.16b,v17.16b -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	 orr	v6.16b,v3.16b,v3.16b -	 subs	x2,x2,#32 -	aesd	v0.16b,v18.16b -	aesd	v1.16b,v18.16b -	aesimc	v0.16b,v0.16b -	 csel	x8,xzr,x8,lo -	aesimc	v1.16b,v1.16b -	 mov	x7,x3 -	aesd	v0.16b,v19.16b -	aesd	v1.16b,v19.16b -	aesimc	v0.16b,v0.16b -	 ld1	{v2.16b},[x0],x8 -	aesimc	v1.16b,v1.16b -	 csel	x8,xzr,x8,ls -	aesd	v0.16b,v20.16b -	aesd	v1.16b,v20.16b -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	 ld1	{v3.16b},[x0],x8 -	aesd	v0.16b,v21.16b -	aesd	v1.16b,v21.16b -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	 ld1 {v16.4s},[x7],#16	// re-pre-load rndkey[0] -	aesd	v0.16b,v22.16b -	aesd	v1.16b,v22.16b -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	 ld1 {v17.4s},[x7],#16	// re-pre-load rndkey[1] -	aesd	v0.16b,v23.16b -	aesd	v1.16b,v23.16b - -	 mov	w6,w5 -	eor	v4.16b,v4.16b,v0.16b -	eor	v5.16b,v5.16b,v1.16b -	 orr	v0.16b,v2.16b,v2.16b -	st1	{v4.16b},[x1],#16 -	 orr	v1.16b,v3.16b,v3.16b -	st1	{v5.16b},[x1],#16 -	b.hs	.Loop2x_cbc_dec - -	adds	x2,x2,#32 -	b.eq	.Lcbc_done - -.Lcbc_dec_tail: -	aesd	v0.16b,v16.16b -	ld1	{v16.4s},[x7],#16 -	aesimc	v0.16b,v0.16b -	subs	w6,w6,#2 -	aesd	v0.16b,v17.16b -	ld1	{v17.4s},[x7],#16 -	aesimc	v0.16b,v0.16b -	b.gt	.Lcbc_dec_tail - -	aesd	v0.16b,v16.16b -	aesimc	v0.16b,v0.16b -	aesd	v0.16b,v17.16b -	aesimc	v0.16b,v0.16b -	 eor	v4.16b,v6.16b,v7.16b -	aesd	v0.16b,v18.16b -	aesimc	v0.16b,v0.16b -	 orr	v6.16b,v2.16b,v2.16b -	aesd	v0.16b,v19.16b -	aesimc	v0.16b,v0.16b -	aesd	v0.16b,v20.16b -	aesimc	v0.16b,v0.16b -	aesd	v0.16b,v21.16b -	aesimc	v0.16b,v0.16b -	aesd	v0.16b,v22.16b -	aesimc	v0.16b,v0.16b -	aesd	v0.16b,v23.16b - -	eor	v4.16b,v4.16b,v0.16b -	st1	{v4.16b},[x1],#16 - -.Lcbc_done: -	st1	{v6.16b},[x4] -.Lcbc_abort: -	ldr	x29,[sp],#16 -	ret -.size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt -.globl	aes_v8_ctr32_encrypt_blocks -.type	aes_v8_ctr32_encrypt_blocks,%function -.align	5 -aes_v8_ctr32_encrypt_blocks: -	stp		x29,x30,[sp,#-16]! -	add		x29,sp,#0 -	ldr		w5,[x3,#240] - -	ldr		w8, [x4, #12] -	ld1		{v0.4s},[x4] - -	ld1		{v16.4s-v17.4s},[x3]		// load key schedule... -	sub		w5,w5,#6 -	add		x7,x3,x5,lsl#4	// pointer to last 7 round keys -	sub		w5,w5,#2 -	ld1		{v18.4s-v19.4s},[x7],#32 -	ld1		{v20.4s-v21.4s},[x7],#32 -	ld1		{v22.4s-v23.4s},[x7],#32 -	ld1		{v7.4s},[x7] - -	add		x7,x3,#32 -	mov		w6,w5 - -	subs		x2,x2,#2 -	b.lo		.Lctr32_tail - -#ifndef __ARMEB__ -	rev		w8, w8 -#endif -	orr		v1.16b,v0.16b,v0.16b -	add		w8, w8, #1 -	orr		v6.16b,v0.16b,v0.16b -	rev		w10, w8 -	cmp		w5,#2 -	mov		v1.s[3],w10 -	b.eq		.Lctr32_128 - -.Loop2x_ctr32: -	aese		v0.16b,v16.16b -	aese		v1.16b,v16.16b -	ld1		{v16.4s},[x7],#16 -	aesmc		v0.16b,v0.16b -	aesmc		v1.16b,v1.16b -	subs		w6,w6,#2 -	aese		v0.16b,v17.16b -	aese		v1.16b,v17.16b -	ld1		{v17.4s},[x7],#16 -	aesmc		v0.16b,v0.16b -	aesmc		v1.16b,v1.16b -	b.gt		.Loop2x_ctr32 - -	aese		v0.16b,v16.16b -	aese		v1.16b,v16.16b -	aesmc		v4.16b,v0.16b -	 orr		v0.16b,v6.16b,v6.16b -	aesmc		v5.16b,v1.16b -	 orr		v1.16b,v6.16b,v6.16b -	aese		v4.16b,v17.16b -	aese		v5.16b,v17.16b -	 ld1		{v2.16b},[x0],#16 -	aesmc		v4.16b,v4.16b -	 ld1		{v3.16b},[x0],#16 -	aesmc		v5.16b,v5.16b -	 add		w8,w8,#1 -	aese		v4.16b,v18.16b -	aese		v5.16b,v18.16b -	 rev		w9,w8 -	aesmc		v4.16b,v4.16b -	aesmc		v5.16b,v5.16b -	 add		w8,w8,#1 -	aese		v4.16b,v19.16b -	aese		v5.16b,v19.16b -	 eor		v2.16b,v2.16b,v7.16b -	 rev		w10,w8 -	aesmc		v4.16b,v4.16b -	aesmc		v5.16b,v5.16b -	 eor		v3.16b,v3.16b,v7.16b -	 mov		x7,x3 -	aese		v4.16b,v20.16b -	aese		v5.16b,v20.16b -	 subs		x2,x2,#2 -	aesmc		v4.16b,v4.16b -	aesmc		v5.16b,v5.16b -	 ld1	 {v16.4s-v17.4s},[x7],#32	// re-pre-load rndkey[0-1] -	aese		v4.16b,v21.16b -	aese		v5.16b,v21.16b -	aesmc		v4.16b,v4.16b -	aesmc		v5.16b,v5.16b -	aese		v4.16b,v22.16b -	aese		v5.16b,v22.16b -	 mov	v0.s[3], w9 -	aesmc		v4.16b,v4.16b -	 mov	v1.s[3], w10 -	aesmc		v5.16b,v5.16b -	aese		v4.16b,v23.16b -	aese		v5.16b,v23.16b - -	 mov		w6,w5 -	eor		v2.16b,v2.16b,v4.16b -	eor		v3.16b,v3.16b,v5.16b -	st1		{v2.16b},[x1],#16 -	st1		{v3.16b},[x1],#16 -	b.hs		.Loop2x_ctr32 - -	adds		x2,x2,#2 -	b.eq		.Lctr32_done -	b		.Lctr32_tail - -.Lctr32_128: -	ld1		{v4.4s-v5.4s},[x7] - -.Loop2x_ctr32_128: -	aese		v0.16b,v16.16b -	aese		v1.16b,v16.16b -	aesmc		v0.16b,v0.16b -	 ld1		{v2.16b},[x0],#16 -	aesmc		v1.16b,v1.16b -	 ld1		{v3.16b},[x0],#16 -	aese		v0.16b,v17.16b -	aese		v1.16b,v17.16b -	 add		w8,w8,#1 -	aesmc		v0.16b,v0.16b -	aesmc		v1.16b,v1.16b -	 rev		w9,w8 -	aese		v0.16b,v4.16b -	aese		v1.16b,v4.16b -	 add		w8,w8,#1 -	aesmc		v0.16b,v0.16b -	aesmc		v1.16b,v1.16b -	 rev		w10,w8 -	aese		v0.16b,v5.16b -	aese		v1.16b,v5.16b -	 subs		x2,x2,#2 -	aesmc		v0.16b,v0.16b -	aesmc		v1.16b,v1.16b -	aese		v0.16b,v18.16b -	aese		v1.16b,v18.16b -	aesmc		v0.16b,v0.16b -	aesmc		v1.16b,v1.16b -	aese		v0.16b,v19.16b -	aese		v1.16b,v19.16b -	aesmc		v0.16b,v0.16b -	aesmc		v1.16b,v1.16b -	aese		v0.16b,v20.16b -	aese		v1.16b,v20.16b -	aesmc		v0.16b,v0.16b -	aesmc		v1.16b,v1.16b -	aese		v0.16b,v21.16b -	aese		v1.16b,v21.16b -	aesmc		v0.16b,v0.16b -	aesmc		v1.16b,v1.16b -	aese		v0.16b,v22.16b -	aese		v1.16b,v22.16b -	aesmc		v0.16b,v0.16b -	aesmc		v1.16b,v1.16b -	 eor		v2.16b,v2.16b,v7.16b -	aese		v0.16b,v23.16b -	 eor		v3.16b,v3.16b,v7.16b -	aese		v1.16b,v23.16b - -	eor		v2.16b,v2.16b,v0.16b -	orr		v0.16b,v6.16b,v6.16b -	eor		v3.16b,v3.16b,v1.16b -	orr		v1.16b,v6.16b,v6.16b -	st1		{v2.16b},[x1],#16 -	mov		v0.s[3], w9 -	st1		{v3.16b},[x1],#16 -	mov		v1.s[3], w10 -	b.hs		.Loop2x_ctr32_128 - -	adds		x2,x2,#2 -	b.eq		.Lctr32_done - -.Lctr32_tail: -	aese		v0.16b,v16.16b -	ld1		{v16.4s},[x7],#16 -	aesmc		v0.16b,v0.16b -	subs		w6,w6,#2 -	aese		v0.16b,v17.16b -	ld1		{v17.4s},[x7],#16 -	aesmc		v0.16b,v0.16b -	b.gt		.Lctr32_tail - -	aese		v0.16b,v16.16b -	aesmc		v0.16b,v0.16b -	aese		v0.16b,v17.16b -	aesmc		v0.16b,v0.16b -	 ld1		{v2.16b},[x0] -	aese		v0.16b,v18.16b -	aesmc		v0.16b,v0.16b -	aese		v0.16b,v19.16b -	aesmc		v0.16b,v0.16b -	aese		v0.16b,v20.16b -	aesmc		v0.16b,v0.16b -	aese		v0.16b,v21.16b -	aesmc		v0.16b,v0.16b -	aese		v0.16b,v22.16b -	aesmc		v0.16b,v0.16b -	 eor		v2.16b,v2.16b,v7.16b -	aese		v0.16b,v23.16b - -	eor		v2.16b,v2.16b,v0.16b -	st1		{v2.16b},[x1] - -.Lctr32_done: -	ldr		x29,[sp],#16 -	ret -.size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks -#endif diff --git a/app/openssl/crypto/aes/asm/aesv8-armx.S b/app/openssl/crypto/aes/asm/aesv8-armx.S deleted file mode 100644 index 1637e4d4..00000000 --- a/app/openssl/crypto/aes/asm/aesv8-armx.S +++ /dev/null @@ -1,767 +0,0 @@ -#include "arm_arch.h" - -#if __ARM_ARCH__>=7 -.text -.fpu	neon -.code	32 -.align	5 -rcon: -.long	0x01,0x01,0x01,0x01 -.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat -.long	0x1b,0x1b,0x1b,0x1b - -.globl	aes_v8_set_encrypt_key -.type	aes_v8_set_encrypt_key,%function -.align	5 -aes_v8_set_encrypt_key: -.Lenc_key: -	adr	r3,rcon -	cmp	r1,#192 - -	veor	q0,q0,q0 -	vld1.8	{q3},[r0]! -	mov	r1,#8		@ reuse r1 -	vld1.32	{q1,q2},[r3]! - -	blt	.Loop128 -	beq	.L192 -	b	.L256 - -.align	4 -.Loop128: -	vtbl.8	d20,{q3},d4 -	vtbl.8	d21,{q3},d5 -	vext.8	q9,q0,q3,#12 -	vst1.32	{q3},[r2]! -	.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0 -	subs	r1,r1,#1 - -	veor	q3,q3,q9 -	vext.8	q9,q0,q9,#12 -	veor	q3,q3,q9 -	vext.8	q9,q0,q9,#12 -	 veor	q10,q10,q1 -	veor	q3,q3,q9 -	vshl.u8	q1,q1,#1 -	veor	q3,q3,q10 -	bne	.Loop128 - -	vld1.32	{q1},[r3] - -	vtbl.8	d20,{q3},d4 -	vtbl.8	d21,{q3},d5 -	vext.8	q9,q0,q3,#12 -	vst1.32	{q3},[r2]! -	.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0 - -	veor	q3,q3,q9 -	vext.8	q9,q0,q9,#12 -	veor	q3,q3,q9 -	vext.8	q9,q0,q9,#12 -	 veor	q10,q10,q1 -	veor	q3,q3,q9 -	vshl.u8	q1,q1,#1 -	veor	q3,q3,q10 - -	vtbl.8	d20,{q3},d4 -	vtbl.8	d21,{q3},d5 -	vext.8	q9,q0,q3,#12 -	vst1.32	{q3},[r2]! -	.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0 - -	veor	q3,q3,q9 -	vext.8	q9,q0,q9,#12 -	veor	q3,q3,q9 -	vext.8	q9,q0,q9,#12 -	 veor	q10,q10,q1 -	veor	q3,q3,q9 -	veor	q3,q3,q10 -	vst1.32	{q3},[r2] -	add	r2,r2,#0x50 - -	mov	r12,#10 -	b	.Ldone - -.align	4 -.L192: -	vld1.8	{d16},[r0]! -	vmov.i8	q10,#8			@ borrow q10 -	vst1.32	{q3},[r2]! -	vsub.i8	q2,q2,q10	@ adjust the mask - -.Loop192: -	vtbl.8	d20,{q8},d4 -	vtbl.8	d21,{q8},d5 -	vext.8	q9,q0,q3,#12 -	vst1.32	{d16},[r2]! -	.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0 -	subs	r1,r1,#1 - -	veor	q3,q3,q9 -	vext.8	q9,q0,q9,#12 -	veor	q3,q3,q9 -	vext.8	q9,q0,q9,#12 -	veor	q3,q3,q9 - -	vdup.32	q9,d7[1] -	veor	q9,q9,q8 -	 veor	q10,q10,q1 -	vext.8	q8,q0,q8,#12 -	vshl.u8	q1,q1,#1 -	veor	q8,q8,q9 -	veor	q3,q3,q10 -	veor	q8,q8,q10 -	vst1.32	{q3},[r2]! -	bne	.Loop192 - -	mov	r12,#12 -	add	r2,r2,#0x20 -	b	.Ldone - -.align	4 -.L256: -	vld1.8	{q8},[r0] -	mov	r1,#7 -	mov	r12,#14 -	vst1.32	{q3},[r2]! - -.Loop256: -	vtbl.8	d20,{q8},d4 -	vtbl.8	d21,{q8},d5 -	vext.8	q9,q0,q3,#12 -	vst1.32	{q8},[r2]! -	.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0 -	subs	r1,r1,#1 - -	veor	q3,q3,q9 -	vext.8	q9,q0,q9,#12 -	veor	q3,q3,q9 -	vext.8	q9,q0,q9,#12 -	 veor	q10,q10,q1 -	veor	q3,q3,q9 -	vshl.u8	q1,q1,#1 -	veor	q3,q3,q10 -	vst1.32	{q3},[r2]! -	beq	.Ldone - -	vdup.32	q10,d7[1] -	vext.8	q9,q0,q8,#12 -	.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0 - -	veor	q8,q8,q9 -	vext.8	q9,q0,q9,#12 -	veor	q8,q8,q9 -	vext.8	q9,q0,q9,#12 -	veor	q8,q8,q9 - -	veor	q8,q8,q10 -	b	.Loop256 - -.Ldone: -	str	r12,[r2] - -	eor	r0,r0,r0		@ return value -	 -	bx	lr -.size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key - -.globl	aes_v8_set_decrypt_key -.type	aes_v8_set_decrypt_key,%function -.align	5 -aes_v8_set_decrypt_key: -	stmdb	sp!,{r4,lr} -	bl	.Lenc_key - -	sub	r2,r2,#240		@ restore original r2 -	mov	r4,#-16 -	add	r0,r2,r12,lsl#4	@ end of key schedule - -	vld1.32	{q0},[r2] -	vld1.32	{q1},[r0] -	vst1.32	{q0},[r0],r4 -	vst1.32	{q1},[r2]! - -.Loop_imc: -	vld1.32	{q0},[r2] -	vld1.32	{q1},[r0] -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	vst1.32	{q0},[r0],r4 -	vst1.32	{q1},[r2]! -	cmp	r0,r2 -	bhi	.Loop_imc - -	vld1.32	{q0},[r2] -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	vst1.32	{q0},[r0] - -	eor	r0,r0,r0		@ return value -	ldmia	sp!,{r4,pc} -.size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key -.globl	aes_v8_encrypt -.type	aes_v8_encrypt,%function -.align	5 -aes_v8_encrypt: -	ldr	r3,[r2,#240] -	vld1.32	{q0},[r2]! -	vld1.8	{q2},[r0] -	sub	r3,r3,#2 -	vld1.32	{q1},[r2]! - -.Loop_enc: -	.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0 -	vld1.32	{q0},[r2]! -	.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2 -	subs	r3,r3,#2 -	.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1 -	vld1.32	{q1},[r2]! -	.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2 -	bgt	.Loop_enc - -	.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0 -	vld1.32	{q0},[r2] -	.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2 -	.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1 -	veor	q2,q2,q0 - -	vst1.8	{q2},[r1] -	bx	lr -.size	aes_v8_encrypt,.-aes_v8_encrypt -.globl	aes_v8_decrypt -.type	aes_v8_decrypt,%function -.align	5 -aes_v8_decrypt: -	ldr	r3,[r2,#240] -	vld1.32	{q0},[r2]! -	vld1.8	{q2},[r0] -	sub	r3,r3,#2 -	vld1.32	{q1},[r2]! - -.Loop_dec: -	.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0 -	vld1.32	{q0},[r2]! -	.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2 -	subs	r3,r3,#2 -	.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1 -	vld1.32	{q1},[r2]! -	.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2 -	bgt	.Loop_dec - -	.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0 -	vld1.32	{q0},[r2] -	.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2 -	.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1 -	veor	q2,q2,q0 - -	vst1.8	{q2},[r1] -	bx	lr -.size	aes_v8_decrypt,.-aes_v8_decrypt -.globl	aes_v8_cbc_encrypt -.type	aes_v8_cbc_encrypt,%function -.align	5 -aes_v8_cbc_encrypt: -	mov	ip,sp -	stmdb	sp!,{r4-r8,lr} -	vstmdb	sp!,{d8-d15}            @ ABI specification says so -	ldmia	ip,{r4-r5}		@ load remaining args -	subs	r2,r2,#16 -	mov	r8,#16 -	blo	.Lcbc_abort -	moveq	r8,#0 - -	cmp	r5,#0			@ en- or decrypting? -	ldr	r5,[r3,#240] -	and	r2,r2,#-16 -	vld1.8	{q6},[r4] -	vld1.8	{q0},[r0],r8 - -	vld1.32	{q8-q9},[r3]		@ load key schedule... -	sub	r5,r5,#6 -	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys -	sub	r5,r5,#2 -	vld1.32	{q10-q11},[r7]! -	vld1.32	{q12-q13},[r7]! -	vld1.32	{q14-q15},[r7]! -	vld1.32	{q7},[r7] - -	add	r7,r3,#32 -	mov	r6,r5 -	beq	.Lcbc_dec - -	cmp	r5,#2 -	veor	q0,q0,q6 -	veor	q5,q8,q7 -	beq	.Lcbc_enc128 - -.Loop_cbc_enc: -	.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8 -	vld1.32	{q8},[r7]! -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	subs	r6,r6,#2 -	.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9 -	vld1.32	{q9},[r7]! -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	bgt	.Loop_cbc_enc - -	.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	 subs	r2,r2,#16 -	.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	 moveq	r8,#0 -	.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	 add	r7,r3,#16 -	.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	 vld1.8	{q8},[r0],r8 -	.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	 veor	q8,q8,q5 -	.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	 vld1.32 {q9},[r7]!	@ re-pre-load rndkey[1] -	.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15 - -	 mov	r6,r5 -	veor	q6,q0,q7 -	vst1.8	{q6},[r1]! -	bhs	.Loop_cbc_enc - -	b	.Lcbc_done - -.align	5 -.Lcbc_enc128: -	vld1.32	{q2-q3},[r7] -	.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	b	.Lenter_cbc_enc128 -.Loop_cbc_enc128: -	.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	 vst1.8	{q6},[r1]! -.Lenter_cbc_enc128: -	.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	 subs	r2,r2,#16 -	.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	 moveq	r8,#0 -	.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	 vld1.8	{q8},[r0],r8 -	.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	 veor	q8,q8,q5 -	.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15 -	veor	q6,q0,q7 -	bhs	.Loop_cbc_enc128 - -	vst1.8	{q6},[r1]! -	b	.Lcbc_done - -.align	5 -.Lcbc_dec128: -	vld1.32	{q4-q5},[r7] -	veor	q6,q6,q7 -	veor	q2,q0,q7 -	mov	r12,r8 - -.Loop2x_cbc_dec128: -	.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8 -	.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	 subs	r2,r2,#32 -	.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9 -	.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	 movlo	r8,#0 -	.byte	0x48,0x03,0xb0,0xf3	@ aesd q0,q4 -	.byte	0x48,0x23,0xb0,0xf3	@ aesd q1,q4 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	 movls	r12,#0 -	.byte	0x4a,0x03,0xb0,0xf3	@ aesd q0,q5 -	.byte	0x4a,0x23,0xb0,0xf3	@ aesd q1,q5 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	.byte	0x64,0x03,0xb0,0xf3	@ aesd q0,q10 -	.byte	0x64,0x23,0xb0,0xf3	@ aesd q1,q10 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	.byte	0x66,0x03,0xb0,0xf3	@ aesd q0,q11 -	.byte	0x66,0x23,0xb0,0xf3	@ aesd q1,q11 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12 -	.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13 -	.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14 -	.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15 -	.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15 - -	veor	q6,q6,q0 -	vld1.8	{q0},[r0],r8 -	veor	q2,q2,q1 -	vld1.8	{q1},[r0],r12 -	vst1.8	{q6},[r1]! -	veor	q6,q3,q7 -	vst1.8	{q2},[r1]! -	veor	q2,q0,q7 -	vorr	q3,q1,q1 -	bhs	.Loop2x_cbc_dec128 - -	adds	r2,r2,#32 -	veor	q6,q6,q7 -	beq	.Lcbc_done -	veor	q2,q2,q7 -	b	.Lcbc_dec_tail - -.align	5 -.Lcbc_dec: -	subs	r2,r2,#16 -	vorr	q2,q0,q0 -	blo	.Lcbc_dec_tail - -	moveq	r8,#0 -	cmp	r5,#2 -	vld1.8	{q1},[r0],r8 -	vorr	q3,q1,q1 -	beq	.Lcbc_dec128 - -.Loop2x_cbc_dec: -	.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8 -	.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8 -	vld1.32	{q8},[r7]! -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	subs	r6,r6,#2 -	.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9 -	.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9 -	vld1.32	{q9},[r7]! -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	bgt	.Loop2x_cbc_dec - -	.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8 -	.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	 veor	q4,q6,q7 -	 veor	q5,q2,q7 -	.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9 -	.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	 vorr	q6,q3,q3 -	 subs	r2,r2,#32 -	.byte	0x64,0x03,0xb0,0xf3	@ aesd q0,q10 -	.byte	0x64,0x23,0xb0,0xf3	@ aesd q1,q10 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	 movlo	r8,#0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	 mov	r7,r3 -	.byte	0x66,0x03,0xb0,0xf3	@ aesd q0,q11 -	.byte	0x66,0x23,0xb0,0xf3	@ aesd q1,q11 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	 vld1.8	{q2},[r0],r8 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	 movls	r8,#0 -	.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12 -	.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	 vld1.8	{q3},[r0],r8 -	.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13 -	.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	 vld1.32 {q8},[r7]!	@ re-pre-load rndkey[0] -	.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14 -	.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1 -	 vld1.32 {q9},[r7]!	@ re-pre-load rndkey[1] -	.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15 -	.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15 - -	 mov	r6,r5 -	veor	q4,q4,q0 -	veor	q5,q5,q1 -	 vorr	q0,q2,q2 -	vst1.8	{q4},[r1]! -	 vorr	q1,q3,q3 -	vst1.8	{q5},[r1]! -	bhs	.Loop2x_cbc_dec - -	adds	r2,r2,#32 -	beq	.Lcbc_done - -.Lcbc_dec_tail: -	.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8 -	vld1.32	{q8},[r7]! -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	subs	r6,r6,#2 -	.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9 -	vld1.32	{q9},[r7]! -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	bgt	.Lcbc_dec_tail - -	.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	 veor	q4,q6,q7 -	.byte	0x64,0x03,0xb0,0xf3	@ aesd q0,q10 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	 vorr	q6,q2,q2 -	.byte	0x66,0x03,0xb0,0xf3	@ aesd q0,q11 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14 -	.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0 -	.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15 - -	veor	q4,q4,q0 -	vst1.8	{q4},[r1]! - -.Lcbc_done: -	vst1.8	{q6},[r4] -.Lcbc_abort: -	vldmia	sp!,{d8-d15} -	ldmia	sp!,{r4-r8,pc} -.size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt -.globl	aes_v8_ctr32_encrypt_blocks -.type	aes_v8_ctr32_encrypt_blocks,%function -.align	5 -aes_v8_ctr32_encrypt_blocks: -	mov		ip,sp -	stmdb		sp!,{r4-r10,lr} -	vstmdb		sp!,{d8-d15}            @ ABI specification says so -	ldr		r4, [ip]		@ load remaining arg -	ldr		r5,[r3,#240] - -	ldr		r8, [r4, #12] -	vld1.32		{q0},[r4] - -	vld1.32		{q8-q9},[r3]		@ load key schedule... -	sub		r5,r5,#6 -	add		r7,r3,r5,lsl#4	@ pointer to last 7 round keys -	sub		r5,r5,#2 -	vld1.32		{q10-q11},[r7]! -	vld1.32		{q12-q13},[r7]! -	vld1.32		{q14-q15},[r7]! -	vld1.32		{q7},[r7] - -	add		r7,r3,#32 -	mov		r6,r5 - -	subs		r2,r2,#2 -	blo		.Lctr32_tail - -#ifndef __ARMEB__ -	rev		r8, r8 -#endif -	vorr		q1,q0,q0 -	add		r8, r8, #1 -	vorr		q6,q0,q0 -	rev		r10, r8 -	cmp		r5,#2 -	vmov.32	d3[1],r10 -	beq		.Lctr32_128 - -.Loop2x_ctr32: -	.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8 -	.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8 -	vld1.32		{q8},[r7]! -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1 -	subs		r6,r6,#2 -	.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9 -	.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9 -	vld1.32		{q9},[r7]! -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1 -	bgt		.Loop2x_ctr32 - -	.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8 -	.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8 -	.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0 -	 vorr		q0,q6,q6 -	.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1 -	 vorr		q1,q6,q6 -	.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9 -	.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9 -	 vld1.8		{q2},[r0]! -	.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4 -	 vld1.8		{q3},[r0]! -	.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5 -	 add		r8,r8,#1 -	.byte	0x24,0x83,0xb0,0xf3	@ aese q4,q10 -	.byte	0x24,0xa3,0xb0,0xf3	@ aese q5,q10 -	 rev		r9,r8 -	.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4 -	.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5 -	 add		r8,r8,#1 -	.byte	0x26,0x83,0xb0,0xf3	@ aese q4,q11 -	.byte	0x26,0xa3,0xb0,0xf3	@ aese q5,q11 -	 veor		q2,q2,q7 -	 rev		r10,r8 -	.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4 -	.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5 -	 veor		q3,q3,q7 -	 mov		r7,r3 -	.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12 -	.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12 -	 subs		r2,r2,#2 -	.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4 -	.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5 -	 vld1.32	 {q8-q9},[r7]!	@ re-pre-load rndkey[0-1] -	.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13 -	.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13 -	.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4 -	.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5 -	.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14 -	.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14 -	 vmov.32	d1[1], r9 -	.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4 -	 vmov.32	d3[1], r10 -	.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5 -	.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15 -	.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15 - -	 mov		r6,r5 -	veor		q2,q2,q4 -	veor		q3,q3,q5 -	vst1.8		{q2},[r1]! -	vst1.8		{q3},[r1]! -	bhs		.Loop2x_ctr32 - -	adds		r2,r2,#2 -	beq		.Lctr32_done -	b		.Lctr32_tail - -.Lctr32_128: -	vld1.32		{q4-q5},[r7] - -.Loop2x_ctr32_128: -	.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8 -	.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	 vld1.8		{q2},[r0]! -	.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1 -	 vld1.8		{q3},[r0]! -	.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9 -	.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9 -	 add		r8,r8,#1 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1 -	 rev		r9,r8 -	.byte	0x08,0x03,0xb0,0xf3	@ aese q0,q4 -	.byte	0x08,0x23,0xb0,0xf3	@ aese q1,q4 -	 add		r8,r8,#1 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1 -	 rev		r10,r8 -	.byte	0x0a,0x03,0xb0,0xf3	@ aese q0,q5 -	.byte	0x0a,0x23,0xb0,0xf3	@ aese q1,q5 -	 subs		r2,r2,#2 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1 -	.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10 -	.byte	0x24,0x23,0xb0,0xf3	@ aese q1,q10 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1 -	.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11 -	.byte	0x26,0x23,0xb0,0xf3	@ aese q1,q11 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1 -	.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12 -	.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1 -	.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13 -	.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1 -	.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14 -	.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1 -	 veor		q2,q2,q7 -	.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15 -	 veor		q3,q3,q7 -	.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15 - -	veor		q2,q2,q0 -	vorr		q0,q6,q6 -	veor		q3,q3,q1 -	vorr		q1,q6,q6 -	vst1.8		{q2},[r1]! -	vmov.32	d1[1], r9 -	vst1.8		{q3},[r1]! -	vmov.32	d3[1], r10 -	bhs		.Loop2x_ctr32_128 - -	adds		r2,r2,#2 -	beq		.Lctr32_done - -.Lctr32_tail: -	.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8 -	vld1.32		{q8},[r7]! -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	subs		r6,r6,#2 -	.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9 -	vld1.32		{q9},[r7]! -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	bgt		.Lctr32_tail - -	.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	 vld1.8		{q2},[r0] -	.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14 -	.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0 -	 veor		q2,q2,q7 -	.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15 - -	veor		q2,q2,q0 -	vst1.8		{q2},[r1] - -.Lctr32_done: -	vldmia		sp!,{d8-d15} -	ldmia		sp!,{r4-r10,pc} -.size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks -#endif diff --git a/app/openssl/crypto/aes/asm/aesv8-armx.pl b/app/openssl/crypto/aes/asm/aesv8-armx.pl deleted file mode 100644 index 415dc04a..00000000 --- a/app/openssl/crypto/aes/asm/aesv8-armx.pl +++ /dev/null @@ -1,980 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# This module implements support for ARMv8 AES instructions. The -# module is endian-agnostic in sense that it supports both big- and -# little-endian cases. As does it support both 32- and 64-bit modes -# of operation. Latter is achieved by limiting amount of utilized -# registers to 16, which implies additional instructions. This has -# no effect on mighty Apple A7, as results are literally equal to -# the theoretical estimates based on instruction latencies and issue -# rate. It remains to be seen how does it affect other platforms... -# -# Performance in cycles per byte processed with 128-bit key: -# -#		CBC enc		CBC dec		CTR -# Apple A7	2.39		1.20		1.20 -# Cortex-A5x	n/a		n/a		n/a - -$flavour = shift; -open STDOUT,">".shift; - -$prefix="aes_v8"; - -$code=<<___; -#include "arm_arch.h" - -#if __ARM_ARCH__>=7 -.text -___ -$code.=".arch	armv8-a+crypto\n"	if ($flavour =~ /64/); -$code.=".fpu	neon\n.code	32\n"	if ($flavour !~ /64/); - -# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax, -# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to -# maintain both 32- and 64-bit codes within single module and -# transliterate common code to either flavour with regex vodoo. -# -{{{ -my ($inp,$bits,$out,$ptr,$rounds)=("x0","w1","x2","x3","w12"); -my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)= -	$flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10)); - - -$code.=<<___; -.align	5 -rcon: -.long	0x01,0x01,0x01,0x01 -.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat -.long	0x1b,0x1b,0x1b,0x1b - -.globl	${prefix}_set_encrypt_key -.type	${prefix}_set_encrypt_key,%function -.align	5 -${prefix}_set_encrypt_key: -.Lenc_key: -___ -$code.=<<___	if ($flavour =~ /64/); -	stp	x29,x30,[sp,#-16]! -	add	x29,sp,#0 -___ -$code.=<<___; -	adr	$ptr,rcon -	cmp	$bits,#192 - -	veor	$zero,$zero,$zero -	vld1.8	{$in0},[$inp],#16 -	mov	$bits,#8		// reuse $bits -	vld1.32	{$rcon,$mask},[$ptr],#32 - -	b.lt	.Loop128 -	b.eq	.L192 -	b	.L256 - -.align	4 -.Loop128: -	vtbl.8	$key,{$in0},$mask -	vext.8	$tmp,$zero,$in0,#12 -	vst1.32	{$in0},[$out],#16 -	aese	$key,$zero -	subs	$bits,$bits,#1 - -	veor	$in0,$in0,$tmp -	vext.8	$tmp,$zero,$tmp,#12 -	veor	$in0,$in0,$tmp -	vext.8	$tmp,$zero,$tmp,#12 -	 veor	$key,$key,$rcon -	veor	$in0,$in0,$tmp -	vshl.u8	$rcon,$rcon,#1 -	veor	$in0,$in0,$key -	b.ne	.Loop128 - -	vld1.32	{$rcon},[$ptr] - -	vtbl.8	$key,{$in0},$mask -	vext.8	$tmp,$zero,$in0,#12 -	vst1.32	{$in0},[$out],#16 -	aese	$key,$zero - -	veor	$in0,$in0,$tmp -	vext.8	$tmp,$zero,$tmp,#12 -	veor	$in0,$in0,$tmp -	vext.8	$tmp,$zero,$tmp,#12 -	 veor	$key,$key,$rcon -	veor	$in0,$in0,$tmp -	vshl.u8	$rcon,$rcon,#1 -	veor	$in0,$in0,$key - -	vtbl.8	$key,{$in0},$mask -	vext.8	$tmp,$zero,$in0,#12 -	vst1.32	{$in0},[$out],#16 -	aese	$key,$zero - -	veor	$in0,$in0,$tmp -	vext.8	$tmp,$zero,$tmp,#12 -	veor	$in0,$in0,$tmp -	vext.8	$tmp,$zero,$tmp,#12 -	 veor	$key,$key,$rcon -	veor	$in0,$in0,$tmp -	veor	$in0,$in0,$key -	vst1.32	{$in0},[$out] -	add	$out,$out,#0x50 - -	mov	$rounds,#10 -	b	.Ldone - -.align	4 -.L192: -	vld1.8	{$in1},[$inp],#8 -	vmov.i8	$key,#8			// borrow $key -	vst1.32	{$in0},[$out],#16 -	vsub.i8	$mask,$mask,$key	// adjust the mask - -.Loop192: -	vtbl.8	$key,{$in1},$mask -	vext.8	$tmp,$zero,$in0,#12 -	vst1.32	{$in1},[$out],#8 -	aese	$key,$zero -	subs	$bits,$bits,#1 - -	veor	$in0,$in0,$tmp -	vext.8	$tmp,$zero,$tmp,#12 -	veor	$in0,$in0,$tmp -	vext.8	$tmp,$zero,$tmp,#12 -	veor	$in0,$in0,$tmp - -	vdup.32	$tmp,${in0}[3] -	veor	$tmp,$tmp,$in1 -	 veor	$key,$key,$rcon -	vext.8	$in1,$zero,$in1,#12 -	vshl.u8	$rcon,$rcon,#1 -	veor	$in1,$in1,$tmp -	veor	$in0,$in0,$key -	veor	$in1,$in1,$key -	vst1.32	{$in0},[$out],#16 -	b.ne	.Loop192 - -	mov	$rounds,#12 -	add	$out,$out,#0x20 -	b	.Ldone - -.align	4 -.L256: -	vld1.8	{$in1},[$inp] -	mov	$bits,#7 -	mov	$rounds,#14 -	vst1.32	{$in0},[$out],#16 - -.Loop256: -	vtbl.8	$key,{$in1},$mask -	vext.8	$tmp,$zero,$in0,#12 -	vst1.32	{$in1},[$out],#16 -	aese	$key,$zero -	subs	$bits,$bits,#1 - -	veor	$in0,$in0,$tmp -	vext.8	$tmp,$zero,$tmp,#12 -	veor	$in0,$in0,$tmp -	vext.8	$tmp,$zero,$tmp,#12 -	 veor	$key,$key,$rcon -	veor	$in0,$in0,$tmp -	vshl.u8	$rcon,$rcon,#1 -	veor	$in0,$in0,$key -	vst1.32	{$in0},[$out],#16 -	b.eq	.Ldone - -	vdup.32	$key,${in0}[3]		// just splat -	vext.8	$tmp,$zero,$in1,#12 -	aese	$key,$zero - -	veor	$in1,$in1,$tmp -	vext.8	$tmp,$zero,$tmp,#12 -	veor	$in1,$in1,$tmp -	vext.8	$tmp,$zero,$tmp,#12 -	veor	$in1,$in1,$tmp - -	veor	$in1,$in1,$key -	b	.Loop256 - -.Ldone: -	str	$rounds,[$out] - -	eor	x0,x0,x0		// return value -	`"ldr	x29,[sp],#16"		if ($flavour =~ /64/)` -	ret -.size	${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key - -.globl	${prefix}_set_decrypt_key -.type	${prefix}_set_decrypt_key,%function -.align	5 -${prefix}_set_decrypt_key: -___ -$code.=<<___	if ($flavour =~ /64/); -	stp	x29,x30,[sp,#-16]! -	add	x29,sp,#0 -___ -$code.=<<___	if ($flavour !~ /64/); -	stmdb	sp!,{r4,lr} -___ -$code.=<<___; -	bl	.Lenc_key - -	sub	$out,$out,#240		// restore original $out -	mov	x4,#-16 -	add	$inp,$out,x12,lsl#4	// end of key schedule - -	vld1.32	{v0.16b},[$out] -	vld1.32	{v1.16b},[$inp] -	vst1.32	{v0.16b},[$inp],x4 -	vst1.32	{v1.16b},[$out],#16 - -.Loop_imc: -	vld1.32	{v0.16b},[$out] -	vld1.32	{v1.16b},[$inp] -	aesimc	v0.16b,v0.16b -	aesimc	v1.16b,v1.16b -	vst1.32	{v0.16b},[$inp],x4 -	vst1.32	{v1.16b},[$out],#16 -	cmp	$inp,$out -	b.hi	.Loop_imc - -	vld1.32	{v0.16b},[$out] -	aesimc	v0.16b,v0.16b -	vst1.32	{v0.16b},[$inp] - -	eor	x0,x0,x0		// return value -___ -$code.=<<___	if ($flavour !~ /64/); -	ldmia	sp!,{r4,pc} -___ -$code.=<<___	if ($flavour =~ /64/); -	ldp	x29,x30,[sp],#16 -	ret -___ -$code.=<<___; -.size	${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key -___ -}}} -{{{ -sub gen_block () { -my $dir = shift; -my ($e,$mc) = $dir eq "en" ? ("e","mc") : ("d","imc"); -my ($inp,$out,$key)=map("x$_",(0..2)); -my $rounds="w3"; -my ($rndkey0,$rndkey1,$inout)=map("q$_",(0..3)); - -$code.=<<___; -.globl	${prefix}_${dir}crypt -.type	${prefix}_${dir}crypt,%function -.align	5 -${prefix}_${dir}crypt: -	ldr	$rounds,[$key,#240] -	vld1.32	{$rndkey0},[$key],#16 -	vld1.8	{$inout},[$inp] -	sub	$rounds,$rounds,#2 -	vld1.32	{$rndkey1},[$key],#16 - -.Loop_${dir}c: -	aes$e	$inout,$rndkey0 -	vld1.32	{$rndkey0},[$key],#16 -	aes$mc	$inout,$inout -	subs	$rounds,$rounds,#2 -	aes$e	$inout,$rndkey1 -	vld1.32	{$rndkey1},[$key],#16 -	aes$mc	$inout,$inout -	b.gt	.Loop_${dir}c - -	aes$e	$inout,$rndkey0 -	vld1.32	{$rndkey0},[$key] -	aes$mc	$inout,$inout -	aes$e	$inout,$rndkey1 -	veor	$inout,$inout,$rndkey0 - -	vst1.8	{$inout},[$out] -	ret -.size	${prefix}_${dir}crypt,.-${prefix}_${dir}crypt -___ -} -&gen_block("en"); -&gen_block("de"); -}}} -{{{ -my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5"; -my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12"); -my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); - -my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1); - -### q8-q15	preloaded key schedule - -$code.=<<___; -.globl	${prefix}_cbc_encrypt -.type	${prefix}_cbc_encrypt,%function -.align	5 -${prefix}_cbc_encrypt: -___ -$code.=<<___	if ($flavour =~ /64/); -	stp	x29,x30,[sp,#-16]! -	add	x29,sp,#0 -___ -$code.=<<___	if ($flavour !~ /64/); -	mov	ip,sp -	stmdb	sp!,{r4-r8,lr} -	vstmdb	sp!,{d8-d15}            @ ABI specification says so -	ldmia	ip,{r4-r5}		@ load remaining args -___ -$code.=<<___; -	subs	$len,$len,#16 -	mov	$step,#16 -	b.lo	.Lcbc_abort -	cclr	$step,eq - -	cmp	$enc,#0			// en- or decrypting? -	ldr	$rounds,[$key,#240] -	and	$len,$len,#-16 -	vld1.8	{$ivec},[$ivp] -	vld1.8	{$dat},[$inp],$step - -	vld1.32	{q8-q9},[$key]		// load key schedule... -	sub	$rounds,$rounds,#6 -	add	$key_,$key,x5,lsl#4	// pointer to last 7 round keys -	sub	$rounds,$rounds,#2 -	vld1.32	{q10-q11},[$key_],#32 -	vld1.32	{q12-q13},[$key_],#32 -	vld1.32	{q14-q15},[$key_],#32 -	vld1.32	{$rndlast},[$key_] - -	add	$key_,$key,#32 -	mov	$cnt,$rounds -	b.eq	.Lcbc_dec - -	cmp	$rounds,#2 -	veor	$dat,$dat,$ivec -	veor	$rndzero_n_last,q8,$rndlast -	b.eq	.Lcbc_enc128 - -.Loop_cbc_enc: -	aese	$dat,q8 -	vld1.32	{q8},[$key_],#16 -	aesmc	$dat,$dat -	subs	$cnt,$cnt,#2 -	aese	$dat,q9 -	vld1.32	{q9},[$key_],#16 -	aesmc	$dat,$dat -	b.gt	.Loop_cbc_enc - -	aese	$dat,q8 -	aesmc	$dat,$dat -	 subs	$len,$len,#16 -	aese	$dat,q9 -	aesmc	$dat,$dat -	 cclr	$step,eq -	aese	$dat,q10 -	aesmc	$dat,$dat -	 add	$key_,$key,#16 -	aese	$dat,q11 -	aesmc	$dat,$dat -	 vld1.8	{q8},[$inp],$step -	aese	$dat,q12 -	aesmc	$dat,$dat -	 veor	q8,q8,$rndzero_n_last -	aese	$dat,q13 -	aesmc	$dat,$dat -	 vld1.32 {q9},[$key_],#16	// re-pre-load rndkey[1] -	aese	$dat,q14 -	aesmc	$dat,$dat -	aese	$dat,q15 - -	 mov	$cnt,$rounds -	veor	$ivec,$dat,$rndlast -	vst1.8	{$ivec},[$out],#16 -	b.hs	.Loop_cbc_enc - -	b	.Lcbc_done - -.align	5 -.Lcbc_enc128: -	vld1.32	{$in0-$in1},[$key_] -	aese	$dat,q8 -	aesmc	$dat,$dat -	b	.Lenter_cbc_enc128 -.Loop_cbc_enc128: -	aese	$dat,q8 -	aesmc	$dat,$dat -	 vst1.8	{$ivec},[$out],#16 -.Lenter_cbc_enc128: -	aese	$dat,q9 -	aesmc	$dat,$dat -	 subs	$len,$len,#16 -	aese	$dat,$in0 -	aesmc	$dat,$dat -	 cclr	$step,eq -	aese	$dat,$in1 -	aesmc	$dat,$dat -	aese	$dat,q10 -	aesmc	$dat,$dat -	aese	$dat,q11 -	aesmc	$dat,$dat -	 vld1.8	{q8},[$inp],$step -	aese	$dat,q12 -	aesmc	$dat,$dat -	aese	$dat,q13 -	aesmc	$dat,$dat -	aese	$dat,q14 -	aesmc	$dat,$dat -	 veor	q8,q8,$rndzero_n_last -	aese	$dat,q15 -	veor	$ivec,$dat,$rndlast -	b.hs	.Loop_cbc_enc128 - -	vst1.8	{$ivec},[$out],#16 -	b	.Lcbc_done - -.align	5 -.Lcbc_dec128: -	vld1.32	{$tmp0-$tmp1},[$key_] -	veor	$ivec,$ivec,$rndlast -	veor	$in0,$dat0,$rndlast -	mov	$step1,$step - -.Loop2x_cbc_dec128: -	aesd	$dat0,q8 -	aesd	$dat1,q8 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	 subs	$len,$len,#32 -	aesd	$dat0,q9 -	aesd	$dat1,q9 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	 cclr	$step,lo -	aesd	$dat0,$tmp0 -	aesd	$dat1,$tmp0 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	 cclr	$step1,ls -	aesd	$dat0,$tmp1 -	aesd	$dat1,$tmp1 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	aesd	$dat0,q10 -	aesd	$dat1,q10 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	aesd	$dat0,q11 -	aesd	$dat1,q11 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	aesd	$dat0,q12 -	aesd	$dat1,q12 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	aesd	$dat0,q13 -	aesd	$dat1,q13 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	aesd	$dat0,q14 -	aesd	$dat1,q14 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	aesd	$dat0,q15 -	aesd	$dat1,q15 - -	veor	$ivec,$ivec,$dat0 -	vld1.8	{$dat0},[$inp],$step -	veor	$in0,$in0,$dat1 -	vld1.8	{$dat1},[$inp],$step1 -	vst1.8	{$ivec},[$out],#16 -	veor	$ivec,$in1,$rndlast -	vst1.8	{$in0},[$out],#16 -	veor	$in0,$dat0,$rndlast -	vorr	$in1,$dat1,$dat1 -	b.hs	.Loop2x_cbc_dec128 - -	adds	$len,$len,#32 -	veor	$ivec,$ivec,$rndlast -	b.eq	.Lcbc_done -	veor	$in0,$in0,$rndlast -	b	.Lcbc_dec_tail - -.align	5 -.Lcbc_dec: -	subs	$len,$len,#16 -	vorr	$in0,$dat,$dat -	b.lo	.Lcbc_dec_tail - -	cclr	$step,eq -	cmp	$rounds,#2 -	vld1.8	{$dat1},[$inp],$step -	vorr	$in1,$dat1,$dat1 -	b.eq	.Lcbc_dec128 - -.Loop2x_cbc_dec: -	aesd	$dat0,q8 -	aesd	$dat1,q8 -	vld1.32	{q8},[$key_],#16 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	subs	$cnt,$cnt,#2 -	aesd	$dat0,q9 -	aesd	$dat1,q9 -	vld1.32	{q9},[$key_],#16 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	b.gt	.Loop2x_cbc_dec - -	aesd	$dat0,q8 -	aesd	$dat1,q8 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	 veor	$tmp0,$ivec,$rndlast -	 veor	$tmp1,$in0,$rndlast -	aesd	$dat0,q9 -	aesd	$dat1,q9 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	 vorr	$ivec,$in1,$in1 -	 subs	$len,$len,#32 -	aesd	$dat0,q10 -	aesd	$dat1,q10 -	aesimc	$dat0,$dat0 -	 cclr	$step,lo -	aesimc	$dat1,$dat1 -	 mov	$key_,$key -	aesd	$dat0,q11 -	aesd	$dat1,q11 -	aesimc	$dat0,$dat0 -	 vld1.8	{$in0},[$inp],$step -	aesimc	$dat1,$dat1 -	 cclr	$step,ls -	aesd	$dat0,q12 -	aesd	$dat1,q12 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	 vld1.8	{$in1},[$inp],$step -	aesd	$dat0,q13 -	aesd	$dat1,q13 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	 vld1.32 {q8},[$key_],#16	// re-pre-load rndkey[0] -	aesd	$dat0,q14 -	aesd	$dat1,q14 -	aesimc	$dat0,$dat0 -	aesimc	$dat1,$dat1 -	 vld1.32 {q9},[$key_],#16	// re-pre-load rndkey[1] -	aesd	$dat0,q15 -	aesd	$dat1,q15 - -	 mov	$cnt,$rounds -	veor	$tmp0,$tmp0,$dat0 -	veor	$tmp1,$tmp1,$dat1 -	 vorr	$dat0,$in0,$in0 -	vst1.8	{$tmp0},[$out],#16 -	 vorr	$dat1,$in1,$in1 -	vst1.8	{$tmp1},[$out],#16 -	b.hs	.Loop2x_cbc_dec - -	adds	$len,$len,#32 -	b.eq	.Lcbc_done - -.Lcbc_dec_tail: -	aesd	$dat,q8 -	vld1.32	{q8},[$key_],#16 -	aesimc	$dat,$dat -	subs	$cnt,$cnt,#2 -	aesd	$dat,q9 -	vld1.32	{q9},[$key_],#16 -	aesimc	$dat,$dat -	b.gt	.Lcbc_dec_tail - -	aesd	$dat,q8 -	aesimc	$dat,$dat -	aesd	$dat,q9 -	aesimc	$dat,$dat -	 veor	$tmp,$ivec,$rndlast -	aesd	$dat,q10 -	aesimc	$dat,$dat -	 vorr	$ivec,$in0,$in0 -	aesd	$dat,q11 -	aesimc	$dat,$dat -	aesd	$dat,q12 -	aesimc	$dat,$dat -	aesd	$dat,q13 -	aesimc	$dat,$dat -	aesd	$dat,q14 -	aesimc	$dat,$dat -	aesd	$dat,q15 - -	veor	$tmp,$tmp,$dat -	vst1.8	{$tmp},[$out],#16 - -.Lcbc_done: -	vst1.8	{$ivec},[$ivp] -.Lcbc_abort: -___ -$code.=<<___	if ($flavour !~ /64/); -	vldmia	sp!,{d8-d15} -	ldmia	sp!,{r4-r8,pc} -___ -$code.=<<___	if ($flavour =~ /64/); -	ldr	x29,[sp],#16 -	ret -___ -$code.=<<___; -.size	${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt -___ -}}} -{{{ -my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); -my ($rounds,$cnt,$key_,$ctr,$tctr,$tctr1)=("w5","w6","x7","w8","w9","w10"); -my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); - -my ($dat,$tmp)=($dat0,$tmp0); - -### q8-q15	preloaded key schedule - -$code.=<<___; -.globl	${prefix}_ctr32_encrypt_blocks -.type	${prefix}_ctr32_encrypt_blocks,%function -.align	5 -${prefix}_ctr32_encrypt_blocks: -___ -$code.=<<___	if ($flavour =~ /64/); -	stp		x29,x30,[sp,#-16]! -	add		x29,sp,#0 -___ -$code.=<<___	if ($flavour !~ /64/); -	mov		ip,sp -	stmdb		sp!,{r4-r10,lr} -	vstmdb		sp!,{d8-d15}            @ ABI specification says so -	ldr		r4, [ip]		@ load remaining arg -___ -$code.=<<___; -	ldr		$rounds,[$key,#240] - -	ldr		$ctr, [$ivp, #12] -	vld1.32		{$dat0},[$ivp] - -	vld1.32		{q8-q9},[$key]		// load key schedule... -	sub		$rounds,$rounds,#6 -	add		$key_,$key,x5,lsl#4	// pointer to last 7 round keys -	sub		$rounds,$rounds,#2 -	vld1.32		{q10-q11},[$key_],#32 -	vld1.32		{q12-q13},[$key_],#32 -	vld1.32		{q14-q15},[$key_],#32 -	vld1.32		{$rndlast},[$key_] - -	add		$key_,$key,#32 -	mov		$cnt,$rounds - -	subs		$len,$len,#2 -	b.lo		.Lctr32_tail - -#ifndef __ARMEB__ -	rev		$ctr, $ctr -#endif -	vorr		$dat1,$dat0,$dat0 -	add		$ctr, $ctr, #1 -	vorr		$ivec,$dat0,$dat0 -	rev		$tctr1, $ctr -	cmp		$rounds,#2 -	vmov.32		${dat1}[3],$tctr1 -	b.eq		.Lctr32_128 - -.Loop2x_ctr32: -	aese		$dat0,q8 -	aese		$dat1,q8 -	vld1.32		{q8},[$key_],#16 -	aesmc		$dat0,$dat0 -	aesmc		$dat1,$dat1 -	subs		$cnt,$cnt,#2 -	aese		$dat0,q9 -	aese		$dat1,q9 -	vld1.32		{q9},[$key_],#16 -	aesmc		$dat0,$dat0 -	aesmc		$dat1,$dat1 -	b.gt		.Loop2x_ctr32 - -	aese		$dat0,q8 -	aese		$dat1,q8 -	aesmc		$tmp0,$dat0 -	 vorr		$dat0,$ivec,$ivec -	aesmc		$tmp1,$dat1 -	 vorr		$dat1,$ivec,$ivec -	aese		$tmp0,q9 -	aese		$tmp1,q9 -	 vld1.8		{$in0},[$inp],#16 -	aesmc		$tmp0,$tmp0 -	 vld1.8		{$in1},[$inp],#16 -	aesmc		$tmp1,$tmp1 -	 add		$ctr,$ctr,#1 -	aese		$tmp0,q10 -	aese		$tmp1,q10 -	 rev		$tctr,$ctr -	aesmc		$tmp0,$tmp0 -	aesmc		$tmp1,$tmp1 -	 add		$ctr,$ctr,#1 -	aese		$tmp0,q11 -	aese		$tmp1,q11 -	 veor		$in0,$in0,$rndlast -	 rev		$tctr1,$ctr -	aesmc		$tmp0,$tmp0 -	aesmc		$tmp1,$tmp1 -	 veor		$in1,$in1,$rndlast -	 mov		$key_,$key -	aese		$tmp0,q12 -	aese		$tmp1,q12 -	 subs		$len,$len,#2 -	aesmc		$tmp0,$tmp0 -	aesmc		$tmp1,$tmp1 -	 vld1.32	 {q8-q9},[$key_],#32	// re-pre-load rndkey[0-1] -	aese		$tmp0,q13 -	aese		$tmp1,q13 -	aesmc		$tmp0,$tmp0 -	aesmc		$tmp1,$tmp1 -	aese		$tmp0,q14 -	aese		$tmp1,q14 -	 vmov.32	${dat0}[3], $tctr -	aesmc		$tmp0,$tmp0 -	 vmov.32	${dat1}[3], $tctr1 -	aesmc		$tmp1,$tmp1 -	aese		$tmp0,q15 -	aese		$tmp1,q15 - -	 mov		$cnt,$rounds -	veor		$in0,$in0,$tmp0 -	veor		$in1,$in1,$tmp1 -	vst1.8		{$in0},[$out],#16 -	vst1.8		{$in1},[$out],#16 -	b.hs		.Loop2x_ctr32 - -	adds		$len,$len,#2 -	b.eq		.Lctr32_done -	b		.Lctr32_tail - -.Lctr32_128: -	vld1.32		{$tmp0-$tmp1},[$key_] - -.Loop2x_ctr32_128: -	aese		$dat0,q8 -	aese		$dat1,q8 -	aesmc		$dat0,$dat0 -	 vld1.8		{$in0},[$inp],#16 -	aesmc		$dat1,$dat1 -	 vld1.8		{$in1},[$inp],#16 -	aese		$dat0,q9 -	aese		$dat1,q9 -	 add		$ctr,$ctr,#1 -	aesmc		$dat0,$dat0 -	aesmc		$dat1,$dat1 -	 rev		$tctr,$ctr -	aese		$dat0,$tmp0 -	aese		$dat1,$tmp0 -	 add		$ctr,$ctr,#1 -	aesmc		$dat0,$dat0 -	aesmc		$dat1,$dat1 -	 rev		$tctr1,$ctr -	aese		$dat0,$tmp1 -	aese		$dat1,$tmp1 -	 subs		$len,$len,#2 -	aesmc		$dat0,$dat0 -	aesmc		$dat1,$dat1 -	aese		$dat0,q10 -	aese		$dat1,q10 -	aesmc		$dat0,$dat0 -	aesmc		$dat1,$dat1 -	aese		$dat0,q11 -	aese		$dat1,q11 -	aesmc		$dat0,$dat0 -	aesmc		$dat1,$dat1 -	aese		$dat0,q12 -	aese		$dat1,q12 -	aesmc		$dat0,$dat0 -	aesmc		$dat1,$dat1 -	aese		$dat0,q13 -	aese		$dat1,q13 -	aesmc		$dat0,$dat0 -	aesmc		$dat1,$dat1 -	aese		$dat0,q14 -	aese		$dat1,q14 -	aesmc		$dat0,$dat0 -	aesmc		$dat1,$dat1 -	 veor		$in0,$in0,$rndlast -	aese		$dat0,q15 -	 veor		$in1,$in1,$rndlast -	aese		$dat1,q15 - -	veor		$in0,$in0,$dat0 -	vorr		$dat0,$ivec,$ivec -	veor		$in1,$in1,$dat1 -	vorr		$dat1,$ivec,$ivec -	vst1.8		{$in0},[$out],#16 -	vmov.32		${dat0}[3], $tctr -	vst1.8		{$in1},[$out],#16 -	vmov.32		${dat1}[3], $tctr1 -	b.hs		.Loop2x_ctr32_128 - -	adds		$len,$len,#2 -	b.eq		.Lctr32_done - -.Lctr32_tail: -	aese		$dat,q8 -	vld1.32		{q8},[$key_],#16 -	aesmc		$dat,$dat -	subs		$cnt,$cnt,#2 -	aese		$dat,q9 -	vld1.32		{q9},[$key_],#16 -	aesmc		$dat,$dat -	b.gt		.Lctr32_tail - -	aese		$dat,q8 -	aesmc		$dat,$dat -	aese		$dat,q9 -	aesmc		$dat,$dat -	 vld1.8		{$in0},[$inp] -	aese		$dat,q10 -	aesmc		$dat,$dat -	aese		$dat,q11 -	aesmc		$dat,$dat -	aese		$dat,q12 -	aesmc		$dat,$dat -	aese		$dat,q13 -	aesmc		$dat,$dat -	aese		$dat,q14 -	aesmc		$dat,$dat -	 veor		$in0,$in0,$rndlast -	aese		$dat,q15 - -	veor		$in0,$in0,$dat -	vst1.8		{$in0},[$out] - -.Lctr32_done: -___ -$code.=<<___	if ($flavour !~ /64/); -	vldmia		sp!,{d8-d15} -	ldmia		sp!,{r4-r10,pc} -___ -$code.=<<___	if ($flavour =~ /64/); -	ldr		x29,[sp],#16 -	ret -___ -$code.=<<___; -.size	${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks -___ -}}} -$code.=<<___; -#endif -___ -######################################## -if ($flavour =~ /64/) {			######## 64-bit code -    my %opcode = ( -	"aesd"	=>	0x4e285800,	"aese"	=>	0x4e284800, -	"aesimc"=>	0x4e287800,	"aesmc"	=>	0x4e286800	); - -    local *unaes = sub { -	my ($mnemonic,$arg)=@_; - -	$arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o	&& -	sprintf ".inst\t0x%08x\t//%s %s", -			$opcode{$mnemonic}|$1|($2<<5), -			$mnemonic,$arg; -    }; - -    foreach(split("\n",$code)) { -        s/\`([^\`]*)\`/eval($1)/geo; - -	s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo;	# old->new registers -        s/@\s/\/\//o;			# old->new style commentary - -	#s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo	or -	s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel	$1$2,$1zr,$1$2,$3/o	or -        s/vmov\.i8/movi/o	or	# fix up legacy mnemonics -        s/vext\.8/ext/o		or -        s/vrev32\.8/rev32/o	or -        s/vtst\.8/cmtst/o	or -        s/vshr/ushr/o		or -        s/^(\s+)v/$1/o		or	# strip off v prefix -	s/\bbx\s+lr\b/ret/o; - -	# fix up remainig legacy suffixes -	s/\.[ui]?8//o; -	m/\],#8/o and s/\.16b/\.8b/go; -        s/\.[ui]?32//o and s/\.16b/\.4s/go; -        s/\.[ui]?64//o and s/\.16b/\.2d/go; -	s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o; - -        print $_,"\n"; -    } -} else {				######## 32-bit code -    my %opcode = ( -	"aesd"	=>	0xf3b00340,	"aese"	=>	0xf3b00300, -	"aesimc"=>	0xf3b003c0,	"aesmc"	=>	0xf3b00380	); - -    local *unaes = sub { -	my ($mnemonic,$arg)=@_; - -	if ($arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o) { -	    my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) -					 |(($2&7)<<1) |(($2&8)<<2); -	    # since ARMv7 instructions are always encoded little-endian. -	    # correct solution is to use .inst directive, but older -	    # assemblers don't implement it:-( -	    sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", -			$word&0xff,($word>>8)&0xff, -			($word>>16)&0xff,($word>>24)&0xff, -			$mnemonic,$arg; -	} -    }; - -    sub unvtbl { -	my $arg=shift; - -	$arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o && -	sprintf	"vtbl.8	d%d,{q%d},d%d\n\t". -		"vtbl.8	d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1;	 -    } - -    sub unvdup32 { -	my $arg=shift; - -	$arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o && -	sprintf	"vdup.32	q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1;	 -    } - -    sub unvmov32 { -	my $arg=shift; - -	$arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o && -	sprintf	"vmov.32	d%d[%d],%s",2*$1+($2>>1),$2&1,$3;	 -    } - -    foreach(split("\n",$code)) { -        s/\`([^\`]*)\`/eval($1)/geo; - -	s/\b[wx]([0-9]+)\b/r$1/go;		# new->old registers -	s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go;	# new->old registers -        s/\/\/\s?/@ /o;				# new->old style commentary - -	# fix up remainig new-style suffixes -	s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo	or -	s/\],#[0-9]+/]!/o; - -	s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo	or -	s/cclr\s+([^,]+),\s*([a-z]+)/mov$2	$1,#0/o	or -	s/vtbl\.8\s+(.*)/unvtbl($1)/geo			or -	s/vdup\.32\s+(.*)/unvdup32($1)/geo		or -	s/vmov\.32\s+(.*)/unvmov32($1)/geo		or -	s/^(\s+)b\./$1b/o				or -	s/^(\s+)ret/$1bx\tlr/o; - -        print $_,"\n"; -    } -} - -close STDOUT; diff --git a/app/openssl/crypto/arm64cpuid.S b/app/openssl/crypto/arm64cpuid.S deleted file mode 100644 index 4778ac1d..00000000 --- a/app/openssl/crypto/arm64cpuid.S +++ /dev/null @@ -1,46 +0,0 @@ -#include "arm_arch.h" - -.text -.arch	armv8-a+crypto - -.align	5 -.global	_armv7_neon_probe -.type	_armv7_neon_probe,%function -_armv7_neon_probe: -	orr	v15.16b, v15.16b, v15.16b -	ret -.size	_armv7_neon_probe,.-_armv7_neon_probe - -.global	_armv7_tick -.type	_armv7_tick,%function -_armv7_tick: -	mrs	x0, CNTVCT_EL0 -	ret -.size	_armv7_tick,.-_armv7_tick - -.global	_armv8_aes_probe -.type	_armv8_aes_probe,%function -_armv8_aes_probe: -	aese	v0.16b, v0.16b -	ret -.size	_armv8_aes_probe,.-_armv8_aes_probe - -.global	_armv8_sha1_probe -.type	_armv8_sha1_probe,%function -_armv8_sha1_probe: -	sha1h	s0, s0 -	ret -.size	_armv8_sha1_probe,.-_armv8_sha1_probe - -.global	_armv8_sha256_probe -.type	_armv8_sha256_probe,%function -_armv8_sha256_probe: -	sha256su0	v0.4s, v0.4s -	ret -.size	_armv8_sha256_probe,.-_armv8_sha256_probe -.global	_armv8_pmull_probe -.type	_armv8_pmull_probe,%function -_armv8_pmull_probe: -	pmull	v0.1q, v0.1d, v0.1d -	ret -.size	_armv8_pmull_probe,.-_armv8_pmull_probe diff --git a/app/openssl/crypto/arm_arch.h b/app/openssl/crypto/arm_arch.h index 6fa87244..5a831076 100644 --- a/app/openssl/crypto/arm_arch.h +++ b/app/openssl/crypto/arm_arch.h @@ -10,24 +10,13 @@  #   define __ARMEL__  #  endif  # elif defined(__GNUC__) -#  if	defined(__aarch64__) -#   define __ARM_ARCH__ 8 -#   if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ -#    define __ARMEB__ -#   else -#    define __ARMEL__ -#   endif    /*     * Why doesn't gcc define __ARM_ARCH__? Instead it defines     * bunch of below macros. See all_architectires[] table in     * gcc/config/arm/arm.c. On a side note it defines     * __ARMEL__/__ARMEB__ for little-/big-endian.     */ -#  elif defined(__ARM_ARCH) -#   define __ARM_ARCH__ __ARM_ARCH -#  elif	defined(__ARM_ARCH_8A__) -#   define __ARM_ARCH__ 8 -#  elif	defined(__ARM_ARCH_7__)	|| defined(__ARM_ARCH_7A__)	|| \ +#  if	defined(__ARM_ARCH_7__)	|| defined(__ARM_ARCH_7A__)	|| \  	defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__)	|| \  	defined(__ARM_ARCH_7EM__)  #   define __ARM_ARCH__ 7 @@ -54,13 +43,9 @@  #if !__ASSEMBLER__  extern unsigned int OPENSSL_armcap_P; -#endif  #define ARMV7_NEON      (1<<0)  #define ARMV7_TICK      (1<<1) -#define ARMV8_AES       (1<<2) -#define ARMV8_SHA1      (1<<3) -#define ARMV8_SHA256    (1<<4) -#define ARMV8_PMULL     (1<<5) +#endif  #endif diff --git a/app/openssl/crypto/armcap.c b/app/openssl/crypto/armcap.c index 7e46d07a..9abaf396 100644 --- a/app/openssl/crypto/armcap.c +++ b/app/openssl/crypto/armcap.c @@ -19,13 +19,9 @@ static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); }   * ARM compilers support inline assembler...   */  void _armv7_neon_probe(void); -void _armv8_aes_probe(void); -void _armv8_sha1_probe(void); -void _armv8_sha256_probe(void); -void _armv8_pmull_probe(void); -unsigned long _armv7_tick(void); +unsigned int _armv7_tick(void); -unsigned long OPENSSL_rdtsc(void) +unsigned int OPENSSL_rdtsc(void)  	{  	if (OPENSSL_armcap_P & ARMV7_TICK)  		return _armv7_tick(); @@ -33,41 +29,9 @@ unsigned long OPENSSL_rdtsc(void)  		return 0;  	} -/* - * Use a weak reference to getauxval() so we can use it if it is available but - * don't break the build if it is not. - */  #if defined(__GNUC__) && __GNUC__>=2  void OPENSSL_cpuid_setup(void) __attribute__((constructor)); -extern unsigned long getauxval(unsigned long type) __attribute__((weak)); -#else -static unsigned long (*getauxval)(unsigned long) = NULL;  #endif - -/* - * ARM puts the the feature bits for Crypto Extensions in AT_HWCAP2, whereas - * AArch64 used AT_HWCAP. - */ -#if defined(__arm__) || defined (__arm) -# define HWCAP			16	/* AT_HWCAP */ -# define HWCAP_NEON		(1 << 12) - -# define HWCAP_CE		26	/* AT_HWCAP2 */ -# define HWCAP_CE_AES		(1 << 0) -# define HWCAP_CE_PMULL		(1 << 1) -# define HWCAP_CE_SHA1		(1 << 2) -# define HWCAP_CE_SHA256	(1 << 3) -#elif defined(__aarch64__) -# define HWCAP			16	/* AT_HWCAP */ -# define HWCAP_NEON		(1 << 1) - -# define HWCAP_CE		HWCAP -# define HWCAP_CE_AES		(1 << 3) -# define HWCAP_CE_PMULL		(1 << 4) -# define HWCAP_CE_SHA1		(1 << 5) -# define HWCAP_CE_SHA256	(1 << 6) -#endif -  void OPENSSL_cpuid_setup(void)  	{  	char *e; @@ -80,7 +44,7 @@ void OPENSSL_cpuid_setup(void)  	if ((e=getenv("OPENSSL_armcap")))  		{ -		OPENSSL_armcap_P=(unsigned int)strtoul(e,NULL,0); +		OPENSSL_armcap_P=strtoul(e,NULL,0);  		return;  		} @@ -100,51 +64,10 @@ void OPENSSL_cpuid_setup(void)  	sigprocmask(SIG_SETMASK,&ill_act.sa_mask,&oset);  	sigaction(SIGILL,&ill_act,&ill_oact); -	if (getauxval != NULL) -		{ -		if (getauxval(HWCAP) & HWCAP_NEON) -			{ -			unsigned long hwcap = getauxval(HWCAP_CE); - -			OPENSSL_armcap_P |= ARMV7_NEON; - -			if (hwcap & HWCAP_CE_AES) -				OPENSSL_armcap_P |= ARMV8_AES; - -			if (hwcap & HWCAP_CE_PMULL) -				OPENSSL_armcap_P |= ARMV8_PMULL; - -			if (hwcap & HWCAP_CE_SHA1) -				OPENSSL_armcap_P |= ARMV8_SHA1; - -			if (hwcap & HWCAP_CE_SHA256) -				OPENSSL_armcap_P |= ARMV8_SHA256; -			} -		} -	else if (sigsetjmp(ill_jmp,1) == 0) +	if (sigsetjmp(ill_jmp,1) == 0)  		{  		_armv7_neon_probe();  		OPENSSL_armcap_P |= ARMV7_NEON; -		if (sigsetjmp(ill_jmp,1) == 0) -			{ -			_armv8_pmull_probe(); -			OPENSSL_armcap_P |= ARMV8_PMULL|ARMV8_AES; -			} -		else if (sigsetjmp(ill_jmp,1) == 0) -			{ -			_armv8_aes_probe(); -			OPENSSL_armcap_P |= ARMV8_AES; -			} -		if (sigsetjmp(ill_jmp,1) == 0) -			{ -			_armv8_sha1_probe(); -			OPENSSL_armcap_P |= ARMV8_SHA1; -			} -		if (sigsetjmp(ill_jmp,1) == 0) -			{ -			_armv8_sha256_probe(); -			OPENSSL_armcap_P |= ARMV8_SHA256; -			}  		}  	if (sigsetjmp(ill_jmp,1) == 0)  		{ diff --git a/app/openssl/crypto/armv4cpuid.S b/app/openssl/crypto/armv4cpuid.S index add11d40..2d618dea 100644 --- a/app/openssl/crypto/armv4cpuid.S +++ b/app/openssl/crypto/armv4cpuid.S @@ -7,49 +7,17 @@  .global	_armv7_neon_probe  .type	_armv7_neon_probe,%function  _armv7_neon_probe: -	.byte	0xf0,0x01,0x60,0xf2	@ vorr	q8,q8,q8 -	.byte	0x1e,0xff,0x2f,0xe1	@ bx	lr +	.word	0xf26ee1fe	@ vorr	q15,q15,q15 +	.word	0xe12fff1e	@ bx	lr  .size	_armv7_neon_probe,.-_armv7_neon_probe  .global	_armv7_tick  .type	_armv7_tick,%function  _armv7_tick: -	mrrc	p15,1,r0,r1,c14		@ CNTVCT -#if __ARM_ARCH__>=5 -	bx	lr -#else -	.word	0xe12fff1e		@ bx	lr -#endif +	mrc	p15,0,r0,c9,c13,0 +	.word	0xe12fff1e	@ bx	lr  .size	_armv7_tick,.-_armv7_tick -.global	_armv8_aes_probe -.type	_armv8_aes_probe,%function -_armv8_aes_probe: -	.byte	0x00,0x03,0xb0,0xf3	@ aese.8	q0,q0 -	.byte	0x1e,0xff,0x2f,0xe1	@ bx	lr -.size	_armv8_aes_probe,.-_armv8_aes_probe - -.global	_armv8_sha1_probe -.type	_armv8_sha1_probe,%function -_armv8_sha1_probe: -	.byte	0x40,0x0c,0x00,0xf2	@ sha1c.32	q0,q0,q0 -	.byte	0x1e,0xff,0x2f,0xe1	@ bx	lr -.size	_armv8_sha1_probe,.-_armv8_sha1_probe - -.global	_armv8_sha256_probe -.type	_armv8_sha256_probe,%function -_armv8_sha256_probe: -	.byte	0x40,0x0c,0x00,0xf3	@ sha256h.32	q0,q0,q0 -	.byte	0x1e,0xff,0x2f,0xe1	@ bx lr -.size	_armv8_sha256_probe,.-_armv8_sha256_probe -.global	_armv8_pmull_probe -.type	_armv8_pmull_probe,%function -_armv8_pmull_probe: -	.byte	0x00,0x0e,0xa0,0xf2	@ vmull.p64	q0,d0,d0 -	.byte	0x1e,0xff,0x2f,0xe1	@ bx	lr -.size	_armv8_pmull_probe,.-_armv8_pmull_probe - -.align	5  .global	OPENSSL_atomic_add  .type	OPENSSL_atomic_add,%function  OPENSSL_atomic_add: @@ -60,7 +28,7 @@ OPENSSL_atomic_add:  	cmp	r2,#0  	bne	.Ladd  	mov	r0,r3 -	bx	lr +	.word	0xe12fff1e	@ bx	lr  #else  	stmdb	sp!,{r4-r6,lr}  	ldr	r2,.Lspinlock @@ -113,13 +81,9 @@ OPENSSL_cleanse:  	adds	r1,r1,#4  	bne	.Little  .Lcleanse_done: -#if __ARM_ARCH__>=5 -	bx	lr -#else  	tst	lr,#1  	moveq	pc,lr  	.word	0xe12fff1e	@ bx	lr -#endif  .size	OPENSSL_cleanse,.-OPENSSL_cleanse  .global	OPENSSL_wipe_cpu @@ -133,53 +97,41 @@ OPENSSL_wipe_cpu:  	eor	ip,ip,ip  	tst	r0,#1  	beq	.Lwipe_done -	.byte	0x50,0x01,0x00,0xf3	@ veor	q0, q0, q0 -	.byte	0x52,0x21,0x02,0xf3	@ veor	q1, q1, q1 -	.byte	0x54,0x41,0x04,0xf3	@ veor	q2, q2, q2 -	.byte	0x56,0x61,0x06,0xf3	@ veor	q3, q3, q3 -	.byte	0xf0,0x01,0x40,0xf3	@ veor	q8, q8, q8 -	.byte	0xf2,0x21,0x42,0xf3	@ veor	q9, q9, q9 -	.byte	0xf4,0x41,0x44,0xf3	@ veor	q10, q10, q10 -	.byte	0xf6,0x61,0x46,0xf3	@ veor	q11, q11, q11 -	.byte	0xf8,0x81,0x48,0xf3	@ veor	q12, q12, q12 -	.byte	0xfa,0xa1,0x4a,0xf3	@ veor	q13, q13, q13 -	.byte	0xfc,0xc1,0x4c,0xf3	@ veor	q14, q14, q14 -	.byte	0xfe,0xe1,0x4e,0xf3	@ veor	q14, q14, q14 +	.word	0xf3000150	@ veor    q0, q0, q0 +	.word	0xf3022152	@ veor    q1, q1, q1 +	.word	0xf3044154	@ veor    q2, q2, q2 +	.word	0xf3066156	@ veor    q3, q3, q3 +	.word	0xf34001f0	@ veor    q8, q8, q8 +	.word	0xf34221f2	@ veor    q9, q9, q9 +	.word	0xf34441f4	@ veor    q10, q10, q10 +	.word	0xf34661f6	@ veor    q11, q11, q11 +	.word	0xf34881f8	@ veor    q12, q12, q12 +	.word	0xf34aa1fa	@ veor    q13, q13, q13 +	.word	0xf34cc1fc	@ veor    q14, q14, q14 +	.word	0xf34ee1fe	@ veor    q15, q15, q15  .Lwipe_done:  	mov	r0,sp -#if __ARM_ARCH__>=5 -	bx	lr -#else  	tst	lr,#1  	moveq	pc,lr  	.word	0xe12fff1e	@ bx	lr -#endif  .size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu  .global	OPENSSL_instrument_bus  .type	OPENSSL_instrument_bus,%function  OPENSSL_instrument_bus:  	eor	r0,r0,r0 -#if __ARM_ARCH__>=5 -	bx	lr -#else  	tst	lr,#1  	moveq	pc,lr  	.word	0xe12fff1e	@ bx	lr -#endif  .size	OPENSSL_instrument_bus,.-OPENSSL_instrument_bus  .global	OPENSSL_instrument_bus2  .type	OPENSSL_instrument_bus2,%function  OPENSSL_instrument_bus2:  	eor	r0,r0,r0 -#if __ARM_ARCH__>=5 -	bx	lr -#else  	tst	lr,#1  	moveq	pc,lr  	.word	0xe12fff1e	@ bx	lr -#endif  .size	OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2  .align	5 diff --git a/app/openssl/crypto/asn1/a_strnid.c b/app/openssl/crypto/asn1/a_strnid.c index 2afd5a41..2fc48c15 100644 --- a/app/openssl/crypto/asn1/a_strnid.c +++ b/app/openssl/crypto/asn1/a_strnid.c @@ -74,7 +74,7 @@ static int sk_table_cmp(const ASN1_STRING_TABLE * const *a,   * certain software (e.g. Netscape) has problems with them.   */ -static unsigned long global_mask = B_ASN1_UTF8STRING; +static unsigned long global_mask = 0xFFFFFFFFL;  void ASN1_STRING_set_default_mask(unsigned long mask)  { diff --git a/app/openssl/crypto/bio/bio.h b/app/openssl/crypto/bio/bio.h index d05fa22a..05699ab2 100644 --- a/app/openssl/crypto/bio/bio.h +++ b/app/openssl/crypto/bio/bio.h @@ -266,9 +266,6 @@ void BIO_clear_flags(BIO *b, int flags);  #define BIO_RR_CONNECT			0x02  /* Returned from the accept BIO when an accept would have blocked */  #define BIO_RR_ACCEPT			0x03 -/* Returned from the SSL bio when the channel id retrieval code cannot find the - * private key. */ -#define BIO_RR_SSL_CHANNEL_ID_LOOKUP	0x04  /* These are passed by the BIO callback */  #define BIO_CB_FREE	0x01 diff --git a/app/openssl/crypto/bio/bss_dgram.c b/app/openssl/crypto/bio/bss_dgram.c index d9967e72..54c012c4 100644 --- a/app/openssl/crypto/bio/bss_dgram.c +++ b/app/openssl/crypto/bio/bss_dgram.c @@ -1333,7 +1333,7 @@ static long dgram_sctp_ctrl(BIO *b, int cmd, long num, void *ptr)  	bio_dgram_sctp_data *data = NULL;  	socklen_t sockopt_len = 0;  	struct sctp_authkeyid authkeyid; -	struct sctp_authkey *authkey = NULL; +	struct sctp_authkey *authkey;  	data = (bio_dgram_sctp_data *)b->ptr; @@ -1388,11 +1388,6 @@ static long dgram_sctp_ctrl(BIO *b, int cmd, long num, void *ptr)  		/* Add new key */  		sockopt_len = sizeof(struct sctp_authkey) + 64 * sizeof(uint8_t);  		authkey = OPENSSL_malloc(sockopt_len); -		if (authkey == NULL) -			{ -			ret = -1; -			break; -			}  		memset(authkey, 0x00, sockopt_len);  		authkey->sca_keynumber = authkeyid.scact_keynumber + 1;  #ifndef __FreeBSD__ @@ -1404,8 +1399,6 @@ static long dgram_sctp_ctrl(BIO *b, int cmd, long num, void *ptr)  		memcpy(&authkey->sca_key[0], ptr, 64 * sizeof(uint8_t));  		ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_KEY, authkey, sockopt_len); -		OPENSSL_free(authkey); -		authkey = NULL;  		if (ret < 0) break;  		/* Reset active key */ diff --git a/app/openssl/crypto/bn/asm/armv4-gf2m.S b/app/openssl/crypto/bn/asm/armv4-gf2m.S index 0fa25b26..038f0864 100644 --- a/app/openssl/crypto/bn/asm/armv4-gf2m.S +++ b/app/openssl/crypto/bn/asm/armv4-gf2m.S @@ -5,6 +5,31 @@  #if __ARM_ARCH__>=7  .fpu	neon + +.type	mul_1x1_neon,%function +.align	5 +mul_1x1_neon: +	vshl.u64	d2,d16,#8	@ q1-q3 are slided  +	vmull.p8	q0,d16,d17	@ a·bb +	vshl.u64	d4,d16,#16 +	vmull.p8	q1,d2,d17	@ a<<8·bb +	vshl.u64	d6,d16,#24 +	vmull.p8	q2,d4,d17	@ a<<16·bb +	vshr.u64	d2,#8 +	vmull.p8	q3,d6,d17	@ a<<24·bb +	vshl.u64	d3,#24 +	veor		d0,d2 +	vshr.u64	d4,#16 +	veor		d0,d3 +	vshl.u64	d5,#16 +	veor		d0,d4 +	vshr.u64	d6,#24 +	veor		d0,d5 +	vshl.u64	d7,#8 +	veor		d0,d6 +	veor		d0,d7 +	.word	0xe12fff1e +.size	mul_1x1_neon,.-mul_1x1_neon  #endif  .type	mul_1x1_ialu,%function  .align	5 @@ -95,53 +120,40 @@ bn_GF2m_mul_2x2:  	tst	r12,#1  	beq	.Lialu -	ldr		r12, [sp]		@ 5th argument -	vmov.32		d26, r2, r1 -	vmov.32		d27, r12, r3 -	vmov.i64	d28, #0x0000ffffffffffff -	vmov.i64	d29, #0x00000000ffffffff -	vmov.i64	d30, #0x000000000000ffff - -	vext.8		d2, d26, d26, #1	@ A1 -	vmull.p8	q1, d2, d27		@ F = A1*B -	vext.8		d0, d27, d27, #1	@ B1 -	vmull.p8	q0, d26, d0		@ E = A*B1 -	vext.8		d4, d26, d26, #2	@ A2 -	vmull.p8	q2, d4, d27		@ H = A2*B -	vext.8		d16, d27, d27, #2	@ B2 -	vmull.p8	q8, d26, d16		@ G = A*B2 -	vext.8		d6, d26, d26, #3	@ A3 -	veor		q1, q1, q0		@ L = E + F -	vmull.p8	q3, d6, d27		@ J = A3*B -	vext.8		d0, d27, d27, #3	@ B3 -	veor		q2, q2, q8		@ M = G + H -	vmull.p8	q0, d26, d0		@ I = A*B3 -	veor		d2, d2, d3	@ t0 = (L) (P0 + P1) << 8 -	vand		d3, d3, d28 -	vext.8		d16, d27, d27, #4	@ B4 -	veor		d4, d4, d5	@ t1 = (M) (P2 + P3) << 16 -	vand		d5, d5, d29 -	vmull.p8	q8, d26, d16		@ K = A*B4 -	veor		q3, q3, q0		@ N = I + J -	veor		d2, d2, d3 -	veor		d4, d4, d5 -	veor		d6, d6, d7	@ t2 = (N) (P4 + P5) << 24 -	vand		d7, d7, d30 -	vext.8		q1, q1, q1, #15 -	veor		d16, d16, d17	@ t3 = (K) (P6 + P7) << 32 -	vmov.i64	d17, #0 -	vext.8		q2, q2, q2, #14 -	veor		d6, d6, d7 -	vmull.p8	q0, d26, d27		@ D = A*B -	vext.8		q8, q8, q8, #12 -	vext.8		q3, q3, q3, #13 -	veor		q1, q1, q2 -	veor		q3, q3, q8 -	veor		q0, q0, q1 -	veor		q0, q0, q3 - -	vst1.32		{q0}, [r0] -	bx	lr		@ bx lr +	veor	d18,d18 +	vmov.32	d19,r3,r3		@ two copies of b1 +	vmov.32	d18[0],r1		@ a1 + +	veor	d20,d20 +	vld1.32	d21[],[sp,:32]	@ two copies of b0 +	vmov.32	d20[0],r2		@ a0 +	mov	r12,lr + +	vmov	d16,d18 +	vmov	d17,d19 +	bl	mul_1x1_neon		@ a1·b1 +	vmov	d22,d0 + +	vmov	d16,d20 +	vmov	d17,d21 +	bl	mul_1x1_neon		@ a0·b0 +	vmov	d23,d0 + +	veor	d16,d20,d18 +	veor	d17,d21,d19 +	veor	d20,d23,d22 +	bl	mul_1x1_neon		@ (a0+a1)·(b0+b1) + +	veor	d0,d20			@ (a0+a1)·(b0+b1)-a0·b0-a1·b1 +	vshl.u64 d1,d0,#32 +	vshr.u64 d0,d0,#32 +	veor	d23,d1 +	veor	d22,d0 +	vst1.32	{d23[0]},[r0,:32]! +	vst1.32	{d23[1]},[r0,:32]! +	vst1.32	{d22[0]},[r0,:32]! +	vst1.32	{d22[1]},[r0,:32] +	bx	r12  .align	4  .Lialu:  #endif diff --git a/app/openssl/crypto/bn/asm/armv4-gf2m.pl b/app/openssl/crypto/bn/asm/armv4-gf2m.pl index 3f1f4f67..22ad1f85 100644 --- a/app/openssl/crypto/bn/asm/armv4-gf2m.pl +++ b/app/openssl/crypto/bn/asm/armv4-gf2m.pl @@ -20,21 +20,14 @@  # length, more for longer keys. Even though NEON 1x1 multiplication  # runs in even less cycles, ~30, improvement is measurable only on  # longer keys. One has to optimize code elsewhere to get NEON glow... -# -# April 2014 -# -# Double bn_GF2m_mul_2x2 performance by using algorithm from paper -# referred below, which improves ECDH and ECDSA verify benchmarks -# by 18-40%. -# -# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software -# Polynomial Multiplication on ARM Processors using the NEON Engine. -#  -# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf  while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}  open STDOUT,">$output"; +sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     } +sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   } +sub Q()     { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; } +  $code=<<___;  #include "arm_arch.h" @@ -43,6 +36,31 @@ $code=<<___;  #if __ARM_ARCH__>=7  .fpu	neon + +.type	mul_1x1_neon,%function +.align	5 +mul_1x1_neon: +	vshl.u64	`&Dlo("q1")`,d16,#8	@ q1-q3 are slided $a +	vmull.p8	`&Q("d0")`,d16,d17	@ a·bb +	vshl.u64	`&Dlo("q2")`,d16,#16 +	vmull.p8	q1,`&Dlo("q1")`,d17	@ a<<8·bb +	vshl.u64	`&Dlo("q3")`,d16,#24 +	vmull.p8	q2,`&Dlo("q2")`,d17	@ a<<16·bb +	vshr.u64	`&Dlo("q1")`,#8 +	vmull.p8	q3,`&Dlo("q3")`,d17	@ a<<24·bb +	vshl.u64	`&Dhi("q1")`,#24 +	veor		d0,`&Dlo("q1")` +	vshr.u64	`&Dlo("q2")`,#16 +	veor		d0,`&Dhi("q1")` +	vshl.u64	`&Dhi("q2")`,#16 +	veor		d0,`&Dlo("q2")` +	vshr.u64	`&Dlo("q3")`,#24 +	veor		d0,`&Dhi("q2")` +	vshl.u64	`&Dhi("q3")`,#8 +	veor		d0,`&Dlo("q3")` +	veor		d0,`&Dhi("q3")` +	bx	lr +.size	mul_1x1_neon,.-mul_1x1_neon  #endif  ___  ################ @@ -141,9 +159,8 @@ ___  # void	bn_GF2m_mul_2x2(BN_ULONG *r,  #	BN_ULONG a1,BN_ULONG a0,  #	BN_ULONG b1,BN_ULONG b0);	# r[3..0]=a1a0·b1b0 -{ -my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12)); -my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31)); + +($A1,$B1,$A0,$B0,$A1B1,$A0B0)=map("d$_",(18..23));  $code.=<<___;  .global	bn_GF2m_mul_2x2 @@ -156,58 +173,44 @@ bn_GF2m_mul_2x2:  	tst	r12,#1  	beq	.Lialu -	ldr		r12, [sp]		@ 5th argument -	vmov.32		$a, r2, r1 -	vmov.32		$b, r12, r3 -	vmov.i64	$k48, #0x0000ffffffffffff -	vmov.i64	$k32, #0x00000000ffffffff -	vmov.i64	$k16, #0x000000000000ffff - -	vext.8		$t0#lo, $a, $a, #1	@ A1 -	vmull.p8	$t0, $t0#lo, $b		@ F = A1*B -	vext.8		$r#lo, $b, $b, #1	@ B1 -	vmull.p8	$r, $a, $r#lo		@ E = A*B1 -	vext.8		$t1#lo, $a, $a, #2	@ A2 -	vmull.p8	$t1, $t1#lo, $b		@ H = A2*B -	vext.8		$t3#lo, $b, $b, #2	@ B2 -	vmull.p8	$t3, $a, $t3#lo		@ G = A*B2 -	vext.8		$t2#lo, $a, $a, #3	@ A3 -	veor		$t0, $t0, $r		@ L = E + F -	vmull.p8	$t2, $t2#lo, $b		@ J = A3*B -	vext.8		$r#lo, $b, $b, #3	@ B3 -	veor		$t1, $t1, $t3		@ M = G + H -	vmull.p8	$r, $a, $r#lo		@ I = A*B3 -	veor		$t0#lo, $t0#lo, $t0#hi	@ t0 = (L) (P0 + P1) << 8 -	vand		$t0#hi, $t0#hi, $k48 -	vext.8		$t3#lo, $b, $b, #4	@ B4 -	veor		$t1#lo, $t1#lo, $t1#hi	@ t1 = (M) (P2 + P3) << 16 -	vand		$t1#hi, $t1#hi, $k32 -	vmull.p8	$t3, $a, $t3#lo		@ K = A*B4 -	veor		$t2, $t2, $r		@ N = I + J -	veor		$t0#lo, $t0#lo, $t0#hi -	veor		$t1#lo, $t1#lo, $t1#hi -	veor		$t2#lo, $t2#lo, $t2#hi	@ t2 = (N) (P4 + P5) << 24 -	vand		$t2#hi, $t2#hi, $k16 -	vext.8		$t0, $t0, $t0, #15 -	veor		$t3#lo, $t3#lo, $t3#hi	@ t3 = (K) (P6 + P7) << 32 -	vmov.i64	$t3#hi, #0 -	vext.8		$t1, $t1, $t1, #14 -	veor		$t2#lo, $t2#lo, $t2#hi -	vmull.p8	$r, $a, $b		@ D = A*B -	vext.8		$t3, $t3, $t3, #12 -	vext.8		$t2, $t2, $t2, #13 -	veor		$t0, $t0, $t1 -	veor		$t2, $t2, $t3 -	veor		$r, $r, $t0 -	veor		$r, $r, $t2 - -	vst1.32		{$r}, [r0] -	ret		@ bx lr +	veor	$A1,$A1 +	vmov.32	$B1,r3,r3		@ two copies of b1 +	vmov.32	${A1}[0],r1		@ a1 + +	veor	$A0,$A0 +	vld1.32	${B0}[],[sp,:32]	@ two copies of b0 +	vmov.32	${A0}[0],r2		@ a0 +	mov	r12,lr + +	vmov	d16,$A1 +	vmov	d17,$B1 +	bl	mul_1x1_neon		@ a1·b1 +	vmov	$A1B1,d0 + +	vmov	d16,$A0 +	vmov	d17,$B0 +	bl	mul_1x1_neon		@ a0·b0 +	vmov	$A0B0,d0 + +	veor	d16,$A0,$A1 +	veor	d17,$B0,$B1 +	veor	$A0,$A0B0,$A1B1 +	bl	mul_1x1_neon		@ (a0+a1)·(b0+b1) + +	veor	d0,$A0			@ (a0+a1)·(b0+b1)-a0·b0-a1·b1 +	vshl.u64 d1,d0,#32 +	vshr.u64 d0,d0,#32 +	veor	$A0B0,d1 +	veor	$A1B1,d0 +	vst1.32	{${A0B0}[0]},[r0,:32]! +	vst1.32	{${A0B0}[1]},[r0,:32]! +	vst1.32	{${A1B1}[0]},[r0,:32]! +	vst1.32	{${A1B1}[1]},[r0,:32] +	bx	r12  .align	4  .Lialu:  #endif  ___ -}  $ret="r10";	# reassigned 1st argument  $code.=<<___;  	stmdb	sp!,{r4-r10,lr} @@ -269,13 +272,7 @@ $code.=<<___;  .comm	OPENSSL_armcap_P,4,4  ___ -foreach (split("\n",$code)) { -	s/\`([^\`]*)\`/eval $1/geo; - -	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo	or -	s/\bret\b/bx	lr/go		or -	s/\bbx\s+lr\b/.word\t0xe12fff1e/go;    # make it possible to compile with -march=armv4 - -	print $_,"\n"; -} +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;    # make it possible to compile with -march=armv4 +print $code;  close STDOUT;   # enforce flush diff --git a/app/openssl/crypto/bn/asm/armv4-mont.pl b/app/openssl/crypto/bn/asm/armv4-mont.pl index 72bad8e3..f78a8b5f 100644 --- a/app/openssl/crypto/bn/asm/armv4-mont.pl +++ b/app/openssl/crypto/bn/asm/armv4-mont.pl @@ -1,7 +1,7 @@  #!/usr/bin/env perl  # ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL  # project. The module is, however, dual licensed under OpenSSL and  # CRYPTOGAMS licenses depending on where you obtain it. For further  # details see http://www.openssl.org/~appro/cryptogams/. @@ -23,21 +23,6 @@  # than 1/2KB. Windows CE port would be trivial, as it's exclusively  # about decorations, ABI and instruction syntax are identical. -# November 2013 -# -# Add NEON code path, which handles lengths divisible by 8. RSA/DSA -# performance improvement on Cortex-A8 is ~45-100% depending on key -# length, more for longer keys. On Cortex-A15 the span is ~10-105%. -# On Snapdragon S4 improvement was measured to vary from ~70% to -# incredible ~380%, yes, 4.8x faster, for RSA4096 sign. But this is -# rather because original integer-only code seems to perform -# suboptimally on S4. Situation on Cortex-A9 is unfortunately -# different. It's being looked into, but the trouble is that -# performance for vectors longer than 256 bits is actually couple -# of percent worse than for integer-only code. The code is chosen -# for execution on all NEON-capable processors, because gain on -# others outweighs the marginal loss on Cortex-A9. -  while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}  open STDOUT,">$output"; @@ -67,40 +52,16 @@ $_n0="$num,#14*4";  $_num="$num,#15*4";	$_bpend=$_num;  $code=<<___; -#include "arm_arch.h" -  .text -.code	32 - -#if __ARM_ARCH__>=7 -.align	5 -.LOPENSSL_armcap: -.word	OPENSSL_armcap_P-bn_mul_mont -#endif  .global	bn_mul_mont  .type	bn_mul_mont,%function -.align	5 +.align	2  bn_mul_mont: -	ldr	ip,[sp,#4]		@ load num  	stmdb	sp!,{r0,r2}		@ sp points at argument block -#if __ARM_ARCH__>=7 -	tst	ip,#7 -	bne	.Lialu -	adr	r0,bn_mul_mont -	ldr	r2,.LOPENSSL_armcap -	ldr	r0,[r0,r2] -	tst	r0,#1			@ NEON available? -	ldmia	sp, {r0,r2} -	beq	.Lialu -	add	sp,sp,#8 -	b	bn_mul8x_mont_neon -.align	4 -.Lialu: -#endif -	cmp	ip,#2 -	mov	$num,ip			@ load num +	ldr	$num,[sp,#3*4]		@ load num +	cmp	$num,#2  	movlt	r0,#0  	addlt	sp,sp,#2*4  	blt	.Labrt @@ -230,446 +191,14 @@ bn_mul_mont:  	ldmia	sp!,{r4-r12,lr}		@ restore registers  	add	sp,sp,#2*4		@ skip over {r0,r2}  	mov	r0,#1 -.Labrt: -#if __ARM_ARCH__>=5 -	ret				@ bx lr -#else -	tst	lr,#1 +.Labrt:	tst	lr,#1  	moveq	pc,lr			@ be binary compatible with V4, yet  	bx	lr			@ interoperable with Thumb ISA:-) -#endif  .size	bn_mul_mont,.-bn_mul_mont -___ -{ -sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     } -sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   } - -my ($A0,$A1,$A2,$A3)=map("d$_",(0..3)); -my ($N0,$N1,$N2,$N3)=map("d$_",(4..7)); -my ($Z,$Temp)=("q4","q5"); -my ($A0xB,$A1xB,$A2xB,$A3xB,$A4xB,$A5xB,$A6xB,$A7xB)=map("q$_",(6..13)); -my ($Bi,$Ni,$M0)=map("d$_",(28..31)); -my $zero=&Dlo($Z); -my $temp=&Dlo($Temp); - -my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5)); -my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9)); - -$code.=<<___; -#if __ARM_ARCH__>=7 -.fpu	neon - -.type	bn_mul8x_mont_neon,%function -.align	5 -bn_mul8x_mont_neon: -	mov	ip,sp -	stmdb	sp!,{r4-r11} -	vstmdb	sp!,{d8-d15}		@ ABI specification says so -	ldmia	ip,{r4-r5}		@ load rest of parameter block - -	sub		$toutptr,sp,#16 -	vld1.32		{${Bi}[0]}, [$bptr,:32]! -	sub		$toutptr,$toutptr,$num,lsl#4 -	vld1.32		{$A0-$A3},  [$aptr]!		@ can't specify :32 :-( -	and		$toutptr,$toutptr,#-64 -	vld1.32		{${M0}[0]}, [$n0,:32] -	mov		sp,$toutptr			@ alloca -	veor		$zero,$zero,$zero -	subs		$inner,$num,#8 -	vzip.16		$Bi,$zero - -	vmull.u32	$A0xB,$Bi,${A0}[0] -	vmull.u32	$A1xB,$Bi,${A0}[1] -	vmull.u32	$A2xB,$Bi,${A1}[0] -	vshl.i64	$temp,`&Dhi("$A0xB")`,#16 -	vmull.u32	$A3xB,$Bi,${A1}[1] - -	vadd.u64	$temp,$temp,`&Dlo("$A0xB")` -	veor		$zero,$zero,$zero -	vmul.u32	$Ni,$temp,$M0 - -	vmull.u32	$A4xB,$Bi,${A2}[0] -	 vld1.32	{$N0-$N3}, [$nptr]! -	vmull.u32	$A5xB,$Bi,${A2}[1] -	vmull.u32	$A6xB,$Bi,${A3}[0] -	vzip.16		$Ni,$zero -	vmull.u32	$A7xB,$Bi,${A3}[1] - -	bne	.LNEON_1st - -	@ special case for num=8, everything is in register bank... - -	vmlal.u32	$A0xB,$Ni,${N0}[0] -	sub		$outer,$num,#1 -	vmlal.u32	$A1xB,$Ni,${N0}[1] -	vmlal.u32	$A2xB,$Ni,${N1}[0] -	vmlal.u32	$A3xB,$Ni,${N1}[1] - -	vmlal.u32	$A4xB,$Ni,${N2}[0] -	vmov		$Temp,$A0xB -	vmlal.u32	$A5xB,$Ni,${N2}[1] -	vmov		$A0xB,$A1xB -	vmlal.u32	$A6xB,$Ni,${N3}[0] -	vmov		$A1xB,$A2xB -	vmlal.u32	$A7xB,$Ni,${N3}[1] -	vmov		$A2xB,$A3xB -	vmov		$A3xB,$A4xB -	vshr.u64	$temp,$temp,#16 -	vmov		$A4xB,$A5xB -	vmov		$A5xB,$A6xB -	vadd.u64	$temp,$temp,`&Dhi("$Temp")` -	vmov		$A6xB,$A7xB -	veor		$A7xB,$A7xB -	vshr.u64	$temp,$temp,#16 - -	b	.LNEON_outer8 - -.align	4 -.LNEON_outer8: -	vld1.32		{${Bi}[0]}, [$bptr,:32]! -	veor		$zero,$zero,$zero -	vzip.16		$Bi,$zero -	vadd.u64	`&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp - -	vmlal.u32	$A0xB,$Bi,${A0}[0] -	vmlal.u32	$A1xB,$Bi,${A0}[1] -	vmlal.u32	$A2xB,$Bi,${A1}[0] -	vshl.i64	$temp,`&Dhi("$A0xB")`,#16 -	vmlal.u32	$A3xB,$Bi,${A1}[1] - -	vadd.u64	$temp,$temp,`&Dlo("$A0xB")` -	veor		$zero,$zero,$zero -	subs		$outer,$outer,#1 -	vmul.u32	$Ni,$temp,$M0 - -	vmlal.u32	$A4xB,$Bi,${A2}[0] -	vmlal.u32	$A5xB,$Bi,${A2}[1] -	vmlal.u32	$A6xB,$Bi,${A3}[0] -	vzip.16		$Ni,$zero -	vmlal.u32	$A7xB,$Bi,${A3}[1] - -	vmlal.u32	$A0xB,$Ni,${N0}[0] -	vmlal.u32	$A1xB,$Ni,${N0}[1] -	vmlal.u32	$A2xB,$Ni,${N1}[0] -	vmlal.u32	$A3xB,$Ni,${N1}[1] - -	vmlal.u32	$A4xB,$Ni,${N2}[0] -	vmov		$Temp,$A0xB -	vmlal.u32	$A5xB,$Ni,${N2}[1] -	vmov		$A0xB,$A1xB -	vmlal.u32	$A6xB,$Ni,${N3}[0] -	vmov		$A1xB,$A2xB -	vmlal.u32	$A7xB,$Ni,${N3}[1] -	vmov		$A2xB,$A3xB -	vmov		$A3xB,$A4xB -	vshr.u64	$temp,$temp,#16 -	vmov		$A4xB,$A5xB -	vmov		$A5xB,$A6xB -	vadd.u64	$temp,$temp,`&Dhi("$Temp")` -	vmov		$A6xB,$A7xB -	veor		$A7xB,$A7xB -	vshr.u64	$temp,$temp,#16 - -	bne	.LNEON_outer8 - -	vadd.u64	`&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp -	mov		$toutptr,sp -	vshr.u64	$temp,`&Dlo("$A0xB")`,#16 -	mov		$inner,$num -	vadd.u64	`&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp -	add		$tinptr,sp,#16 -	vshr.u64	$temp,`&Dhi("$A0xB")`,#16 -	vzip.16		`&Dlo("$A0xB")`,`&Dhi("$A0xB")` - -	b	.LNEON_tail2 - -.align	4 -.LNEON_1st: -	vmlal.u32	$A0xB,$Ni,${N0}[0] -	 vld1.32	{$A0-$A3}, [$aptr]! -	vmlal.u32	$A1xB,$Ni,${N0}[1] -	subs		$inner,$inner,#8 -	vmlal.u32	$A2xB,$Ni,${N1}[0] -	vmlal.u32	$A3xB,$Ni,${N1}[1] - -	vmlal.u32	$A4xB,$Ni,${N2}[0] -	 vld1.32	{$N0-$N1}, [$nptr]! -	vmlal.u32	$A5xB,$Ni,${N2}[1] -	 vst1.64	{$A0xB-$A1xB}, [$toutptr,:256]! -	vmlal.u32	$A6xB,$Ni,${N3}[0] -	vmlal.u32	$A7xB,$Ni,${N3}[1] -	 vst1.64	{$A2xB-$A3xB}, [$toutptr,:256]! - -	vmull.u32	$A0xB,$Bi,${A0}[0] -	 vld1.32	{$N2-$N3}, [$nptr]! -	vmull.u32	$A1xB,$Bi,${A0}[1] -	 vst1.64	{$A4xB-$A5xB}, [$toutptr,:256]! -	vmull.u32	$A2xB,$Bi,${A1}[0] -	vmull.u32	$A3xB,$Bi,${A1}[1] -	 vst1.64	{$A6xB-$A7xB}, [$toutptr,:256]! - -	vmull.u32	$A4xB,$Bi,${A2}[0] -	vmull.u32	$A5xB,$Bi,${A2}[1] -	vmull.u32	$A6xB,$Bi,${A3}[0] -	vmull.u32	$A7xB,$Bi,${A3}[1] - -	bne	.LNEON_1st - -	vmlal.u32	$A0xB,$Ni,${N0}[0] -	add		$tinptr,sp,#16 -	vmlal.u32	$A1xB,$Ni,${N0}[1] -	sub		$aptr,$aptr,$num,lsl#2		@ rewind $aptr -	vmlal.u32	$A2xB,$Ni,${N1}[0] -	 vld1.64	{$Temp}, [sp,:128] -	vmlal.u32	$A3xB,$Ni,${N1}[1] -	sub		$outer,$num,#1 - -	vmlal.u32	$A4xB,$Ni,${N2}[0] -	vst1.64		{$A0xB-$A1xB}, [$toutptr,:256]! -	vmlal.u32	$A5xB,$Ni,${N2}[1] -	vshr.u64	$temp,$temp,#16 -	 vld1.64	{$A0xB},       [$tinptr, :128]! -	vmlal.u32	$A6xB,$Ni,${N3}[0] -	vst1.64		{$A2xB-$A3xB}, [$toutptr,:256]! -	vmlal.u32	$A7xB,$Ni,${N3}[1] - -	vst1.64		{$A4xB-$A5xB}, [$toutptr,:256]! -	vadd.u64	$temp,$temp,`&Dhi("$Temp")` -	veor		$Z,$Z,$Z -	vst1.64		{$A6xB-$A7xB}, [$toutptr,:256]! -	 vld1.64	{$A1xB-$A2xB}, [$tinptr, :256]! -	vst1.64		{$Z},          [$toutptr,:128] -	vshr.u64	$temp,$temp,#16 - -	b		.LNEON_outer - -.align	4 -.LNEON_outer: -	vld1.32		{${Bi}[0]}, [$bptr,:32]! -	sub		$nptr,$nptr,$num,lsl#2		@ rewind $nptr -	vld1.32		{$A0-$A3},  [$aptr]! -	veor		$zero,$zero,$zero -	mov		$toutptr,sp -	vzip.16		$Bi,$zero -	sub		$inner,$num,#8 -	vadd.u64	`&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp - -	vmlal.u32	$A0xB,$Bi,${A0}[0] -	 vld1.64	{$A3xB-$A4xB},[$tinptr,:256]! -	vmlal.u32	$A1xB,$Bi,${A0}[1] -	vmlal.u32	$A2xB,$Bi,${A1}[0] -	 vld1.64	{$A5xB-$A6xB},[$tinptr,:256]! -	vmlal.u32	$A3xB,$Bi,${A1}[1] - -	vshl.i64	$temp,`&Dhi("$A0xB")`,#16 -	veor		$zero,$zero,$zero -	vadd.u64	$temp,$temp,`&Dlo("$A0xB")` -	 vld1.64	{$A7xB},[$tinptr,:128]! -	vmul.u32	$Ni,$temp,$M0 - -	vmlal.u32	$A4xB,$Bi,${A2}[0] -	 vld1.32	{$N0-$N3}, [$nptr]! -	vmlal.u32	$A5xB,$Bi,${A2}[1] -	vmlal.u32	$A6xB,$Bi,${A3}[0] -	vzip.16		$Ni,$zero -	vmlal.u32	$A7xB,$Bi,${A3}[1] - -.LNEON_inner: -	vmlal.u32	$A0xB,$Ni,${N0}[0] -	 vld1.32	{$A0-$A3}, [$aptr]! -	vmlal.u32	$A1xB,$Ni,${N0}[1] -	 subs		$inner,$inner,#8 -	vmlal.u32	$A2xB,$Ni,${N1}[0] -	vmlal.u32	$A3xB,$Ni,${N1}[1] -	vst1.64		{$A0xB-$A1xB}, [$toutptr,:256]! - -	vmlal.u32	$A4xB,$Ni,${N2}[0] -	 vld1.64	{$A0xB},       [$tinptr, :128]! -	vmlal.u32	$A5xB,$Ni,${N2}[1] -	vst1.64		{$A2xB-$A3xB}, [$toutptr,:256]! -	vmlal.u32	$A6xB,$Ni,${N3}[0] -	 vld1.64	{$A1xB-$A2xB}, [$tinptr, :256]! -	vmlal.u32	$A7xB,$Ni,${N3}[1] -	vst1.64		{$A4xB-$A5xB}, [$toutptr,:256]! - -	vmlal.u32	$A0xB,$Bi,${A0}[0] -	 vld1.64	{$A3xB-$A4xB}, [$tinptr, :256]! -	vmlal.u32	$A1xB,$Bi,${A0}[1] -	vst1.64		{$A6xB-$A7xB}, [$toutptr,:256]! -	vmlal.u32	$A2xB,$Bi,${A1}[0] -	 vld1.64	{$A5xB-$A6xB}, [$tinptr, :256]! -	vmlal.u32	$A3xB,$Bi,${A1}[1] -	 vld1.32	{$N0-$N3}, [$nptr]! - -	vmlal.u32	$A4xB,$Bi,${A2}[0] -	 vld1.64	{$A7xB},       [$tinptr, :128]! -	vmlal.u32	$A5xB,$Bi,${A2}[1] -	vmlal.u32	$A6xB,$Bi,${A3}[0] -	vmlal.u32	$A7xB,$Bi,${A3}[1] - -	bne	.LNEON_inner - -	vmlal.u32	$A0xB,$Ni,${N0}[0] -	add		$tinptr,sp,#16 -	vmlal.u32	$A1xB,$Ni,${N0}[1] -	sub		$aptr,$aptr,$num,lsl#2		@ rewind $aptr -	vmlal.u32	$A2xB,$Ni,${N1}[0] -	 vld1.64	{$Temp}, [sp,:128] -	vmlal.u32	$A3xB,$Ni,${N1}[1] -	subs		$outer,$outer,#1 - -	vmlal.u32	$A4xB,$Ni,${N2}[0] -	vst1.64		{$A0xB-$A1xB}, [$toutptr,:256]! -	vmlal.u32	$A5xB,$Ni,${N2}[1] -	 vld1.64	{$A0xB},       [$tinptr, :128]! -	vshr.u64	$temp,$temp,#16 -	vst1.64		{$A2xB-$A3xB}, [$toutptr,:256]! -	vmlal.u32	$A6xB,$Ni,${N3}[0] -	 vld1.64	{$A1xB-$A2xB}, [$tinptr, :256]! -	vmlal.u32	$A7xB,$Ni,${N3}[1] - -	vst1.64		{$A4xB-$A5xB}, [$toutptr,:256]! -	vadd.u64	$temp,$temp,`&Dhi("$Temp")` -	vst1.64		{$A6xB-$A7xB}, [$toutptr,:256]! -	vshr.u64	$temp,$temp,#16 - -	bne	.LNEON_outer - -	mov		$toutptr,sp -	mov		$inner,$num - -.LNEON_tail: -	vadd.u64	`&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp -	vld1.64		{$A3xB-$A4xB}, [$tinptr, :256]! -	vshr.u64	$temp,`&Dlo("$A0xB")`,#16 -	vadd.u64	`&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp -	vld1.64		{$A5xB-$A6xB}, [$tinptr, :256]! -	vshr.u64	$temp,`&Dhi("$A0xB")`,#16 -	vld1.64		{$A7xB},       [$tinptr, :128]! -	vzip.16		`&Dlo("$A0xB")`,`&Dhi("$A0xB")` - -.LNEON_tail2: -	vadd.u64	`&Dlo("$A1xB")`,`&Dlo("$A1xB")`,$temp -	vst1.32		{`&Dlo("$A0xB")`[0]}, [$toutptr, :32]! -	vshr.u64	$temp,`&Dlo("$A1xB")`,#16 -	vadd.u64	`&Dhi("$A1xB")`,`&Dhi("$A1xB")`,$temp -	vshr.u64	$temp,`&Dhi("$A1xB")`,#16 -	vzip.16		`&Dlo("$A1xB")`,`&Dhi("$A1xB")` - -	vadd.u64	`&Dlo("$A2xB")`,`&Dlo("$A2xB")`,$temp -	vst1.32		{`&Dlo("$A1xB")`[0]}, [$toutptr, :32]! -	vshr.u64	$temp,`&Dlo("$A2xB")`,#16 -	vadd.u64	`&Dhi("$A2xB")`,`&Dhi("$A2xB")`,$temp -	vshr.u64	$temp,`&Dhi("$A2xB")`,#16 -	vzip.16		`&Dlo("$A2xB")`,`&Dhi("$A2xB")` - -	vadd.u64	`&Dlo("$A3xB")`,`&Dlo("$A3xB")`,$temp -	vst1.32		{`&Dlo("$A2xB")`[0]}, [$toutptr, :32]! -	vshr.u64	$temp,`&Dlo("$A3xB")`,#16 -	vadd.u64	`&Dhi("$A3xB")`,`&Dhi("$A3xB")`,$temp -	vshr.u64	$temp,`&Dhi("$A3xB")`,#16 -	vzip.16		`&Dlo("$A3xB")`,`&Dhi("$A3xB")` - -	vadd.u64	`&Dlo("$A4xB")`,`&Dlo("$A4xB")`,$temp -	vst1.32		{`&Dlo("$A3xB")`[0]}, [$toutptr, :32]! -	vshr.u64	$temp,`&Dlo("$A4xB")`,#16 -	vadd.u64	`&Dhi("$A4xB")`,`&Dhi("$A4xB")`,$temp -	vshr.u64	$temp,`&Dhi("$A4xB")`,#16 -	vzip.16		`&Dlo("$A4xB")`,`&Dhi("$A4xB")` - -	vadd.u64	`&Dlo("$A5xB")`,`&Dlo("$A5xB")`,$temp -	vst1.32		{`&Dlo("$A4xB")`[0]}, [$toutptr, :32]! -	vshr.u64	$temp,`&Dlo("$A5xB")`,#16 -	vadd.u64	`&Dhi("$A5xB")`,`&Dhi("$A5xB")`,$temp -	vshr.u64	$temp,`&Dhi("$A5xB")`,#16 -	vzip.16		`&Dlo("$A5xB")`,`&Dhi("$A5xB")` - -	vadd.u64	`&Dlo("$A6xB")`,`&Dlo("$A6xB")`,$temp -	vst1.32		{`&Dlo("$A5xB")`[0]}, [$toutptr, :32]! -	vshr.u64	$temp,`&Dlo("$A6xB")`,#16 -	vadd.u64	`&Dhi("$A6xB")`,`&Dhi("$A6xB")`,$temp -	vld1.64		{$A0xB}, [$tinptr, :128]! -	vshr.u64	$temp,`&Dhi("$A6xB")`,#16 -	vzip.16		`&Dlo("$A6xB")`,`&Dhi("$A6xB")` - -	vadd.u64	`&Dlo("$A7xB")`,`&Dlo("$A7xB")`,$temp -	vst1.32		{`&Dlo("$A6xB")`[0]}, [$toutptr, :32]! -	vshr.u64	$temp,`&Dlo("$A7xB")`,#16 -	vadd.u64	`&Dhi("$A7xB")`,`&Dhi("$A7xB")`,$temp -	vld1.64		{$A1xB-$A2xB},	[$tinptr, :256]! -	vshr.u64	$temp,`&Dhi("$A7xB")`,#16 -	vzip.16		`&Dlo("$A7xB")`,`&Dhi("$A7xB")` -	subs		$inner,$inner,#8 -	vst1.32		{`&Dlo("$A7xB")`[0]}, [$toutptr, :32]! - -	bne	.LNEON_tail - -	vst1.32	{${temp}[0]}, [$toutptr, :32]		@ top-most bit -	sub	$nptr,$nptr,$num,lsl#2			@ rewind $nptr -	subs	$aptr,sp,#0				@ clear carry flag -	add	$bptr,sp,$num,lsl#2 - -.LNEON_sub: -	ldmia	$aptr!, {r4-r7} -	ldmia	$nptr!, {r8-r11} -	sbcs	r8, r4,r8 -	sbcs	r9, r5,r9 -	sbcs	r10,r6,r10 -	sbcs	r11,r7,r11 -	teq	$aptr,$bptr				@ preserves carry -	stmia	$rptr!, {r8-r11} -	bne	.LNEON_sub - -	ldr	r10, [$aptr]				@ load top-most bit -	veor	q0,q0,q0 -	sub	r11,$bptr,sp				@ this is num*4 -	veor	q1,q1,q1 -	mov	$aptr,sp -	sub	$rptr,$rptr,r11				@ rewind $rptr -	mov	$nptr,$bptr				@ second 3/4th of frame -	sbcs	r10,r10,#0				@ result is carry flag - -.LNEON_copy_n_zap: -	ldmia	$aptr!, {r4-r7} -	ldmia	$rptr,  {r8-r11} -	movcc	r8, r4 -	vst1.64	{q0-q1}, [$nptr,:256]!			@ wipe -	movcc	r9, r5 -	movcc	r10,r6 -	vst1.64	{q0-q1}, [$nptr,:256]!			@ wipe -	movcc	r11,r7 -	ldmia	$aptr, {r4-r7} -	stmia	$rptr!, {r8-r11} -	sub	$aptr,$aptr,#16 -	ldmia	$rptr, {r8-r11} -	movcc	r8, r4 -	vst1.64	{q0-q1}, [$aptr,:256]!			@ wipe -	movcc	r9, r5 -	movcc	r10,r6 -	vst1.64	{q0-q1}, [$nptr,:256]!			@ wipe -	movcc	r11,r7 -	teq	$aptr,$bptr				@ preserves carry -	stmia	$rptr!, {r8-r11} -	bne	.LNEON_copy_n_zap - -	sub	sp,ip,#96 -        vldmia  sp!,{d8-d15} -        ldmia   sp!,{r4-r11} -	ret						@ bx lr -.size	bn_mul8x_mont_neon,.-bn_mul8x_mont_neon -#endif -___ -} -$code.=<<___; -.asciz	"Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" +.asciz	"Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"  .align	2 -#if __ARM_ARCH__>=7 -.comm	OPENSSL_armcap_P,4,4 -#endif  ___ -$code =~ s/\`([^\`]*)\`/eval $1/gem;  $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4 -$code =~ s/\bret\b/bx	lr/gm;  print $code;  close STDOUT; diff --git a/app/openssl/crypto/bn/asm/armv4-mont.s b/app/openssl/crypto/bn/asm/armv4-mont.s index fecae15e..64c220b5 100644 --- a/app/openssl/crypto/bn/asm/armv4-mont.s +++ b/app/openssl/crypto/bn/asm/armv4-mont.s @@ -1,37 +1,13 @@ -#include "arm_arch.h" -  .text -.code	32 - -#if __ARM_ARCH__>=7 -.align	5 -.LOPENSSL_armcap: -.word	OPENSSL_armcap_P-bn_mul_mont -#endif  .global	bn_mul_mont  .type	bn_mul_mont,%function -.align	5 +.align	2  bn_mul_mont: -	ldr	ip,[sp,#4]		@ load num  	stmdb	sp!,{r0,r2}		@ sp points at argument block -#if __ARM_ARCH__>=7 -	tst	ip,#7 -	bne	.Lialu -	adr	r0,bn_mul_mont -	ldr	r2,.LOPENSSL_armcap -	ldr	r0,[r0,r2] -	tst	r0,#1			@ NEON available? -	ldmia	sp, {r0,r2} -	beq	.Lialu -	add	sp,sp,#8 -	b	bn_mul8x_mont_neon -.align	4 -.Lialu: -#endif -	cmp	ip,#2 -	mov	r0,ip			@ load num +	ldr	r0,[sp,#3*4]		@ load num +	cmp	r0,#2  	movlt	r0,#0  	addlt	sp,sp,#2*4  	blt	.Labrt @@ -161,419 +137,9 @@ bn_mul_mont:  	ldmia	sp!,{r4-r12,lr}		@ restore registers  	add	sp,sp,#2*4		@ skip over {r0,r2}  	mov	r0,#1 -.Labrt: -#if __ARM_ARCH__>=5 -	bx	lr				@ .word	0xe12fff1e -#else -	tst	lr,#1 +.Labrt:	tst	lr,#1  	moveq	pc,lr			@ be binary compatible with V4, yet  	.word	0xe12fff1e			@ interoperable with Thumb ISA:-) -#endif  .size	bn_mul_mont,.-bn_mul_mont -#if __ARM_ARCH__>=7 -.fpu	neon - -.type	bn_mul8x_mont_neon,%function -.align	5 -bn_mul8x_mont_neon: -	mov	ip,sp -	stmdb	sp!,{r4-r11} -	vstmdb	sp!,{d8-d15}		@ ABI specification says so -	ldmia	ip,{r4-r5}		@ load rest of parameter block - -	sub		r7,sp,#16 -	vld1.32		{d28[0]}, [r2,:32]! -	sub		r7,r7,r5,lsl#4 -	vld1.32		{d0-d3},  [r1]!		@ can't specify :32 :-( -	and		r7,r7,#-64 -	vld1.32		{d30[0]}, [r4,:32] -	mov		sp,r7			@ alloca -	veor		d8,d8,d8 -	subs		r8,r5,#8 -	vzip.16		d28,d8 - -	vmull.u32	q6,d28,d0[0] -	vmull.u32	q7,d28,d0[1] -	vmull.u32	q8,d28,d1[0] -	vshl.i64	d10,d13,#16 -	vmull.u32	q9,d28,d1[1] - -	vadd.u64	d10,d10,d12 -	veor		d8,d8,d8 -	vmul.u32	d29,d10,d30 - -	vmull.u32	q10,d28,d2[0] -	 vld1.32	{d4-d7}, [r3]! -	vmull.u32	q11,d28,d2[1] -	vmull.u32	q12,d28,d3[0] -	vzip.16		d29,d8 -	vmull.u32	q13,d28,d3[1] - -	bne	.LNEON_1st - -	@ special case for num=8, everything is in register bank... - -	vmlal.u32	q6,d29,d4[0] -	sub		r9,r5,#1 -	vmlal.u32	q7,d29,d4[1] -	vmlal.u32	q8,d29,d5[0] -	vmlal.u32	q9,d29,d5[1] - -	vmlal.u32	q10,d29,d6[0] -	vmov		q5,q6 -	vmlal.u32	q11,d29,d6[1] -	vmov		q6,q7 -	vmlal.u32	q12,d29,d7[0] -	vmov		q7,q8 -	vmlal.u32	q13,d29,d7[1] -	vmov		q8,q9 -	vmov		q9,q10 -	vshr.u64	d10,d10,#16 -	vmov		q10,q11 -	vmov		q11,q12 -	vadd.u64	d10,d10,d11 -	vmov		q12,q13 -	veor		q13,q13 -	vshr.u64	d10,d10,#16 - -	b	.LNEON_outer8 - -.align	4 -.LNEON_outer8: -	vld1.32		{d28[0]}, [r2,:32]! -	veor		d8,d8,d8 -	vzip.16		d28,d8 -	vadd.u64	d12,d12,d10 - -	vmlal.u32	q6,d28,d0[0] -	vmlal.u32	q7,d28,d0[1] -	vmlal.u32	q8,d28,d1[0] -	vshl.i64	d10,d13,#16 -	vmlal.u32	q9,d28,d1[1] - -	vadd.u64	d10,d10,d12 -	veor		d8,d8,d8 -	subs		r9,r9,#1 -	vmul.u32	d29,d10,d30 - -	vmlal.u32	q10,d28,d2[0] -	vmlal.u32	q11,d28,d2[1] -	vmlal.u32	q12,d28,d3[0] -	vzip.16		d29,d8 -	vmlal.u32	q13,d28,d3[1] - -	vmlal.u32	q6,d29,d4[0] -	vmlal.u32	q7,d29,d4[1] -	vmlal.u32	q8,d29,d5[0] -	vmlal.u32	q9,d29,d5[1] - -	vmlal.u32	q10,d29,d6[0] -	vmov		q5,q6 -	vmlal.u32	q11,d29,d6[1] -	vmov		q6,q7 -	vmlal.u32	q12,d29,d7[0] -	vmov		q7,q8 -	vmlal.u32	q13,d29,d7[1] -	vmov		q8,q9 -	vmov		q9,q10 -	vshr.u64	d10,d10,#16 -	vmov		q10,q11 -	vmov		q11,q12 -	vadd.u64	d10,d10,d11 -	vmov		q12,q13 -	veor		q13,q13 -	vshr.u64	d10,d10,#16 - -	bne	.LNEON_outer8 - -	vadd.u64	d12,d12,d10 -	mov		r7,sp -	vshr.u64	d10,d12,#16 -	mov		r8,r5 -	vadd.u64	d13,d13,d10 -	add		r6,sp,#16 -	vshr.u64	d10,d13,#16 -	vzip.16		d12,d13 - -	b	.LNEON_tail2 - -.align	4 -.LNEON_1st: -	vmlal.u32	q6,d29,d4[0] -	 vld1.32	{d0-d3}, [r1]! -	vmlal.u32	q7,d29,d4[1] -	subs		r8,r8,#8 -	vmlal.u32	q8,d29,d5[0] -	vmlal.u32	q9,d29,d5[1] - -	vmlal.u32	q10,d29,d6[0] -	 vld1.32	{d4-d5}, [r3]! -	vmlal.u32	q11,d29,d6[1] -	 vst1.64	{q6-q7}, [r7,:256]! -	vmlal.u32	q12,d29,d7[0] -	vmlal.u32	q13,d29,d7[1] -	 vst1.64	{q8-q9}, [r7,:256]! - -	vmull.u32	q6,d28,d0[0] -	 vld1.32	{d6-d7}, [r3]! -	vmull.u32	q7,d28,d0[1] -	 vst1.64	{q10-q11}, [r7,:256]! -	vmull.u32	q8,d28,d1[0] -	vmull.u32	q9,d28,d1[1] -	 vst1.64	{q12-q13}, [r7,:256]! - -	vmull.u32	q10,d28,d2[0] -	vmull.u32	q11,d28,d2[1] -	vmull.u32	q12,d28,d3[0] -	vmull.u32	q13,d28,d3[1] - -	bne	.LNEON_1st - -	vmlal.u32	q6,d29,d4[0] -	add		r6,sp,#16 -	vmlal.u32	q7,d29,d4[1] -	sub		r1,r1,r5,lsl#2		@ rewind r1 -	vmlal.u32	q8,d29,d5[0] -	 vld1.64	{q5}, [sp,:128] -	vmlal.u32	q9,d29,d5[1] -	sub		r9,r5,#1 - -	vmlal.u32	q10,d29,d6[0] -	vst1.64		{q6-q7}, [r7,:256]! -	vmlal.u32	q11,d29,d6[1] -	vshr.u64	d10,d10,#16 -	 vld1.64	{q6},       [r6, :128]! -	vmlal.u32	q12,d29,d7[0] -	vst1.64		{q8-q9}, [r7,:256]! -	vmlal.u32	q13,d29,d7[1] - -	vst1.64		{q10-q11}, [r7,:256]! -	vadd.u64	d10,d10,d11 -	veor		q4,q4,q4 -	vst1.64		{q12-q13}, [r7,:256]! -	 vld1.64	{q7-q8}, [r6, :256]! -	vst1.64		{q4},          [r7,:128] -	vshr.u64	d10,d10,#16 - -	b		.LNEON_outer - -.align	4 -.LNEON_outer: -	vld1.32		{d28[0]}, [r2,:32]! -	sub		r3,r3,r5,lsl#2		@ rewind r3 -	vld1.32		{d0-d3},  [r1]! -	veor		d8,d8,d8 -	mov		r7,sp -	vzip.16		d28,d8 -	sub		r8,r5,#8 -	vadd.u64	d12,d12,d10 - -	vmlal.u32	q6,d28,d0[0] -	 vld1.64	{q9-q10},[r6,:256]! -	vmlal.u32	q7,d28,d0[1] -	vmlal.u32	q8,d28,d1[0] -	 vld1.64	{q11-q12},[r6,:256]! -	vmlal.u32	q9,d28,d1[1] - -	vshl.i64	d10,d13,#16 -	veor		d8,d8,d8 -	vadd.u64	d10,d10,d12 -	 vld1.64	{q13},[r6,:128]! -	vmul.u32	d29,d10,d30 - -	vmlal.u32	q10,d28,d2[0] -	 vld1.32	{d4-d7}, [r3]! -	vmlal.u32	q11,d28,d2[1] -	vmlal.u32	q12,d28,d3[0] -	vzip.16		d29,d8 -	vmlal.u32	q13,d28,d3[1] - -.LNEON_inner: -	vmlal.u32	q6,d29,d4[0] -	 vld1.32	{d0-d3}, [r1]! -	vmlal.u32	q7,d29,d4[1] -	 subs		r8,r8,#8 -	vmlal.u32	q8,d29,d5[0] -	vmlal.u32	q9,d29,d5[1] -	vst1.64		{q6-q7}, [r7,:256]! - -	vmlal.u32	q10,d29,d6[0] -	 vld1.64	{q6},       [r6, :128]! -	vmlal.u32	q11,d29,d6[1] -	vst1.64		{q8-q9}, [r7,:256]! -	vmlal.u32	q12,d29,d7[0] -	 vld1.64	{q7-q8}, [r6, :256]! -	vmlal.u32	q13,d29,d7[1] -	vst1.64		{q10-q11}, [r7,:256]! - -	vmlal.u32	q6,d28,d0[0] -	 vld1.64	{q9-q10}, [r6, :256]! -	vmlal.u32	q7,d28,d0[1] -	vst1.64		{q12-q13}, [r7,:256]! -	vmlal.u32	q8,d28,d1[0] -	 vld1.64	{q11-q12}, [r6, :256]! -	vmlal.u32	q9,d28,d1[1] -	 vld1.32	{d4-d7}, [r3]! - -	vmlal.u32	q10,d28,d2[0] -	 vld1.64	{q13},       [r6, :128]! -	vmlal.u32	q11,d28,d2[1] -	vmlal.u32	q12,d28,d3[0] -	vmlal.u32	q13,d28,d3[1] - -	bne	.LNEON_inner - -	vmlal.u32	q6,d29,d4[0] -	add		r6,sp,#16 -	vmlal.u32	q7,d29,d4[1] -	sub		r1,r1,r5,lsl#2		@ rewind r1 -	vmlal.u32	q8,d29,d5[0] -	 vld1.64	{q5}, [sp,:128] -	vmlal.u32	q9,d29,d5[1] -	subs		r9,r9,#1 - -	vmlal.u32	q10,d29,d6[0] -	vst1.64		{q6-q7}, [r7,:256]! -	vmlal.u32	q11,d29,d6[1] -	 vld1.64	{q6},       [r6, :128]! -	vshr.u64	d10,d10,#16 -	vst1.64		{q8-q9}, [r7,:256]! -	vmlal.u32	q12,d29,d7[0] -	 vld1.64	{q7-q8}, [r6, :256]! -	vmlal.u32	q13,d29,d7[1] - -	vst1.64		{q10-q11}, [r7,:256]! -	vadd.u64	d10,d10,d11 -	vst1.64		{q12-q13}, [r7,:256]! -	vshr.u64	d10,d10,#16 - -	bne	.LNEON_outer - -	mov		r7,sp -	mov		r8,r5 - -.LNEON_tail: -	vadd.u64	d12,d12,d10 -	vld1.64		{q9-q10}, [r6, :256]! -	vshr.u64	d10,d12,#16 -	vadd.u64	d13,d13,d10 -	vld1.64		{q11-q12}, [r6, :256]! -	vshr.u64	d10,d13,#16 -	vld1.64		{q13},       [r6, :128]! -	vzip.16		d12,d13 - -.LNEON_tail2: -	vadd.u64	d14,d14,d10 -	vst1.32		{d12[0]}, [r7, :32]! -	vshr.u64	d10,d14,#16 -	vadd.u64	d15,d15,d10 -	vshr.u64	d10,d15,#16 -	vzip.16		d14,d15 - -	vadd.u64	d16,d16,d10 -	vst1.32		{d14[0]}, [r7, :32]! -	vshr.u64	d10,d16,#16 -	vadd.u64	d17,d17,d10 -	vshr.u64	d10,d17,#16 -	vzip.16		d16,d17 - -	vadd.u64	d18,d18,d10 -	vst1.32		{d16[0]}, [r7, :32]! -	vshr.u64	d10,d18,#16 -	vadd.u64	d19,d19,d10 -	vshr.u64	d10,d19,#16 -	vzip.16		d18,d19 - -	vadd.u64	d20,d20,d10 -	vst1.32		{d18[0]}, [r7, :32]! -	vshr.u64	d10,d20,#16 -	vadd.u64	d21,d21,d10 -	vshr.u64	d10,d21,#16 -	vzip.16		d20,d21 - -	vadd.u64	d22,d22,d10 -	vst1.32		{d20[0]}, [r7, :32]! -	vshr.u64	d10,d22,#16 -	vadd.u64	d23,d23,d10 -	vshr.u64	d10,d23,#16 -	vzip.16		d22,d23 - -	vadd.u64	d24,d24,d10 -	vst1.32		{d22[0]}, [r7, :32]! -	vshr.u64	d10,d24,#16 -	vadd.u64	d25,d25,d10 -	vld1.64		{q6}, [r6, :128]! -	vshr.u64	d10,d25,#16 -	vzip.16		d24,d25 - -	vadd.u64	d26,d26,d10 -	vst1.32		{d24[0]}, [r7, :32]! -	vshr.u64	d10,d26,#16 -	vadd.u64	d27,d27,d10 -	vld1.64		{q7-q8},	[r6, :256]! -	vshr.u64	d10,d27,#16 -	vzip.16		d26,d27 -	subs		r8,r8,#8 -	vst1.32		{d26[0]}, [r7, :32]! - -	bne	.LNEON_tail - -	vst1.32	{d10[0]}, [r7, :32]		@ top-most bit -	sub	r3,r3,r5,lsl#2			@ rewind r3 -	subs	r1,sp,#0				@ clear carry flag -	add	r2,sp,r5,lsl#2 - -.LNEON_sub: -	ldmia	r1!, {r4-r7} -	ldmia	r3!, {r8-r11} -	sbcs	r8, r4,r8 -	sbcs	r9, r5,r9 -	sbcs	r10,r6,r10 -	sbcs	r11,r7,r11 -	teq	r1,r2				@ preserves carry -	stmia	r0!, {r8-r11} -	bne	.LNEON_sub - -	ldr	r10, [r1]				@ load top-most bit -	veor	q0,q0,q0 -	sub	r11,r2,sp				@ this is num*4 -	veor	q1,q1,q1 -	mov	r1,sp -	sub	r0,r0,r11				@ rewind r0 -	mov	r3,r2				@ second 3/4th of frame -	sbcs	r10,r10,#0				@ result is carry flag - -.LNEON_copy_n_zap: -	ldmia	r1!, {r4-r7} -	ldmia	r0,  {r8-r11} -	movcc	r8, r4 -	vst1.64	{q0-q1}, [r3,:256]!			@ wipe -	movcc	r9, r5 -	movcc	r10,r6 -	vst1.64	{q0-q1}, [r3,:256]!			@ wipe -	movcc	r11,r7 -	ldmia	r1, {r4-r7} -	stmia	r0!, {r8-r11} -	sub	r1,r1,#16 -	ldmia	r0, {r8-r11} -	movcc	r8, r4 -	vst1.64	{q0-q1}, [r1,:256]!			@ wipe -	movcc	r9, r5 -	movcc	r10,r6 -	vst1.64	{q0-q1}, [r3,:256]!			@ wipe -	movcc	r11,r7 -	teq	r1,r2				@ preserves carry -	stmia	r0!, {r8-r11} -	bne	.LNEON_copy_n_zap - -	sub	sp,ip,#96 -        vldmia  sp!,{d8-d15} -        ldmia   sp!,{r4-r11} -	bx	lr						@ .word	0xe12fff1e -.size	bn_mul8x_mont_neon,.-bn_mul8x_mont_neon -#endif -.asciz	"Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>" +.asciz	"Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro@openssl.org>"  .align	2 -#if __ARM_ARCH__>=7 -.comm	OPENSSL_armcap_P,4,4 -#endif diff --git a/app/openssl/crypto/bn/asm/mips3.S b/app/openssl/crypto/bn/asm/mips3.S deleted file mode 100644 index dca4105c..00000000 --- a/app/openssl/crypto/bn/asm/mips3.S +++ /dev/null @@ -1,2201 +0,0 @@ -.rdata -.asciiz	"mips3.s, Version 1.1" -.asciiz	"MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" - -/* - * ==================================================================== - * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL - * project. - * - * Rights for redistribution and usage in source and binary forms are - * granted according to the OpenSSL license. Warranty of any kind is - * disclaimed. - * ==================================================================== - */ - -/* - * This is my modest contributon to the OpenSSL project (see - * http://www.openssl.org/ for more information about it) and is - * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c - * module. For updates see http://fy.chalmers.se/~appro/hpe/. - * - * The module is designed to work with either of the "new" MIPS ABI(5), - * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under - * IRIX 5.x not only because it doesn't support new ABIs but also - * because 5.x kernels put R4x00 CPU into 32-bit mode and all those - * 64-bit instructions (daddu, dmultu, etc.) found below gonna only - * cause illegal instruction exception:-( - * - * In addition the code depends on preprocessor flags set up by MIPSpro - * compiler driver (either as or cc) and therefore (probably?) can't be - * compiled by the GNU assembler. GNU C driver manages fine though... - * I mean as long as -mmips-as is specified or is the default option, - * because then it simply invokes /usr/bin/as which in turn takes - * perfect care of the preprocessor definitions. Another neat feature - * offered by the MIPSpro assembler is an optimization pass. This gave - * me the opportunity to have the code looking more regular as all those - * architecture dependent instruction rescheduling details were left to - * the assembler. Cool, huh? - * - * Performance improvement is astonishing! 'apps/openssl speed rsa dsa' - * goes way over 3 times faster! - * - *					<appro@fy.chalmers.se> - */ -#include <asm.h> -#include <regdef.h> - -#if _MIPS_ISA>=4 -#define	MOVNZ(cond,dst,src)	\ -	movn	dst,src,cond -#else -#define	MOVNZ(cond,dst,src)	\ -	.set	noreorder;	\ -	bnezl	cond,.+8;	\ -	move	dst,src;	\ -	.set	reorder -#endif - -.text - -.set	noat -.set	reorder - -#define	MINUS4	v1 - -.align	5 -LEAF(bn_mul_add_words) -	.set	noreorder -	bgtzl	a2,.L_bn_mul_add_words_proceed -	ld	t0,0(a1) -	jr	ra -	move	v0,zero -	.set	reorder - -.L_bn_mul_add_words_proceed: -	li	MINUS4,-4 -	and	ta0,a2,MINUS4 -	move	v0,zero -	beqz	ta0,.L_bn_mul_add_words_tail - -.L_bn_mul_add_words_loop: -	dmultu	t0,a3 -	ld	t1,0(a0) -	ld	t2,8(a1) -	ld	t3,8(a0) -	ld	ta0,16(a1) -	ld	ta1,16(a0) -	daddu	t1,v0 -	sltu	v0,t1,v0	/* All manuals say it "compares 32-bit -				 * values", but it seems to work fine -				 * even on 64-bit registers. */ -	mflo	AT -	mfhi	t0 -	daddu	t1,AT -	daddu	v0,t0 -	sltu	AT,t1,AT -	sd	t1,0(a0) -	daddu	v0,AT - -	dmultu	t2,a3 -	ld	ta2,24(a1) -	ld	ta3,24(a0) -	daddu	t3,v0 -	sltu	v0,t3,v0 -	mflo	AT -	mfhi	t2 -	daddu	t3,AT -	daddu	v0,t2 -	sltu	AT,t3,AT -	sd	t3,8(a0) -	daddu	v0,AT - -	dmultu	ta0,a3 -	subu	a2,4 -	PTR_ADD	a0,32 -	PTR_ADD	a1,32 -	daddu	ta1,v0 -	sltu	v0,ta1,v0 -	mflo	AT -	mfhi	ta0 -	daddu	ta1,AT -	daddu	v0,ta0 -	sltu	AT,ta1,AT -	sd	ta1,-16(a0) -	daddu	v0,AT - - -	dmultu	ta2,a3 -	and	ta0,a2,MINUS4 -	daddu	ta3,v0 -	sltu	v0,ta3,v0 -	mflo	AT -	mfhi	ta2 -	daddu	ta3,AT -	daddu	v0,ta2 -	sltu	AT,ta3,AT -	sd	ta3,-8(a0) -	daddu	v0,AT -	.set	noreorder -	bgtzl	ta0,.L_bn_mul_add_words_loop -	ld	t0,0(a1) - -	bnezl	a2,.L_bn_mul_add_words_tail -	ld	t0,0(a1) -	.set	reorder - -.L_bn_mul_add_words_return: -	jr	ra - -.L_bn_mul_add_words_tail: -	dmultu	t0,a3 -	ld	t1,0(a0) -	subu	a2,1 -	daddu	t1,v0 -	sltu	v0,t1,v0 -	mflo	AT -	mfhi	t0 -	daddu	t1,AT -	daddu	v0,t0 -	sltu	AT,t1,AT -	sd	t1,0(a0) -	daddu	v0,AT -	beqz	a2,.L_bn_mul_add_words_return - -	ld	t0,8(a1) -	dmultu	t0,a3 -	ld	t1,8(a0) -	subu	a2,1 -	daddu	t1,v0 -	sltu	v0,t1,v0 -	mflo	AT -	mfhi	t0 -	daddu	t1,AT -	daddu	v0,t0 -	sltu	AT,t1,AT -	sd	t1,8(a0) -	daddu	v0,AT -	beqz	a2,.L_bn_mul_add_words_return - -	ld	t0,16(a1) -	dmultu	t0,a3 -	ld	t1,16(a0) -	daddu	t1,v0 -	sltu	v0,t1,v0 -	mflo	AT -	mfhi	t0 -	daddu	t1,AT -	daddu	v0,t0 -	sltu	AT,t1,AT -	sd	t1,16(a0) -	daddu	v0,AT -	jr	ra -END(bn_mul_add_words) - -.align	5 -LEAF(bn_mul_words) -	.set	noreorder -	bgtzl	a2,.L_bn_mul_words_proceed -	ld	t0,0(a1) -	jr	ra -	move	v0,zero -	.set	reorder - -.L_bn_mul_words_proceed: -	li	MINUS4,-4 -	and	ta0,a2,MINUS4 -	move	v0,zero -	beqz	ta0,.L_bn_mul_words_tail - -.L_bn_mul_words_loop: -	dmultu	t0,a3 -	ld	t2,8(a1) -	ld	ta0,16(a1) -	ld	ta2,24(a1) -	mflo	AT -	mfhi	t0 -	daddu	v0,AT -	sltu	t1,v0,AT -	sd	v0,0(a0) -	daddu	v0,t1,t0 - -	dmultu	t2,a3 -	subu	a2,4 -	PTR_ADD	a0,32 -	PTR_ADD	a1,32 -	mflo	AT -	mfhi	t2 -	daddu	v0,AT -	sltu	t3,v0,AT -	sd	v0,-24(a0) -	daddu	v0,t3,t2 - -	dmultu	ta0,a3 -	mflo	AT -	mfhi	ta0 -	daddu	v0,AT -	sltu	ta1,v0,AT -	sd	v0,-16(a0) -	daddu	v0,ta1,ta0 - - -	dmultu	ta2,a3 -	and	ta0,a2,MINUS4 -	mflo	AT -	mfhi	ta2 -	daddu	v0,AT -	sltu	ta3,v0,AT -	sd	v0,-8(a0) -	daddu	v0,ta3,ta2 -	.set	noreorder -	bgtzl	ta0,.L_bn_mul_words_loop -	ld	t0,0(a1) - -	bnezl	a2,.L_bn_mul_words_tail -	ld	t0,0(a1) -	.set	reorder - -.L_bn_mul_words_return: -	jr	ra - -.L_bn_mul_words_tail: -	dmultu	t0,a3 -	subu	a2,1 -	mflo	AT -	mfhi	t0 -	daddu	v0,AT -	sltu	t1,v0,AT -	sd	v0,0(a0) -	daddu	v0,t1,t0 -	beqz	a2,.L_bn_mul_words_return - -	ld	t0,8(a1) -	dmultu	t0,a3 -	subu	a2,1 -	mflo	AT -	mfhi	t0 -	daddu	v0,AT -	sltu	t1,v0,AT -	sd	v0,8(a0) -	daddu	v0,t1,t0 -	beqz	a2,.L_bn_mul_words_return - -	ld	t0,16(a1) -	dmultu	t0,a3 -	mflo	AT -	mfhi	t0 -	daddu	v0,AT -	sltu	t1,v0,AT -	sd	v0,16(a0) -	daddu	v0,t1,t0 -	jr	ra -END(bn_mul_words) - -.align	5 -LEAF(bn_sqr_words) -	.set	noreorder -	bgtzl	a2,.L_bn_sqr_words_proceed -	ld	t0,0(a1) -	jr	ra -	move	v0,zero -	.set	reorder - -.L_bn_sqr_words_proceed: -	li	MINUS4,-4 -	and	ta0,a2,MINUS4 -	move	v0,zero -	beqz	ta0,.L_bn_sqr_words_tail - -.L_bn_sqr_words_loop: -	dmultu	t0,t0 -	ld	t2,8(a1) -	ld	ta0,16(a1) -	ld	ta2,24(a1) -	mflo	t1 -	mfhi	t0 -	sd	t1,0(a0) -	sd	t0,8(a0) - -	dmultu	t2,t2 -	subu	a2,4 -	PTR_ADD	a0,64 -	PTR_ADD	a1,32 -	mflo	t3 -	mfhi	t2 -	sd	t3,-48(a0) -	sd	t2,-40(a0) - -	dmultu	ta0,ta0 -	mflo	ta1 -	mfhi	ta0 -	sd	ta1,-32(a0) -	sd	ta0,-24(a0) - - -	dmultu	ta2,ta2 -	and	ta0,a2,MINUS4 -	mflo	ta3 -	mfhi	ta2 -	sd	ta3,-16(a0) -	sd	ta2,-8(a0) - -	.set	noreorder -	bgtzl	ta0,.L_bn_sqr_words_loop -	ld	t0,0(a1) - -	bnezl	a2,.L_bn_sqr_words_tail -	ld	t0,0(a1) -	.set	reorder - -.L_bn_sqr_words_return: -	move	v0,zero -	jr	ra - -.L_bn_sqr_words_tail: -	dmultu	t0,t0 -	subu	a2,1 -	mflo	t1 -	mfhi	t0 -	sd	t1,0(a0) -	sd	t0,8(a0) -	beqz	a2,.L_bn_sqr_words_return - -	ld	t0,8(a1) -	dmultu	t0,t0 -	subu	a2,1 -	mflo	t1 -	mfhi	t0 -	sd	t1,16(a0) -	sd	t0,24(a0) -	beqz	a2,.L_bn_sqr_words_return - -	ld	t0,16(a1) -	dmultu	t0,t0 -	mflo	t1 -	mfhi	t0 -	sd	t1,32(a0) -	sd	t0,40(a0) -	jr	ra -END(bn_sqr_words) - -.align	5 -LEAF(bn_add_words) -	.set	noreorder -	bgtzl	a3,.L_bn_add_words_proceed -	ld	t0,0(a1) -	jr	ra -	move	v0,zero -	.set	reorder - -.L_bn_add_words_proceed: -	li	MINUS4,-4 -	and	AT,a3,MINUS4 -	move	v0,zero -	beqz	AT,.L_bn_add_words_tail - -.L_bn_add_words_loop: -	ld	ta0,0(a2) -	subu	a3,4 -	ld	t1,8(a1) -	and	AT,a3,MINUS4 -	ld	t2,16(a1) -	PTR_ADD	a2,32 -	ld	t3,24(a1) -	PTR_ADD	a0,32 -	ld	ta1,-24(a2) -	PTR_ADD	a1,32 -	ld	ta2,-16(a2) -	ld	ta3,-8(a2) -	daddu	ta0,t0 -	sltu	t8,ta0,t0 -	daddu	t0,ta0,v0 -	sltu	v0,t0,ta0 -	sd	t0,-32(a0) -	daddu	v0,t8 - -	daddu	ta1,t1 -	sltu	t9,ta1,t1 -	daddu	t1,ta1,v0 -	sltu	v0,t1,ta1 -	sd	t1,-24(a0) -	daddu	v0,t9 - -	daddu	ta2,t2 -	sltu	t8,ta2,t2 -	daddu	t2,ta2,v0 -	sltu	v0,t2,ta2 -	sd	t2,-16(a0) -	daddu	v0,t8 -	 -	daddu	ta3,t3 -	sltu	t9,ta3,t3 -	daddu	t3,ta3,v0 -	sltu	v0,t3,ta3 -	sd	t3,-8(a0) -	daddu	v0,t9 -	 -	.set	noreorder -	bgtzl	AT,.L_bn_add_words_loop -	ld	t0,0(a1) - -	bnezl	a3,.L_bn_add_words_tail -	ld	t0,0(a1) -	.set	reorder - -.L_bn_add_words_return: -	jr	ra - -.L_bn_add_words_tail: -	ld	ta0,0(a2) -	daddu	ta0,t0 -	subu	a3,1 -	sltu	t8,ta0,t0 -	daddu	t0,ta0,v0 -	sltu	v0,t0,ta0 -	sd	t0,0(a0) -	daddu	v0,t8 -	beqz	a3,.L_bn_add_words_return - -	ld	t1,8(a1) -	ld	ta1,8(a2) -	daddu	ta1,t1 -	subu	a3,1 -	sltu	t9,ta1,t1 -	daddu	t1,ta1,v0 -	sltu	v0,t1,ta1 -	sd	t1,8(a0) -	daddu	v0,t9 -	beqz	a3,.L_bn_add_words_return - -	ld	t2,16(a1) -	ld	ta2,16(a2) -	daddu	ta2,t2 -	sltu	t8,ta2,t2 -	daddu	t2,ta2,v0 -	sltu	v0,t2,ta2 -	sd	t2,16(a0) -	daddu	v0,t8 -	jr	ra -END(bn_add_words) - -.align	5 -LEAF(bn_sub_words) -	.set	noreorder -	bgtzl	a3,.L_bn_sub_words_proceed -	ld	t0,0(a1) -	jr	ra -	move	v0,zero -	.set	reorder - -.L_bn_sub_words_proceed: -	li	MINUS4,-4 -	and	AT,a3,MINUS4 -	move	v0,zero -	beqz	AT,.L_bn_sub_words_tail - -.L_bn_sub_words_loop: -	ld	ta0,0(a2) -	subu	a3,4 -	ld	t1,8(a1) -	and	AT,a3,MINUS4 -	ld	t2,16(a1) -	PTR_ADD	a2,32 -	ld	t3,24(a1) -	PTR_ADD	a0,32 -	ld	ta1,-24(a2) -	PTR_ADD	a1,32 -	ld	ta2,-16(a2) -	ld	ta3,-8(a2) -	sltu	t8,t0,ta0 -	dsubu	t0,ta0 -	dsubu	ta0,t0,v0 -	sd	ta0,-32(a0) -	MOVNZ	(t0,v0,t8) - -	sltu	t9,t1,ta1 -	dsubu	t1,ta1 -	dsubu	ta1,t1,v0 -	sd	ta1,-24(a0) -	MOVNZ	(t1,v0,t9) - - -	sltu	t8,t2,ta2 -	dsubu	t2,ta2 -	dsubu	ta2,t2,v0 -	sd	ta2,-16(a0) -	MOVNZ	(t2,v0,t8) - -	sltu	t9,t3,ta3 -	dsubu	t3,ta3 -	dsubu	ta3,t3,v0 -	sd	ta3,-8(a0) -	MOVNZ	(t3,v0,t9) - -	.set	noreorder -	bgtzl	AT,.L_bn_sub_words_loop -	ld	t0,0(a1) - -	bnezl	a3,.L_bn_sub_words_tail -	ld	t0,0(a1) -	.set	reorder - -.L_bn_sub_words_return: -	jr	ra - -.L_bn_sub_words_tail: -	ld	ta0,0(a2) -	subu	a3,1 -	sltu	t8,t0,ta0 -	dsubu	t0,ta0 -	dsubu	ta0,t0,v0 -	MOVNZ	(t0,v0,t8) -	sd	ta0,0(a0) -	beqz	a3,.L_bn_sub_words_return - -	ld	t1,8(a1) -	subu	a3,1 -	ld	ta1,8(a2) -	sltu	t9,t1,ta1 -	dsubu	t1,ta1 -	dsubu	ta1,t1,v0 -	MOVNZ	(t1,v0,t9) -	sd	ta1,8(a0) -	beqz	a3,.L_bn_sub_words_return - -	ld	t2,16(a1) -	ld	ta2,16(a2) -	sltu	t8,t2,ta2 -	dsubu	t2,ta2 -	dsubu	ta2,t2,v0 -	MOVNZ	(t2,v0,t8) -	sd	ta2,16(a0) -	jr	ra -END(bn_sub_words) - -#undef	MINUS4 - -.align 5 -LEAF(bn_div_3_words) -	.set	reorder -	move	a3,a0		/* we know that bn_div_words doesn't -				 * touch a3, ta2, ta3 and preserves a2 -				 * so that we can save two arguments -				 * and return address in registers -				 * instead of stack:-) -				 */ -	ld	a0,(a3) -	move	ta2,a1 -	ld	a1,-8(a3) -	bne	a0,a2,.L_bn_div_3_words_proceed -	li	v0,-1 -	jr	ra -.L_bn_div_3_words_proceed: -	move	ta3,ra -	bal	bn_div_words -	move	ra,ta3 -	dmultu	ta2,v0 -	ld	t2,-16(a3) -	move	ta0,zero -	mfhi	t1 -	mflo	t0 -	sltu	t8,t1,v1 -.L_bn_div_3_words_inner_loop: -	bnez	t8,.L_bn_div_3_words_inner_loop_done -	sgeu	AT,t2,t0 -	seq	t9,t1,v1 -	and	AT,t9 -	sltu	t3,t0,ta2 -	daddu	v1,a2 -	dsubu	t1,t3 -	dsubu	t0,ta2 -	sltu	t8,t1,v1 -	sltu	ta0,v1,a2 -	or	t8,ta0 -	.set	noreorder -	beqzl	AT,.L_bn_div_3_words_inner_loop -	dsubu	v0,1 -	.set	reorder -.L_bn_div_3_words_inner_loop_done: -	jr	ra -END(bn_div_3_words) - -.align	5 -LEAF(bn_div_words) -	.set	noreorder -	bnezl	a2,.L_bn_div_words_proceed -	move	v1,zero -	jr	ra -	li	v0,-1		/* I'd rather signal div-by-zero -				 * which can be done with 'break 7' */ - -.L_bn_div_words_proceed: -	bltz	a2,.L_bn_div_words_body -	move	t9,v1 -	dsll	a2,1 -	bgtz	a2,.-4 -	addu	t9,1 - -	.set	reorder -	negu	t1,t9 -	li	t2,-1 -	dsll	t2,t1 -	and	t2,a0 -	dsrl	AT,a1,t1 -	.set	noreorder -	bnezl	t2,.+8 -	break	6		/* signal overflow */ -	.set	reorder -	dsll	a0,t9 -	dsll	a1,t9 -	or	a0,AT - -#define	QT	ta0 -#define	HH	ta1 -#define	DH	v1 -.L_bn_div_words_body: -	dsrl	DH,a2,32 -	sgeu	AT,a0,a2 -	.set	noreorder -	bnezl	AT,.+8 -	dsubu	a0,a2 -	.set	reorder - -	li	QT,-1 -	dsrl	HH,a0,32 -	dsrl	QT,32	/* q=0xffffffff */ -	beq	DH,HH,.L_bn_div_words_skip_div1 -	ddivu	zero,a0,DH -	mflo	QT -.L_bn_div_words_skip_div1: -	dmultu	a2,QT -	dsll	t3,a0,32 -	dsrl	AT,a1,32 -	or	t3,AT -	mflo	t0 -	mfhi	t1 -.L_bn_div_words_inner_loop1: -	sltu	t2,t3,t0 -	seq	t8,HH,t1 -	sltu	AT,HH,t1 -	and	t2,t8 -	sltu	v0,t0,a2 -	or	AT,t2 -	.set	noreorder -	beqz	AT,.L_bn_div_words_inner_loop1_done -	dsubu	t1,v0 -	dsubu	t0,a2 -	b	.L_bn_div_words_inner_loop1 -	dsubu	QT,1 -	.set	reorder -.L_bn_div_words_inner_loop1_done: - -	dsll	a1,32 -	dsubu	a0,t3,t0 -	dsll	v0,QT,32 - -	li	QT,-1 -	dsrl	HH,a0,32 -	dsrl	QT,32	/* q=0xffffffff */ -	beq	DH,HH,.L_bn_div_words_skip_div2 -	ddivu	zero,a0,DH -	mflo	QT -.L_bn_div_words_skip_div2: -#undef	DH -	dmultu	a2,QT -	dsll	t3,a0,32 -	dsrl	AT,a1,32 -	or	t3,AT -	mflo	t0 -	mfhi	t1 -.L_bn_div_words_inner_loop2: -	sltu	t2,t3,t0 -	seq	t8,HH,t1 -	sltu	AT,HH,t1 -	and	t2,t8 -	sltu	v1,t0,a2 -	or	AT,t2 -	.set	noreorder -	beqz	AT,.L_bn_div_words_inner_loop2_done -	dsubu	t1,v1 -	dsubu	t0,a2 -	b	.L_bn_div_words_inner_loop2 -	dsubu	QT,1 -	.set	reorder -.L_bn_div_words_inner_loop2_done:	 -#undef	HH - -	dsubu	a0,t3,t0 -	or	v0,QT -	dsrl	v1,a0,t9	/* v1 contains remainder if anybody wants it */ -	dsrl	a2,t9		/* restore a2 */ -	jr	ra -#undef	QT -END(bn_div_words) - -#define	a_0	t0 -#define	a_1	t1 -#define	a_2	t2 -#define	a_3	t3 -#define	b_0	ta0 -#define	b_1	ta1 -#define	b_2	ta2 -#define	b_3	ta3 - -#define	a_4	s0 -#define	a_5	s2 -#define	a_6	s4 -#define	a_7	a1	/* once we load a[7] we don't need a anymore */ -#define	b_4	s1 -#define	b_5	s3 -#define	b_6	s5 -#define	b_7	a2	/* once we load b[7] we don't need b anymore */ - -#define	t_1	t8 -#define	t_2	t9 - -#define	c_1	v0 -#define	c_2	v1 -#define	c_3	a3 - -#define	FRAME_SIZE	48 - -.align	5 -LEAF(bn_mul_comba8) -	.set	noreorder -	PTR_SUB	sp,FRAME_SIZE -	.frame	sp,64,ra -	.set	reorder -	ld	a_0,0(a1)	/* If compiled with -mips3 option on -				 * R5000 box assembler barks on this -				 * line with "shouldn't have mult/div -				 * as last instruction in bb (R10K -				 * bug)" warning. If anybody out there -				 * has a clue about how to circumvent -				 * this do send me a note. -				 *		<appro@fy.chalmers.se> -				 */ -	ld	b_0,0(a2) -	ld	a_1,8(a1) -	ld	a_2,16(a1) -	ld	a_3,24(a1) -	ld	b_1,8(a2) -	ld	b_2,16(a2) -	ld	b_3,24(a2) -	dmultu	a_0,b_0		/* mul_add_c(a[0],b[0],c1,c2,c3); */ -	sd	s0,0(sp) -	sd	s1,8(sp) -	sd	s2,16(sp) -	sd	s3,24(sp) -	sd	s4,32(sp) -	sd	s5,40(sp) -	mflo	c_1 -	mfhi	c_2 - -	dmultu	a_0,b_1		/* mul_add_c(a[0],b[1],c2,c3,c1); */ -	ld	a_4,32(a1) -	ld	a_5,40(a1) -	ld	a_6,48(a1) -	ld	a_7,56(a1) -	ld	b_4,32(a2) -	ld	b_5,40(a2) -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	c_3,t_2,AT -	dmultu	a_1,b_0		/* mul_add_c(a[1],b[0],c2,c3,c1); */ -	ld	b_6,48(a2) -	ld	b_7,56(a2) -	sd	c_1,0(a0)	/* r[0]=c1; */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	c_1,c_3,t_2 -	sd	c_2,8(a0)	/* r[1]=c2; */ - -	dmultu	a_2,b_0		/* mul_add_c(a[2],b[0],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	dmultu	a_1,b_1		/* mul_add_c(a[1],b[1],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	c_2,c_1,t_2 -	dmultu	a_0,b_2		/* mul_add_c(a[0],b[2],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	sd	c_3,16(a0)	/* r[2]=c3; */ - -	dmultu	a_0,b_3		/* mul_add_c(a[0],b[3],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	c_3,c_2,t_2 -	dmultu	a_1,b_2		/* mul_add_c(a[1],b[2],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_2,b_1		/* mul_add_c(a[2],b[1],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_3,b_0		/* mul_add_c(a[3],b[0],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	sd	c_1,24(a0)	/* r[3]=c1; */ - -	dmultu	a_4,b_0		/* mul_add_c(a[4],b[0],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	c_1,c_3,t_2 -	dmultu	a_3,b_1		/* mul_add_c(a[3],b[1],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_2,b_2		/* mul_add_c(a[2],b[2],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_1,b_3		/* mul_add_c(a[1],b[3],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_0,b_4		/* mul_add_c(a[0],b[4],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	sd	c_2,32(a0)	/* r[4]=c2; */ - -	dmultu	a_0,b_5		/* mul_add_c(a[0],b[5],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	c_2,c_1,t_2 -	dmultu	a_1,b_4		/* mul_add_c(a[1],b[4],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_2,b_3		/* mul_add_c(a[2],b[3],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_3,b_2		/* mul_add_c(a[3],b[2],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_4,b_1		/* mul_add_c(a[4],b[1],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_5,b_0		/* mul_add_c(a[5],b[0],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	sd	c_3,40(a0)	/* r[5]=c3; */ - -	dmultu	a_6,b_0		/* mul_add_c(a[6],b[0],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	c_3,c_2,t_2 -	dmultu	a_5,b_1		/* mul_add_c(a[5],b[1],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_4,b_2		/* mul_add_c(a[4],b[2],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_3,b_3		/* mul_add_c(a[3],b[3],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_2,b_4		/* mul_add_c(a[2],b[4],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_1,b_5		/* mul_add_c(a[1],b[5],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_0,b_6		/* mul_add_c(a[0],b[6],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	sd	c_1,48(a0)	/* r[6]=c1; */ - -	dmultu	a_0,b_7		/* mul_add_c(a[0],b[7],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	c_1,c_3,t_2 -	dmultu	a_1,b_6		/* mul_add_c(a[1],b[6],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_2,b_5		/* mul_add_c(a[2],b[5],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_3,b_4		/* mul_add_c(a[3],b[4],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_4,b_3		/* mul_add_c(a[4],b[3],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_5,b_2		/* mul_add_c(a[5],b[2],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_6,b_1		/* mul_add_c(a[6],b[1],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_7,b_0		/* mul_add_c(a[7],b[0],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	sd	c_2,56(a0)	/* r[7]=c2; */ - -	dmultu	a_7,b_1		/* mul_add_c(a[7],b[1],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	c_2,c_1,t_2 -	dmultu	a_6,b_2		/* mul_add_c(a[6],b[2],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_5,b_3		/* mul_add_c(a[5],b[3],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_4,b_4		/* mul_add_c(a[4],b[4],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_3,b_5		/* mul_add_c(a[3],b[5],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_2,b_6		/* mul_add_c(a[2],b[6],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_1,b_7		/* mul_add_c(a[1],b[7],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	sd	c_3,64(a0)	/* r[8]=c3; */ - -	dmultu	a_2,b_7		/* mul_add_c(a[2],b[7],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	c_3,c_2,t_2 -	dmultu	a_3,b_6		/* mul_add_c(a[3],b[6],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_4,b_5		/* mul_add_c(a[4],b[5],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_5,b_4		/* mul_add_c(a[5],b[4],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_6,b_3		/* mul_add_c(a[6],b[3],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_7,b_2		/* mul_add_c(a[7],b[2],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	sd	c_1,72(a0)	/* r[9]=c1; */ - -	dmultu	a_7,b_3		/* mul_add_c(a[7],b[3],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	c_1,c_3,t_2 -	dmultu	a_6,b_4		/* mul_add_c(a[6],b[4],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_5,b_5		/* mul_add_c(a[5],b[5],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_4,b_6		/* mul_add_c(a[4],b[6],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_3,b_7		/* mul_add_c(a[3],b[7],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	sd	c_2,80(a0)	/* r[10]=c2; */ - -	dmultu	a_4,b_7		/* mul_add_c(a[4],b[7],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	c_2,c_1,t_2 -	dmultu	a_5,b_6		/* mul_add_c(a[5],b[6],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_6,b_5		/* mul_add_c(a[6],b[5],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_7,b_4		/* mul_add_c(a[7],b[4],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	sd	c_3,88(a0)	/* r[11]=c3; */ - -	dmultu	a_7,b_5		/* mul_add_c(a[7],b[5],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	c_3,c_2,t_2 -	dmultu	a_6,b_6		/* mul_add_c(a[6],b[6],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_5,b_7		/* mul_add_c(a[5],b[7],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	sd	c_1,96(a0)	/* r[12]=c1; */ - -	dmultu	a_6,b_7		/* mul_add_c(a[6],b[7],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	c_1,c_3,t_2 -	dmultu	a_7,b_6		/* mul_add_c(a[7],b[6],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	sd	c_2,104(a0)	/* r[13]=c2; */ - -	dmultu	a_7,b_7		/* mul_add_c(a[7],b[7],c3,c1,c2); */ -	ld	s0,0(sp) -	ld	s1,8(sp) -	ld	s2,16(sp) -	ld	s3,24(sp) -	ld	s4,32(sp) -	ld	s5,40(sp) -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sd	c_3,112(a0)	/* r[14]=c3; */ -	sd	c_1,120(a0)	/* r[15]=c1; */ - -	PTR_ADD	sp,FRAME_SIZE - -	jr	ra -END(bn_mul_comba8) - -.align	5 -LEAF(bn_mul_comba4) -	.set	reorder -	ld	a_0,0(a1) -	ld	b_0,0(a2) -	ld	a_1,8(a1) -	ld	a_2,16(a1) -	dmultu	a_0,b_0		/* mul_add_c(a[0],b[0],c1,c2,c3); */ -	ld	a_3,24(a1) -	ld	b_1,8(a2) -	ld	b_2,16(a2) -	ld	b_3,24(a2) -	mflo	c_1 -	mfhi	c_2 -	sd	c_1,0(a0) - -	dmultu	a_0,b_1		/* mul_add_c(a[0],b[1],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	c_3,t_2,AT -	dmultu	a_1,b_0		/* mul_add_c(a[1],b[0],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	c_1,c_3,t_2 -	sd	c_2,8(a0) - -	dmultu	a_2,b_0		/* mul_add_c(a[2],b[0],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	dmultu	a_1,b_1		/* mul_add_c(a[1],b[1],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	c_2,c_1,t_2 -	dmultu	a_0,b_2		/* mul_add_c(a[0],b[2],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	sd	c_3,16(a0) - -	dmultu	a_0,b_3		/* mul_add_c(a[0],b[3],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	c_3,c_2,t_2 -	dmultu	a_1,b_2		/* mul_add_c(a[1],b[2],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_2,b_1		/* mul_add_c(a[2],b[1],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_3,b_0		/* mul_add_c(a[3],b[0],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	sd	c_1,24(a0) - -	dmultu	a_3,b_1		/* mul_add_c(a[3],b[1],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	c_1,c_3,t_2 -	dmultu	a_2,b_2		/* mul_add_c(a[2],b[2],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_1,b_3		/* mul_add_c(a[1],b[3],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	sd	c_2,32(a0) - -	dmultu	a_2,b_3		/* mul_add_c(a[2],b[3],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	c_2,c_1,t_2 -	dmultu	a_3,b_2		/* mul_add_c(a[3],b[2],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	sd	c_3,40(a0) - -	dmultu	a_3,b_3		/* mul_add_c(a[3],b[3],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sd	c_1,48(a0) -	sd	c_2,56(a0) - -	jr	ra -END(bn_mul_comba4) - -#undef	a_4 -#undef	a_5 -#undef	a_6 -#undef	a_7 -#define	a_4	b_0 -#define	a_5	b_1 -#define	a_6	b_2 -#define	a_7	b_3 - -.align	5 -LEAF(bn_sqr_comba8) -	.set	reorder -	ld	a_0,0(a1) -	ld	a_1,8(a1) -	ld	a_2,16(a1) -	ld	a_3,24(a1) - -	dmultu	a_0,a_0		/* mul_add_c(a[0],b[0],c1,c2,c3); */ -	ld	a_4,32(a1) -	ld	a_5,40(a1) -	ld	a_6,48(a1) -	ld	a_7,56(a1) -	mflo	c_1 -	mfhi	c_2 -	sd	c_1,0(a0) - -	dmultu	a_0,a_1		/* mul_add_c2(a[0],b[1],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_1,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	c_3,t_2,AT -	sd	c_2,8(a0) - -	dmultu	a_2,a_0		/* mul_add_c2(a[2],b[0],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_2,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_1,a_1		/* mul_add_c(a[1],b[1],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	sd	c_3,16(a0) - -	dmultu	a_0,a_3		/* mul_add_c2(a[0],b[3],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_3,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_1,a_2		/* mul_add_c2(a[1],b[2],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_3,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	sd	c_1,24(a0) - -	dmultu	a_4,a_0		/* mul_add_c2(a[4],b[0],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_1,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_3,a_1		/* mul_add_c2(a[3],b[1],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_1,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_2,a_2		/* mul_add_c(a[2],b[2],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	sd	c_2,32(a0) - -	dmultu	a_0,a_5		/* mul_add_c2(a[0],b[5],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_2,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_1,a_4		/* mul_add_c2(a[1],b[4],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_2,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_2,a_3		/* mul_add_c2(a[2],b[3],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_2,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	sd	c_3,40(a0) - -	dmultu	a_6,a_0		/* mul_add_c2(a[6],b[0],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_3,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_5,a_1		/* mul_add_c2(a[5],b[1],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_3,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_4,a_2		/* mul_add_c2(a[4],b[2],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_3,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_3,a_3		/* mul_add_c(a[3],b[3],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	sd	c_1,48(a0) - -	dmultu	a_0,a_7		/* mul_add_c2(a[0],b[7],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_1,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_1,a_6		/* mul_add_c2(a[1],b[6],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_1,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_2,a_5		/* mul_add_c2(a[2],b[5],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_1,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_3,a_4		/* mul_add_c2(a[3],b[4],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_1,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	sd	c_2,56(a0) - -	dmultu	a_7,a_1		/* mul_add_c2(a[7],b[1],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_2,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_6,a_2		/* mul_add_c2(a[6],b[2],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_2,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_5,a_3		/* mul_add_c2(a[5],b[3],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_2,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_4,a_4		/* mul_add_c(a[4],b[4],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	sd	c_3,64(a0) - -	dmultu	a_2,a_7		/* mul_add_c2(a[2],b[7],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_3,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_3,a_6		/* mul_add_c2(a[3],b[6],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_3,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_4,a_5		/* mul_add_c2(a[4],b[5],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_3,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	sd	c_1,72(a0) - -	dmultu	a_7,a_3		/* mul_add_c2(a[7],b[3],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_1,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_6,a_4		/* mul_add_c2(a[6],b[4],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_1,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_5,a_5		/* mul_add_c(a[5],b[5],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	sd	c_2,80(a0) - -	dmultu	a_4,a_7		/* mul_add_c2(a[4],b[7],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_2,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_5,a_6		/* mul_add_c2(a[5],b[6],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_2,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	sd	c_3,88(a0) - -	dmultu	a_7,a_5		/* mul_add_c2(a[7],b[5],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_3,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_6,a_6		/* mul_add_c(a[6],b[6],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	sd	c_1,96(a0) - -	dmultu	a_6,a_7		/* mul_add_c2(a[6],b[7],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_1,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	sd	c_2,104(a0) - -	dmultu	a_7,a_7		/* mul_add_c(a[7],b[7],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sd	c_3,112(a0) -	sd	c_1,120(a0) - -	jr	ra -END(bn_sqr_comba8) - -.align	5 -LEAF(bn_sqr_comba4) -	.set	reorder -	ld	a_0,0(a1) -	ld	a_1,8(a1) -	ld	a_2,16(a1) -	ld	a_3,24(a1) -	dmultu	a_0,a_0		/* mul_add_c(a[0],b[0],c1,c2,c3); */ -	mflo	c_1 -	mfhi	c_2 -	sd	c_1,0(a0) - -	dmultu	a_0,a_1		/* mul_add_c2(a[0],b[1],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_1,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	c_3,t_2,AT -	sd	c_2,8(a0) - -	dmultu	a_2,a_0		/* mul_add_c2(a[2],b[0],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_2,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	dmultu	a_1,a_1		/* mul_add_c(a[1],b[1],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	sd	c_3,16(a0) - -	dmultu	a_0,a_3		/* mul_add_c2(a[0],b[3],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_3,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	dmultu	a_1,a_2		/* mul_add_c(a2[1],b[2],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	slt	AT,t_2,zero -	daddu	c_3,AT -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sltu	AT,c_2,t_2 -	daddu	c_3,AT -	sd	c_1,24(a0) - -	dmultu	a_3,a_1		/* mul_add_c2(a[3],b[1],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_1,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	dmultu	a_2,a_2		/* mul_add_c(a[2],b[2],c2,c3,c1); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_2,t_1 -	sltu	AT,c_2,t_1 -	daddu	t_2,AT -	daddu	c_3,t_2 -	sltu	AT,c_3,t_2 -	daddu	c_1,AT -	sd	c_2,32(a0) - -	dmultu	a_2,a_3		/* mul_add_c2(a[2],b[3],c3,c1,c2); */ -	mflo	t_1 -	mfhi	t_2 -	slt	c_2,t_2,zero -	dsll	t_2,1 -	slt	a2,t_1,zero -	daddu	t_2,a2 -	dsll	t_1,1 -	daddu	c_3,t_1 -	sltu	AT,c_3,t_1 -	daddu	t_2,AT -	daddu	c_1,t_2 -	sltu	AT,c_1,t_2 -	daddu	c_2,AT -	sd	c_3,40(a0) - -	dmultu	a_3,a_3		/* mul_add_c(a[3],b[3],c1,c2,c3); */ -	mflo	t_1 -	mfhi	t_2 -	daddu	c_1,t_1 -	sltu	AT,c_1,t_1 -	daddu	t_2,AT -	daddu	c_2,t_2 -	sd	c_1,48(a0) -	sd	c_2,56(a0) - -	jr	ra -END(bn_sqr_comba4) diff --git a/app/openssl/crypto/bn/asm/pa-risc2.S b/app/openssl/crypto/bn/asm/pa-risc2.S deleted file mode 100644 index f3b16290..00000000 --- a/app/openssl/crypto/bn/asm/pa-risc2.S +++ /dev/null @@ -1,1618 +0,0 @@ -; -; PA-RISC 2.0 implementation of bn_asm code, based on the -; 64-bit version of the code.  This code is effectively the -; same as the 64-bit version except the register model is -; slightly different given all values must be 32-bit between -; function calls.  Thus the 64-bit return values are returned -; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit -; -; -; This code is approximately 2x faster than the C version -; for RSA/DSA. -; -; See http://devresource.hp.com/  for more details on the PA-RISC -; architecture.  Also see the book "PA-RISC 2.0 Architecture" -; by Gerry Kane for information on the instruction set architecture. -; -; Code written by Chris Ruemmler (with some help from the HP C -; compiler). -; -; The code compiles with HP's assembler -; - -	.level	2.0N -	.space	$TEXT$ -	.subspa	$CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY - -; -; Global Register definitions used for the routines. -; -; Some information about HP's runtime architecture for 32-bits. -; -; "Caller save" means the calling function must save the register -; if it wants the register to be preserved. -; "Callee save" means if a function uses the register, it must save -; the value before using it. -; -; For the floating point registers  -; -;    "caller save" registers: fr4-fr11, fr22-fr31 -;    "callee save" registers: fr12-fr21 -;    "special" registers: fr0-fr3 (status and exception registers) -; -; For the integer registers -;     value zero             :  r0 -;     "caller save" registers: r1,r19-r26 -;     "callee save" registers: r3-r18 -;     return register        :  r2  (rp) -;     return values          ; r28,r29  (ret0,ret1) -;     Stack pointer          ; r30  (sp)  -;     millicode return ptr   ; r31  (also a caller save register) - - -; -; Arguments to the routines -; -r_ptr       .reg %r26 -a_ptr       .reg %r25 -b_ptr       .reg %r24 -num         .reg %r24 -n           .reg %r23 - -; -; Note that the "w" argument for bn_mul_add_words and bn_mul_words -; is passed on the stack at a delta of -56 from the top of stack -; as the routine is entered. -; - -; -; Globals used in some routines -; - -top_overflow .reg %r23 -high_mask    .reg %r22    ; value 0xffffffff80000000L - - -;------------------------------------------------------------------------------ -; -; bn_mul_add_words -; -;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr,  -;								int num, BN_ULONG w) -; -; arg0 = r_ptr -; arg1 = a_ptr -; arg3 = num -; -56(sp) =  w -; -; Local register definitions -; - -fm1          .reg %fr22 -fm           .reg %fr23 -ht_temp      .reg %fr24 -ht_temp_1    .reg %fr25 -lt_temp      .reg %fr26 -lt_temp_1    .reg %fr27 -fm1_1        .reg %fr28 -fm_1         .reg %fr29 - -fw_h         .reg %fr7L -fw_l         .reg %fr7R -fw           .reg %fr7 - -fht_0        .reg %fr8L -flt_0        .reg %fr8R -t_float_0    .reg %fr8 - -fht_1        .reg %fr9L -flt_1        .reg %fr9R -t_float_1    .reg %fr9 - -tmp_0        .reg %r31 -tmp_1        .reg %r21 -m_0          .reg %r20  -m_1          .reg %r19  -ht_0         .reg %r1   -ht_1         .reg %r3 -lt_0         .reg %r4 -lt_1         .reg %r5 -m1_0         .reg %r6  -m1_1         .reg %r7  -rp_val       .reg %r8 -rp_val_1     .reg %r9 - -bn_mul_add_words -	.export	bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN -	.proc -	.callinfo frame=128 -    .entry -	.align 64 - -    STD     %r3,0(%sp)          ; save r3   -    STD     %r4,8(%sp)          ; save r4   -	NOP                         ; Needed to make the loop 16-byte aligned -	NOP                         ; needed to make the loop 16-byte aligned - -    STD     %r5,16(%sp)         ; save r5   -	NOP -    STD     %r6,24(%sp)         ; save r6   -    STD     %r7,32(%sp)         ; save r7   - -    STD     %r8,40(%sp)         ; save r8   -    STD     %r9,48(%sp)         ; save r9   -    COPY    %r0,%ret1           ; return 0 by default -    DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32     - -    CMPIB,>= 0,num,bn_mul_add_words_exit  ; if (num <= 0) then exit -	LDO     128(%sp),%sp        ; bump stack - -	; -	; The loop is unrolled twice, so if there is only 1 number -    ; then go straight to the cleanup code. -	; -	CMPIB,= 1,num,bn_mul_add_words_single_top -	FLDD    -184(%sp),fw        ; (-56-128) load up w into fw (fw_h/fw_l) - -	; -	; This loop is unrolled 2 times (64-byte aligned as well) -	; -	; PA-RISC 2.0 chips have two fully pipelined multipliers, thus -    ; two 32-bit mutiplies can be issued per cycle. -    ;  -bn_mul_add_words_unroll2 - -    FLDD    0(a_ptr),t_float_0       ; load up 64-bit value (fr8L) ht(L)/lt(R) -    FLDD    8(a_ptr),t_float_1       ; load up 64-bit value (fr8L) ht(L)/lt(R) -    LDD     0(r_ptr),rp_val          ; rp[0] -    LDD     8(r_ptr),rp_val_1        ; rp[1] - -    XMPYU   fht_0,fw_l,fm1           ; m1[0] = fht_0*fw_l -    XMPYU   fht_1,fw_l,fm1_1         ; m1[1] = fht_1*fw_l -    FSTD    fm1,-16(%sp)             ; -16(sp) = m1[0] -    FSTD    fm1_1,-48(%sp)           ; -48(sp) = m1[1] - -    XMPYU   flt_0,fw_h,fm            ; m[0] = flt_0*fw_h -    XMPYU   flt_1,fw_h,fm_1          ; m[1] = flt_1*fw_h -    FSTD    fm,-8(%sp)               ; -8(sp) = m[0] -    FSTD    fm_1,-40(%sp)            ; -40(sp) = m[1] - -    XMPYU   fht_0,fw_h,ht_temp       ; ht_temp   = fht_0*fw_h -    XMPYU   fht_1,fw_h,ht_temp_1     ; ht_temp_1 = fht_1*fw_h -    FSTD    ht_temp,-24(%sp)         ; -24(sp)   = ht_temp -    FSTD    ht_temp_1,-56(%sp)       ; -56(sp)   = ht_temp_1 - -    XMPYU   flt_0,fw_l,lt_temp       ; lt_temp = lt*fw_l -    XMPYU   flt_1,fw_l,lt_temp_1     ; lt_temp = lt*fw_l -    FSTD    lt_temp,-32(%sp)         ; -32(sp) = lt_temp  -    FSTD    lt_temp_1,-64(%sp)       ; -64(sp) = lt_temp_1  - -    LDD     -8(%sp),m_0              ; m[0]  -    LDD     -40(%sp),m_1             ; m[1] -    LDD     -16(%sp),m1_0            ; m1[0] -    LDD     -48(%sp),m1_1            ; m1[1] - -    LDD     -24(%sp),ht_0            ; ht[0] -    LDD     -56(%sp),ht_1            ; ht[1] -    ADD,L   m1_0,m_0,tmp_0           ; tmp_0 = m[0] + m1[0];  -    ADD,L   m1_1,m_1,tmp_1           ; tmp_1 = m[1] + m1[1];  - -    LDD     -32(%sp),lt_0             -    LDD     -64(%sp),lt_1             -    CMPCLR,*>>= tmp_0,m1_0, %r0      ; if (m[0] < m1[0]) -    ADD,L   ht_0,top_overflow,ht_0   ; ht[0] += (1<<32) - -    CMPCLR,*>>= tmp_1,m1_1,%r0       ; if (m[1] < m1[1]) -    ADD,L   ht_1,top_overflow,ht_1   ; ht[1] += (1<<32) -    EXTRD,U tmp_0,31,32,m_0          ; m[0]>>32   -    DEPD,Z  tmp_0,31,32,m1_0         ; m1[0] = m[0]<<32  - -    EXTRD,U tmp_1,31,32,m_1          ; m[1]>>32   -    DEPD,Z  tmp_1,31,32,m1_1         ; m1[1] = m[1]<<32  -    ADD,L   ht_0,m_0,ht_0            ; ht[0]+= (m[0]>>32) -    ADD,L   ht_1,m_1,ht_1            ; ht[1]+= (m[1]>>32) - -    ADD     lt_0,m1_0,lt_0           ; lt[0] = lt[0]+m1[0]; -	ADD,DC  ht_0,%r0,ht_0            ; ht[0]++ -    ADD     lt_1,m1_1,lt_1           ; lt[1] = lt[1]+m1[1]; -    ADD,DC  ht_1,%r0,ht_1            ; ht[1]++ - -    ADD    %ret1,lt_0,lt_0           ; lt[0] = lt[0] + c; -	ADD,DC  ht_0,%r0,ht_0            ; ht[0]++ -    ADD     lt_0,rp_val,lt_0         ; lt[0] = lt[0]+rp[0] -    ADD,DC  ht_0,%r0,ht_0            ; ht[0]++ - -	LDO    -2(num),num               ; num = num - 2; -    ADD     ht_0,lt_1,lt_1           ; lt[1] = lt[1] + ht_0 (c); -    ADD,DC  ht_1,%r0,ht_1            ; ht[1]++ -    STD     lt_0,0(r_ptr)            ; rp[0] = lt[0] - -    ADD     lt_1,rp_val_1,lt_1       ; lt[1] = lt[1]+rp[1] -    ADD,DC  ht_1,%r0,%ret1           ; ht[1]++ -    LDO     16(a_ptr),a_ptr          ; a_ptr += 2 - -    STD     lt_1,8(r_ptr)            ; rp[1] = lt[1] -	CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do -    LDO     16(r_ptr),r_ptr          ; r_ptr += 2 - -    CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one - -	; -	; Top of loop aligned on 64-byte boundary -	; -bn_mul_add_words_single_top -    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R) -    LDD     0(r_ptr),rp_val           ; rp[0] -    LDO     8(a_ptr),a_ptr            ; a_ptr++ -    XMPYU   fht_0,fw_l,fm1            ; m1 = ht*fw_l -    FSTD    fm1,-16(%sp)              ; -16(sp) = m1 -    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h -    FSTD    fm,-8(%sp)                ; -8(sp) = m -    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = ht*fw_h -    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht -    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l -    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt  - -    LDD     -8(%sp),m_0                -    LDD    -16(%sp),m1_0              ; m1 = temp1  -    ADD,L   m_0,m1_0,tmp_0            ; tmp_0 = m + m1;  -    LDD     -24(%sp),ht_0              -    LDD     -32(%sp),lt_0              - -    CMPCLR,*>>= tmp_0,m1_0,%r0        ; if (m < m1) -    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32) - -    EXTRD,U tmp_0,31,32,m_0           ; m>>32   -    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32  - -    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32) -    ADD     lt_0,m1_0,tmp_0           ; tmp_0 = lt+m1; -    ADD,DC  ht_0,%r0,ht_0             ; ht++ -    ADD     %ret1,tmp_0,lt_0          ; lt = lt + c; -    ADD,DC  ht_0,%r0,ht_0             ; ht++ -    ADD     lt_0,rp_val,lt_0          ; lt = lt+rp[0] -    ADD,DC  ht_0,%r0,%ret1            ; ht++ -    STD     lt_0,0(r_ptr)             ; rp[0] = lt - -bn_mul_add_words_exit -    .EXIT -	 -    EXTRD,U %ret1,31,32,%ret0         ; for 32-bit, return in ret0/ret1 -    LDD     -80(%sp),%r9              ; restore r9   -    LDD     -88(%sp),%r8              ; restore r8   -    LDD     -96(%sp),%r7              ; restore r7   -    LDD     -104(%sp),%r6             ; restore r6   -    LDD     -112(%sp),%r5             ; restore r5   -    LDD     -120(%sp),%r4             ; restore r4   -    BVE     (%rp) -    LDD,MB  -128(%sp),%r3             ; restore r3 -	.PROCEND	;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) -; -; arg0 = rp -; arg1 = ap -; arg3 = num -; w on stack at -56(sp) - -bn_mul_words -	.proc -	.callinfo frame=128 -    .entry -	.EXPORT	bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -	.align 64 - -    STD     %r3,0(%sp)          ; save r3   -    STD     %r4,8(%sp)          ; save r4   -	NOP -    STD     %r5,16(%sp)         ; save r5   - -    STD     %r6,24(%sp)         ; save r6   -    STD     %r7,32(%sp)         ; save r7   -    COPY    %r0,%ret1           ; return 0 by default -    DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32     - -    CMPIB,>= 0,num,bn_mul_words_exit -	LDO     128(%sp),%sp    ; bump stack - -	; -	; See if only 1 word to do, thus just do cleanup -	; -	CMPIB,= 1,num,bn_mul_words_single_top -	FLDD    -184(%sp),fw        ; (-56-128) load up w into fw (fw_h/fw_l) - -	; -	; This loop is unrolled 2 times (64-byte aligned as well) -	; -	; PA-RISC 2.0 chips have two fully pipelined multipliers, thus -    ; two 32-bit mutiplies can be issued per cycle. -    ;  -bn_mul_words_unroll2 - -    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R) -    FLDD    8(a_ptr),t_float_1        ; load up 64-bit value (fr8L) ht(L)/lt(R) -    XMPYU   fht_0,fw_l,fm1            ; m1[0] = fht_0*fw_l -    XMPYU   fht_1,fw_l,fm1_1          ; m1[1] = ht*fw_l - -    FSTD    fm1,-16(%sp)              ; -16(sp) = m1 -    FSTD    fm1_1,-48(%sp)            ; -48(sp) = m1 -    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h -    XMPYU   flt_1,fw_h,fm_1           ; m = lt*fw_h - -    FSTD    fm,-8(%sp)                ; -8(sp) = m -    FSTD    fm_1,-40(%sp)             ; -40(sp) = m -    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = fht_0*fw_h -    XMPYU   fht_1,fw_h,ht_temp_1      ; ht_temp = ht*fw_h - -    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht -    FSTD    ht_temp_1,-56(%sp)        ; -56(sp) = ht -    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l -    XMPYU   flt_1,fw_l,lt_temp_1      ; lt_temp = lt*fw_l - -    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt  -    FSTD    lt_temp_1,-64(%sp)        ; -64(sp) = lt  -    LDD     -8(%sp),m_0                -    LDD     -40(%sp),m_1               - -    LDD    -16(%sp),m1_0               -    LDD    -48(%sp),m1_1               -    LDD     -24(%sp),ht_0              -    LDD     -56(%sp),ht_1              - -    ADD,L   m1_0,m_0,tmp_0            ; tmp_0 = m + m1;  -    ADD,L   m1_1,m_1,tmp_1            ; tmp_1 = m + m1;  -    LDD     -32(%sp),lt_0              -    LDD     -64(%sp),lt_1              - -    CMPCLR,*>>= tmp_0,m1_0, %r0       ; if (m < m1) -    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32) -    CMPCLR,*>>= tmp_1,m1_1,%r0        ; if (m < m1) -    ADD,L   ht_1,top_overflow,ht_1    ; ht += (1<<32) - -    EXTRD,U tmp_0,31,32,m_0           ; m>>32   -    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32  -    EXTRD,U tmp_1,31,32,m_1           ; m>>32   -    DEPD,Z  tmp_1,31,32,m1_1          ; m1 = m<<32  - -    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32) -    ADD,L   ht_1,m_1,ht_1             ; ht+= (m>>32) -    ADD     lt_0,m1_0,lt_0            ; lt = lt+m1; -	ADD,DC  ht_0,%r0,ht_0             ; ht++ - -    ADD     lt_1,m1_1,lt_1            ; lt = lt+m1; -    ADD,DC  ht_1,%r0,ht_1             ; ht++ -    ADD    %ret1,lt_0,lt_0            ; lt = lt + c (ret1); -	ADD,DC  ht_0,%r0,ht_0             ; ht++ - -    ADD     ht_0,lt_1,lt_1            ; lt = lt + c (ht_0) -    ADD,DC  ht_1,%r0,ht_1             ; ht++ -    STD     lt_0,0(r_ptr)             ; rp[0] = lt -    STD     lt_1,8(r_ptr)             ; rp[1] = lt - -	COPY    ht_1,%ret1                ; carry = ht -	LDO    -2(num),num                ; num = num - 2; -    LDO     16(a_ptr),a_ptr           ; ap += 2 -	CMPIB,<= 2,num,bn_mul_words_unroll2 -    LDO     16(r_ptr),r_ptr           ; rp++ - -    CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? - -	; -	; Top of loop aligned on 64-byte boundary -	; -bn_mul_words_single_top -    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R) - -    XMPYU   fht_0,fw_l,fm1            ; m1 = ht*fw_l -    FSTD    fm1,-16(%sp)              ; -16(sp) = m1 -    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h -    FSTD    fm,-8(%sp)                ; -8(sp) = m -    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = ht*fw_h -    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht -    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l -    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt  - -    LDD     -8(%sp),m_0                -    LDD    -16(%sp),m1_0               -    ADD,L   m_0,m1_0,tmp_0            ; tmp_0 = m + m1;  -    LDD     -24(%sp),ht_0              -    LDD     -32(%sp),lt_0              - -    CMPCLR,*>>= tmp_0,m1_0,%r0        ; if (m < m1) -    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32) - -    EXTRD,U tmp_0,31,32,m_0           ; m>>32   -    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32  - -    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32) -    ADD     lt_0,m1_0,lt_0            ; lt= lt+m1; -    ADD,DC  ht_0,%r0,ht_0             ; ht++ - -    ADD     %ret1,lt_0,lt_0           ; lt = lt + c; -    ADD,DC  ht_0,%r0,ht_0             ; ht++ - -    COPY    ht_0,%ret1                ; copy carry -    STD     lt_0,0(r_ptr)             ; rp[0] = lt - -bn_mul_words_exit -    .EXIT -    EXTRD,U %ret1,31,32,%ret0           ; for 32-bit, return in ret0/ret1 -    LDD     -96(%sp),%r7              ; restore r7   -    LDD     -104(%sp),%r6             ; restore r6   -    LDD     -112(%sp),%r5             ; restore r5   -    LDD     -120(%sp),%r4             ; restore r4   -    BVE     (%rp) -    LDD,MB  -128(%sp),%r3             ; restore r3 -	.PROCEND	 - -;---------------------------------------------------------------------------- -; -;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) -; -; arg0 = rp -; arg1 = ap -; arg2 = num -; - -bn_sqr_words -	.proc -	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE -	.EXPORT	bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -    .entry -	.align 64 - -    STD     %r3,0(%sp)          ; save r3   -    STD     %r4,8(%sp)          ; save r4   -	NOP -    STD     %r5,16(%sp)         ; save r5   - -    CMPIB,>= 0,num,bn_sqr_words_exit -	LDO     128(%sp),%sp       ; bump stack - -	; -	; If only 1, the goto straight to cleanup -	; -	CMPIB,= 1,num,bn_sqr_words_single_top -    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L - -	; -	; This loop is unrolled 2 times (64-byte aligned as well) -	; - -bn_sqr_words_unroll2 -    FLDD    0(a_ptr),t_float_0        ; a[0] -    FLDD    8(a_ptr),t_float_1        ; a[1] -    XMPYU   fht_0,flt_0,fm            ; m[0] -    XMPYU   fht_1,flt_1,fm_1          ; m[1] - -    FSTD    fm,-24(%sp)               ; store m[0] -    FSTD    fm_1,-56(%sp)             ; store m[1] -    XMPYU   flt_0,flt_0,lt_temp       ; lt[0] -    XMPYU   flt_1,flt_1,lt_temp_1     ; lt[1] - -    FSTD    lt_temp,-16(%sp)          ; store lt[0] -    FSTD    lt_temp_1,-48(%sp)        ; store lt[1] -    XMPYU   fht_0,fht_0,ht_temp       ; ht[0] -    XMPYU   fht_1,fht_1,ht_temp_1     ; ht[1] - -    FSTD    ht_temp,-8(%sp)           ; store ht[0] -    FSTD    ht_temp_1,-40(%sp)        ; store ht[1] -    LDD     -24(%sp),m_0              -    LDD     -56(%sp),m_1               - -    AND     m_0,high_mask,tmp_0       ; m[0] & Mask -    AND     m_1,high_mask,tmp_1       ; m[1] & Mask -    DEPD,Z  m_0,30,31,m_0             ; m[0] << 32+1 -    DEPD,Z  m_1,30,31,m_1             ; m[1] << 32+1 - -    LDD     -16(%sp),lt_0         -    LDD     -48(%sp),lt_1         -    EXTRD,U tmp_0,32,33,tmp_0         ; tmp_0 = m[0]&Mask >> 32-1 -    EXTRD,U tmp_1,32,33,tmp_1         ; tmp_1 = m[1]&Mask >> 32-1 - -    LDD     -8(%sp),ht_0             -    LDD     -40(%sp),ht_1            -    ADD,L   ht_0,tmp_0,ht_0           ; ht[0] += tmp_0 -    ADD,L   ht_1,tmp_1,ht_1           ; ht[1] += tmp_1 - -    ADD     lt_0,m_0,lt_0             ; lt = lt+m -    ADD,DC  ht_0,%r0,ht_0             ; ht[0]++ -    STD     lt_0,0(r_ptr)             ; rp[0] = lt[0] -    STD     ht_0,8(r_ptr)             ; rp[1] = ht[1] - -    ADD     lt_1,m_1,lt_1             ; lt = lt+m -    ADD,DC  ht_1,%r0,ht_1             ; ht[1]++ -    STD     lt_1,16(r_ptr)            ; rp[2] = lt[1] -    STD     ht_1,24(r_ptr)            ; rp[3] = ht[1] - -	LDO    -2(num),num                ; num = num - 2; -    LDO     16(a_ptr),a_ptr           ; ap += 2 -	CMPIB,<= 2,num,bn_sqr_words_unroll2 -    LDO     32(r_ptr),r_ptr           ; rp += 4 - -    CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? - -	; -	; Top of loop aligned on 64-byte boundary -	; -bn_sqr_words_single_top -    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R) - -    XMPYU   fht_0,flt_0,fm            ; m -    FSTD    fm,-24(%sp)               ; store m - -    XMPYU   flt_0,flt_0,lt_temp       ; lt -    FSTD    lt_temp,-16(%sp)          ; store lt - -    XMPYU   fht_0,fht_0,ht_temp       ; ht -    FSTD    ht_temp,-8(%sp)           ; store ht - -    LDD     -24(%sp),m_0              ; load m -    AND     m_0,high_mask,tmp_0       ; m & Mask -    DEPD,Z  m_0,30,31,m_0             ; m << 32+1 -    LDD     -16(%sp),lt_0             ; lt - -    LDD     -8(%sp),ht_0              ; ht -    EXTRD,U tmp_0,32,33,tmp_0         ; tmp_0 = m&Mask >> 32-1 -    ADD     m_0,lt_0,lt_0             ; lt = lt+m -    ADD,L   ht_0,tmp_0,ht_0           ; ht += tmp_0 -    ADD,DC  ht_0,%r0,ht_0             ; ht++ - -    STD     lt_0,0(r_ptr)             ; rp[0] = lt -    STD     ht_0,8(r_ptr)             ; rp[1] = ht - -bn_sqr_words_exit -    .EXIT -    LDD     -112(%sp),%r5       ; restore r5   -    LDD     -120(%sp),%r4       ; restore r4   -    BVE     (%rp) -    LDD,MB  -128(%sp),%r3  -	.PROCEND	;in=23,24,25,26,29;out=28; - - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp  -; arg1 = ap -; arg2 = bp  -; arg3 = n - -t  .reg %r22 -b  .reg %r21 -l  .reg %r20 - -bn_add_words -	.proc -    .entry -	.callinfo -	.EXPORT	bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -	.align 64 - -    CMPIB,>= 0,n,bn_add_words_exit -    COPY    %r0,%ret1           ; return 0 by default - -	; -	; If 2 or more numbers do the loop -	; -	CMPIB,= 1,n,bn_add_words_single_top -	NOP - -	; -	; This loop is unrolled 2 times (64-byte aligned as well) -	; -bn_add_words_unroll2 -	LDD     0(a_ptr),t -	LDD     0(b_ptr),b -	ADD     t,%ret1,t                    ; t = t+c; -	ADD,DC  %r0,%r0,%ret1                ; set c to carry -	ADD     t,b,l                        ; l = t + b[0] -	ADD,DC  %ret1,%r0,%ret1              ; c+= carry -	STD     l,0(r_ptr) - -	LDD     8(a_ptr),t -	LDD     8(b_ptr),b -	ADD     t,%ret1,t                     ; t = t+c; -	ADD,DC  %r0,%r0,%ret1                 ; set c to carry -	ADD     t,b,l                         ; l = t + b[0] -	ADD,DC  %ret1,%r0,%ret1               ; c+= carry -	STD     l,8(r_ptr) - -	LDO     -2(n),n -	LDO     16(a_ptr),a_ptr -	LDO     16(b_ptr),b_ptr - -	CMPIB,<= 2,n,bn_add_words_unroll2 -	LDO     16(r_ptr),r_ptr - -    CMPIB,=,N 0,n,bn_add_words_exit ; are we done? - -bn_add_words_single_top -	LDD     0(a_ptr),t -	LDD     0(b_ptr),b - -	ADD     t,%ret1,t                 ; t = t+c; -	ADD,DC  %r0,%r0,%ret1             ; set c to carry (could use CMPCLR??) -	ADD     t,b,l                     ; l = t + b[0] -	ADD,DC  %ret1,%r0,%ret1           ; c+= carry -	STD     l,0(r_ptr) - -bn_add_words_exit -    .EXIT -    BVE     (%rp) -    EXTRD,U %ret1,31,32,%ret0           ; for 32-bit, return in ret0/ret1 -	.PROCEND	;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp  -; arg1 = ap -; arg2 = bp  -; arg3 = n - -t1       .reg %r22 -t2       .reg %r21 -sub_tmp1 .reg %r20 -sub_tmp2 .reg %r19 - - -bn_sub_words -	.proc -	.callinfo  -	.EXPORT	bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -    .entry -	.align 64 - -    CMPIB,>=  0,n,bn_sub_words_exit -    COPY    %r0,%ret1           ; return 0 by default - -	; -	; If 2 or more numbers do the loop -	; -	CMPIB,= 1,n,bn_sub_words_single_top -	NOP - -	; -	; This loop is unrolled 2 times (64-byte aligned as well) -	; -bn_sub_words_unroll2 -	LDD     0(a_ptr),t1 -	LDD     0(b_ptr),t2 -	SUB     t1,t2,sub_tmp1           ; t3 = t1-t2;  -	SUB     sub_tmp1,%ret1,sub_tmp1  ; t3 = t3- c;  - -	CMPCLR,*>> t1,t2,sub_tmp2        ; clear if t1 > t2 -	LDO      1(%r0),sub_tmp2 -	 -	CMPCLR,*= t1,t2,%r0 -	COPY    sub_tmp2,%ret1 -	STD     sub_tmp1,0(r_ptr) - -	LDD     8(a_ptr),t1 -	LDD     8(b_ptr),t2 -	SUB     t1,t2,sub_tmp1            ; t3 = t1-t2;  -	SUB     sub_tmp1,%ret1,sub_tmp1   ; t3 = t3- c;  -	CMPCLR,*>> t1,t2,sub_tmp2         ; clear if t1 > t2 -	LDO      1(%r0),sub_tmp2 -	 -	CMPCLR,*= t1,t2,%r0 -	COPY    sub_tmp2,%ret1 -	STD     sub_tmp1,8(r_ptr) - -	LDO     -2(n),n -	LDO     16(a_ptr),a_ptr -	LDO     16(b_ptr),b_ptr - -	CMPIB,<= 2,n,bn_sub_words_unroll2 -	LDO     16(r_ptr),r_ptr - -    CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? - -bn_sub_words_single_top -	LDD     0(a_ptr),t1 -	LDD     0(b_ptr),t2 -	SUB     t1,t2,sub_tmp1            ; t3 = t1-t2;  -	SUB     sub_tmp1,%ret1,sub_tmp1   ; t3 = t3- c;  -	CMPCLR,*>> t1,t2,sub_tmp2         ; clear if t1 > t2 -	LDO      1(%r0),sub_tmp2 -	 -	CMPCLR,*= t1,t2,%r0 -	COPY    sub_tmp2,%ret1 - -	STD     sub_tmp1,0(r_ptr) - -bn_sub_words_exit -    .EXIT -    BVE     (%rp) -    EXTRD,U %ret1,31,32,%ret0           ; for 32-bit, return in ret0/ret1 -	.PROCEND	;in=23,24,25,26,29;out=28; - -;------------------------------------------------------------------------------ -; -; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) -; -; arg0 = h -; arg1 = l -; arg2 = d -; -; This is mainly just output from the HP C compiler.   -; -;------------------------------------------------------------------------------ -bn_div_words -	.PROC -	.EXPORT	bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN -	.IMPORT	BN_num_bits_word,CODE -	;--- not PIC	.IMPORT	__iob,DATA -	;--- not PIC	.IMPORT	fprintf,CODE -	.IMPORT	abort,CODE -	.IMPORT	$$div2U,MILLICODE -	.CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE -        .ENTRY -        STW     %r2,-20(%r30)   ;offset 0x8ec -        STW,MA  %r3,192(%r30)   ;offset 0x8f0 -        STW     %r4,-188(%r30)  ;offset 0x8f4 -        DEPD    %r5,31,32,%r6   ;offset 0x8f8 -        STD     %r6,-184(%r30)  ;offset 0x8fc -        DEPD    %r7,31,32,%r8   ;offset 0x900 -        STD     %r8,-176(%r30)  ;offset 0x904 -        STW     %r9,-168(%r30)  ;offset 0x908 -        LDD     -248(%r30),%r3  ;offset 0x90c -        COPY    %r26,%r4        ;offset 0x910 -        COPY    %r24,%r5        ;offset 0x914 -        DEPD    %r25,31,32,%r4  ;offset 0x918 -        CMPB,*<>        %r3,%r0,$0006000C       ;offset 0x91c -        DEPD    %r23,31,32,%r5  ;offset 0x920 -        MOVIB,TR        -1,%r29,$00060002       ;offset 0x924 -        EXTRD,U %r29,31,32,%r28 ;offset 0x928 -$0006002A -        LDO     -1(%r29),%r29   ;offset 0x92c -        SUB     %r23,%r7,%r23   ;offset 0x930 -$00060024 -        SUB     %r4,%r31,%r25   ;offset 0x934 -        AND     %r25,%r19,%r26  ;offset 0x938 -        CMPB,*<>,N      %r0,%r26,$00060046      ;offset 0x93c -        DEPD,Z  %r25,31,32,%r20 ;offset 0x940 -        OR      %r20,%r24,%r21  ;offset 0x944 -        CMPB,*<<,N      %r21,%r23,$0006002A     ;offset 0x948 -        SUB     %r31,%r2,%r31   ;offset 0x94c -$00060046 -$0006002E -        DEPD,Z  %r23,31,32,%r25 ;offset 0x950 -        EXTRD,U %r23,31,32,%r26 ;offset 0x954 -        AND     %r25,%r19,%r24  ;offset 0x958 -        ADD,L   %r31,%r26,%r31  ;offset 0x95c -        CMPCLR,*>>=     %r5,%r24,%r0    ;offset 0x960 -        LDO     1(%r31),%r31    ;offset 0x964 -$00060032 -        CMPB,*<<=,N     %r31,%r4,$00060036      ;offset 0x968 -        LDO     -1(%r29),%r29   ;offset 0x96c -        ADD,L   %r4,%r3,%r4     ;offset 0x970 -$00060036 -        ADDIB,=,N       -1,%r8,$D0      ;offset 0x974 -        SUB     %r5,%r24,%r28   ;offset 0x978 -$0006003A -        SUB     %r4,%r31,%r24   ;offset 0x97c -        SHRPD   %r24,%r28,32,%r4        ;offset 0x980 -        DEPD,Z  %r29,31,32,%r9  ;offset 0x984 -        DEPD,Z  %r28,31,32,%r5  ;offset 0x988 -$0006001C -        EXTRD,U %r4,31,32,%r31  ;offset 0x98c -        CMPB,*<>,N      %r31,%r2,$00060020      ;offset 0x990 -        MOVB,TR %r6,%r29,$D1    ;offset 0x994 -        STD     %r29,-152(%r30) ;offset 0x998 -$0006000C -        EXTRD,U %r3,31,32,%r25  ;offset 0x99c -        COPY    %r3,%r26        ;offset 0x9a0 -        EXTRD,U %r3,31,32,%r9   ;offset 0x9a4 -        EXTRD,U %r4,31,32,%r8   ;offset 0x9a8 -        .CALL   ARGW0=GR,ARGW1=GR,RTNVAL=GR     ;in=25,26;out=28; -        B,L     BN_num_bits_word,%r2    ;offset 0x9ac -        EXTRD,U %r5,31,32,%r7   ;offset 0x9b0 -        LDI     64,%r20 ;offset 0x9b4 -        DEPD    %r7,31,32,%r5   ;offset 0x9b8 -        DEPD    %r8,31,32,%r4   ;offset 0x9bc -        DEPD    %r9,31,32,%r3   ;offset 0x9c0 -        CMPB,=  %r28,%r20,$00060012     ;offset 0x9c4 -        COPY    %r28,%r24       ;offset 0x9c8 -        MTSARCM %r24    ;offset 0x9cc -        DEPDI,Z -1,%sar,1,%r19  ;offset 0x9d0 -        CMPB,*>>,N      %r4,%r19,$D2    ;offset 0x9d4 -$00060012 -        SUBI    64,%r24,%r31    ;offset 0x9d8 -        CMPCLR,*<<      %r4,%r3,%r0     ;offset 0x9dc -        SUB     %r4,%r3,%r4     ;offset 0x9e0 -$00060016 -        CMPB,=  %r31,%r0,$0006001A      ;offset 0x9e4 -        COPY    %r0,%r9 ;offset 0x9e8 -        MTSARCM %r31    ;offset 0x9ec -        DEPD,Z  %r3,%sar,64,%r3 ;offset 0x9f0 -        SUBI    64,%r31,%r26    ;offset 0x9f4 -        MTSAR   %r26    ;offset 0x9f8 -        SHRPD   %r4,%r5,%sar,%r4        ;offset 0x9fc -        MTSARCM %r31    ;offset 0xa00 -        DEPD,Z  %r5,%sar,64,%r5 ;offset 0xa04 -$0006001A -        DEPDI,Z -1,31,32,%r19   ;offset 0xa08 -        AND     %r3,%r19,%r29   ;offset 0xa0c -        EXTRD,U %r29,31,32,%r2  ;offset 0xa10 -        DEPDI,Z -1,63,32,%r6    ;offset 0xa14 -        MOVIB,TR        2,%r8,$0006001C ;offset 0xa18 -        EXTRD,U %r3,63,32,%r7   ;offset 0xa1c -$D2 -        ;--- not PIC	ADDIL   LR'__iob-$global$,%r27,%r1      ;offset 0xa20 -        ;--- not PIC	LDIL    LR'C$7,%r21     ;offset 0xa24 -        ;--- not PIC	LDO     RR'__iob-$global$+32(%r1),%r26  ;offset 0xa28 -        ;--- not PIC	.CALL   ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR    ;in=24,25,26;out=28; -        ;--- not PIC	B,L     fprintf,%r2     ;offset 0xa2c -        ;--- not PIC	LDO     RR'C$7(%r21),%r25       ;offset 0xa30 -        .CALL           ; -        B,L     abort,%r2       ;offset 0xa34 -        NOP             ;offset 0xa38 -        B       $D3     ;offset 0xa3c -        LDW     -212(%r30),%r2  ;offset 0xa40 -$00060020 -        COPY    %r4,%r26        ;offset 0xa44 -        EXTRD,U %r4,31,32,%r25  ;offset 0xa48 -        COPY    %r2,%r24        ;offset 0xa4c -        .CALL   ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) -        B,L     $$div2U,%r31    ;offset 0xa50 -        EXTRD,U %r2,31,32,%r23  ;offset 0xa54 -        DEPD    %r28,31,32,%r29 ;offset 0xa58 -$00060022 -        STD     %r29,-152(%r30) ;offset 0xa5c -$D1 -        AND     %r5,%r19,%r24   ;offset 0xa60 -        EXTRD,U %r24,31,32,%r24 ;offset 0xa64 -        STW     %r2,-160(%r30)  ;offset 0xa68 -        STW     %r7,-128(%r30)  ;offset 0xa6c -        FLDD    -152(%r30),%fr4 ;offset 0xa70 -        FLDD    -152(%r30),%fr7 ;offset 0xa74 -        FLDW    -160(%r30),%fr8L        ;offset 0xa78 -        FLDW    -128(%r30),%fr5L        ;offset 0xa7c -        XMPYU   %fr8L,%fr7L,%fr10       ;offset 0xa80 -        FSTD    %fr10,-136(%r30)        ;offset 0xa84 -        XMPYU   %fr8L,%fr7R,%fr22       ;offset 0xa88 -        FSTD    %fr22,-144(%r30)        ;offset 0xa8c -        XMPYU   %fr5L,%fr4L,%fr11       ;offset 0xa90 -        XMPYU   %fr5L,%fr4R,%fr23       ;offset 0xa94 -        FSTD    %fr11,-112(%r30)        ;offset 0xa98 -        FSTD    %fr23,-120(%r30)        ;offset 0xa9c -        LDD     -136(%r30),%r28 ;offset 0xaa0 -        DEPD,Z  %r28,31,32,%r31 ;offset 0xaa4 -        LDD     -144(%r30),%r20 ;offset 0xaa8 -        ADD,L   %r20,%r31,%r31  ;offset 0xaac -        LDD     -112(%r30),%r22 ;offset 0xab0 -        DEPD,Z  %r22,31,32,%r22 ;offset 0xab4 -        LDD     -120(%r30),%r21 ;offset 0xab8 -        B       $00060024       ;offset 0xabc -        ADD,L   %r21,%r22,%r23  ;offset 0xac0 -$D0 -        OR      %r9,%r29,%r29   ;offset 0xac4 -$00060040 -        EXTRD,U %r29,31,32,%r28 ;offset 0xac8 -$00060002 -$L2 -        LDW     -212(%r30),%r2  ;offset 0xacc -$D3 -        LDW     -168(%r30),%r9  ;offset 0xad0 -        LDD     -176(%r30),%r8  ;offset 0xad4 -        EXTRD,U %r8,31,32,%r7   ;offset 0xad8 -        LDD     -184(%r30),%r6  ;offset 0xadc -        EXTRD,U %r6,31,32,%r5   ;offset 0xae0 -        LDW     -188(%r30),%r4  ;offset 0xae4 -        BVE     (%r2)   ;offset 0xae8 -        .EXIT -        LDW,MB  -192(%r30),%r3  ;offset 0xaec -	.PROCEND	;in=23,25;out=28,29;fpin=105,107; - - - - -;---------------------------------------------------------------------------- -; -; Registers to hold 64-bit values to manipulate.  The "L" part -; of the register corresponds to the upper 32-bits, while the "R" -; part corresponds to the lower 32-bits -;  -; Note, that when using b6 and b7, the code must save these before -; using them because they are callee save registers  -;  -; -; Floating point registers to use to save values that -; are manipulated.  These don't collide with ftemp1-6 and -; are all caller save registers -; -a0        .reg %fr22 -a0L       .reg %fr22L -a0R       .reg %fr22R - -a1        .reg %fr23 -a1L       .reg %fr23L -a1R       .reg %fr23R - -a2        .reg %fr24 -a2L       .reg %fr24L -a2R       .reg %fr24R - -a3        .reg %fr25 -a3L       .reg %fr25L -a3R       .reg %fr25R - -a4        .reg %fr26 -a4L       .reg %fr26L -a4R       .reg %fr26R - -a5        .reg %fr27 -a5L       .reg %fr27L -a5R       .reg %fr27R - -a6        .reg %fr28 -a6L       .reg %fr28L -a6R       .reg %fr28R - -a7        .reg %fr29 -a7L       .reg %fr29L -a7R       .reg %fr29R - -b0        .reg %fr30 -b0L       .reg %fr30L -b0R       .reg %fr30R - -b1        .reg %fr31 -b1L       .reg %fr31L -b1R       .reg %fr31R - -; -; Temporary floating point variables, these are all caller save -; registers -; -ftemp1    .reg %fr4 -ftemp2    .reg %fr5 -ftemp3    .reg %fr6 -ftemp4    .reg %fr7 - -; -; The B set of registers when used. -; - -b2        .reg %fr8 -b2L       .reg %fr8L -b2R       .reg %fr8R - -b3        .reg %fr9 -b3L       .reg %fr9L -b3R       .reg %fr9R - -b4        .reg %fr10 -b4L       .reg %fr10L -b4R       .reg %fr10R - -b5        .reg %fr11 -b5L       .reg %fr11L -b5R       .reg %fr11R - -b6        .reg %fr12 -b6L       .reg %fr12L -b6R       .reg %fr12R - -b7        .reg %fr13 -b7L       .reg %fr13L -b7R       .reg %fr13R - -c1           .reg %r21   ; only reg -temp1        .reg %r20   ; only reg -temp2        .reg %r19   ; only reg -temp3        .reg %r31   ; only reg - -m1           .reg %r28    -c2           .reg %r23    -high_one     .reg %r1 -ht           .reg %r6 -lt           .reg %r5 -m            .reg %r4 -c3           .reg %r3 - -SQR_ADD_C  .macro  A0L,A0R,C1,C2,C3 -    XMPYU   A0L,A0R,ftemp1       ; m -    FSTD    ftemp1,-24(%sp)      ; store m - -    XMPYU   A0R,A0R,ftemp2       ; lt -    FSTD    ftemp2,-16(%sp)      ; store lt - -    XMPYU   A0L,A0L,ftemp3       ; ht -    FSTD    ftemp3,-8(%sp)       ; store ht - -    LDD     -24(%sp),m           ; load m -    AND     m,high_mask,temp2    ; m & Mask -    DEPD,Z  m,30,31,temp3        ; m << 32+1 -    LDD     -16(%sp),lt          ; lt - -    LDD     -8(%sp),ht           ; ht -    EXTRD,U temp2,32,33,temp1    ; temp1 = m&Mask >> 32-1 -    ADD     temp3,lt,lt          ; lt = lt+m -    ADD,L   ht,temp1,ht          ; ht += temp1 -    ADD,DC  ht,%r0,ht            ; ht++ - -    ADD     C1,lt,C1             ; c1=c1+lt -    ADD,DC  ht,%r0,ht            ; ht++ - -    ADD     C2,ht,C2             ; c2=c2+ht -    ADD,DC  C3,%r0,C3            ; c3++ -.endm - -SQR_ADD_C2 .macro  A0L,A0R,A1L,A1R,C1,C2,C3 -    XMPYU   A0L,A1R,ftemp1          ; m1 = bl*ht -    FSTD    ftemp1,-16(%sp)         ; -    XMPYU   A0R,A1L,ftemp2          ; m = bh*lt -    FSTD    ftemp2,-8(%sp)          ; -    XMPYU   A0R,A1R,ftemp3          ; lt = bl*lt -    FSTD    ftemp3,-32(%sp) -    XMPYU   A0L,A1L,ftemp4          ; ht = bh*ht -    FSTD    ftemp4,-24(%sp)         ; - -    LDD     -8(%sp),m               ; r21 = m -    LDD     -16(%sp),m1             ; r19 = m1 -    ADD,L   m,m1,m                  ; m+m1 - -    DEPD,Z  m,31,32,temp3           ; (m+m1<<32) -    LDD     -24(%sp),ht             ; r24 = ht - -    CMPCLR,*>>= m,m1,%r0            ; if (m < m1) -    ADD,L   ht,high_one,ht          ; ht+=high_one - -    EXTRD,U m,31,32,temp1           ; m >> 32 -    LDD     -32(%sp),lt             ; lt -    ADD,L   ht,temp1,ht             ; ht+= m>>32 -    ADD     lt,temp3,lt             ; lt = lt+m1 -    ADD,DC  ht,%r0,ht               ; ht++ - -    ADD     ht,ht,ht                ; ht=ht+ht; -    ADD,DC  C3,%r0,C3               ; add in carry (c3++) - -    ADD     lt,lt,lt                ; lt=lt+lt; -    ADD,DC  ht,%r0,ht               ; add in carry (ht++) - -    ADD     C1,lt,C1                ; c1=c1+lt -    ADD,DC,*NUV ht,%r0,ht           ; add in carry (ht++) -    LDO     1(C3),C3              ; bump c3 if overflow,nullify otherwise - -    ADD     C2,ht,C2                ; c2 = c2 + ht -    ADD,DC  C3,%r0,C3             ; add in carry (c3++) -.endm - -; -;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba8 -	.PROC -	.CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE -	.EXPORT	bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -    .ENTRY -	.align 64 - -    STD     %r3,0(%sp)          ; save r3 -    STD     %r4,8(%sp)          ; save r4 -    STD     %r5,16(%sp)         ; save r5 -    STD     %r6,24(%sp)         ; save r6 - -	; -	; Zero out carries -	; -	COPY     %r0,c1 -	COPY     %r0,c2 -	COPY     %r0,c3 - -	LDO      128(%sp),%sp       ; bump stack -    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L -    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32 - -	; -	; Load up all of the values we are going to use -	; -    FLDD     0(a_ptr),a0        -    FLDD     8(a_ptr),a1        -    FLDD    16(a_ptr),a2        -    FLDD    24(a_ptr),a3        -    FLDD    32(a_ptr),a4        -    FLDD    40(a_ptr),a5        -    FLDD    48(a_ptr),a6        -    FLDD    56(a_ptr),a7        - -	SQR_ADD_C a0L,a0R,c1,c2,c3 -	STD     c1,0(r_ptr)          ; r[0] = c1; -	COPY    %r0,c1 - -	SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 -	STD     c2,8(r_ptr)          ; r[1] = c2; -	COPY    %r0,c2 - -	SQR_ADD_C a1L,a1R,c3,c1,c2 -	SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 -	STD     c3,16(r_ptr)            ; r[2] = c3; -	COPY    %r0,c3 - -	SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 -	SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 -	STD     c1,24(r_ptr)           ; r[3] = c1; -	COPY    %r0,c1 - -	SQR_ADD_C a2L,a2R,c2,c3,c1 -	SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 -	SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 -	STD     c2,32(r_ptr)          ; r[4] = c2; -	COPY    %r0,c2 - -	SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 -	SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 -	SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 -	STD     c3,40(r_ptr)          ; r[5] = c3; -	COPY    %r0,c3 - -	SQR_ADD_C a3L,a3R,c1,c2,c3 -	SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 -	SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 -	SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 -	STD     c1,48(r_ptr)          ; r[6] = c1; -	COPY    %r0,c1 - -	SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 -	SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 -	SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 -	SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 -	STD     c2,56(r_ptr)          ; r[7] = c2; -	COPY    %r0,c2 - -	SQR_ADD_C a4L,a4R,c3,c1,c2 -	SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 -	SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 -	SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 -	STD     c3,64(r_ptr)          ; r[8] = c3; -	COPY    %r0,c3 - -	SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 -	SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 -	SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 -	STD     c1,72(r_ptr)          ; r[9] = c1; -	COPY    %r0,c1 - -	SQR_ADD_C a5L,a5R,c2,c3,c1 -	SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 -	SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 -	STD     c2,80(r_ptr)          ; r[10] = c2; -	COPY    %r0,c2 - -	SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 -	SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 -	STD     c3,88(r_ptr)          ; r[11] = c3; -	COPY    %r0,c3 -	 -	SQR_ADD_C a6L,a6R,c1,c2,c3 -	SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 -	STD     c1,96(r_ptr)          ; r[12] = c1; -	COPY    %r0,c1 - -	SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 -	STD     c2,104(r_ptr)         ; r[13] = c2; -	COPY    %r0,c2 - -	SQR_ADD_C a7L,a7R,c3,c1,c2 -	STD     c3, 112(r_ptr)       ; r[14] = c3 -	STD     c1, 120(r_ptr)       ; r[15] = c1 - -    .EXIT -    LDD     -104(%sp),%r6        ; restore r6 -    LDD     -112(%sp),%r5        ; restore r5 -    LDD     -120(%sp),%r4        ; restore r4 -    BVE     (%rp) -    LDD,MB  -128(%sp),%r3 - -	.PROCEND	 - -;----------------------------------------------------------------------------- -; -;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba4 -	.proc -	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE -	.EXPORT	bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -    .entry -	.align 64 -    STD     %r3,0(%sp)          ; save r3 -    STD     %r4,8(%sp)          ; save r4 -    STD     %r5,16(%sp)         ; save r5 -    STD     %r6,24(%sp)         ; save r6 - -	; -	; Zero out carries -	; -	COPY     %r0,c1 -	COPY     %r0,c2 -	COPY     %r0,c3 - -	LDO      128(%sp),%sp       ; bump stack -    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L -    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32 - -	; -	; Load up all of the values we are going to use -	; -    FLDD     0(a_ptr),a0        -    FLDD     8(a_ptr),a1        -    FLDD    16(a_ptr),a2        -    FLDD    24(a_ptr),a3        -    FLDD    32(a_ptr),a4        -    FLDD    40(a_ptr),a5        -    FLDD    48(a_ptr),a6        -    FLDD    56(a_ptr),a7        - -	SQR_ADD_C a0L,a0R,c1,c2,c3 - -	STD     c1,0(r_ptr)          ; r[0] = c1; -	COPY    %r0,c1 - -	SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - -	STD     c2,8(r_ptr)          ; r[1] = c2; -	COPY    %r0,c2 - -	SQR_ADD_C a1L,a1R,c3,c1,c2 -	SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - -	STD     c3,16(r_ptr)            ; r[2] = c3; -	COPY    %r0,c3 - -	SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 -	SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - -	STD     c1,24(r_ptr)           ; r[3] = c1; -	COPY    %r0,c1 - -	SQR_ADD_C a2L,a2R,c2,c3,c1 -	SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - -	STD     c2,32(r_ptr)           ; r[4] = c2; -	COPY    %r0,c2 - -	SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 -	STD     c3,40(r_ptr)           ; r[5] = c3; -	COPY    %r0,c3 - -	SQR_ADD_C a3L,a3R,c1,c2,c3 -	STD     c1,48(r_ptr)           ; r[6] = c1; -	STD     c2,56(r_ptr)           ; r[7] = c2; - -    .EXIT -    LDD     -104(%sp),%r6        ; restore r6 -    LDD     -112(%sp),%r5        ; restore r5 -    LDD     -120(%sp),%r4        ; restore r4 -    BVE     (%rp) -    LDD,MB  -128(%sp),%r3 - -	.PROCEND	 - - -;--------------------------------------------------------------------------- - -MUL_ADD_C  .macro  A0L,A0R,B0L,B0R,C1,C2,C3 -    XMPYU   A0L,B0R,ftemp1        ; m1 = bl*ht -    FSTD    ftemp1,-16(%sp)       ; -    XMPYU   A0R,B0L,ftemp2        ; m = bh*lt -    FSTD    ftemp2,-8(%sp)        ; -    XMPYU   A0R,B0R,ftemp3        ; lt = bl*lt -    FSTD    ftemp3,-32(%sp) -    XMPYU   A0L,B0L,ftemp4        ; ht = bh*ht -    FSTD    ftemp4,-24(%sp)       ; - -    LDD     -8(%sp),m             ; r21 = m -    LDD     -16(%sp),m1           ; r19 = m1 -    ADD,L   m,m1,m                ; m+m1 - -    DEPD,Z  m,31,32,temp3         ; (m+m1<<32) -    LDD     -24(%sp),ht           ; r24 = ht - -    CMPCLR,*>>= m,m1,%r0          ; if (m < m1) -    ADD,L   ht,high_one,ht        ; ht+=high_one - -    EXTRD,U m,31,32,temp1         ; m >> 32 -    LDD     -32(%sp),lt           ; lt -    ADD,L   ht,temp1,ht           ; ht+= m>>32 -    ADD     lt,temp3,lt           ; lt = lt+m1 -    ADD,DC  ht,%r0,ht             ; ht++ - -    ADD     C1,lt,C1              ; c1=c1+lt -    ADD,DC  ht,%r0,ht             ; bump c3 if overflow,nullify otherwise - -    ADD     C2,ht,C2              ; c2 = c2 + ht -    ADD,DC  C3,%r0,C3             ; add in carry (c3++) -.endm - - -; -;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba8 -	.proc -	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE -	.EXPORT	bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -    .entry -	.align 64 - -    STD     %r3,0(%sp)          ; save r3 -    STD     %r4,8(%sp)          ; save r4 -    STD     %r5,16(%sp)         ; save r5 -    STD     %r6,24(%sp)         ; save r6 -    FSTD    %fr12,32(%sp)       ; save r6 -    FSTD    %fr13,40(%sp)       ; save r7 - -	; -	; Zero out carries -	; -	COPY     %r0,c1 -	COPY     %r0,c2 -	COPY     %r0,c3 - -	LDO      128(%sp),%sp       ; bump stack -    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32 - -	; -	; Load up all of the values we are going to use -	; -    FLDD      0(a_ptr),a0        -    FLDD      8(a_ptr),a1        -    FLDD     16(a_ptr),a2        -    FLDD     24(a_ptr),a3        -    FLDD     32(a_ptr),a4        -    FLDD     40(a_ptr),a5        -    FLDD     48(a_ptr),a6        -    FLDD     56(a_ptr),a7        - -    FLDD      0(b_ptr),b0        -    FLDD      8(b_ptr),b1        -    FLDD     16(b_ptr),b2        -    FLDD     24(b_ptr),b3        -    FLDD     32(b_ptr),b4        -    FLDD     40(b_ptr),b5        -    FLDD     48(b_ptr),b6        -    FLDD     56(b_ptr),b7        - -	MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 -	STD       c1,0(r_ptr) -	COPY      %r0,c1 - -	MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 -	MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 -	STD       c2,8(r_ptr) -	COPY      %r0,c2 - -	MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 -	MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 -	MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 -	STD       c3,16(r_ptr) -	COPY      %r0,c3 - -	MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 -	MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 -	MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 -	MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 -	STD       c1,24(r_ptr) -	COPY      %r0,c1 - -	MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 -	MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 -	MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 -	MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 -	MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 -	STD       c2,32(r_ptr) -	COPY      %r0,c2 - -	MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 -	MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 -	MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 -	MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 -	MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 -	MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 -	STD       c3,40(r_ptr) -	COPY      %r0,c3 - -	MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 -	MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 -	MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 -	MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 -	MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 -	MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 -	MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 -	STD       c1,48(r_ptr) -	COPY      %r0,c1 -	 -	MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 -	MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 -	MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 -	MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 -	MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 -	MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 -	MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 -	MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 -	STD       c2,56(r_ptr) -	COPY      %r0,c2 - -	MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 -	MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 -	MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 -	MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 -	MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 -	MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 -	MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 -	STD       c3,64(r_ptr) -	COPY      %r0,c3 - -	MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 -	MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 -	MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 -	MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 -	MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 -	MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 -	STD       c1,72(r_ptr) -	COPY      %r0,c1 - -	MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 -	MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 -	MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 -	MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 -	MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 -	STD       c2,80(r_ptr) -	COPY      %r0,c2 - -	MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 -	MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 -	MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 -	MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 -	STD       c3,88(r_ptr) -	COPY      %r0,c3 - -	MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 -	MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 -	MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 -	STD       c1,96(r_ptr) -	COPY      %r0,c1 - -	MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 -	MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 -	STD       c2,104(r_ptr) -	COPY      %r0,c2 - -	MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 -	STD       c3,112(r_ptr) -	STD       c1,120(r_ptr) - -    .EXIT -    FLDD    -88(%sp),%fr13  -    FLDD    -96(%sp),%fr12  -    LDD     -104(%sp),%r6        ; restore r6 -    LDD     -112(%sp),%r5        ; restore r5 -    LDD     -120(%sp),%r4        ; restore r4 -    BVE     (%rp) -    LDD,MB  -128(%sp),%r3 - -	.PROCEND	 - -;----------------------------------------------------------------------------- -; -;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba4 -	.proc -	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE -	.EXPORT	bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -    .entry -	.align 64 - -    STD     %r3,0(%sp)          ; save r3 -    STD     %r4,8(%sp)          ; save r4 -    STD     %r5,16(%sp)         ; save r5 -    STD     %r6,24(%sp)         ; save r6 -    FSTD    %fr12,32(%sp)       ; save r6 -    FSTD    %fr13,40(%sp)       ; save r7 - -	; -	; Zero out carries -	; -	COPY     %r0,c1 -	COPY     %r0,c2 -	COPY     %r0,c3 - -	LDO      128(%sp),%sp       ; bump stack -    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32 - -	; -	; Load up all of the values we are going to use -	; -    FLDD      0(a_ptr),a0        -    FLDD      8(a_ptr),a1        -    FLDD     16(a_ptr),a2        -    FLDD     24(a_ptr),a3        - -    FLDD      0(b_ptr),b0        -    FLDD      8(b_ptr),b1        -    FLDD     16(b_ptr),b2        -    FLDD     24(b_ptr),b3        - -	MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 -	STD       c1,0(r_ptr) -	COPY      %r0,c1 - -	MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 -	MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 -	STD       c2,8(r_ptr) -	COPY      %r0,c2 - -	MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 -	MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 -	MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 -	STD       c3,16(r_ptr) -	COPY      %r0,c3 - -	MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 -	MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 -	MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 -	MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 -	STD       c1,24(r_ptr) -	COPY      %r0,c1 - -	MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 -	MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 -	MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 -	STD       c2,32(r_ptr) -	COPY      %r0,c2 - -	MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 -	MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 -	STD       c3,40(r_ptr) -	COPY      %r0,c3 - -	MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 -	STD       c1,48(r_ptr) -	STD       c2,56(r_ptr) - -    .EXIT -    FLDD    -88(%sp),%fr13  -    FLDD    -96(%sp),%fr12  -    LDD     -104(%sp),%r6        ; restore r6 -    LDD     -112(%sp),%r5        ; restore r5 -    LDD     -120(%sp),%r4        ; restore r4 -    BVE     (%rp) -    LDD,MB  -128(%sp),%r3 - -	.PROCEND	 - - -;--- not PIC	.SPACE	$TEXT$ -;--- not PIC	.SUBSPA	$CODE$ -;--- not PIC	.SPACE	$PRIVATE$,SORT=16 -;--- not PIC	.IMPORT	$global$,DATA -;--- not PIC	.SPACE	$TEXT$ -;--- not PIC	.SUBSPA	$CODE$ -;--- not PIC	.SUBSPA	$LIT$,ACCESS=0x2c -;--- not PIC	C$7 -;--- not PIC	.ALIGN	8 -;--- not PIC	.STRINGZ	"Division would overflow (%d)\n" -	.END diff --git a/app/openssl/crypto/bn/asm/pa-risc2W.S b/app/openssl/crypto/bn/asm/pa-risc2W.S deleted file mode 100644 index a9954575..00000000 --- a/app/openssl/crypto/bn/asm/pa-risc2W.S +++ /dev/null @@ -1,1605 +0,0 @@ -; -; PA-RISC 64-bit implementation of bn_asm code -; -; This code is approximately 2x faster than the C version -; for RSA/DSA. -; -; See http://devresource.hp.com/  for more details on the PA-RISC -; architecture.  Also see the book "PA-RISC 2.0 Architecture" -; by Gerry Kane for information on the instruction set architecture. -; -; Code written by Chris Ruemmler (with some help from the HP C -; compiler). -; -; The code compiles with HP's assembler -; - -	.level	2.0W -	.space	$TEXT$ -	.subspa	$CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY - -; -; Global Register definitions used for the routines. -; -; Some information about HP's runtime architecture for 64-bits. -; -; "Caller save" means the calling function must save the register -; if it wants the register to be preserved. -; "Callee save" means if a function uses the register, it must save -; the value before using it. -; -; For the floating point registers  -; -;    "caller save" registers: fr4-fr11, fr22-fr31 -;    "callee save" registers: fr12-fr21 -;    "special" registers: fr0-fr3 (status and exception registers) -; -; For the integer registers -;     value zero             :  r0 -;     "caller save" registers: r1,r19-r26 -;     "callee save" registers: r3-r18 -;     return register        :  r2  (rp) -;     return values          ; r28  (ret0,ret1) -;     Stack pointer          ; r30  (sp)  -;     global data pointer    ; r27  (dp) -;     argument pointer       ; r29  (ap) -;     millicode return ptr   ; r31  (also a caller save register) - - -; -; Arguments to the routines -; -r_ptr       .reg %r26 -a_ptr       .reg %r25 -b_ptr       .reg %r24 -num         .reg %r24 -w           .reg %r23 -n           .reg %r23 - - -; -; Globals used in some routines -; - -top_overflow .reg %r29 -high_mask    .reg %r22    ; value 0xffffffff80000000L - - -;------------------------------------------------------------------------------ -; -; bn_mul_add_words -; -;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr,  -;								int num, BN_ULONG w) -; -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = num -; arg3 = w -; -; Local register definitions -; - -fm1          .reg %fr22 -fm           .reg %fr23 -ht_temp      .reg %fr24 -ht_temp_1    .reg %fr25 -lt_temp      .reg %fr26 -lt_temp_1    .reg %fr27 -fm1_1        .reg %fr28 -fm_1         .reg %fr29 - -fw_h         .reg %fr7L -fw_l         .reg %fr7R -fw           .reg %fr7 - -fht_0        .reg %fr8L -flt_0        .reg %fr8R -t_float_0    .reg %fr8 - -fht_1        .reg %fr9L -flt_1        .reg %fr9R -t_float_1    .reg %fr9 - -tmp_0        .reg %r31 -tmp_1        .reg %r21 -m_0          .reg %r20  -m_1          .reg %r19  -ht_0         .reg %r1   -ht_1         .reg %r3 -lt_0         .reg %r4 -lt_1         .reg %r5 -m1_0         .reg %r6  -m1_1         .reg %r7  -rp_val       .reg %r8 -rp_val_1     .reg %r9 - -bn_mul_add_words -	.export	bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN -	.proc -	.callinfo frame=128 -    .entry -	.align 64 - -    STD     %r3,0(%sp)          ; save r3   -    STD     %r4,8(%sp)          ; save r4   -	NOP                         ; Needed to make the loop 16-byte aligned -	NOP                         ; Needed to make the loop 16-byte aligned - -    STD     %r5,16(%sp)         ; save r5   -    STD     %r6,24(%sp)         ; save r6   -    STD     %r7,32(%sp)         ; save r7   -    STD     %r8,40(%sp)         ; save r8   - -    STD     %r9,48(%sp)         ; save r9   -    COPY    %r0,%ret0           ; return 0 by default -    DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32     -	STD     w,56(%sp)           ; store w on stack - -    CMPIB,>= 0,num,bn_mul_add_words_exit  ; if (num <= 0) then exit -	LDO     128(%sp),%sp       ; bump stack - -	; -	; The loop is unrolled twice, so if there is only 1 number -    ; then go straight to the cleanup code. -	; -	CMPIB,= 1,num,bn_mul_add_words_single_top -	FLDD    -72(%sp),fw     ; load up w into fp register fw (fw_h/fw_l) - -	; -	; This loop is unrolled 2 times (64-byte aligned as well) -	; -	; PA-RISC 2.0 chips have two fully pipelined multipliers, thus -    ; two 32-bit mutiplies can be issued per cycle. -    ;  -bn_mul_add_words_unroll2 - -    FLDD    0(a_ptr),t_float_0       ; load up 64-bit value (fr8L) ht(L)/lt(R) -    FLDD    8(a_ptr),t_float_1       ; load up 64-bit value (fr8L) ht(L)/lt(R) -    LDD     0(r_ptr),rp_val          ; rp[0] -    LDD     8(r_ptr),rp_val_1        ; rp[1] - -    XMPYU   fht_0,fw_l,fm1           ; m1[0] = fht_0*fw_l -    XMPYU   fht_1,fw_l,fm1_1         ; m1[1] = fht_1*fw_l -    FSTD    fm1,-16(%sp)             ; -16(sp) = m1[0] -    FSTD    fm1_1,-48(%sp)           ; -48(sp) = m1[1] - -    XMPYU   flt_0,fw_h,fm            ; m[0] = flt_0*fw_h -    XMPYU   flt_1,fw_h,fm_1          ; m[1] = flt_1*fw_h -    FSTD    fm,-8(%sp)               ; -8(sp) = m[0] -    FSTD    fm_1,-40(%sp)            ; -40(sp) = m[1] - -    XMPYU   fht_0,fw_h,ht_temp       ; ht_temp   = fht_0*fw_h -    XMPYU   fht_1,fw_h,ht_temp_1     ; ht_temp_1 = fht_1*fw_h -    FSTD    ht_temp,-24(%sp)         ; -24(sp)   = ht_temp -    FSTD    ht_temp_1,-56(%sp)       ; -56(sp)   = ht_temp_1 - -    XMPYU   flt_0,fw_l,lt_temp       ; lt_temp = lt*fw_l -    XMPYU   flt_1,fw_l,lt_temp_1     ; lt_temp = lt*fw_l -    FSTD    lt_temp,-32(%sp)         ; -32(sp) = lt_temp  -    FSTD    lt_temp_1,-64(%sp)       ; -64(sp) = lt_temp_1  - -    LDD     -8(%sp),m_0              ; m[0]  -    LDD     -40(%sp),m_1             ; m[1] -    LDD     -16(%sp),m1_0            ; m1[0] -    LDD     -48(%sp),m1_1            ; m1[1] - -    LDD     -24(%sp),ht_0            ; ht[0] -    LDD     -56(%sp),ht_1            ; ht[1] -    ADD,L   m1_0,m_0,tmp_0           ; tmp_0 = m[0] + m1[0];  -    ADD,L   m1_1,m_1,tmp_1           ; tmp_1 = m[1] + m1[1];  - -    LDD     -32(%sp),lt_0             -    LDD     -64(%sp),lt_1             -    CMPCLR,*>>= tmp_0,m1_0, %r0      ; if (m[0] < m1[0]) -    ADD,L   ht_0,top_overflow,ht_0   ; ht[0] += (1<<32) - -    CMPCLR,*>>= tmp_1,m1_1,%r0       ; if (m[1] < m1[1]) -    ADD,L   ht_1,top_overflow,ht_1   ; ht[1] += (1<<32) -    EXTRD,U tmp_0,31,32,m_0          ; m[0]>>32   -    DEPD,Z  tmp_0,31,32,m1_0         ; m1[0] = m[0]<<32  - -    EXTRD,U tmp_1,31,32,m_1          ; m[1]>>32   -    DEPD,Z  tmp_1,31,32,m1_1         ; m1[1] = m[1]<<32  -    ADD,L   ht_0,m_0,ht_0            ; ht[0]+= (m[0]>>32) -    ADD,L   ht_1,m_1,ht_1            ; ht[1]+= (m[1]>>32) - -    ADD     lt_0,m1_0,lt_0           ; lt[0] = lt[0]+m1[0]; -	ADD,DC  ht_0,%r0,ht_0            ; ht[0]++ -    ADD     lt_1,m1_1,lt_1           ; lt[1] = lt[1]+m1[1]; -    ADD,DC  ht_1,%r0,ht_1            ; ht[1]++ - -    ADD    %ret0,lt_0,lt_0           ; lt[0] = lt[0] + c; -	ADD,DC  ht_0,%r0,ht_0            ; ht[0]++ -    ADD     lt_0,rp_val,lt_0         ; lt[0] = lt[0]+rp[0] -    ADD,DC  ht_0,%r0,ht_0            ; ht[0]++ - -	LDO    -2(num),num               ; num = num - 2; -    ADD     ht_0,lt_1,lt_1           ; lt[1] = lt[1] + ht_0 (c); -    ADD,DC  ht_1,%r0,ht_1            ; ht[1]++ -    STD     lt_0,0(r_ptr)            ; rp[0] = lt[0] - -    ADD     lt_1,rp_val_1,lt_1       ; lt[1] = lt[1]+rp[1] -    ADD,DC  ht_1,%r0,%ret0           ; ht[1]++ -    LDO     16(a_ptr),a_ptr          ; a_ptr += 2 - -    STD     lt_1,8(r_ptr)            ; rp[1] = lt[1] -	CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do -    LDO     16(r_ptr),r_ptr          ; r_ptr += 2 - -    CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one - -	; -	; Top of loop aligned on 64-byte boundary -	; -bn_mul_add_words_single_top -    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R) -    LDD     0(r_ptr),rp_val           ; rp[0] -    LDO     8(a_ptr),a_ptr            ; a_ptr++ -    XMPYU   fht_0,fw_l,fm1            ; m1 = ht*fw_l -    FSTD    fm1,-16(%sp)              ; -16(sp) = m1 -    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h -    FSTD    fm,-8(%sp)                ; -8(sp) = m -    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = ht*fw_h -    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht -    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l -    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt  - -    LDD     -8(%sp),m_0                -    LDD    -16(%sp),m1_0              ; m1 = temp1  -    ADD,L   m_0,m1_0,tmp_0            ; tmp_0 = m + m1;  -    LDD     -24(%sp),ht_0              -    LDD     -32(%sp),lt_0              - -    CMPCLR,*>>= tmp_0,m1_0,%r0        ; if (m < m1) -    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32) - -    EXTRD,U tmp_0,31,32,m_0           ; m>>32   -    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32  - -    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32) -    ADD     lt_0,m1_0,tmp_0           ; tmp_0 = lt+m1; -    ADD,DC  ht_0,%r0,ht_0             ; ht++ -    ADD     %ret0,tmp_0,lt_0          ; lt = lt + c; -    ADD,DC  ht_0,%r0,ht_0             ; ht++ -    ADD     lt_0,rp_val,lt_0          ; lt = lt+rp[0] -    ADD,DC  ht_0,%r0,%ret0            ; ht++ -    STD     lt_0,0(r_ptr)             ; rp[0] = lt - -bn_mul_add_words_exit -    .EXIT -    LDD     -80(%sp),%r9              ; restore r9   -    LDD     -88(%sp),%r8              ; restore r8   -    LDD     -96(%sp),%r7              ; restore r7   -    LDD     -104(%sp),%r6             ; restore r6   -    LDD     -112(%sp),%r5             ; restore r5   -    LDD     -120(%sp),%r4             ; restore r4   -    BVE     (%rp) -    LDD,MB  -128(%sp),%r3             ; restore r3 -	.PROCEND	;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) -; -; arg0 = rp -; arg1 = ap -; arg2 = num -; arg3 = w - -bn_mul_words -	.proc -	.callinfo frame=128 -    .entry -	.EXPORT	bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -	.align 64 - -    STD     %r3,0(%sp)          ; save r3   -    STD     %r4,8(%sp)          ; save r4   -    STD     %r5,16(%sp)         ; save r5   -    STD     %r6,24(%sp)         ; save r6   - -    STD     %r7,32(%sp)         ; save r7   -    COPY    %r0,%ret0           ; return 0 by default -    DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32     -	STD     w,56(%sp)           ; w on stack - -    CMPIB,>= 0,num,bn_mul_words_exit -	LDO     128(%sp),%sp       ; bump stack - -	; -	; See if only 1 word to do, thus just do cleanup -	; -	CMPIB,= 1,num,bn_mul_words_single_top -	FLDD    -72(%sp),fw     ; load up w into fp register fw (fw_h/fw_l) - -	; -	; This loop is unrolled 2 times (64-byte aligned as well) -	; -	; PA-RISC 2.0 chips have two fully pipelined multipliers, thus -    ; two 32-bit mutiplies can be issued per cycle. -    ;  -bn_mul_words_unroll2 - -    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R) -    FLDD    8(a_ptr),t_float_1        ; load up 64-bit value (fr8L) ht(L)/lt(R) -    XMPYU   fht_0,fw_l,fm1            ; m1[0] = fht_0*fw_l -    XMPYU   fht_1,fw_l,fm1_1          ; m1[1] = ht*fw_l - -    FSTD    fm1,-16(%sp)              ; -16(sp) = m1 -    FSTD    fm1_1,-48(%sp)            ; -48(sp) = m1 -    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h -    XMPYU   flt_1,fw_h,fm_1           ; m = lt*fw_h - -    FSTD    fm,-8(%sp)                ; -8(sp) = m -    FSTD    fm_1,-40(%sp)             ; -40(sp) = m -    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = fht_0*fw_h -    XMPYU   fht_1,fw_h,ht_temp_1      ; ht_temp = ht*fw_h - -    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht -    FSTD    ht_temp_1,-56(%sp)        ; -56(sp) = ht -    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l -    XMPYU   flt_1,fw_l,lt_temp_1      ; lt_temp = lt*fw_l - -    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt  -    FSTD    lt_temp_1,-64(%sp)        ; -64(sp) = lt  -    LDD     -8(%sp),m_0                -    LDD     -40(%sp),m_1               - -    LDD    -16(%sp),m1_0               -    LDD    -48(%sp),m1_1               -    LDD     -24(%sp),ht_0              -    LDD     -56(%sp),ht_1              - -    ADD,L   m1_0,m_0,tmp_0            ; tmp_0 = m + m1;  -    ADD,L   m1_1,m_1,tmp_1            ; tmp_1 = m + m1;  -    LDD     -32(%sp),lt_0              -    LDD     -64(%sp),lt_1              - -    CMPCLR,*>>= tmp_0,m1_0, %r0       ; if (m < m1) -    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32) -    CMPCLR,*>>= tmp_1,m1_1,%r0        ; if (m < m1) -    ADD,L   ht_1,top_overflow,ht_1    ; ht += (1<<32) - -    EXTRD,U tmp_0,31,32,m_0           ; m>>32   -    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32  -    EXTRD,U tmp_1,31,32,m_1           ; m>>32   -    DEPD,Z  tmp_1,31,32,m1_1          ; m1 = m<<32  - -    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32) -    ADD,L   ht_1,m_1,ht_1             ; ht+= (m>>32) -    ADD     lt_0,m1_0,lt_0            ; lt = lt+m1; -	ADD,DC  ht_0,%r0,ht_0             ; ht++ - -    ADD     lt_1,m1_1,lt_1            ; lt = lt+m1; -    ADD,DC  ht_1,%r0,ht_1             ; ht++ -    ADD    %ret0,lt_0,lt_0            ; lt = lt + c (ret0); -	ADD,DC  ht_0,%r0,ht_0             ; ht++ - -    ADD     ht_0,lt_1,lt_1            ; lt = lt + c (ht_0) -    ADD,DC  ht_1,%r0,ht_1             ; ht++ -    STD     lt_0,0(r_ptr)             ; rp[0] = lt -    STD     lt_1,8(r_ptr)             ; rp[1] = lt - -	COPY    ht_1,%ret0                ; carry = ht -	LDO    -2(num),num                ; num = num - 2; -    LDO     16(a_ptr),a_ptr           ; ap += 2 -	CMPIB,<= 2,num,bn_mul_words_unroll2 -    LDO     16(r_ptr),r_ptr           ; rp++ - -    CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? - -	; -	; Top of loop aligned on 64-byte boundary -	; -bn_mul_words_single_top -    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R) - -    XMPYU   fht_0,fw_l,fm1            ; m1 = ht*fw_l -    FSTD    fm1,-16(%sp)              ; -16(sp) = m1 -    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h -    FSTD    fm,-8(%sp)                ; -8(sp) = m -    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = ht*fw_h -    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht -    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l -    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt  - -    LDD     -8(%sp),m_0                -    LDD    -16(%sp),m1_0               -    ADD,L   m_0,m1_0,tmp_0            ; tmp_0 = m + m1;  -    LDD     -24(%sp),ht_0              -    LDD     -32(%sp),lt_0              - -    CMPCLR,*>>= tmp_0,m1_0,%r0        ; if (m < m1) -    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32) - -    EXTRD,U tmp_0,31,32,m_0           ; m>>32   -    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32  - -    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32) -    ADD     lt_0,m1_0,lt_0            ; lt= lt+m1; -    ADD,DC  ht_0,%r0,ht_0             ; ht++ - -    ADD     %ret0,lt_0,lt_0           ; lt = lt + c; -    ADD,DC  ht_0,%r0,ht_0             ; ht++ - -    COPY    ht_0,%ret0                ; copy carry -    STD     lt_0,0(r_ptr)             ; rp[0] = lt - -bn_mul_words_exit -    .EXIT -    LDD     -96(%sp),%r7              ; restore r7   -    LDD     -104(%sp),%r6             ; restore r6   -    LDD     -112(%sp),%r5             ; restore r5   -    LDD     -120(%sp),%r4             ; restore r4   -    BVE     (%rp) -    LDD,MB  -128(%sp),%r3             ; restore r3 -	.PROCEND	;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) -; -; arg0 = rp -; arg1 = ap -; arg2 = num -; - -bn_sqr_words -	.proc -	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE -	.EXPORT	bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -    .entry -	.align 64 - -    STD     %r3,0(%sp)          ; save r3   -    STD     %r4,8(%sp)          ; save r4   -	NOP -    STD     %r5,16(%sp)         ; save r5   - -    CMPIB,>= 0,num,bn_sqr_words_exit -	LDO     128(%sp),%sp       ; bump stack - -	; -	; If only 1, the goto straight to cleanup -	; -	CMPIB,= 1,num,bn_sqr_words_single_top -    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L - -	; -	; This loop is unrolled 2 times (64-byte aligned as well) -	; - -bn_sqr_words_unroll2 -    FLDD    0(a_ptr),t_float_0        ; a[0] -    FLDD    8(a_ptr),t_float_1        ; a[1] -    XMPYU   fht_0,flt_0,fm            ; m[0] -    XMPYU   fht_1,flt_1,fm_1          ; m[1] - -    FSTD    fm,-24(%sp)               ; store m[0] -    FSTD    fm_1,-56(%sp)             ; store m[1] -    XMPYU   flt_0,flt_0,lt_temp       ; lt[0] -    XMPYU   flt_1,flt_1,lt_temp_1     ; lt[1] - -    FSTD    lt_temp,-16(%sp)          ; store lt[0] -    FSTD    lt_temp_1,-48(%sp)        ; store lt[1] -    XMPYU   fht_0,fht_0,ht_temp       ; ht[0] -    XMPYU   fht_1,fht_1,ht_temp_1     ; ht[1] - -    FSTD    ht_temp,-8(%sp)           ; store ht[0] -    FSTD    ht_temp_1,-40(%sp)        ; store ht[1] -    LDD     -24(%sp),m_0              -    LDD     -56(%sp),m_1               - -    AND     m_0,high_mask,tmp_0       ; m[0] & Mask -    AND     m_1,high_mask,tmp_1       ; m[1] & Mask -    DEPD,Z  m_0,30,31,m_0             ; m[0] << 32+1 -    DEPD,Z  m_1,30,31,m_1             ; m[1] << 32+1 - -    LDD     -16(%sp),lt_0         -    LDD     -48(%sp),lt_1         -    EXTRD,U tmp_0,32,33,tmp_0         ; tmp_0 = m[0]&Mask >> 32-1 -    EXTRD,U tmp_1,32,33,tmp_1         ; tmp_1 = m[1]&Mask >> 32-1 - -    LDD     -8(%sp),ht_0             -    LDD     -40(%sp),ht_1            -    ADD,L   ht_0,tmp_0,ht_0           ; ht[0] += tmp_0 -    ADD,L   ht_1,tmp_1,ht_1           ; ht[1] += tmp_1 - -    ADD     lt_0,m_0,lt_0             ; lt = lt+m -    ADD,DC  ht_0,%r0,ht_0             ; ht[0]++ -    STD     lt_0,0(r_ptr)             ; rp[0] = lt[0] -    STD     ht_0,8(r_ptr)             ; rp[1] = ht[1] - -    ADD     lt_1,m_1,lt_1             ; lt = lt+m -    ADD,DC  ht_1,%r0,ht_1             ; ht[1]++ -    STD     lt_1,16(r_ptr)            ; rp[2] = lt[1] -    STD     ht_1,24(r_ptr)            ; rp[3] = ht[1] - -	LDO    -2(num),num                ; num = num - 2; -    LDO     16(a_ptr),a_ptr           ; ap += 2 -	CMPIB,<= 2,num,bn_sqr_words_unroll2 -    LDO     32(r_ptr),r_ptr           ; rp += 4 - -    CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? - -	; -	; Top of loop aligned on 64-byte boundary -	; -bn_sqr_words_single_top -    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R) - -    XMPYU   fht_0,flt_0,fm            ; m -    FSTD    fm,-24(%sp)               ; store m - -    XMPYU   flt_0,flt_0,lt_temp       ; lt -    FSTD    lt_temp,-16(%sp)          ; store lt - -    XMPYU   fht_0,fht_0,ht_temp       ; ht -    FSTD    ht_temp,-8(%sp)           ; store ht - -    LDD     -24(%sp),m_0              ; load m -    AND     m_0,high_mask,tmp_0       ; m & Mask -    DEPD,Z  m_0,30,31,m_0             ; m << 32+1 -    LDD     -16(%sp),lt_0             ; lt - -    LDD     -8(%sp),ht_0              ; ht -    EXTRD,U tmp_0,32,33,tmp_0         ; tmp_0 = m&Mask >> 32-1 -    ADD     m_0,lt_0,lt_0             ; lt = lt+m -    ADD,L   ht_0,tmp_0,ht_0           ; ht += tmp_0 -    ADD,DC  ht_0,%r0,ht_0             ; ht++ - -    STD     lt_0,0(r_ptr)             ; rp[0] = lt -    STD     ht_0,8(r_ptr)             ; rp[1] = ht - -bn_sqr_words_exit -    .EXIT -    LDD     -112(%sp),%r5       ; restore r5   -    LDD     -120(%sp),%r4       ; restore r4   -    BVE     (%rp) -    LDD,MB  -128(%sp),%r3  -	.PROCEND	;in=23,24,25,26,29;out=28; - - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp  -; arg1 = ap -; arg2 = bp  -; arg3 = n - -t  .reg %r22 -b  .reg %r21 -l  .reg %r20 - -bn_add_words -	.proc -    .entry -	.callinfo -	.EXPORT	bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -	.align 64 - -    CMPIB,>= 0,n,bn_add_words_exit -    COPY    %r0,%ret0           ; return 0 by default - -	; -	; If 2 or more numbers do the loop -	; -	CMPIB,= 1,n,bn_add_words_single_top -	NOP - -	; -	; This loop is unrolled 2 times (64-byte aligned as well) -	; -bn_add_words_unroll2 -	LDD     0(a_ptr),t -	LDD     0(b_ptr),b -	ADD     t,%ret0,t                    ; t = t+c; -	ADD,DC  %r0,%r0,%ret0                ; set c to carry -	ADD     t,b,l                        ; l = t + b[0] -	ADD,DC  %ret0,%r0,%ret0              ; c+= carry -	STD     l,0(r_ptr) - -	LDD     8(a_ptr),t -	LDD     8(b_ptr),b -	ADD     t,%ret0,t                     ; t = t+c; -	ADD,DC  %r0,%r0,%ret0                 ; set c to carry -	ADD     t,b,l                         ; l = t + b[0] -	ADD,DC  %ret0,%r0,%ret0               ; c+= carry -	STD     l,8(r_ptr) - -	LDO     -2(n),n -	LDO     16(a_ptr),a_ptr -	LDO     16(b_ptr),b_ptr - -	CMPIB,<= 2,n,bn_add_words_unroll2 -	LDO     16(r_ptr),r_ptr - -    CMPIB,=,N 0,n,bn_add_words_exit ; are we done? - -bn_add_words_single_top -	LDD     0(a_ptr),t -	LDD     0(b_ptr),b - -	ADD     t,%ret0,t                 ; t = t+c; -	ADD,DC  %r0,%r0,%ret0             ; set c to carry (could use CMPCLR??) -	ADD     t,b,l                     ; l = t + b[0] -	ADD,DC  %ret0,%r0,%ret0           ; c+= carry -	STD     l,0(r_ptr) - -bn_add_words_exit -    .EXIT -    BVE     (%rp) -	NOP -	.PROCEND	;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp  -; arg1 = ap -; arg2 = bp  -; arg3 = n - -t1       .reg %r22 -t2       .reg %r21 -sub_tmp1 .reg %r20 -sub_tmp2 .reg %r19 - - -bn_sub_words -	.proc -	.callinfo  -	.EXPORT	bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -    .entry -	.align 64 - -    CMPIB,>=  0,n,bn_sub_words_exit -    COPY    %r0,%ret0           ; return 0 by default - -	; -	; If 2 or more numbers do the loop -	; -	CMPIB,= 1,n,bn_sub_words_single_top -	NOP - -	; -	; This loop is unrolled 2 times (64-byte aligned as well) -	; -bn_sub_words_unroll2 -	LDD     0(a_ptr),t1 -	LDD     0(b_ptr),t2 -	SUB     t1,t2,sub_tmp1           ; t3 = t1-t2;  -	SUB     sub_tmp1,%ret0,sub_tmp1  ; t3 = t3- c;  - -	CMPCLR,*>> t1,t2,sub_tmp2        ; clear if t1 > t2 -	LDO      1(%r0),sub_tmp2 -	 -	CMPCLR,*= t1,t2,%r0 -	COPY    sub_tmp2,%ret0 -	STD     sub_tmp1,0(r_ptr) - -	LDD     8(a_ptr),t1 -	LDD     8(b_ptr),t2 -	SUB     t1,t2,sub_tmp1            ; t3 = t1-t2;  -	SUB     sub_tmp1,%ret0,sub_tmp1   ; t3 = t3- c;  -	CMPCLR,*>> t1,t2,sub_tmp2         ; clear if t1 > t2 -	LDO      1(%r0),sub_tmp2 -	 -	CMPCLR,*= t1,t2,%r0 -	COPY    sub_tmp2,%ret0 -	STD     sub_tmp1,8(r_ptr) - -	LDO     -2(n),n -	LDO     16(a_ptr),a_ptr -	LDO     16(b_ptr),b_ptr - -	CMPIB,<= 2,n,bn_sub_words_unroll2 -	LDO     16(r_ptr),r_ptr - -    CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? - -bn_sub_words_single_top -	LDD     0(a_ptr),t1 -	LDD     0(b_ptr),t2 -	SUB     t1,t2,sub_tmp1            ; t3 = t1-t2;  -	SUB     sub_tmp1,%ret0,sub_tmp1   ; t3 = t3- c;  -	CMPCLR,*>> t1,t2,sub_tmp2         ; clear if t1 > t2 -	LDO      1(%r0),sub_tmp2 -	 -	CMPCLR,*= t1,t2,%r0 -	COPY    sub_tmp2,%ret0 - -	STD     sub_tmp1,0(r_ptr) - -bn_sub_words_exit -    .EXIT -    BVE     (%rp) -	NOP -	.PROCEND	;in=23,24,25,26,29;out=28; - -;------------------------------------------------------------------------------ -; -; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) -; -; arg0 = h -; arg1 = l -; arg2 = d -; -; This is mainly just modified assembly from the compiler, thus the -; lack of variable names. -; -;------------------------------------------------------------------------------ -bn_div_words -	.proc -	.callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE -	.EXPORT	bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -	.IMPORT	BN_num_bits_word,CODE,NO_RELOCATION -	.IMPORT	__iob,DATA -	.IMPORT	fprintf,CODE,NO_RELOCATION -	.IMPORT	abort,CODE,NO_RELOCATION -	.IMPORT	$$div2U,MILLICODE -    .entry -    STD     %r2,-16(%r30)    -    STD,MA  %r3,352(%r30)    -    STD     %r4,-344(%r30)   -    STD     %r5,-336(%r30)   -    STD     %r6,-328(%r30)   -    STD     %r7,-320(%r30)   -    STD     %r8,-312(%r30)   -    STD     %r9,-304(%r30)   -    STD     %r10,-296(%r30) - -    STD     %r27,-288(%r30)             ; save gp - -    COPY    %r24,%r3           ; save d  -    COPY    %r26,%r4           ; save h (high 64-bits) -    LDO      -1(%r0),%ret0     ; return -1 by default	 - -    CMPB,*=  %r0,%arg2,$D3     ; if (d == 0) -    COPY    %r25,%r5           ; save l (low 64-bits) - -    LDO     -48(%r30),%r29     ; create ap  -    .CALL   ;in=26,29;out=28; -    B,L     BN_num_bits_word,%r2  -    COPY    %r3,%r26         -    LDD     -288(%r30),%r27    ; restore gp  -    LDI     64,%r21  - -    CMPB,=  %r21,%ret0,$00000012   ;if (i == 64) (forward)  -    COPY    %ret0,%r24             ; i    -    MTSARCM %r24     -    DEPDI,Z -1,%sar,1,%r29   -    CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<<i) (forward)  - -$00000012 -    SUBI    64,%r24,%r31                       ; i = 64 - i; -    CMPCLR,*<< %r4,%r3,%r0                     ; if (h >= d) -    SUB     %r4,%r3,%r4                        ; h -= d -    CMPB,=  %r31,%r0,$0000001A                 ; if (i) -    COPY    %r0,%r10                           ; ret = 0 -    MTSARCM %r31                               ; i to shift -    DEPD,Z  %r3,%sar,64,%r3                    ; d <<= i; -    SUBI    64,%r31,%r19                       ; 64 - i; redundent -    MTSAR   %r19                               ; (64 -i) to shift -    SHRPD   %r4,%r5,%sar,%r4                   ; l>> (64-i) -    MTSARCM %r31                               ; i to shift -    DEPD,Z  %r5,%sar,64,%r5                    ; l <<= i; - -$0000001A -    DEPDI,Z -1,31,32,%r19                       -    EXTRD,U %r3,31,32,%r6                      ; dh=(d&0xfff)>>32 -    EXTRD,U %r3,63,32,%r8                      ; dl = d&0xffffff -    LDO     2(%r0),%r9 -    STD    %r3,-280(%r30)                      ; "d" to stack - -$0000001C -    DEPDI,Z -1,63,32,%r29                      ;  -    EXTRD,U %r4,31,32,%r31                     ; h >> 32 -    CMPB,*=,N  %r31,%r6,$D2     	       ; if ((h>>32) != dh)(forward) div -    COPY    %r4,%r26        -    EXTRD,U %r4,31,32,%r25  -    COPY    %r6,%r24       -    .CALL   ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) -    B,L     $$div2U,%r2      -    EXTRD,U %r6,31,32,%r23   -    DEPD    %r28,31,32,%r29  -$D2 -    STD     %r29,-272(%r30)                   ; q -    AND     %r5,%r19,%r24                   ; t & 0xffffffff00000000; -    EXTRD,U %r24,31,32,%r24                 ; ???  -    FLDD    -272(%r30),%fr7                 ; q -    FLDD    -280(%r30),%fr8                 ; d -    XMPYU   %fr8L,%fr7L,%fr10   -    FSTD    %fr10,-256(%r30)    -    XMPYU   %fr8L,%fr7R,%fr22   -    FSTD    %fr22,-264(%r30)    -    XMPYU   %fr8R,%fr7L,%fr11  -    XMPYU   %fr8R,%fr7R,%fr23 -    FSTD    %fr11,-232(%r30) -    FSTD    %fr23,-240(%r30) -    LDD     -256(%r30),%r28 -    DEPD,Z  %r28,31,32,%r2  -    LDD     -264(%r30),%r20 -    ADD,L   %r20,%r2,%r31    -    LDD     -232(%r30),%r22  -    DEPD,Z  %r22,31,32,%r22  -    LDD     -240(%r30),%r21  -    B       $00000024       ; enter loop   -    ADD,L   %r21,%r22,%r23  - -$0000002A -    LDO     -1(%r29),%r29    -    SUB     %r23,%r8,%r23    -$00000024 -    SUB     %r4,%r31,%r25    -    AND     %r25,%r19,%r26   -    CMPB,*<>,N      %r0,%r26,$00000046  ; (forward) -    DEPD,Z  %r25,31,32,%r20  -    OR      %r20,%r24,%r21   -    CMPB,*<<,N  %r21,%r23,$0000002A ;(backward)  -    SUB     %r31,%r6,%r31    -;-------------Break path--------------------- - -$00000046 -    DEPD,Z  %r23,31,32,%r25              ;tl -    EXTRD,U %r23,31,32,%r26              ;t -    AND     %r25,%r19,%r24               ;tl = (tl<<32)&0xfffffff0000000L -    ADD,L   %r31,%r26,%r31               ;th += t;  -    CMPCLR,*>>=     %r5,%r24,%r0         ;if (l<tl) -    LDO     1(%r31),%r31                 ; th++; -    CMPB,*<<=,N     %r31,%r4,$00000036   ;if (n < th) (forward) -    LDO     -1(%r29),%r29                ;q--;  -    ADD,L   %r4,%r3,%r4                  ;h += d; -$00000036 -    ADDIB,=,N       -1,%r9,$D1 ;if (--count == 0) break (forward)  -    SUB     %r5,%r24,%r28                ; l -= tl; -    SUB     %r4,%r31,%r24                ; h -= th; -    SHRPD   %r24,%r28,32,%r4             ; h = ((h<<32)|(l>>32)); -    DEPD,Z  %r29,31,32,%r10              ; ret = q<<32 -    b      $0000001C -    DEPD,Z  %r28,31,32,%r5               ; l = l << 32  - -$D1 -    OR      %r10,%r29,%r28           ; ret |= q -$D3 -    LDD     -368(%r30),%r2   -$D0 -    LDD     -296(%r30),%r10  -    LDD     -304(%r30),%r9   -    LDD     -312(%r30),%r8   -    LDD     -320(%r30),%r7   -    LDD     -328(%r30),%r6   -    LDD     -336(%r30),%r5   -    LDD     -344(%r30),%r4   -    BVE     (%r2)    -        .EXIT -    LDD,MB  -352(%r30),%r3  - -bn_div_err_case -    MFIA    %r6      -    ADDIL   L'bn_div_words-bn_div_err_case,%r6,%r1  -    LDO     R'bn_div_words-bn_div_err_case(%r1),%r6   -    ADDIL   LT'__iob,%r27,%r1        -    LDD     RT'__iob(%r1),%r26       -    ADDIL   L'C$4-bn_div_words,%r6,%r1     -    LDO     R'C$4-bn_div_words(%r1),%r25   -    LDO     64(%r26),%r26    -    .CALL           ;in=24,25,26,29;out=28; -    B,L     fprintf,%r2     -    LDO     -48(%r30),%r29  -    LDD     -288(%r30),%r27 -    .CALL           ;in=29; -    B,L     abort,%r2       -    LDO     -48(%r30),%r29  -    LDD     -288(%r30),%r27 -    B       $D0          -    LDD     -368(%r30),%r2   -	.PROCEND	;in=24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -; Registers to hold 64-bit values to manipulate.  The "L" part -; of the register corresponds to the upper 32-bits, while the "R" -; part corresponds to the lower 32-bits -;  -; Note, that when using b6 and b7, the code must save these before -; using them because they are callee save registers  -;  -; -; Floating point registers to use to save values that -; are manipulated.  These don't collide with ftemp1-6 and -; are all caller save registers -; -a0        .reg %fr22 -a0L       .reg %fr22L -a0R       .reg %fr22R - -a1        .reg %fr23 -a1L       .reg %fr23L -a1R       .reg %fr23R - -a2        .reg %fr24 -a2L       .reg %fr24L -a2R       .reg %fr24R - -a3        .reg %fr25 -a3L       .reg %fr25L -a3R       .reg %fr25R - -a4        .reg %fr26 -a4L       .reg %fr26L -a4R       .reg %fr26R - -a5        .reg %fr27 -a5L       .reg %fr27L -a5R       .reg %fr27R - -a6        .reg %fr28 -a6L       .reg %fr28L -a6R       .reg %fr28R - -a7        .reg %fr29 -a7L       .reg %fr29L -a7R       .reg %fr29R - -b0        .reg %fr30 -b0L       .reg %fr30L -b0R       .reg %fr30R - -b1        .reg %fr31 -b1L       .reg %fr31L -b1R       .reg %fr31R - -; -; Temporary floating point variables, these are all caller save -; registers -; -ftemp1    .reg %fr4 -ftemp2    .reg %fr5 -ftemp3    .reg %fr6 -ftemp4    .reg %fr7 - -; -; The B set of registers when used. -; - -b2        .reg %fr8 -b2L       .reg %fr8L -b2R       .reg %fr8R - -b3        .reg %fr9 -b3L       .reg %fr9L -b3R       .reg %fr9R - -b4        .reg %fr10 -b4L       .reg %fr10L -b4R       .reg %fr10R - -b5        .reg %fr11 -b5L       .reg %fr11L -b5R       .reg %fr11R - -b6        .reg %fr12 -b6L       .reg %fr12L -b6R       .reg %fr12R - -b7        .reg %fr13 -b7L       .reg %fr13L -b7R       .reg %fr13R - -c1           .reg %r21   ; only reg -temp1        .reg %r20   ; only reg -temp2        .reg %r19   ; only reg -temp3        .reg %r31   ; only reg - -m1           .reg %r28    -c2           .reg %r23    -high_one     .reg %r1 -ht           .reg %r6 -lt           .reg %r5 -m            .reg %r4 -c3           .reg %r3 - -SQR_ADD_C  .macro  A0L,A0R,C1,C2,C3 -    XMPYU   A0L,A0R,ftemp1       ; m -    FSTD    ftemp1,-24(%sp)      ; store m - -    XMPYU   A0R,A0R,ftemp2       ; lt -    FSTD    ftemp2,-16(%sp)      ; store lt - -    XMPYU   A0L,A0L,ftemp3       ; ht -    FSTD    ftemp3,-8(%sp)       ; store ht - -    LDD     -24(%sp),m           ; load m -    AND     m,high_mask,temp2    ; m & Mask -    DEPD,Z  m,30,31,temp3        ; m << 32+1 -    LDD     -16(%sp),lt          ; lt - -    LDD     -8(%sp),ht           ; ht -    EXTRD,U temp2,32,33,temp1    ; temp1 = m&Mask >> 32-1 -    ADD     temp3,lt,lt          ; lt = lt+m -    ADD,L   ht,temp1,ht          ; ht += temp1 -    ADD,DC  ht,%r0,ht            ; ht++ - -    ADD     C1,lt,C1             ; c1=c1+lt -    ADD,DC  ht,%r0,ht            ; ht++ - -    ADD     C2,ht,C2             ; c2=c2+ht -    ADD,DC  C3,%r0,C3            ; c3++ -.endm - -SQR_ADD_C2 .macro  A0L,A0R,A1L,A1R,C1,C2,C3 -    XMPYU   A0L,A1R,ftemp1          ; m1 = bl*ht -    FSTD    ftemp1,-16(%sp)         ; -    XMPYU   A0R,A1L,ftemp2          ; m = bh*lt -    FSTD    ftemp2,-8(%sp)          ; -    XMPYU   A0R,A1R,ftemp3          ; lt = bl*lt -    FSTD    ftemp3,-32(%sp) -    XMPYU   A0L,A1L,ftemp4          ; ht = bh*ht -    FSTD    ftemp4,-24(%sp)         ; - -    LDD     -8(%sp),m               ; r21 = m -    LDD     -16(%sp),m1             ; r19 = m1 -    ADD,L   m,m1,m                  ; m+m1 - -    DEPD,Z  m,31,32,temp3           ; (m+m1<<32) -    LDD     -24(%sp),ht             ; r24 = ht - -    CMPCLR,*>>= m,m1,%r0            ; if (m < m1) -    ADD,L   ht,high_one,ht          ; ht+=high_one - -    EXTRD,U m,31,32,temp1           ; m >> 32 -    LDD     -32(%sp),lt             ; lt -    ADD,L   ht,temp1,ht             ; ht+= m>>32 -    ADD     lt,temp3,lt             ; lt = lt+m1 -    ADD,DC  ht,%r0,ht               ; ht++ - -    ADD     ht,ht,ht                ; ht=ht+ht; -    ADD,DC  C3,%r0,C3               ; add in carry (c3++) - -    ADD     lt,lt,lt                ; lt=lt+lt; -    ADD,DC  ht,%r0,ht               ; add in carry (ht++) - -    ADD     C1,lt,C1                ; c1=c1+lt -    ADD,DC,*NUV ht,%r0,ht           ; add in carry (ht++) -    LDO     1(C3),C3              ; bump c3 if overflow,nullify otherwise - -    ADD     C2,ht,C2                ; c2 = c2 + ht -    ADD,DC  C3,%r0,C3             ; add in carry (c3++) -.endm - -; -;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba8 -	.PROC -	.CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE -	.EXPORT	bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -    .ENTRY -	.align 64 - -    STD     %r3,0(%sp)          ; save r3 -    STD     %r4,8(%sp)          ; save r4 -    STD     %r5,16(%sp)         ; save r5 -    STD     %r6,24(%sp)         ; save r6 - -	; -	; Zero out carries -	; -	COPY     %r0,c1 -	COPY     %r0,c2 -	COPY     %r0,c3 - -	LDO      128(%sp),%sp       ; bump stack -    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L -    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32 - -	; -	; Load up all of the values we are going to use -	; -    FLDD     0(a_ptr),a0        -    FLDD     8(a_ptr),a1        -    FLDD    16(a_ptr),a2        -    FLDD    24(a_ptr),a3        -    FLDD    32(a_ptr),a4        -    FLDD    40(a_ptr),a5        -    FLDD    48(a_ptr),a6        -    FLDD    56(a_ptr),a7        - -	SQR_ADD_C a0L,a0R,c1,c2,c3 -	STD     c1,0(r_ptr)          ; r[0] = c1; -	COPY    %r0,c1 - -	SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 -	STD     c2,8(r_ptr)          ; r[1] = c2; -	COPY    %r0,c2 - -	SQR_ADD_C a1L,a1R,c3,c1,c2 -	SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 -	STD     c3,16(r_ptr)            ; r[2] = c3; -	COPY    %r0,c3 - -	SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 -	SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 -	STD     c1,24(r_ptr)           ; r[3] = c1; -	COPY    %r0,c1 - -	SQR_ADD_C a2L,a2R,c2,c3,c1 -	SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 -	SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 -	STD     c2,32(r_ptr)          ; r[4] = c2; -	COPY    %r0,c2 - -	SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 -	SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 -	SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 -	STD     c3,40(r_ptr)          ; r[5] = c3; -	COPY    %r0,c3 - -	SQR_ADD_C a3L,a3R,c1,c2,c3 -	SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 -	SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 -	SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 -	STD     c1,48(r_ptr)          ; r[6] = c1; -	COPY    %r0,c1 - -	SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 -	SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 -	SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 -	SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 -	STD     c2,56(r_ptr)          ; r[7] = c2; -	COPY    %r0,c2 - -	SQR_ADD_C a4L,a4R,c3,c1,c2 -	SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 -	SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 -	SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 -	STD     c3,64(r_ptr)          ; r[8] = c3; -	COPY    %r0,c3 - -	SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 -	SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 -	SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 -	STD     c1,72(r_ptr)          ; r[9] = c1; -	COPY    %r0,c1 - -	SQR_ADD_C a5L,a5R,c2,c3,c1 -	SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 -	SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 -	STD     c2,80(r_ptr)          ; r[10] = c2; -	COPY    %r0,c2 - -	SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 -	SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 -	STD     c3,88(r_ptr)          ; r[11] = c3; -	COPY    %r0,c3 -	 -	SQR_ADD_C a6L,a6R,c1,c2,c3 -	SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 -	STD     c1,96(r_ptr)          ; r[12] = c1; -	COPY    %r0,c1 - -	SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 -	STD     c2,104(r_ptr)         ; r[13] = c2; -	COPY    %r0,c2 - -	SQR_ADD_C a7L,a7R,c3,c1,c2 -	STD     c3, 112(r_ptr)       ; r[14] = c3 -	STD     c1, 120(r_ptr)       ; r[15] = c1 - -    .EXIT -    LDD     -104(%sp),%r6        ; restore r6 -    LDD     -112(%sp),%r5        ; restore r5 -    LDD     -120(%sp),%r4        ; restore r4 -    BVE     (%rp) -    LDD,MB  -128(%sp),%r3 - -	.PROCEND	 - -;----------------------------------------------------------------------------- -; -;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba4 -	.proc -	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE -	.EXPORT	bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -    .entry -	.align 64 -    STD     %r3,0(%sp)          ; save r3 -    STD     %r4,8(%sp)          ; save r4 -    STD     %r5,16(%sp)         ; save r5 -    STD     %r6,24(%sp)         ; save r6 - -	; -	; Zero out carries -	; -	COPY     %r0,c1 -	COPY     %r0,c2 -	COPY     %r0,c3 - -	LDO      128(%sp),%sp       ; bump stack -    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L -    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32 - -	; -	; Load up all of the values we are going to use -	; -    FLDD     0(a_ptr),a0        -    FLDD     8(a_ptr),a1        -    FLDD    16(a_ptr),a2        -    FLDD    24(a_ptr),a3        -    FLDD    32(a_ptr),a4        -    FLDD    40(a_ptr),a5        -    FLDD    48(a_ptr),a6        -    FLDD    56(a_ptr),a7        - -	SQR_ADD_C a0L,a0R,c1,c2,c3 - -	STD     c1,0(r_ptr)          ; r[0] = c1; -	COPY    %r0,c1 - -	SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - -	STD     c2,8(r_ptr)          ; r[1] = c2; -	COPY    %r0,c2 - -	SQR_ADD_C a1L,a1R,c3,c1,c2 -	SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - -	STD     c3,16(r_ptr)            ; r[2] = c3; -	COPY    %r0,c3 - -	SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 -	SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - -	STD     c1,24(r_ptr)           ; r[3] = c1; -	COPY    %r0,c1 - -	SQR_ADD_C a2L,a2R,c2,c3,c1 -	SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - -	STD     c2,32(r_ptr)           ; r[4] = c2; -	COPY    %r0,c2 - -	SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 -	STD     c3,40(r_ptr)           ; r[5] = c3; -	COPY    %r0,c3 - -	SQR_ADD_C a3L,a3R,c1,c2,c3 -	STD     c1,48(r_ptr)           ; r[6] = c1; -	STD     c2,56(r_ptr)           ; r[7] = c2; - -    .EXIT -    LDD     -104(%sp),%r6        ; restore r6 -    LDD     -112(%sp),%r5        ; restore r5 -    LDD     -120(%sp),%r4        ; restore r4 -    BVE     (%rp) -    LDD,MB  -128(%sp),%r3 - -	.PROCEND	 - - -;--------------------------------------------------------------------------- - -MUL_ADD_C  .macro  A0L,A0R,B0L,B0R,C1,C2,C3 -    XMPYU   A0L,B0R,ftemp1        ; m1 = bl*ht -    FSTD    ftemp1,-16(%sp)       ; -    XMPYU   A0R,B0L,ftemp2        ; m = bh*lt -    FSTD    ftemp2,-8(%sp)        ; -    XMPYU   A0R,B0R,ftemp3        ; lt = bl*lt -    FSTD    ftemp3,-32(%sp) -    XMPYU   A0L,B0L,ftemp4        ; ht = bh*ht -    FSTD    ftemp4,-24(%sp)       ; - -    LDD     -8(%sp),m             ; r21 = m -    LDD     -16(%sp),m1           ; r19 = m1 -    ADD,L   m,m1,m                ; m+m1 - -    DEPD,Z  m,31,32,temp3         ; (m+m1<<32) -    LDD     -24(%sp),ht           ; r24 = ht - -    CMPCLR,*>>= m,m1,%r0          ; if (m < m1) -    ADD,L   ht,high_one,ht        ; ht+=high_one - -    EXTRD,U m,31,32,temp1         ; m >> 32 -    LDD     -32(%sp),lt           ; lt -    ADD,L   ht,temp1,ht           ; ht+= m>>32 -    ADD     lt,temp3,lt           ; lt = lt+m1 -    ADD,DC  ht,%r0,ht             ; ht++ - -    ADD     C1,lt,C1              ; c1=c1+lt -    ADD,DC  ht,%r0,ht             ; bump c3 if overflow,nullify otherwise - -    ADD     C2,ht,C2              ; c2 = c2 + ht -    ADD,DC  C3,%r0,C3             ; add in carry (c3++) -.endm - - -; -;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba8 -	.proc -	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE -	.EXPORT	bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -    .entry -	.align 64 - -    STD     %r3,0(%sp)          ; save r3 -    STD     %r4,8(%sp)          ; save r4 -    STD     %r5,16(%sp)         ; save r5 -    STD     %r6,24(%sp)         ; save r6 -    FSTD    %fr12,32(%sp)       ; save r6 -    FSTD    %fr13,40(%sp)       ; save r7 - -	; -	; Zero out carries -	; -	COPY     %r0,c1 -	COPY     %r0,c2 -	COPY     %r0,c3 - -	LDO      128(%sp),%sp       ; bump stack -    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32 - -	; -	; Load up all of the values we are going to use -	; -    FLDD      0(a_ptr),a0        -    FLDD      8(a_ptr),a1        -    FLDD     16(a_ptr),a2        -    FLDD     24(a_ptr),a3        -    FLDD     32(a_ptr),a4        -    FLDD     40(a_ptr),a5        -    FLDD     48(a_ptr),a6        -    FLDD     56(a_ptr),a7        - -    FLDD      0(b_ptr),b0        -    FLDD      8(b_ptr),b1        -    FLDD     16(b_ptr),b2        -    FLDD     24(b_ptr),b3        -    FLDD     32(b_ptr),b4        -    FLDD     40(b_ptr),b5        -    FLDD     48(b_ptr),b6        -    FLDD     56(b_ptr),b7        - -	MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 -	STD       c1,0(r_ptr) -	COPY      %r0,c1 - -	MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 -	MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 -	STD       c2,8(r_ptr) -	COPY      %r0,c2 - -	MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 -	MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 -	MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 -	STD       c3,16(r_ptr) -	COPY      %r0,c3 - -	MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 -	MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 -	MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 -	MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 -	STD       c1,24(r_ptr) -	COPY      %r0,c1 - -	MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 -	MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 -	MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 -	MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 -	MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 -	STD       c2,32(r_ptr) -	COPY      %r0,c2 - -	MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 -	MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 -	MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 -	MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 -	MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 -	MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 -	STD       c3,40(r_ptr) -	COPY      %r0,c3 - -	MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 -	MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 -	MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 -	MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 -	MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 -	MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 -	MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 -	STD       c1,48(r_ptr) -	COPY      %r0,c1 -	 -	MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 -	MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 -	MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 -	MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 -	MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 -	MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 -	MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 -	MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 -	STD       c2,56(r_ptr) -	COPY      %r0,c2 - -	MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 -	MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 -	MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 -	MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 -	MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 -	MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 -	MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 -	STD       c3,64(r_ptr) -	COPY      %r0,c3 - -	MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 -	MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 -	MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 -	MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 -	MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 -	MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 -	STD       c1,72(r_ptr) -	COPY      %r0,c1 - -	MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 -	MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 -	MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 -	MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 -	MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 -	STD       c2,80(r_ptr) -	COPY      %r0,c2 - -	MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 -	MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 -	MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 -	MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 -	STD       c3,88(r_ptr) -	COPY      %r0,c3 - -	MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 -	MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 -	MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 -	STD       c1,96(r_ptr) -	COPY      %r0,c1 - -	MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 -	MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 -	STD       c2,104(r_ptr) -	COPY      %r0,c2 - -	MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 -	STD       c3,112(r_ptr) -	STD       c1,120(r_ptr) - -    .EXIT -    FLDD    -88(%sp),%fr13  -    FLDD    -96(%sp),%fr12  -    LDD     -104(%sp),%r6        ; restore r6 -    LDD     -112(%sp),%r5        ; restore r5 -    LDD     -120(%sp),%r4        ; restore r4 -    BVE     (%rp) -    LDD,MB  -128(%sp),%r3 - -	.PROCEND	 - -;----------------------------------------------------------------------------- -; -;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba4 -	.proc -	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE -	.EXPORT	bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN -    .entry -	.align 64 - -    STD     %r3,0(%sp)          ; save r3 -    STD     %r4,8(%sp)          ; save r4 -    STD     %r5,16(%sp)         ; save r5 -    STD     %r6,24(%sp)         ; save r6 -    FSTD    %fr12,32(%sp)       ; save r6 -    FSTD    %fr13,40(%sp)       ; save r7 - -	; -	; Zero out carries -	; -	COPY     %r0,c1 -	COPY     %r0,c2 -	COPY     %r0,c3 - -	LDO      128(%sp),%sp       ; bump stack -    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32 - -	; -	; Load up all of the values we are going to use -	; -    FLDD      0(a_ptr),a0        -    FLDD      8(a_ptr),a1        -    FLDD     16(a_ptr),a2        -    FLDD     24(a_ptr),a3        - -    FLDD      0(b_ptr),b0        -    FLDD      8(b_ptr),b1        -    FLDD     16(b_ptr),b2        -    FLDD     24(b_ptr),b3        - -	MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 -	STD       c1,0(r_ptr) -	COPY      %r0,c1 - -	MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 -	MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 -	STD       c2,8(r_ptr) -	COPY      %r0,c2 - -	MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 -	MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 -	MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 -	STD       c3,16(r_ptr) -	COPY      %r0,c3 - -	MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 -	MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 -	MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 -	MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 -	STD       c1,24(r_ptr) -	COPY      %r0,c1 - -	MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 -	MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 -	MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 -	STD       c2,32(r_ptr) -	COPY      %r0,c2 - -	MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 -	MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 -	STD       c3,40(r_ptr) -	COPY      %r0,c3 - -	MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 -	STD       c1,48(r_ptr) -	STD       c2,56(r_ptr) - -    .EXIT -    FLDD    -88(%sp),%fr13  -    FLDD    -96(%sp),%fr12  -    LDD     -104(%sp),%r6        ; restore r6 -    LDD     -112(%sp),%r5        ; restore r5 -    LDD     -120(%sp),%r4        ; restore r4 -    BVE     (%rp) -    LDD,MB  -128(%sp),%r3 - -	.PROCEND	 - - -	.SPACE	$TEXT$ -	.SUBSPA	$CODE$ -	.SPACE	$PRIVATE$,SORT=16 -	.IMPORT	$global$,DATA -	.SPACE	$TEXT$ -	.SUBSPA	$CODE$ -	.SUBSPA	$LIT$,ACCESS=0x2c -C$4 -	.ALIGN	8 -	.STRINGZ	"Division would overflow (%d)\n" -	.END diff --git a/app/openssl/crypto/bn/bn_mont.c b/app/openssl/crypto/bn/bn_mont.c index ee8532c7..427b5cf4 100644 --- a/app/openssl/crypto/bn/bn_mont.c +++ b/app/openssl/crypto/bn/bn_mont.c @@ -478,38 +478,32 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)  BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,  					const BIGNUM *mod, BN_CTX *ctx)  	{ +	int got_write_lock = 0;  	BN_MONT_CTX *ret;  	CRYPTO_r_lock(lock); -	ret = *pmont; -	CRYPTO_r_unlock(lock); -	if (ret) -		return ret; - -	/* We don't want to serialise globally while doing our lazy-init math in -	 * BN_MONT_CTX_set. That punishes threads that are doing independent -	 * things. Instead, punish the case where more than one thread tries to -	 * lazy-init the same 'pmont', by having each do the lazy-init math work -	 * independently and only use the one from the thread that wins the race -	 * (the losers throw away the work they've done). */ -	ret = BN_MONT_CTX_new(); -	if (!ret) -		return NULL; -	if (!BN_MONT_CTX_set(ret, mod, ctx)) +	if (!*pmont)  		{ -		BN_MONT_CTX_free(ret); -		return NULL; -		} +		CRYPTO_r_unlock(lock); +		CRYPTO_w_lock(lock); +		got_write_lock = 1; -	/* The locked compare-and-set, after the local work is done. */ -	CRYPTO_w_lock(lock); -	if (*pmont) -		{ -		BN_MONT_CTX_free(ret); -		ret = *pmont; +		if (!*pmont) +			{ +			ret = BN_MONT_CTX_new(); +			if (ret && !BN_MONT_CTX_set(ret, mod, ctx)) +				BN_MONT_CTX_free(ret); +			else +				*pmont = ret; +			}  		} +	 +	ret = *pmont; +	 +	if (got_write_lock) +		CRYPTO_w_unlock(lock);  	else -		*pmont = ret; -	CRYPTO_w_unlock(lock); +		CRYPTO_r_unlock(lock); +		  	return ret;  	} diff --git a/app/openssl/crypto/cms/cms_env.c b/app/openssl/crypto/cms/cms_env.c index add00bf9..be20b1c0 100644 --- a/app/openssl/crypto/cms/cms_env.c +++ b/app/openssl/crypto/cms/cms_env.c @@ -185,8 +185,6 @@ CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms,  	if (flags & CMS_USE_KEYID)  		{  		ktri->version = 2; -		if (env->version < 2) -			env->version = 2;  		type = CMS_RECIPINFO_KEYIDENTIFIER;  		}  	else diff --git a/app/openssl/crypto/cms/cms_sd.c b/app/openssl/crypto/cms/cms_sd.c index 51dd33a1..77fbd135 100644 --- a/app/openssl/crypto/cms/cms_sd.c +++ b/app/openssl/crypto/cms/cms_sd.c @@ -158,8 +158,8 @@ static void cms_sd_set_version(CMS_SignedData *sd)  			if (sd->version < 3)  				sd->version = 3;  			} -		else if (si->version < 1) -			si->version = 1; +		else +			sd->version = 1;  		}  	if (sd->version < 1) diff --git a/app/openssl/crypto/cms/cms_smime.c b/app/openssl/crypto/cms/cms_smime.c index 1af9f3a6..8c56e3a8 100644 --- a/app/openssl/crypto/cms/cms_smime.c +++ b/app/openssl/crypto/cms/cms_smime.c @@ -611,7 +611,7 @@ int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert)  	STACK_OF(CMS_RecipientInfo) *ris;  	CMS_RecipientInfo *ri;  	int i, r; -	int debug = 0, ri_match = 0; +	int debug = 0;  	ris = CMS_get0_RecipientInfos(cms);  	if (ris)  		debug = cms->d.envelopedData->encryptedContentInfo->debug; @@ -620,7 +620,6 @@ int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert)  		ri = sk_CMS_RecipientInfo_value(ris, i);  		if (CMS_RecipientInfo_type(ri) != CMS_RECIPINFO_TRANS)  				continue; -		ri_match = 1;  		/* If we have a cert try matching RecipientInfo  		 * otherwise try them all.  		 */ @@ -656,7 +655,7 @@ int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert)  			}  		}  	/* If no cert and not debugging always return success */ -	if (ri_match && !cert && !debug) +	if (!cert && !debug)  		{  		ERR_clear_error();  		return 1; diff --git a/app/openssl/crypto/dso/dso_dlfcn.c b/app/openssl/crypto/dso/dso_dlfcn.c index 4a56aace..5f225480 100644 --- a/app/openssl/crypto/dso/dso_dlfcn.c +++ b/app/openssl/crypto/dso/dso_dlfcn.c @@ -464,7 +464,7 @@ static int dlfcn_pathbyaddr(void *addr,char *path,int sz)  		return len;  		} -	ERR_add_error_data(2, "dlfcn_pathbyaddr(): ", dlerror()); +	ERR_add_error_data(4, "dlfcn_pathbyaddr(): ", dlerror());  #endif  	return -1;  	} diff --git a/app/openssl/crypto/ec/ec_ameth.c b/app/openssl/crypto/ec/ec_ameth.c index f715a238..0ce45240 100644 --- a/app/openssl/crypto/ec/ec_ameth.c +++ b/app/openssl/crypto/ec/ec_ameth.c @@ -352,7 +352,6 @@ static int eckey_priv_encode(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pkey)  		EC_KEY_set_enc_flags(ec_key, old_flags);  		OPENSSL_free(ep);  		ECerr(EC_F_ECKEY_PRIV_ENCODE, ERR_R_EC_LIB); -		return 0;  	}  	/* restore old encoding flags */  	EC_KEY_set_enc_flags(ec_key, old_flags); diff --git a/app/openssl/crypto/ec/ec_asn1.c b/app/openssl/crypto/ec/ec_asn1.c index e94f34e1..145807b6 100644 --- a/app/openssl/crypto/ec/ec_asn1.c +++ b/app/openssl/crypto/ec/ec_asn1.c @@ -1435,11 +1435,8 @@ int i2o_ECPublicKey(EC_KEY *a, unsigned char **out)  				*out, buf_len, NULL))  		{  		ECerr(EC_F_I2O_ECPUBLICKEY, ERR_R_EC_LIB); -		if (new_buffer) -			{ -			OPENSSL_free(*out); -			*out = NULL; -			} +		OPENSSL_free(*out); +		*out = NULL;  		return 0;  		}  	if (!new_buffer) diff --git a/app/openssl/crypto/ec/ec_lcl.h b/app/openssl/crypto/ec/ec_lcl.h index dae91483..6f714c75 100644 --- a/app/openssl/crypto/ec/ec_lcl.h +++ b/app/openssl/crypto/ec/ec_lcl.h @@ -405,7 +405,7 @@ int ec_GF2m_simple_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar,  int ec_GF2m_precompute_mult(EC_GROUP *group, BN_CTX *ctx);  int ec_GF2m_have_precompute_mult(const EC_GROUP *group); -#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +#ifndef OPENSSL_EC_NISTP_64_GCC_128  /* method functions in ecp_nistp224.c */  int ec_GFp_nistp224_group_init(EC_GROUP *group);  int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *n, BN_CTX *); diff --git a/app/openssl/crypto/evp/bio_b64.c b/app/openssl/crypto/evp/bio_b64.c index 16863fe2..ac6d441a 100644 --- a/app/openssl/crypto/evp/bio_b64.c +++ b/app/openssl/crypto/evp/bio_b64.c @@ -226,7 +226,6 @@ static int b64_read(BIO *b, char *out, int outl)  		else if (ctx->start)  			{  			q=p=(unsigned char *)ctx->tmp; -			num = 0;  			for (j=0; j<i; j++)  				{  				if (*(q++) != '\n') continue; diff --git a/app/openssl/crypto/evp/e_aes.c b/app/openssl/crypto/evp/e_aes.c index ad0f7a4a..41cee42d 100644 --- a/app/openssl/crypto/evp/e_aes.c +++ b/app/openssl/crypto/evp/e_aes.c @@ -62,7 +62,7 @@  typedef struct  	{ -	union { double align; AES_KEY ks; } ks; +	AES_KEY ks;  	block128_f block;  	union {  		cbc128_f cbc; @@ -72,7 +72,7 @@ typedef struct  typedef struct  	{ -	union { double align; AES_KEY ks; } ks;	/* AES key schedule to use */ +	AES_KEY ks;		/* AES key schedule to use */  	int key_set;		/* Set if key initialised */  	int iv_set;		/* Set if an iv is set */  	GCM128_CONTEXT gcm; @@ -86,7 +86,7 @@ typedef struct  typedef struct  	{ -	union { double align; AES_KEY ks; } ks1, ks2;	/* AES key schedules to use */ +	AES_KEY ks1, ks2;	/* AES key schedules to use */  	XTS128_CONTEXT xts;  	void     (*stream)(const unsigned char *in,  			unsigned char *out, size_t length, @@ -96,7 +96,7 @@ typedef struct  typedef struct  	{ -	union { double align; AES_KEY ks; } ks;	/* AES key schedule to use */ +	AES_KEY ks;		/* AES key schedule to use */  	int key_set;		/* Set if key initialised */  	int iv_set;		/* Set if an iv is set */  	int tag_set;		/* Set if tag is valid */ @@ -160,7 +160,7 @@ void AES_xts_decrypt(const char *inp,char *out,size_t len,  	defined(_M_AMD64)	|| defined(_M_X64)	|| \  	defined(__INTEL__)				) -extern unsigned int OPENSSL_ia32cap_P[]; +extern unsigned int OPENSSL_ia32cap_P[2];  #ifdef VPAES_ASM  #define VPAES_CAPABLE	(OPENSSL_ia32cap_P[1]&(1<<(41-32))) @@ -310,7 +310,7 @@ static int aesni_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,  		return 1;  	if (key)  		{ -		aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); +		aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);  		CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,  				(block128_f)aesni_encrypt);  		gctx->ctr = (ctr128_f)aesni_ctr32_encrypt_blocks; @@ -355,19 +355,19 @@ static int aesni_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,  		/* key_len is two AES keys */  		if (enc)  			{ -			aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); +			aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);  			xctx->xts.block1 = (block128_f)aesni_encrypt;  			xctx->stream = aesni_xts_encrypt;  			}  		else  			{ -			aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); +			aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);  			xctx->xts.block1 = (block128_f)aesni_decrypt;  			xctx->stream = aesni_xts_decrypt;  			}  		aesni_set_encrypt_key(key + ctx->key_len/2, -						ctx->key_len * 4, &xctx->ks2.ks); +						ctx->key_len * 4, &xctx->ks2);  		xctx->xts.block2 = (block128_f)aesni_encrypt;  		xctx->xts.key1 = &xctx->ks1; @@ -394,7 +394,7 @@ static int aesni_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,  		return 1;  	if (key)  		{ -		aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); +		aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);  		CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,  					&cctx->ks, (block128_f)aesni_encrypt);  		cctx->str = enc?(ccm128_f)aesni_ccm64_encrypt_blocks : @@ -482,38 +482,14 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \  	NULL,NULL,aes_##mode##_ctrl,NULL }; \  const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \  { return &aes_##keylen##_##mode; } +  #endif -#if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__)) +#if defined(AES_ASM) && defined(BSAES_ASM) && (defined(__arm__) || defined(__arm))  #include "arm_arch.h"  #if __ARM_ARCH__>=7 -# if defined(BSAES_ASM) -#  define BSAES_CAPABLE	(OPENSSL_armcap_P & ARMV7_NEON) -# endif -# define HWAES_CAPABLE (OPENSSL_armcap_P & ARMV8_AES) -# define HWAES_set_encrypt_key aes_v8_set_encrypt_key -# define HWAES_set_decrypt_key aes_v8_set_decrypt_key -# define HWAES_encrypt aes_v8_encrypt -# define HWAES_decrypt aes_v8_decrypt -# define HWAES_cbc_encrypt aes_v8_cbc_encrypt -# define HWAES_ctr32_encrypt_blocks aes_v8_ctr32_encrypt_blocks -#endif +#define BSAES_CAPABLE  (OPENSSL_armcap_P & ARMV7_NEON)  #endif - -#if defined(HWAES_CAPABLE) -int HWAES_set_encrypt_key(const unsigned char *userKey, const int bits, -	AES_KEY *key); -int HWAES_set_decrypt_key(const unsigned char *userKey, const int bits, -	AES_KEY *key); -void HWAES_encrypt(const unsigned char *in, unsigned char *out, -	const AES_KEY *key); -void HWAES_decrypt(const unsigned char *in, unsigned char *out, -	const AES_KEY *key); -void HWAES_cbc_encrypt(const unsigned char *in, unsigned char *out, -	size_t length, const AES_KEY *key, -	unsigned char *ivec, const int enc); -void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, -	size_t len, const AES_KEY *key, const unsigned char ivec[16]);  #endif  #define BLOCK_CIPHER_generic_pack(nid,keylen,flags)		\ @@ -534,23 +510,10 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,  	mode = ctx->cipher->flags & EVP_CIPH_MODE;  	if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE)  	    && !enc) -#ifdef HWAES_CAPABLE -	    if (HWAES_CAPABLE) -		{ -		ret = HWAES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks); -		dat->block      = (block128_f)HWAES_decrypt; -		dat->stream.cbc = NULL; -#ifdef HWAES_cbc_encrypt -		if (mode==EVP_CIPH_CBC_MODE) -		    dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt; -#endif -		} -	    else -#endif  #ifdef BSAES_CAPABLE  	    if (BSAES_CAPABLE && mode==EVP_CIPH_CBC_MODE)  		{ -		ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks); +		ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks);  		dat->block	= (block128_f)AES_decrypt;  		dat->stream.cbc	= (cbc128_f)bsaes_cbc_encrypt;  		} @@ -559,7 +522,7 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,  #ifdef VPAES_CAPABLE  	    if (VPAES_CAPABLE)  		{ -		ret = vpaes_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks); +		ret = vpaes_set_decrypt_key(key,ctx->key_len*8,&dat->ks);  		dat->block	= (block128_f)vpaes_decrypt;  		dat->stream.cbc	= mode==EVP_CIPH_CBC_MODE ?  					(cbc128_f)vpaes_cbc_encrypt : @@ -568,37 +531,17 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,  	    else  #endif  		{ -		ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks); +		ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks);  		dat->block	= (block128_f)AES_decrypt;  		dat->stream.cbc	= mode==EVP_CIPH_CBC_MODE ?  					(cbc128_f)AES_cbc_encrypt :  					NULL;  		}  	else -#ifdef HWAES_CAPABLE -	    if (HWAES_CAPABLE) -		{ -		ret = HWAES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks); -		dat->block      = (block128_f)HWAES_encrypt; -		dat->stream.cbc = NULL; -#ifdef HWAES_cbc_encrypt -		if (mode==EVP_CIPH_CBC_MODE) -		    dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt; -		else -#endif -#ifdef HWAES_ctr32_encrypt_blocks -		if (mode==EVP_CIPH_CTR_MODE) -		    dat->stream.ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks; -		else -#endif -		(void)0;	/* terminate potentially open 'else' */ -		} -	    else -#endif  #ifdef BSAES_CAPABLE  	    if (BSAES_CAPABLE && mode==EVP_CIPH_CTR_MODE)  		{ -		ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks); +		ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks);  		dat->block	= (block128_f)AES_encrypt;  		dat->stream.ctr	= (ctr128_f)bsaes_ctr32_encrypt_blocks;  		} @@ -607,7 +550,7 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,  #ifdef VPAES_CAPABLE  	    if (VPAES_CAPABLE)  		{ -		ret = vpaes_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks); +		ret = vpaes_set_encrypt_key(key,ctx->key_len*8,&dat->ks);  		dat->block	= (block128_f)vpaes_encrypt;  		dat->stream.cbc	= mode==EVP_CIPH_CBC_MODE ?  					(cbc128_f)vpaes_cbc_encrypt : @@ -616,7 +559,7 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,  	    else  #endif  		{ -		ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks); +		ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks);  		dat->block	= (block128_f)AES_encrypt;  		dat->stream.cbc	= mode==EVP_CIPH_CBC_MODE ?  					(cbc128_f)AES_cbc_encrypt : @@ -887,25 +830,10 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,  		return 1;  	if (key)  		{ do { -#ifdef HWAES_CAPABLE -		if (HWAES_CAPABLE) -			{ -			HWAES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks); -			CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, -					(block128_f)HWAES_encrypt); -#ifdef HWAES_ctr32_encrypt_blocks -			gctx->ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks; -#else -			gctx->ctr = NULL; -#endif -			break; -			} -		else -#endif  #ifdef BSAES_CAPABLE  		if (BSAES_CAPABLE)  			{ -			AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks); +			AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks);  			CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks,  					(block128_f)AES_encrypt);  			gctx->ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks; @@ -916,7 +844,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,  #ifdef VPAES_CAPABLE  		if (VPAES_CAPABLE)  			{ -			vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks); +			vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks);  			CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks,  					(block128_f)vpaes_encrypt);  			gctx->ctr = NULL; @@ -926,7 +854,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,  #endif  		(void)0;	/* terminate potentially open 'else' */ -		AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); +		AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);  		CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f)AES_encrypt);  #ifdef AES_CTR_ASM  		gctx->ctr = (ctr128_f)AES_ctr32_encrypt; @@ -1147,50 +1075,29 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,  		xctx->stream = NULL;  #endif  		/* key_len is two AES keys */ -#ifdef HWAES_CAPABLE -		if (HWAES_CAPABLE) -			{ -			if (enc) -			    { -			    HWAES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); -			    xctx->xts.block1 = (block128_f)HWAES_encrypt; -			    } -			else -			    { -			    HWAES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); -			    xctx->xts.block1 = (block128_f)HWAES_decrypt; -			    } - -			HWAES_set_encrypt_key(key + ctx->key_len/2, -						    ctx->key_len * 4, &xctx->ks2.ks); -			xctx->xts.block2 = (block128_f)HWAES_encrypt; - -			xctx->xts.key1 = &xctx->ks1; -			break; -			} -		else -#endif +#if !(defined(__arm__) || defined(__arm))      /* not yet? */  #ifdef BSAES_CAPABLE  		if (BSAES_CAPABLE)  			xctx->stream = enc ? bsaes_xts_encrypt : bsaes_xts_decrypt;  		else  #endif +#endif  #ifdef VPAES_CAPABLE  		if (VPAES_CAPABLE)  		    {  		    if (enc)  			{ -			vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); +			vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);  			xctx->xts.block1 = (block128_f)vpaes_encrypt;  			}  		    else  			{ -			vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); +			vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);  			xctx->xts.block1 = (block128_f)vpaes_decrypt;  			}  		    vpaes_set_encrypt_key(key + ctx->key_len/2, -						ctx->key_len * 4, &xctx->ks2.ks); +						ctx->key_len * 4, &xctx->ks2);  		    xctx->xts.block2 = (block128_f)vpaes_encrypt;  		    xctx->xts.key1 = &xctx->ks1; @@ -1202,17 +1109,17 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,  		if (enc)  			{ -			AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); +			AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);  			xctx->xts.block1 = (block128_f)AES_encrypt;  			}  		else  			{ -			AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); +			AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);  			xctx->xts.block1 = (block128_f)AES_decrypt;  			}  		AES_set_encrypt_key(key + ctx->key_len/2, -						ctx->key_len * 4, &xctx->ks2.ks); +						ctx->key_len * 4, &xctx->ks2);  		xctx->xts.block2 = (block128_f)AES_encrypt;  		xctx->xts.key1 = &xctx->ks1; @@ -1320,23 +1227,10 @@ static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,  		return 1;  	if (key) do  		{ -#ifdef HWAES_CAPABLE -		if (HWAES_CAPABLE) -			{ -			HWAES_set_encrypt_key(key,ctx->key_len*8,&cctx->ks.ks); - -			CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, -					&cctx->ks, (block128_f)HWAES_encrypt); -			cctx->str = NULL; -			cctx->key_set = 1; -			break; -			} -		else -#endif  #ifdef VPAES_CAPABLE  		if (VPAES_CAPABLE)  			{ -			vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks.ks); +			vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks);  			CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,  					&cctx->ks, (block128_f)vpaes_encrypt);  			cctx->str = NULL; @@ -1344,7 +1238,7 @@ static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,  			break;  			}  #endif -		AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); +		AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);  		CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,  					&cctx->ks, (block128_f)AES_encrypt);  		cctx->str = NULL; diff --git a/app/openssl/crypto/evp/encode.c b/app/openssl/crypto/evp/encode.c index 4654bdc6..28546a84 100644 --- a/app/openssl/crypto/evp/encode.c +++ b/app/openssl/crypto/evp/encode.c @@ -324,7 +324,6 @@ int EVP_DecodeUpdate(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl,  				v=EVP_DecodeBlock(out,d,n);  				n=0;  				if (v < 0) { rv=0; goto end; } -				if (eof > v) { rv=-1; goto end; }  				ret+=(v-eof);  				}  			else diff --git a/app/openssl/crypto/evp/p_lib.c b/app/openssl/crypto/evp/p_lib.c index 8ee53c1d..bd1977d7 100644 --- a/app/openssl/crypto/evp/p_lib.c +++ b/app/openssl/crypto/evp/p_lib.c @@ -202,7 +202,7 @@ EVP_PKEY *EVP_PKEY_new(void)  EVP_PKEY *EVP_PKEY_dup(EVP_PKEY *pkey)  	{ -	CRYPTO_add(&pkey->references,1,CRYPTO_LOCK_EVP_PKEY); +	CRYPTO_add(&pkey->references, 1, CRYPTO_LOCK_EVP_PKEY);  	return pkey;  	} diff --git a/app/openssl/crypto/modes/asm/ghash-armv4.S b/app/openssl/crypto/modes/asm/ghash-armv4.S index 6c453774..d66c4cbf 100644 --- a/app/openssl/crypto/modes/asm/ghash-armv4.S +++ b/app/openssl/crypto/modes/asm/ghash-armv4.S @@ -309,213 +309,99 @@ gcm_gmult_4bit:  #if __ARM_ARCH__>=7  .fpu	neon -.global	gcm_init_neon -.type	gcm_init_neon,%function -.align	4 -gcm_init_neon: -	vld1.64		d7,[r1,:64]!	@ load H -	vmov.i8		q8,#0xe1 -	vld1.64		d6,[r1,:64] -	vshl.i64	d17,#57 -	vshr.u64	d16,#63		@ t0=0xc2....01 -	vdup.8		q9,d7[7] -	vshr.u64	d26,d6,#63 -	vshr.s8		q9,#7			@ broadcast carry bit -	vshl.i64	q3,q3,#1 -	vand		q8,q8,q9 -	vorr		d7,d26		@ H<<<=1 -	veor		q3,q3,q8		@ twisted H -	vstmia		r0,{q3} - -	bx	lr					@ bx lr -.size	gcm_init_neon,.-gcm_init_neon -  .global	gcm_gmult_neon  .type	gcm_gmult_neon,%function  .align	4  gcm_gmult_neon: -	vld1.64		d7,[r0,:64]!	@ load Xi -	vld1.64		d6,[r0,:64]! -	vmov.i64	d29,#0x0000ffffffffffff -	vldmia		r1,{d26-d27}	@ load twisted H -	vmov.i64	d30,#0x00000000ffffffff +	sub		r1,#16		@ point at H in GCM128_CTX +	vld1.64		d29,[r0,:64]!@ load Xi +	vmov.i32	d5,#0xe1		@ our irreducible polynomial +	vld1.64		d28,[r0,:64]! +	vshr.u64	d5,#32 +	vldmia		r1,{d0-d1}	@ load H +	veor		q12,q12  #ifdef __ARMEL__ -	vrev64.8	q3,q3 +	vrev64.8	q14,q14  #endif -	vmov.i64	d31,#0x000000000000ffff -	veor		d28,d26,d27		@ Karatsuba pre-processing +	veor		q13,q13 +	veor		q11,q11 +	mov		r1,#16 +	veor		q10,q10  	mov		r3,#16 -	b		.Lgmult_neon +	veor		d2,d2 +	vdup.8		d4,d28[0]	@ broadcast lowest byte +	b		.Linner_neon  .size	gcm_gmult_neon,.-gcm_gmult_neon  .global	gcm_ghash_neon  .type	gcm_ghash_neon,%function  .align	4  gcm_ghash_neon: -	vld1.64		d1,[r0,:64]!	@ load Xi -	vld1.64		d0,[r0,:64]! -	vmov.i64	d29,#0x0000ffffffffffff -	vldmia		r1,{d26-d27}	@ load twisted H -	vmov.i64	d30,#0x00000000ffffffff +	vld1.64		d21,[r0,:64]!	@ load Xi +	vmov.i32	d5,#0xe1		@ our irreducible polynomial +	vld1.64		d20,[r0,:64]! +	vshr.u64	d5,#32 +	vldmia		r0,{d0-d1}		@ load H +	veor		q12,q12 +	nop  #ifdef __ARMEL__ -	vrev64.8	q0,q0 +	vrev64.8	q10,q10  #endif -	vmov.i64	d31,#0x000000000000ffff -	veor		d28,d26,d27		@ Karatsuba pre-processing - -.Loop_neon: -	vld1.64		d7,[r2]!		@ load inp -	vld1.64		d6,[r2]! +.Louter_neon: +	vld1.64		d29,[r2]!	@ load inp +	veor		q13,q13 +	vld1.64		d28,[r2]! +	veor		q11,q11 +	mov		r1,#16  #ifdef __ARMEL__ -	vrev64.8	q3,q3 +	vrev64.8	q14,q14  #endif -	veor		q3,q0			@ inp^=Xi -.Lgmult_neon: -	vext.8		d16, d26, d26, #1	@ A1 -	vmull.p8	q8, d16, d6		@ F = A1*B -	vext.8		d0, d6, d6, #1	@ B1 -	vmull.p8	q0, d26, d0		@ E = A*B1 -	vext.8		d18, d26, d26, #2	@ A2 -	vmull.p8	q9, d18, d6		@ H = A2*B -	vext.8		d22, d6, d6, #2	@ B2 -	vmull.p8	q11, d26, d22		@ G = A*B2 -	vext.8		d20, d26, d26, #3	@ A3 -	veor		q8, q8, q0		@ L = E + F -	vmull.p8	q10, d20, d6		@ J = A3*B -	vext.8		d0, d6, d6, #3	@ B3 -	veor		q9, q9, q11		@ M = G + H -	vmull.p8	q0, d26, d0		@ I = A*B3 -	veor		d16, d16, d17	@ t0 = (L) (P0 + P1) << 8 -	vand		d17, d17, d29 -	vext.8		d22, d6, d6, #4	@ B4 -	veor		d18, d18, d19	@ t1 = (M) (P2 + P3) << 16 -	vand		d19, d19, d30 -	vmull.p8	q11, d26, d22		@ K = A*B4 -	veor		q10, q10, q0		@ N = I + J -	veor		d16, d16, d17 -	veor		d18, d18, d19 -	veor		d20, d20, d21	@ t2 = (N) (P4 + P5) << 24 -	vand		d21, d21, d31 -	vext.8		q8, q8, q8, #15 -	veor		d22, d22, d23	@ t3 = (K) (P6 + P7) << 32 -	vmov.i64	d23, #0 -	vext.8		q9, q9, q9, #14 -	veor		d20, d20, d21 -	vmull.p8	q0, d26, d6		@ D = A*B -	vext.8		q11, q11, q11, #12 -	vext.8		q10, q10, q10, #13 -	veor		q8, q8, q9 -	veor		q10, q10, q11 -	veor		q0, q0, q8 -	veor		q0, q0, q10 -	veor		d6,d6,d7	@ Karatsuba pre-processing -	vext.8		d16, d28, d28, #1	@ A1 -	vmull.p8	q8, d16, d6		@ F = A1*B -	vext.8		d2, d6, d6, #1	@ B1 -	vmull.p8	q1, d28, d2		@ E = A*B1 -	vext.8		d18, d28, d28, #2	@ A2 -	vmull.p8	q9, d18, d6		@ H = A2*B -	vext.8		d22, d6, d6, #2	@ B2 -	vmull.p8	q11, d28, d22		@ G = A*B2 -	vext.8		d20, d28, d28, #3	@ A3 -	veor		q8, q8, q1		@ L = E + F -	vmull.p8	q10, d20, d6		@ J = A3*B -	vext.8		d2, d6, d6, #3	@ B3 -	veor		q9, q9, q11		@ M = G + H -	vmull.p8	q1, d28, d2		@ I = A*B3 -	veor		d16, d16, d17	@ t0 = (L) (P0 + P1) << 8 -	vand		d17, d17, d29 -	vext.8		d22, d6, d6, #4	@ B4 -	veor		d18, d18, d19	@ t1 = (M) (P2 + P3) << 16 -	vand		d19, d19, d30 -	vmull.p8	q11, d28, d22		@ K = A*B4 -	veor		q10, q10, q1		@ N = I + J -	veor		d16, d16, d17 -	veor		d18, d18, d19 -	veor		d20, d20, d21	@ t2 = (N) (P4 + P5) << 24 -	vand		d21, d21, d31 -	vext.8		q8, q8, q8, #15 -	veor		d22, d22, d23	@ t3 = (K) (P6 + P7) << 32 -	vmov.i64	d23, #0 -	vext.8		q9, q9, q9, #14 -	veor		d20, d20, d21 -	vmull.p8	q1, d28, d6		@ D = A*B -	vext.8		q11, q11, q11, #12 -	vext.8		q10, q10, q10, #13 -	veor		q8, q8, q9 -	veor		q10, q10, q11 -	veor		q1, q1, q8 -	veor		q1, q1, q10 -	vext.8		d16, d27, d27, #1	@ A1 -	vmull.p8	q8, d16, d7		@ F = A1*B -	vext.8		d4, d7, d7, #1	@ B1 -	vmull.p8	q2, d27, d4		@ E = A*B1 -	vext.8		d18, d27, d27, #2	@ A2 -	vmull.p8	q9, d18, d7		@ H = A2*B -	vext.8		d22, d7, d7, #2	@ B2 -	vmull.p8	q11, d27, d22		@ G = A*B2 -	vext.8		d20, d27, d27, #3	@ A3 -	veor		q8, q8, q2		@ L = E + F -	vmull.p8	q10, d20, d7		@ J = A3*B -	vext.8		d4, d7, d7, #3	@ B3 -	veor		q9, q9, q11		@ M = G + H -	vmull.p8	q2, d27, d4		@ I = A*B3 -	veor		d16, d16, d17	@ t0 = (L) (P0 + P1) << 8 -	vand		d17, d17, d29 -	vext.8		d22, d7, d7, #4	@ B4 -	veor		d18, d18, d19	@ t1 = (M) (P2 + P3) << 16 -	vand		d19, d19, d30 -	vmull.p8	q11, d27, d22		@ K = A*B4 -	veor		q10, q10, q2		@ N = I + J -	veor		d16, d16, d17 -	veor		d18, d18, d19 -	veor		d20, d20, d21	@ t2 = (N) (P4 + P5) << 24 -	vand		d21, d21, d31 -	vext.8		q8, q8, q8, #15 -	veor		d22, d22, d23	@ t3 = (K) (P6 + P7) << 32 -	vmov.i64	d23, #0 -	vext.8		q9, q9, q9, #14 -	veor		d20, d20, d21 -	vmull.p8	q2, d27, d7		@ D = A*B -	vext.8		q11, q11, q11, #12 -	vext.8		q10, q10, q10, #13 -	veor		q8, q8, q9 -	veor		q10, q10, q11 -	veor		q2, q2, q8 -	veor		q2, q2, q10 -	veor		q1,q1,q0		@ Karatsuba post-processing -	veor		q1,q1,q2 -	veor		d1,d1,d2 -	veor		d4,d4,d3	@ Xh|Xl - 256-bit result +	veor		d2,d2 +	veor		q14,q10			@ inp^=Xi +	veor		q10,q10 +	vdup.8		d4,d28[0]	@ broadcast lowest byte +.Linner_neon: +	subs		r1,r1,#1 +	vmull.p8	q9,d1,d4		@ H.lo·Xi[i] +	vmull.p8	q8,d0,d4		@ H.hi·Xi[i] +	vext.8		q14,q12,#1		@ IN>>=8 + +	veor		q10,q13		@ modulo-scheduled part +	vshl.i64	d22,#48 +	vdup.8		d4,d28[0]	@ broadcast lowest byte +	veor		d3,d18,d20 + +	veor		d21,d22 +	vuzp.8		q9,q8 +	vsli.8		d2,d3,#1		@ compose the "carry" byte +	vext.8		q10,q12,#1		@ Z>>=8 -	@ equivalent of reduction_avx from ghash-x86_64.pl -	vshl.i64	q9,q0,#57		@ 1st phase -	vshl.i64	q10,q0,#62 -	veor		q10,q10,q9		@ -	vshl.i64	q9,q0,#63 -	veor		q10, q10, q9		@ - 	veor		d1,d1,d20	@ -	veor		d4,d4,d21 +	vmull.p8	q11,d2,d5		@ "carry"·0xe1 +	vshr.u8		d2,d3,#7		@ save Z's bottom bit +	vext.8		q13,q9,q12,#1	@ Qlo>>=8 +	veor		q10,q8 +	bne		.Linner_neon -	vshr.u64	q10,q0,#1		@ 2nd phase -	veor		q2,q2,q0 -	veor		q0,q0,q10		@ -	vshr.u64	q10,q10,#6 -	vshr.u64	q0,q0,#1		@ -	veor		q0,q0,q2		@ -	veor		q0,q0,q10		@ +	veor		q10,q13		@ modulo-scheduled artefact +	vshl.i64	d22,#48 +	veor		d21,d22 +	@ finalization, normalize Z:Zo +	vand		d2,d5		@ suffices to mask the bit +	vshr.u64	d3,d20,#63 +	vshl.i64	q10,#1  	subs		r3,#16 -	bne		.Loop_neon +	vorr		q10,q1		@ Z=Z:Zo<<1 +	bne		.Louter_neon  #ifdef __ARMEL__ -	vrev64.8	q0,q0 +	vrev64.8	q10,q10  #endif  	sub		r0,#16	 -	vst1.64		d1,[r0,:64]!	@ write out Xi -	vst1.64		d0,[r0,:64] +	vst1.64		d21,[r0,:64]!	@ write out Xi +	vst1.64		d20,[r0,:64] -	bx	lr					@ bx lr +	.word	0xe12fff1e  .size	gcm_ghash_neon,.-gcm_ghash_neon  #endif  .asciz  "GHASH for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>" diff --git a/app/openssl/crypto/modes/asm/ghash-armv4.pl b/app/openssl/crypto/modes/asm/ghash-armv4.pl index b79ecbcc..e46f8e34 100644 --- a/app/openssl/crypto/modes/asm/ghash-armv4.pl +++ b/app/openssl/crypto/modes/asm/ghash-armv4.pl @@ -35,20 +35,6 @@  # Add NEON implementation featuring polynomial multiplication, i.e. no  # lookup tables involved. On Cortex A8 it was measured to process one  # byte in 15 cycles or 55% faster than integer-only code. -# -# April 2014 -# -# Switch to multiplication algorithm suggested in paper referred -# below and combine it with reduction algorithm from x86 module. -# Performance improvement over previous version varies from 65% on -# Snapdragon S4 to 110% on Cortex A9. In absolute terms Cortex A8 -# processes one byte in 8.45 cycles, A9 - in 10.2, Snapdragon S4 - -# in 9.33. -# -# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software -# Polynomial Multiplication on ARM Processors using the NEON Engine. -#  -# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf  # ====================================================================  # Note about "528B" variant. In ARM case it makes lesser sense to @@ -317,160 +303,117 @@ $code.=<<___;  .size	gcm_gmult_4bit,.-gcm_gmult_4bit  ___  { -my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3)); -my ($t0,$t1,$t2,$t3)=map("q$_",(8..12)); -my ($Hlo,$Hhi,$Hhl,$k48,$k32,$k16)=map("d$_",(26..31)); +my $cnt=$Htbl;	# $Htbl is used once in the very beginning -sub clmul64x64 { -my ($r,$a,$b)=@_; -$code.=<<___; -	vext.8		$t0#lo, $a, $a, #1	@ A1 -	vmull.p8	$t0, $t0#lo, $b		@ F = A1*B -	vext.8		$r#lo, $b, $b, #1	@ B1 -	vmull.p8	$r, $a, $r#lo		@ E = A*B1 -	vext.8		$t1#lo, $a, $a, #2	@ A2 -	vmull.p8	$t1, $t1#lo, $b		@ H = A2*B -	vext.8		$t3#lo, $b, $b, #2	@ B2 -	vmull.p8	$t3, $a, $t3#lo		@ G = A*B2 -	vext.8		$t2#lo, $a, $a, #3	@ A3 -	veor		$t0, $t0, $r		@ L = E + F -	vmull.p8	$t2, $t2#lo, $b		@ J = A3*B -	vext.8		$r#lo, $b, $b, #3	@ B3 -	veor		$t1, $t1, $t3		@ M = G + H -	vmull.p8	$r, $a, $r#lo		@ I = A*B3 -	veor		$t0#lo, $t0#lo, $t0#hi	@ t0 = (L) (P0 + P1) << 8 -	vand		$t0#hi, $t0#hi, $k48 -	vext.8		$t3#lo, $b, $b, #4	@ B4 -	veor		$t1#lo, $t1#lo, $t1#hi	@ t1 = (M) (P2 + P3) << 16 -	vand		$t1#hi, $t1#hi, $k32 -	vmull.p8	$t3, $a, $t3#lo		@ K = A*B4 -	veor		$t2, $t2, $r		@ N = I + J -	veor		$t0#lo, $t0#lo, $t0#hi -	veor		$t1#lo, $t1#lo, $t1#hi -	veor		$t2#lo, $t2#lo, $t2#hi	@ t2 = (N) (P4 + P5) << 24 -	vand		$t2#hi, $t2#hi, $k16 -	vext.8		$t0, $t0, $t0, #15 -	veor		$t3#lo, $t3#lo, $t3#hi	@ t3 = (K) (P6 + P7) << 32 -	vmov.i64	$t3#hi, #0 -	vext.8		$t1, $t1, $t1, #14 -	veor		$t2#lo, $t2#lo, $t2#hi -	vmull.p8	$r, $a, $b		@ D = A*B -	vext.8		$t3, $t3, $t3, #12 -	vext.8		$t2, $t2, $t2, #13 -	veor		$t0, $t0, $t1 -	veor		$t2, $t2, $t3 -	veor		$r, $r, $t0 -	veor		$r, $r, $t2 -___ -} +my ($Hhi, $Hlo, $Zo, $T, $xi, $mod) = map("d$_",(0..7)); +my ($Qhi, $Qlo, $Z,  $R, $zero, $Qpost, $IN) = map("q$_",(8..15)); + +# Z:Zo keeps 128-bit result shifted by 1 to the right, with bottom bit +# in Zo. Or should I say "top bit", because GHASH is specified in +# reverse bit order? Otherwise straightforward 128-bt H by one input +# byte multiplication and modulo-reduction, times 16. + +sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     } +sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   } +sub Q()     { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; }  $code.=<<___;  #if __ARM_ARCH__>=7  .fpu	neon -.global	gcm_init_neon -.type	gcm_init_neon,%function -.align	4 -gcm_init_neon: -	vld1.64		$IN#hi,[r1,:64]!	@ load H -	vmov.i8		$t0,#0xe1 -	vld1.64		$IN#lo,[r1,:64] -	vshl.i64	$t0#hi,#57 -	vshr.u64	$t0#lo,#63		@ t0=0xc2....01 -	vdup.8		$t1,$IN#hi[7] -	vshr.u64	$Hlo,$IN#lo,#63 -	vshr.s8		$t1,#7			@ broadcast carry bit -	vshl.i64	$IN,$IN,#1 -	vand		$t0,$t0,$t1 -	vorr		$IN#hi,$Hlo		@ H<<<=1 -	veor		$IN,$IN,$t0		@ twisted H -	vstmia		r0,{$IN} - -	ret					@ bx lr -.size	gcm_init_neon,.-gcm_init_neon -  .global	gcm_gmult_neon  .type	gcm_gmult_neon,%function  .align	4  gcm_gmult_neon: -	vld1.64		$IN#hi,[$Xi,:64]!	@ load Xi -	vld1.64		$IN#lo,[$Xi,:64]! -	vmov.i64	$k48,#0x0000ffffffffffff -	vldmia		$Htbl,{$Hlo-$Hhi}	@ load twisted H -	vmov.i64	$k32,#0x00000000ffffffff +	sub		$Htbl,#16		@ point at H in GCM128_CTX +	vld1.64		`&Dhi("$IN")`,[$Xi,:64]!@ load Xi +	vmov.i32	$mod,#0xe1		@ our irreducible polynomial +	vld1.64		`&Dlo("$IN")`,[$Xi,:64]! +	vshr.u64	$mod,#32 +	vldmia		$Htbl,{$Hhi-$Hlo}	@ load H +	veor		$zero,$zero  #ifdef __ARMEL__  	vrev64.8	$IN,$IN  #endif -	vmov.i64	$k16,#0x000000000000ffff -	veor		$Hhl,$Hlo,$Hhi		@ Karatsuba pre-processing +	veor		$Qpost,$Qpost +	veor		$R,$R +	mov		$cnt,#16 +	veor		$Z,$Z  	mov		$len,#16 -	b		.Lgmult_neon +	veor		$Zo,$Zo +	vdup.8		$xi,`&Dlo("$IN")`[0]	@ broadcast lowest byte +	b		.Linner_neon  .size	gcm_gmult_neon,.-gcm_gmult_neon  .global	gcm_ghash_neon  .type	gcm_ghash_neon,%function  .align	4  gcm_ghash_neon: -	vld1.64		$Xl#hi,[$Xi,:64]!	@ load Xi -	vld1.64		$Xl#lo,[$Xi,:64]! -	vmov.i64	$k48,#0x0000ffffffffffff -	vldmia		$Htbl,{$Hlo-$Hhi}	@ load twisted H -	vmov.i64	$k32,#0x00000000ffffffff +	vld1.64		`&Dhi("$Z")`,[$Xi,:64]!	@ load Xi +	vmov.i32	$mod,#0xe1		@ our irreducible polynomial +	vld1.64		`&Dlo("$Z")`,[$Xi,:64]! +	vshr.u64	$mod,#32 +	vldmia		$Xi,{$Hhi-$Hlo}		@ load H +	veor		$zero,$zero +	nop  #ifdef __ARMEL__ -	vrev64.8	$Xl,$Xl +	vrev64.8	$Z,$Z  #endif -	vmov.i64	$k16,#0x000000000000ffff -	veor		$Hhl,$Hlo,$Hhi		@ Karatsuba pre-processing - -.Loop_neon: -	vld1.64		$IN#hi,[$inp]!		@ load inp -	vld1.64		$IN#lo,[$inp]! +.Louter_neon: +	vld1.64		`&Dhi($IN)`,[$inp]!	@ load inp +	veor		$Qpost,$Qpost +	vld1.64		`&Dlo($IN)`,[$inp]! +	veor		$R,$R +	mov		$cnt,#16  #ifdef __ARMEL__  	vrev64.8	$IN,$IN  #endif -	veor		$IN,$Xl			@ inp^=Xi -.Lgmult_neon: -___ -	&clmul64x64	($Xl,$Hlo,"$IN#lo");	# H.lo·Xi.lo -$code.=<<___; -	veor		$IN#lo,$IN#lo,$IN#hi	@ Karatsuba pre-processing -___ -	&clmul64x64	($Xm,$Hhl,"$IN#lo");	# (H.lo+H.hi)·(Xi.lo+Xi.hi) -	&clmul64x64	($Xh,$Hhi,"$IN#hi");	# H.hi·Xi.hi -$code.=<<___; -	veor		$Xm,$Xm,$Xl		@ Karatsuba post-processing -	veor		$Xm,$Xm,$Xh -	veor		$Xl#hi,$Xl#hi,$Xm#lo -	veor		$Xh#lo,$Xh#lo,$Xm#hi	@ Xh|Xl - 256-bit result - -	@ equivalent of reduction_avx from ghash-x86_64.pl -	vshl.i64	$t1,$Xl,#57		@ 1st phase -	vshl.i64	$t2,$Xl,#62 -	veor		$t2,$t2,$t1		@ -	vshl.i64	$t1,$Xl,#63 -	veor		$t2, $t2, $t1		@ - 	veor		$Xl#hi,$Xl#hi,$t2#lo	@ -	veor		$Xh#lo,$Xh#lo,$t2#hi - -	vshr.u64	$t2,$Xl,#1		@ 2nd phase -	veor		$Xh,$Xh,$Xl -	veor		$Xl,$Xl,$t2		@ -	vshr.u64	$t2,$t2,#6 -	vshr.u64	$Xl,$Xl,#1		@ -	veor		$Xl,$Xl,$Xh		@ -	veor		$Xl,$Xl,$t2		@ - +	veor		$Zo,$Zo +	veor		$IN,$Z			@ inp^=Xi +	veor		$Z,$Z +	vdup.8		$xi,`&Dlo("$IN")`[0]	@ broadcast lowest byte +.Linner_neon: +	subs		$cnt,$cnt,#1 +	vmull.p8	$Qlo,$Hlo,$xi		@ H.lo·Xi[i] +	vmull.p8	$Qhi,$Hhi,$xi		@ H.hi·Xi[i] +	vext.8		$IN,$zero,#1		@ IN>>=8 + +	veor		$Z,$Qpost		@ modulo-scheduled part +	vshl.i64	`&Dlo("$R")`,#48 +	vdup.8		$xi,`&Dlo("$IN")`[0]	@ broadcast lowest byte +	veor		$T,`&Dlo("$Qlo")`,`&Dlo("$Z")` + +	veor		`&Dhi("$Z")`,`&Dlo("$R")` +	vuzp.8		$Qlo,$Qhi +	vsli.8		$Zo,$T,#1		@ compose the "carry" byte +	vext.8		$Z,$zero,#1		@ Z>>=8 + +	vmull.p8	$R,$Zo,$mod		@ "carry"·0xe1 +	vshr.u8		$Zo,$T,#7		@ save Z's bottom bit +	vext.8		$Qpost,$Qlo,$zero,#1	@ Qlo>>=8 +	veor		$Z,$Qhi +	bne		.Linner_neon + +	veor		$Z,$Qpost		@ modulo-scheduled artefact +	vshl.i64	`&Dlo("$R")`,#48 +	veor		`&Dhi("$Z")`,`&Dlo("$R")` + +	@ finalization, normalize Z:Zo +	vand		$Zo,$mod		@ suffices to mask the bit +	vshr.u64	`&Dhi(&Q("$Zo"))`,`&Dlo("$Z")`,#63 +	vshl.i64	$Z,#1  	subs		$len,#16 -	bne		.Loop_neon +	vorr		$Z,`&Q("$Zo")`		@ Z=Z:Zo<<1 +	bne		.Louter_neon  #ifdef __ARMEL__ -	vrev64.8	$Xl,$Xl +	vrev64.8	$Z,$Z  #endif  	sub		$Xi,#16	 -	vst1.64		$Xl#hi,[$Xi,:64]!	@ write out Xi -	vst1.64		$Xl#lo,[$Xi,:64] +	vst1.64		`&Dhi("$Z")`,[$Xi,:64]!	@ write out Xi +	vst1.64		`&Dlo("$Z")`,[$Xi,:64] -	ret					@ bx lr +	bx	lr  .size	gcm_ghash_neon,.-gcm_ghash_neon  #endif  ___ @@ -480,13 +423,7 @@ $code.=<<___;  .align  2  ___ -foreach (split("\n",$code)) { -	s/\`([^\`]*)\`/eval $1/geo; - -	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo	or -	s/\bret\b/bx	lr/go		or -	s/\bbx\s+lr\b/.word\t0xe12fff1e/go;    # make it possible to compile with -march=armv4 - -	print $_,"\n"; -} +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4 +print $code;  close STDOUT; # enforce flush diff --git a/app/openssl/crypto/modes/asm/ghashv8-armx-64.S b/app/openssl/crypto/modes/asm/ghashv8-armx-64.S deleted file mode 100644 index b77b6c40..00000000 --- a/app/openssl/crypto/modes/asm/ghashv8-armx-64.S +++ /dev/null @@ -1,115 +0,0 @@ -#include "arm_arch.h" - -.text -.arch	armv8-a+crypto -.global	gcm_init_v8 -.type	gcm_init_v8,%function -.align	4 -gcm_init_v8: -	ld1		{v17.2d},[x1]		//load H -	movi		v16.16b,#0xe1 -	ext		v3.16b,v17.16b,v17.16b,#8 -	shl	v16.2d,v16.2d,#57 -	ushr	v18.2d,v16.2d,#63 -	ext		v16.16b,v18.16b,v16.16b,#8		//t0=0xc2....01 -	dup		v17.4s,v17.s[1] -	ushr	v19.2d,v3.2d,#63 -	sshr	v17.4s,v17.4s,#31		//broadcast carry bit -	and		v19.16b,v19.16b,v16.16b -	shl	v3.2d,v3.2d,#1 -	ext		v19.16b,v19.16b,v19.16b,#8 -	and		v16.16b,v16.16b,v17.16b -	orr		v3.16b,v3.16b,v19.16b		//H<<<=1 -	eor		v3.16b,v3.16b,v16.16b		//twisted H -	st1		{v3.2d},[x0] - -	ret -.size	gcm_init_v8,.-gcm_init_v8 - -.global	gcm_gmult_v8 -.type	gcm_gmult_v8,%function -.align	4 -gcm_gmult_v8: -	ld1		{v17.2d},[x0]		//load Xi -	movi		v19.16b,#0xe1 -	ld1		{v20.2d},[x1]		//load twisted H -	shl	v19.2d,v19.2d,#57 -#ifndef __ARMEB__ -	rev64	v17.16b,v17.16b -#endif -	ext		v21.16b,v20.16b,v20.16b,#8 -	mov		x3,#0 -	ext		v3.16b,v17.16b,v17.16b,#8 -	mov		x12,#0 -	eor		v21.16b,v21.16b,v20.16b		//Karatsuba pre-processing -	mov		x2,x0 -	b		.Lgmult_v8 -.size	gcm_gmult_v8,.-gcm_gmult_v8 - -.global	gcm_ghash_v8 -.type	gcm_ghash_v8,%function -.align	4 -gcm_ghash_v8: -	ld1		{v0.2d},[x0]		//load [rotated] Xi -	subs		x3,x3,#16 -	movi		v19.16b,#0xe1 -	mov		x12,#16 -	ld1		{v20.2d},[x1]		//load twisted H -	csel	x12,xzr,x12,eq -	ext		v0.16b,v0.16b,v0.16b,#8 -	shl	v19.2d,v19.2d,#57 -	ld1		{v17.2d},[x2],x12	//load [rotated] inp -	ext		v21.16b,v20.16b,v20.16b,#8 -#ifndef __ARMEB__ -	rev64	v0.16b,v0.16b -	rev64	v17.16b,v17.16b -#endif -	eor		v21.16b,v21.16b,v20.16b		//Karatsuba pre-processing -	ext		v3.16b,v17.16b,v17.16b,#8 -	b		.Loop_v8 - -.align	4 -.Loop_v8: -	ext		v18.16b,v0.16b,v0.16b,#8 -	eor		v3.16b,v3.16b,v0.16b		//inp^=Xi -	eor		v17.16b,v17.16b,v18.16b		//v17.16b is rotated inp^Xi - -.Lgmult_v8: -	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo -	eor		v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing -	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi -	subs		x3,x3,#16 -	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi) -	csel	x12,xzr,x12,eq - -	ext		v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing -	eor		v18.16b,v0.16b,v2.16b -	eor		v1.16b,v1.16b,v17.16b -	 ld1	{v17.2d},[x2],x12	//load [rotated] inp -	eor		v1.16b,v1.16b,v18.16b -	pmull	v18.1q,v0.1d,v19.1d		//1st phase - -	ins	v2.d[0],v1.d[1] -	ins	v1.d[1],v0.d[0] -#ifndef __ARMEB__ -	 rev64	v17.16b,v17.16b -#endif -	eor		v0.16b,v1.16b,v18.16b -	 ext		v3.16b,v17.16b,v17.16b,#8 - -	ext		v18.16b,v0.16b,v0.16b,#8		//2nd phase -	pmull	v0.1q,v0.1d,v19.1d -	eor		v18.16b,v18.16b,v2.16b -	eor		v0.16b,v0.16b,v18.16b -	b.hs		.Loop_v8 - -#ifndef __ARMEB__ -	rev64	v0.16b,v0.16b -#endif -	ext		v0.16b,v0.16b,v0.16b,#8 -	st1		{v0.2d},[x0]		//write out Xi - -	ret -.size	gcm_ghash_v8,.-gcm_ghash_v8 -.asciz  "GHASH for ARMv8, CRYPTOGAMS by <appro@openssl.org>" -.align  2 diff --git a/app/openssl/crypto/modes/asm/ghashv8-armx.S b/app/openssl/crypto/modes/asm/ghashv8-armx.S deleted file mode 100644 index f388c54e..00000000 --- a/app/openssl/crypto/modes/asm/ghashv8-armx.S +++ /dev/null @@ -1,116 +0,0 @@ -#include "arm_arch.h" - -.text -.fpu	neon -.code	32 -.global	gcm_init_v8 -.type	gcm_init_v8,%function -.align	4 -gcm_init_v8: -	vld1.64		{q9},[r1]		@ load H -	vmov.i8		q8,#0xe1 -	vext.8		q3,q9,q9,#8 -	vshl.i64	q8,q8,#57 -	vshr.u64	q10,q8,#63 -	vext.8		q8,q10,q8,#8		@ t0=0xc2....01 -	vdup.32	q9,d18[1] -	vshr.u64	q11,q3,#63 -	vshr.s32	q9,q9,#31		@ broadcast carry bit -	vand		q11,q11,q8 -	vshl.i64	q3,q3,#1 -	vext.8		q11,q11,q11,#8 -	vand		q8,q8,q9 -	vorr		q3,q3,q11		@ H<<<=1 -	veor		q3,q3,q8		@ twisted H -	vst1.64		{q3},[r0] - -	bx	lr -.size	gcm_init_v8,.-gcm_init_v8 - -.global	gcm_gmult_v8 -.type	gcm_gmult_v8,%function -.align	4 -gcm_gmult_v8: -	vld1.64		{q9},[r0]		@ load Xi -	vmov.i8		q11,#0xe1 -	vld1.64		{q12},[r1]		@ load twisted H -	vshl.u64	q11,q11,#57 -#ifndef __ARMEB__ -	vrev64.8	q9,q9 -#endif -	vext.8		q13,q12,q12,#8 -	mov		r3,#0 -	vext.8		q3,q9,q9,#8 -	mov		r12,#0 -	veor		q13,q13,q12		@ Karatsuba pre-processing -	mov		r2,r0 -	b		.Lgmult_v8 -.size	gcm_gmult_v8,.-gcm_gmult_v8 - -.global	gcm_ghash_v8 -.type	gcm_ghash_v8,%function -.align	4 -gcm_ghash_v8: -	vld1.64		{q0},[r0]		@ load [rotated] Xi -	subs		r3,r3,#16 -	vmov.i8		q11,#0xe1 -	mov		r12,#16 -	vld1.64		{q12},[r1]		@ load twisted H -	moveq	r12,#0 -	vext.8		q0,q0,q0,#8 -	vshl.u64	q11,q11,#57 -	vld1.64		{q9},[r2],r12	@ load [rotated] inp -	vext.8		q13,q12,q12,#8 -#ifndef __ARMEB__ -	vrev64.8	q0,q0 -	vrev64.8	q9,q9 -#endif -	veor		q13,q13,q12		@ Karatsuba pre-processing -	vext.8		q3,q9,q9,#8 -	b		.Loop_v8 - -.align	4 -.Loop_v8: -	vext.8		q10,q0,q0,#8 -	veor		q3,q3,q0		@ inp^=Xi -	veor		q9,q9,q10		@ q9 is rotated inp^Xi - -.Lgmult_v8: -	.byte	0x86,0x0e,0xa8,0xf2	@ pmull q0,q12,q3		@ H.lo·Xi.lo -	veor		q9,q9,q3		@ Karatsuba pre-processing -	.byte	0x87,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q3		@ H.hi·Xi.hi -	subs		r3,r3,#16 -	.byte	0xa2,0x2e,0xaa,0xf2	@ pmull q1,q13,q9		@ (H.lo+H.hi)·(Xi.lo+Xi.hi) -	moveq	r12,#0 - -	vext.8		q9,q0,q2,#8		@ Karatsuba post-processing -	veor		q10,q0,q2 -	veor		q1,q1,q9 -	 vld1.64	{q9},[r2],r12	@ load [rotated] inp -	veor		q1,q1,q10 -	.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase - -	vmov		d4,d3		@ Xh|Xm - 256-bit result -	vmov		d3,d0		@ Xm is rotated Xl -#ifndef __ARMEB__ -	 vrev64.8	q9,q9 -#endif -	veor		q0,q1,q10 -	 vext.8		q3,q9,q9,#8 - -	vext.8		q10,q0,q0,#8		@ 2nd phase -	.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11 -	veor		q10,q10,q2 -	veor		q0,q0,q10 -	bhs		.Loop_v8 - -#ifndef __ARMEB__ -	vrev64.8	q0,q0 -#endif -	vext.8		q0,q0,q0,#8 -	vst1.64		{q0},[r0]		@ write out Xi - -	bx	lr -.size	gcm_ghash_v8,.-gcm_ghash_v8 -.asciz  "GHASH for ARMv8, CRYPTOGAMS by <appro@openssl.org>" -.align  2 diff --git a/app/openssl/crypto/modes/asm/ghashv8-armx.pl b/app/openssl/crypto/modes/asm/ghashv8-armx.pl deleted file mode 100644 index 69e863e7..00000000 --- a/app/openssl/crypto/modes/asm/ghashv8-armx.pl +++ /dev/null @@ -1,240 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# GHASH for ARMv8 Crypto Extension, 64-bit polynomial multiplication. -# -# June 2014 -# -# Initial version was developed in tight cooperation with Ard -# Biesheuvel <ard.biesheuvel@linaro.org> from bits-n-pieces from -# other assembly modules. Just like aesv8-armx.pl this module -# supports both AArch32 and AArch64 execution modes. -# -# Current performance in cycles per processed byte: -# -#		PMULL[2]	32-bit NEON(*) -# Apple A7	1.76		5.62 -# Cortex-A5x	n/a		n/a -# -# (*)	presented for reference/comparison purposes; - -$flavour = shift; -open STDOUT,">".shift; - -$Xi="x0";	# argument block -$Htbl="x1"; -$inp="x2"; -$len="x3"; - -$inc="x12"; - -{ -my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3)); -my ($t0,$t1,$t2,$t3,$H,$Hhl)=map("q$_",(8..14)); - -$code=<<___; -#include "arm_arch.h" - -.text -___ -$code.=".arch	armv8-a+crypto\n"	if ($flavour =~ /64/); -$code.=".fpu	neon\n.code	32\n"	if ($flavour !~ /64/); - -$code.=<<___; -.global	gcm_init_v8 -.type	gcm_init_v8,%function -.align	4 -gcm_init_v8: -	vld1.64		{$t1},[x1]		@ load H -	vmov.i8		$t0,#0xe1 -	vext.8		$IN,$t1,$t1,#8 -	vshl.i64	$t0,$t0,#57 -	vshr.u64	$t2,$t0,#63 -	vext.8		$t0,$t2,$t0,#8		@ t0=0xc2....01 -	vdup.32		$t1,${t1}[1] -	vshr.u64	$t3,$IN,#63 -	vshr.s32	$t1,$t1,#31		@ broadcast carry bit -	vand		$t3,$t3,$t0 -	vshl.i64	$IN,$IN,#1 -	vext.8		$t3,$t3,$t3,#8 -	vand		$t0,$t0,$t1 -	vorr		$IN,$IN,$t3		@ H<<<=1 -	veor		$IN,$IN,$t0		@ twisted H -	vst1.64		{$IN},[x0] - -	ret -.size	gcm_init_v8,.-gcm_init_v8 - -.global	gcm_gmult_v8 -.type	gcm_gmult_v8,%function -.align	4 -gcm_gmult_v8: -	vld1.64		{$t1},[$Xi]		@ load Xi -	vmov.i8		$t3,#0xe1 -	vld1.64		{$H},[$Htbl]		@ load twisted H -	vshl.u64	$t3,$t3,#57 -#ifndef __ARMEB__ -	vrev64.8	$t1,$t1 -#endif -	vext.8		$Hhl,$H,$H,#8 -	mov		$len,#0 -	vext.8		$IN,$t1,$t1,#8 -	mov		$inc,#0 -	veor		$Hhl,$Hhl,$H		@ Karatsuba pre-processing -	mov		$inp,$Xi -	b		.Lgmult_v8 -.size	gcm_gmult_v8,.-gcm_gmult_v8 - -.global	gcm_ghash_v8 -.type	gcm_ghash_v8,%function -.align	4 -gcm_ghash_v8: -	vld1.64		{$Xl},[$Xi]		@ load [rotated] Xi -	subs		$len,$len,#16 -	vmov.i8		$t3,#0xe1 -	mov		$inc,#16 -	vld1.64		{$H},[$Htbl]		@ load twisted H -	cclr		$inc,eq -	vext.8		$Xl,$Xl,$Xl,#8 -	vshl.u64	$t3,$t3,#57 -	vld1.64		{$t1},[$inp],$inc	@ load [rotated] inp -	vext.8		$Hhl,$H,$H,#8 -#ifndef __ARMEB__ -	vrev64.8	$Xl,$Xl -	vrev64.8	$t1,$t1 -#endif -	veor		$Hhl,$Hhl,$H		@ Karatsuba pre-processing -	vext.8		$IN,$t1,$t1,#8 -	b		.Loop_v8 - -.align	4 -.Loop_v8: -	vext.8		$t2,$Xl,$Xl,#8 -	veor		$IN,$IN,$Xl		@ inp^=Xi -	veor		$t1,$t1,$t2		@ $t1 is rotated inp^Xi - -.Lgmult_v8: -	vpmull.p64	$Xl,$H,$IN		@ H.lo·Xi.lo -	veor		$t1,$t1,$IN		@ Karatsuba pre-processing -	vpmull2.p64	$Xh,$H,$IN		@ H.hi·Xi.hi -	subs		$len,$len,#16 -	vpmull.p64	$Xm,$Hhl,$t1		@ (H.lo+H.hi)·(Xi.lo+Xi.hi) -	cclr		$inc,eq - -	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing -	veor		$t2,$Xl,$Xh -	veor		$Xm,$Xm,$t1 -	 vld1.64	{$t1},[$inp],$inc	@ load [rotated] inp -	veor		$Xm,$Xm,$t2 -	vpmull.p64	$t2,$Xl,$t3		@ 1st phase - -	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result -	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl -#ifndef __ARMEB__ -	 vrev64.8	$t1,$t1 -#endif -	veor		$Xl,$Xm,$t2 -	 vext.8		$IN,$t1,$t1,#8 - -	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase -	vpmull.p64	$Xl,$Xl,$t3 -	veor		$t2,$t2,$Xh -	veor		$Xl,$Xl,$t2 -	b.hs		.Loop_v8 - -#ifndef __ARMEB__ -	vrev64.8	$Xl,$Xl -#endif -	vext.8		$Xl,$Xl,$Xl,#8 -	vst1.64		{$Xl},[$Xi]		@ write out Xi - -	ret -.size	gcm_ghash_v8,.-gcm_ghash_v8 -___ -} -$code.=<<___; -.asciz  "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>" -.align  2 -___ - -if ($flavour =~ /64/) {			######## 64-bit code -    sub unvmov { -	my $arg=shift; - -	$arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o && -	sprintf	"ins	v%d.d[%d],v%d.d[%d]",$1,($2 eq "lo")?0:1,$3,($4 eq "lo")?0:1; -    } -    foreach(split("\n",$code)) { -	s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel	$1$2,$1zr,$1$2,$3/o	or -	s/vmov\.i8/movi/o		or	# fix up legacy mnemonics -	s/vmov\s+(.*)/unvmov($1)/geo	or -	s/vext\.8/ext/o			or -	s/vshr\.s/sshr\.s/o		or -	s/vshr/ushr/o			or -	s/^(\s+)v/$1/o			or	# strip off v prefix -	s/\bbx\s+lr\b/ret/o; - -	s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo;	# old->new registers -	s/@\s/\/\//o;				# old->new style commentary - -	# fix up remainig legacy suffixes -	s/\.[ui]?8(\s)/$1/o; -	s/\.[uis]?32//o and s/\.16b/\.4s/go; -	m/\.p64/o and s/\.16b/\.1q/o;		# 1st pmull argument -	m/l\.p64/o and s/\.16b/\.1d/go;		# 2nd and 3rd pmull arguments -	s/\.[uisp]?64//o and s/\.16b/\.2d/go; -	s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o; - -	print $_,"\n"; -    } -} else {				######## 32-bit code -    sub unvdup32 { -	my $arg=shift; - -	$arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o && -	sprintf	"vdup.32	q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1; -    } -    sub unvpmullp64 { -	my ($mnemonic,$arg)=@_; - -	if ($arg =~ m/q([0-9]+),\s*q([0-9]+),\s*q([0-9]+)/o) { -	    my $word = 0xf2a00e00|(($1&7)<<13)|(($1&8)<<19) -				 |(($2&7)<<17)|(($2&8)<<4) -				 |(($3&7)<<1) |(($3&8)<<2); -	    $word |= 0x00010001	 if ($mnemonic =~ "2"); -	    # since ARMv7 instructions are always encoded little-endian. -	    # correct solution is to use .inst directive, but older -	    # assemblers don't implement it:-( -	    sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", -			$word&0xff,($word>>8)&0xff, -			($word>>16)&0xff,($word>>24)&0xff, -			$mnemonic,$arg; -	} -    } - -    foreach(split("\n",$code)) { -	s/\b[wx]([0-9]+)\b/r$1/go;		# new->old registers -	s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go;	# new->old registers -        s/\/\/\s?/@ /o;				# new->old style commentary - -	# fix up remainig new-style suffixes -	s/\],#[0-9]+/]!/o; - -	s/cclr\s+([^,]+),\s*([a-z]+)/mov$2	$1,#0/o			or -	s/vdup\.32\s+(.*)/unvdup32($1)/geo				or -	s/v?(pmull2?)\.p64\s+(.*)/unvpmullp64($1,$2)/geo		or -	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo	or -	s/^(\s+)b\./$1b/o						or -	s/^(\s+)ret/$1bx\tlr/o; - -        print $_,"\n"; -    } -} - -close STDOUT; # enforce flush diff --git a/app/openssl/crypto/modes/gcm128.c b/app/openssl/crypto/modes/gcm128.c index 79ebb66e..e1dc2b0f 100644 --- a/app/openssl/crypto/modes/gcm128.c +++ b/app/openssl/crypto/modes/gcm128.c @@ -642,7 +642,7 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])  #endif -#if	TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ)) +#if	TABLE_BITS==4 && defined(GHASH_ASM)  # if	!defined(I386_ONLY) && \  	(defined(__i386)	|| defined(__i386__)	|| \  	 defined(__x86_64)	|| defined(__x86_64__)	|| \ @@ -663,21 +663,13 @@ void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len  void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);  void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);  #  endif -# elif defined(__arm__) || defined(__arm) || defined(__aarch64__) +# elif defined(__arm__) || defined(__arm)  #  include "arm_arch.h"  #  if __ARM_ARCH__>=7  #   define GHASH_ASM_ARM  #   define GCM_FUNCREF_4BIT -#   define PMULL_CAPABLE	(OPENSSL_armcap_P & ARMV8_PMULL) -#   if defined(__arm__) || defined(__arm) -#    define NEON_CAPABLE	(OPENSSL_armcap_P & ARMV7_NEON) -#   endif -void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);  void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);  void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); -void gcm_init_v8(u128 Htable[16],const u64 Xi[2]); -void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]); -void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);  #  endif  # endif  #endif @@ -747,21 +739,10 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)  	ctx->ghash = gcm_ghash_4bit;  #  endif  # elif	defined(GHASH_ASM_ARM) -#  ifdef PMULL_CAPABLE -	if (PMULL_CAPABLE) { -		gcm_init_v8(ctx->Htable,ctx->H.u); -		ctx->gmult = gcm_gmult_v8; -		ctx->ghash = gcm_ghash_v8; -	} else -#  endif -#  ifdef NEON_CAPABLE -	if (NEON_CAPABLE) { -		gcm_init_neon(ctx->Htable,ctx->H.u); +	if (OPENSSL_armcap_P & ARMV7_NEON) {  		ctx->gmult = gcm_gmult_neon;  		ctx->ghash = gcm_ghash_neon; -	} else -#  endif -	{ +	} else {  		gcm_init_4bit(ctx->Htable,ctx->H.u);  		ctx->gmult = gcm_gmult_4bit;  		ctx->ghash = gcm_ghash_4bit; diff --git a/app/openssl/crypto/opensslconf-32.h b/app/openssl/crypto/opensslconf-32.h index caf6f1b8..d6625489 100644 --- a/app/openssl/crypto/opensslconf-32.h +++ b/app/openssl/crypto/opensslconf-32.h @@ -53,9 +53,6 @@  #ifndef OPENSSL_NO_RFC3779  # define OPENSSL_NO_RFC3779  #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif  #ifndef OPENSSL_NO_RSAX  # define OPENSSL_NO_RSAX  #endif @@ -140,9 +137,6 @@  # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)  #  define NO_RFC3779  # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -#  define NO_RIPEMD -# endif  # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)  #  define NO_RSAX  # endif diff --git a/app/openssl/crypto/opensslconf-64.h b/app/openssl/crypto/opensslconf-64.h index 88fb0419..70c5a2cb 100644 --- a/app/openssl/crypto/opensslconf-64.h +++ b/app/openssl/crypto/opensslconf-64.h @@ -53,9 +53,6 @@  #ifndef OPENSSL_NO_RFC3779  # define OPENSSL_NO_RFC3779  #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif  #ifndef OPENSSL_NO_RSAX  # define OPENSSL_NO_RSAX  #endif @@ -140,9 +137,6 @@  # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)  #  define NO_RFC3779  # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -#  define NO_RIPEMD -# endif  # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)  #  define NO_RSAX  # endif diff --git a/app/openssl/crypto/opensslconf-static-32.h b/app/openssl/crypto/opensslconf-static-32.h index caf6f1b8..d6625489 100644 --- a/app/openssl/crypto/opensslconf-static-32.h +++ b/app/openssl/crypto/opensslconf-static-32.h @@ -53,9 +53,6 @@  #ifndef OPENSSL_NO_RFC3779  # define OPENSSL_NO_RFC3779  #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif  #ifndef OPENSSL_NO_RSAX  # define OPENSSL_NO_RSAX  #endif @@ -140,9 +137,6 @@  # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)  #  define NO_RFC3779  # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -#  define NO_RIPEMD -# endif  # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)  #  define NO_RSAX  # endif diff --git a/app/openssl/crypto/opensslconf-static-64.h b/app/openssl/crypto/opensslconf-static-64.h index 88fb0419..70c5a2cb 100644 --- a/app/openssl/crypto/opensslconf-static-64.h +++ b/app/openssl/crypto/opensslconf-static-64.h @@ -53,9 +53,6 @@  #ifndef OPENSSL_NO_RFC3779  # define OPENSSL_NO_RFC3779  #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif  #ifndef OPENSSL_NO_RSAX  # define OPENSSL_NO_RSAX  #endif @@ -140,9 +137,6 @@  # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)  #  define NO_RFC3779  # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -#  define NO_RIPEMD -# endif  # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)  #  define NO_RSAX  # endif diff --git a/app/openssl/crypto/opensslv.h b/app/openssl/crypto/opensslv.h index c3b6acec..ebe71807 100644 --- a/app/openssl/crypto/opensslv.h +++ b/app/openssl/crypto/opensslv.h @@ -25,11 +25,11 @@   * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for   *  major minor fix final patch/beta)   */ -#define OPENSSL_VERSION_NUMBER	0x1000108fL +#define OPENSSL_VERSION_NUMBER	0x1000107fL  #ifdef OPENSSL_FIPS -#define OPENSSL_VERSION_TEXT	"OpenSSL 1.0.1h-fips 5 Jun 2014" +#define OPENSSL_VERSION_TEXT	"OpenSSL 1.0.1g-fips 7 Apr 2014"  #else -#define OPENSSL_VERSION_TEXT	"OpenSSL 1.0.1h 5 Jun 2014" +#define OPENSSL_VERSION_TEXT	"OpenSSL 1.0.1g 7 Apr 2014"  #endif  #define OPENSSL_VERSION_PTEXT	" part of " OPENSSL_VERSION_TEXT diff --git a/app/openssl/crypto/pkcs12/p12_crt.c b/app/openssl/crypto/pkcs12/p12_crt.c index 35e8a4a8..a34915d0 100644 --- a/app/openssl/crypto/pkcs12/p12_crt.c +++ b/app/openssl/crypto/pkcs12/p12_crt.c @@ -96,11 +96,7 @@ PKCS12 *PKCS12_create(char *pass, char *name, EVP_PKEY *pkey, X509 *cert,  			nid_cert = NID_pbe_WithSHA1And3_Key_TripleDES_CBC;  		else  #endif -#ifdef OPENSSL_NO_RC2 -		nid_cert = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; -#else  		nid_cert = NID_pbe_WithSHA1And40BitRC2_CBC; -#endif  		}  	if (!nid_key)  		nid_key = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; @@ -290,11 +286,7 @@ int PKCS12_add_safe(STACK_OF(PKCS7) **psafes, STACK_OF(PKCS12_SAFEBAG) *bags,  		free_safes = 0;  	if (nid_safe == 0) -#ifdef OPENSSL_NO_RC2 -		nid_safe = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; -#else  		nid_safe = NID_pbe_WithSHA1And40BitRC2_CBC; -#endif  	if (nid_safe == -1)  		p7 = PKCS12_pack_p7data(bags); diff --git a/app/openssl/crypto/pkcs12/p12_kiss.c b/app/openssl/crypto/pkcs12/p12_kiss.c index c9b7ab61..206b1b0b 100644 --- a/app/openssl/crypto/pkcs12/p12_kiss.c +++ b/app/openssl/crypto/pkcs12/p12_kiss.c @@ -269,7 +269,7 @@ static int parse_bag(PKCS12_SAFEBAG *bag, const char *pass, int passlen,  			int len, r;  			unsigned char *data;  			len = ASN1_STRING_to_UTF8(&data, fname); -			if(len >= 0) { +			if(len > 0) {  				r = X509_alias_set1(x509, data, len);  				OPENSSL_free(data);  				if (!r) diff --git a/app/openssl/crypto/pkcs7/pk7_doit.c b/app/openssl/crypto/pkcs7/pk7_doit.c index d91aa116..77fda3b8 100644 --- a/app/openssl/crypto/pkcs7/pk7_doit.c +++ b/app/openssl/crypto/pkcs7/pk7_doit.c @@ -440,11 +440,6 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert)  		{  	case NID_pkcs7_signed:  		data_body=PKCS7_get_octet_string(p7->d.sign->contents); -		if (!PKCS7_is_detached(p7) && data_body == NULL) -			{ -			PKCS7err(PKCS7_F_PKCS7_DATADECODE,PKCS7_R_INVALID_SIGNED_DATA_TYPE); -			goto err; -			}  		md_sk=p7->d.sign->md_algs;  		break;  	case NID_pkcs7_signedAndEnveloped: @@ -933,7 +928,6 @@ int PKCS7_SIGNER_INFO_sign(PKCS7_SIGNER_INFO *si)  	if (EVP_DigestSignUpdate(&mctx,abuf,alen) <= 0)  		goto err;  	OPENSSL_free(abuf); -	abuf = NULL;  	if (EVP_DigestSignFinal(&mctx, NULL, &siglen) <= 0)  		goto err;  	abuf = OPENSSL_malloc(siglen); diff --git a/app/openssl/crypto/pkcs7/pkcs7.h b/app/openssl/crypto/pkcs7/pkcs7.h index 04f60379..e4d44319 100644 --- a/app/openssl/crypto/pkcs7/pkcs7.h +++ b/app/openssl/crypto/pkcs7/pkcs7.h @@ -453,7 +453,6 @@ void ERR_load_PKCS7_strings(void);  #define PKCS7_R_ERROR_SETTING_CIPHER			 121  #define PKCS7_R_INVALID_MIME_TYPE			 131  #define PKCS7_R_INVALID_NULL_POINTER			 143 -#define PKCS7_R_INVALID_SIGNED_DATA_TYPE		 155  #define PKCS7_R_MIME_NO_CONTENT_TYPE			 132  #define PKCS7_R_MIME_PARSE_ERROR			 133  #define PKCS7_R_MIME_SIG_PARSE_ERROR			 134 diff --git a/app/openssl/crypto/pkcs7/pkcs7err.c b/app/openssl/crypto/pkcs7/pkcs7err.c index f3db08e0..d0af32a2 100644 --- a/app/openssl/crypto/pkcs7/pkcs7err.c +++ b/app/openssl/crypto/pkcs7/pkcs7err.c @@ -1,6 +1,6 @@  /* crypto/pkcs7/pkcs7err.c */  /* ==================================================================== - * Copyright (c) 1999-2014 The OpenSSL Project.  All rights reserved. + * Copyright (c) 1999-2007 The OpenSSL Project.  All rights reserved.   *   * Redistribution and use in source and binary forms, with or without   * modification, are permitted provided that the following conditions @@ -130,7 +130,6 @@ static ERR_STRING_DATA PKCS7_str_reasons[]=  {ERR_REASON(PKCS7_R_ERROR_SETTING_CIPHER),"error setting cipher"},  {ERR_REASON(PKCS7_R_INVALID_MIME_TYPE)   ,"invalid mime type"},  {ERR_REASON(PKCS7_R_INVALID_NULL_POINTER),"invalid null pointer"}, -{ERR_REASON(PKCS7_R_INVALID_SIGNED_DATA_TYPE),"invalid signed data type"},  {ERR_REASON(PKCS7_R_MIME_NO_CONTENT_TYPE),"mime no content type"},  {ERR_REASON(PKCS7_R_MIME_PARSE_ERROR)    ,"mime parse error"},  {ERR_REASON(PKCS7_R_MIME_SIG_PARSE_ERROR),"mime sig parse error"}, diff --git a/app/openssl/crypto/rsa/rsa_ameth.c b/app/openssl/crypto/rsa/rsa_ameth.c index 4c8ecd92..5a2062f9 100644 --- a/app/openssl/crypto/rsa/rsa_ameth.c +++ b/app/openssl/crypto/rsa/rsa_ameth.c @@ -358,7 +358,7 @@ static int rsa_pss_param_print(BIO *bp, RSA_PSS_PARAMS *pss,  		if (i2a_ASN1_INTEGER(bp, pss->saltLength) <= 0)  			goto err;  		} -	else if (BIO_puts(bp, "14 (default)") <= 0) +	else if (BIO_puts(bp, "0x14 (default)") <= 0)  		goto err;  	BIO_puts(bp, "\n"); diff --git a/app/openssl/crypto/sha/asm/sha1-armv4-large.pl b/app/openssl/crypto/sha/asm/sha1-armv4-large.pl index 50bd07b3..33da3e0e 100644 --- a/app/openssl/crypto/sha/asm/sha1-armv4-large.pl +++ b/app/openssl/crypto/sha/asm/sha1-armv4-large.pl @@ -1,7 +1,7 @@  #!/usr/bin/env perl  # ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL  # project. The module is, however, dual licensed under OpenSSL and  # CRYPTOGAMS licenses depending on where you obtain it. For further  # details see http://www.openssl.org/~appro/cryptogams/. @@ -52,20 +52,6 @@  # Profiler-assisted and platform-specific optimization resulted in 10%  # improvement on Cortex A8 core and 12.2 cycles per byte. -# September 2013. -# -# Add NEON implementation (see sha1-586.pl for background info). On -# Cortex A8 it was measured to process one byte in 6.7 cycles or >80% -# faster than integer-only code. Because [fully unrolled] NEON code -# is ~2.5x larger and there are some redundant instructions executed -# when processing last block, improvement is not as big for smallest -# blocks, only ~30%. Snapdragon S4 is a tad faster, 6.4 cycles per -# byte, which is also >80% faster than integer-only code. - -# May 2014. -# -# Add ARMv8 code path performing at 2.35 cpb on Apple A7. -  while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}  open STDOUT,">$output"; @@ -167,22 +153,12 @@ $code=<<___;  #include "arm_arch.h"  .text -.code	32  .global	sha1_block_data_order  .type	sha1_block_data_order,%function -.align	5 +.align	2  sha1_block_data_order: -#if __ARM_ARCH__>=7 -	sub	r3,pc,#8		@ sha1_block_data_order -	ldr	r12,.LOPENSSL_armcap -	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P -	tst	r12,#ARMV8_SHA1 -	bne	.LARMv8 -	tst	r12,#ARMV7_NEON -	bne	.LNEON -#endif  	stmdb	sp!,{r4-r12,lr}  	add	$len,$inp,$len,lsl#6	@ $len to point at the end of $inp  	ldmia	$ctx,{$a,$b,$c,$d,$e} @@ -257,422 +233,16 @@ $code.=<<___;  	moveq	pc,lr			@ be binary compatible with V4, yet  	bx	lr			@ interoperable with Thumb ISA:-)  #endif -.size	sha1_block_data_order,.-sha1_block_data_order - -.align	5 +.align	2  .LK_00_19:	.word	0x5a827999  .LK_20_39:	.word	0x6ed9eba1  .LK_40_59:	.word	0x8f1bbcdc  .LK_60_79:	.word	0xca62c1d6 -.LOPENSSL_armcap: -.word	OPENSSL_armcap_P-sha1_block_data_order -.asciz	"SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" -.align	5 -___ -##################################################################### -# NEON stuff -# -{{{ -my @V=($a,$b,$c,$d,$e); -my ($K_XX_XX,$Ki,$t0,$t1,$Xfer,$saved_sp)=map("r$_",(8..12,14)); -my $Xi=4; -my @X=map("q$_",(8..11,0..3)); -my @Tx=("q12","q13"); -my ($K,$zero)=("q14","q15"); -my $j=0; - -sub AUTOLOAD()          # thunk [simplified] x86-style perlasm -{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; -  my $arg = pop; -    $arg = "#$arg" if ($arg*1 eq $arg); -    $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; -} - -sub body_00_19 () { -	( -	'($a,$b,$c,$d,$e)=@V;'.		# '$code.="@ $j\n";'. -	'&bic	($t0,$d,$b)', -	'&add	($e,$e,$Ki)',		# e+=X[i]+K -	'&and	($t1,$c,$b)', -	'&ldr	($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))', -	'&add	($e,$e,$a,"ror#27")',	# e+=ROR(A,27) -	'&eor	($t1,$t1,$t0)',		# F_00_19 -	'&mov	($b,$b,"ror#2")',	# b=ROR(b,2) -	'&add	($e,$e,$t1);'.		# e+=F_00_19 -	'$j++;	unshift(@V,pop(@V));' -	) -} -sub body_20_39 () { -	( -	'($a,$b,$c,$d,$e)=@V;'.		# '$code.="@ $j\n";'. -	'&eor	($t0,$b,$d)', -	'&add	($e,$e,$Ki)',		# e+=X[i]+K -	'&ldr	($Ki,sprintf "[sp,#%d]",4*(($j+1)&15)) if ($j<79)', -	'&eor	($t1,$t0,$c)',		# F_20_39 -	'&add	($e,$e,$a,"ror#27")',	# e+=ROR(A,27) -	'&mov	($b,$b,"ror#2")',	# b=ROR(b,2) -	'&add	($e,$e,$t1);'.		# e+=F_20_39 -	'$j++;	unshift(@V,pop(@V));' -	) -} -sub body_40_59 () { -	( -	'($a,$b,$c,$d,$e)=@V;'.		# '$code.="@ $j\n";'. -	'&add	($e,$e,$Ki)',		# e+=X[i]+K -	'&and	($t0,$c,$d)', -	'&ldr	($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))', -	'&add	($e,$e,$a,"ror#27")',	# e+=ROR(A,27) -	'&eor	($t1,$c,$d)', -	'&add	($e,$e,$t0)', -	'&and	($t1,$t1,$b)', -	'&mov	($b,$b,"ror#2")',	# b=ROR(b,2) -	'&add	($e,$e,$t1);'.		# e+=F_40_59 -	'$j++;	unshift(@V,pop(@V));' -	) -} - -sub Xupdate_16_31 () -{ use integer; -  my $body = shift; -  my @insns = (&$body,&$body,&$body,&$body); -  my ($a,$b,$c,$d,$e); - -	&vext_8		(@X[0],@X[-4&7],@X[-3&7],8);	# compose "X[-14]" in "X[0]" -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &vadd_i32	(@Tx[1],@X[-1&7],$K); -	 eval(shift(@insns)); -	  &vld1_32	("{$K\[]}","[$K_XX_XX,:32]!")	if ($Xi%5==0); -	 eval(shift(@insns)); -	&vext_8		(@Tx[0],@X[-1&7],$zero,4);	# "X[-3]", 3 words -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&veor		(@X[0],@X[0],@X[-4&7]);		# "X[0]"^="X[-16]" -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&veor		(@Tx[0],@Tx[0],@X[-2&7]);	# "X[-3]"^"X[-8]" -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&veor		(@Tx[0],@Tx[0],@X[0]);		# "X[0]"^="X[-3]"^"X[-8] -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &vst1_32	("{@Tx[1]}","[$Xfer,:128]!");	# X[]+K xfer -	  &sub		($Xfer,$Xfer,64)		if ($Xi%4==0); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vext_8		(@Tx[1],$zero,@Tx[0],4);	# "X[0]"<<96, extract one dword -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vadd_i32	(@X[0],@Tx[0],@Tx[0]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vsri_32	(@X[0],@Tx[0],31);		# "X[0]"<<<=1 -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vshr_u32	(@Tx[0],@Tx[1],30); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vshl_u32	(@Tx[1],@Tx[1],2); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&veor		(@X[0],@X[0],@Tx[0]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&veor		(@X[0],@X[0],@Tx[1]);		# "X[0]"^=("X[0]">>96)<<<2 - -	foreach (@insns) { eval; }	# remaining instructions [if any] - -  $Xi++;	push(@X,shift(@X));	# "rotate" X[] -} - -sub Xupdate_32_79 () -{ use integer; -  my $body = shift; -  my @insns = (&$body,&$body,&$body,&$body); -  my ($a,$b,$c,$d,$e); - -	&vext_8		(@Tx[0],@X[-2&7],@X[-1&7],8);	# compose "X[-6]" -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&veor		(@X[0],@X[0],@X[-4&7]);		# "X[0]"="X[-32]"^"X[-16]" -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&veor		(@X[0],@X[0],@X[-7&7]);		# "X[0]"^="X[-28]" -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &vadd_i32	(@Tx[1],@X[-1&7],$K); -	 eval(shift(@insns)); -	  &vld1_32	("{$K\[]}","[$K_XX_XX,:32]!")	if ($Xi%5==0); -	 eval(shift(@insns)); -	&veor		(@Tx[0],@Tx[0],@X[0]);		# "X[-6]"^="X[0]" -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vshr_u32	(@X[0],@Tx[0],30); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &vst1_32	("{@Tx[1]}","[$Xfer,:128]!");	# X[]+K xfer -	  &sub		($Xfer,$Xfer,64)		if ($Xi%4==0); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vsli_32	(@X[0],@Tx[0],2);		# "X[0]"="X[-6]"<<<2 - -	foreach (@insns) { eval; }	# remaining instructions [if any] - -  $Xi++;	push(@X,shift(@X));	# "rotate" X[] -} - -sub Xuplast_80 () -{ use integer; -  my $body = shift; -  my @insns = (&$body,&$body,&$body,&$body); -  my ($a,$b,$c,$d,$e); - -	&vadd_i32	(@Tx[1],@X[-1&7],$K); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vst1_32	("{@Tx[1]}","[$Xfer,:128]!"); -	&sub		($Xfer,$Xfer,64); - -	&teq		($inp,$len); -	&sub		($K_XX_XX,$K_XX_XX,16);	# rewind $K_XX_XX -	&subeq		($inp,$inp,64);		# reload last block to avoid SEGV -	&vld1_8		("{@X[-4&7]-@X[-3&7]}","[$inp]!"); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vld1_8		("{@X[-2&7]-@X[-1&7]}","[$inp]!"); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vld1_32	("{$K\[]}","[$K_XX_XX,:32]!");	# load K_00_19 -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vrev32_8	(@X[-4&7],@X[-4&7]); - -	foreach (@insns) { eval; }		# remaining instructions - -   $Xi=0; -} - -sub Xloop() -{ use integer; -  my $body = shift; -  my @insns = (&$body,&$body,&$body,&$body); -  my ($a,$b,$c,$d,$e); - -	&vrev32_8	(@X[($Xi-3)&7],@X[($Xi-3)&7]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vadd_i32	(@X[$Xi&7],@X[($Xi-4)&7],$K); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vst1_32	("{@X[$Xi&7]}","[$Xfer,:128]!");# X[]+K xfer to IALU - -	foreach (@insns) { eval; } - -  $Xi++; -} - -$code.=<<___; -#if __ARM_ARCH__>=7 -.fpu	neon - -.type	sha1_block_data_order_neon,%function -.align	4 -sha1_block_data_order_neon: -.LNEON: -	stmdb	sp!,{r4-r12,lr} -	add	$len,$inp,$len,lsl#6	@ $len to point at the end of $inp -	@ dmb				@ errata #451034 on early Cortex A8 -	@ vstmdb	sp!,{d8-d15}	@ ABI specification says so -	mov	$saved_sp,sp -	sub	sp,sp,#64		@ alloca -	adr	$K_XX_XX,.LK_00_19 -	bic	sp,sp,#15		@ align for 128-bit stores - -	ldmia	$ctx,{$a,$b,$c,$d,$e}	@ load context -	mov	$Xfer,sp - -	vld1.8		{@X[-4&7]-@X[-3&7]},[$inp]!	@ handles unaligned -	veor		$zero,$zero,$zero -	vld1.8		{@X[-2&7]-@X[-1&7]},[$inp]! -	vld1.32		{${K}\[]},[$K_XX_XX,:32]!	@ load K_00_19 -	vrev32.8	@X[-4&7],@X[-4&7]		@ yes, even on -	vrev32.8	@X[-3&7],@X[-3&7]		@ big-endian... -	vrev32.8	@X[-2&7],@X[-2&7] -	vadd.i32	@X[0],@X[-4&7],$K -	vrev32.8	@X[-1&7],@X[-1&7] -	vadd.i32	@X[1],@X[-3&7],$K -	vst1.32		{@X[0]},[$Xfer,:128]! -	vadd.i32	@X[2],@X[-2&7],$K -	vst1.32		{@X[1]},[$Xfer,:128]! -	vst1.32		{@X[2]},[$Xfer,:128]! -	ldr		$Ki,[sp]			@ big RAW stall - -.Loop_neon: -___ -	&Xupdate_16_31(\&body_00_19); -	&Xupdate_16_31(\&body_00_19); -	&Xupdate_16_31(\&body_00_19); -	&Xupdate_16_31(\&body_00_19); -	&Xupdate_32_79(\&body_00_19); -	&Xupdate_32_79(\&body_20_39); -	&Xupdate_32_79(\&body_20_39); -	&Xupdate_32_79(\&body_20_39); -	&Xupdate_32_79(\&body_20_39); -	&Xupdate_32_79(\&body_20_39); -	&Xupdate_32_79(\&body_40_59); -	&Xupdate_32_79(\&body_40_59); -	&Xupdate_32_79(\&body_40_59); -	&Xupdate_32_79(\&body_40_59); -	&Xupdate_32_79(\&body_40_59); -	&Xupdate_32_79(\&body_20_39); -	&Xuplast_80(\&body_20_39); -	&Xloop(\&body_20_39); -	&Xloop(\&body_20_39); -	&Xloop(\&body_20_39); -$code.=<<___; -	ldmia	$ctx,{$Ki,$t0,$t1,$Xfer}	@ accumulate context -	add	$a,$a,$Ki -	ldr	$Ki,[$ctx,#16] -	add	$b,$b,$t0 -	add	$c,$c,$t1 -	add	$d,$d,$Xfer -	moveq	sp,$saved_sp -	add	$e,$e,$Ki -	ldrne	$Ki,[sp] -	stmia	$ctx,{$a,$b,$c,$d,$e} -	addne	$Xfer,sp,#3*16 -	bne	.Loop_neon - -	@ vldmia	sp!,{d8-d15} -	ldmia	sp!,{r4-r12,pc} -.size	sha1_block_data_order_neon,.-sha1_block_data_order_neon -#endif -___ -}}} -##################################################################### -# ARMv8 stuff -# -{{{ -my ($ABCD,$E,$E0,$E1)=map("q$_",(0..3)); -my @MSG=map("q$_",(4..7)); -my @Kxx=map("q$_",(8..11)); -my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14)); - -$code.=<<___; -#if __ARM_ARCH__>=7 -.type	sha1_block_data_order_armv8,%function -.align	5 -sha1_block_data_order_armv8: -.LARMv8: -	vstmdb	sp!,{d8-d15}		@ ABI specification says so - -	veor	$E,$E,$E -	adr	r3,.LK_00_19 -	vld1.32	{$ABCD},[$ctx]! -	vld1.32	{$E\[0]},[$ctx] -	sub	$ctx,$ctx,#16 -	vld1.32	{@Kxx[0]\[]},[r3,:32]! -	vld1.32	{@Kxx[1]\[]},[r3,:32]! -	vld1.32	{@Kxx[2]\[]},[r3,:32]! -	vld1.32	{@Kxx[3]\[]},[r3,:32] - -.Loop_v8: -	vld1.8		{@MSG[0]-@MSG[1]},[$inp]! -	vld1.8		{@MSG[2]-@MSG[3]},[$inp]! -	vrev32.8	@MSG[0],@MSG[0] -	vrev32.8	@MSG[1],@MSG[1] - -	vadd.i32	$W0,@Kxx[0],@MSG[0] -	vrev32.8	@MSG[2],@MSG[2] -	vmov		$ABCD_SAVE,$ABCD	@ offload -	subs		$len,$len,#1 - -	vadd.i32	$W1,@Kxx[0],@MSG[1] -	vrev32.8	@MSG[3],@MSG[3] -	sha1h		$E1,$ABCD		@ 0 -	sha1c		$ABCD,$E,$W0 -	vadd.i32	$W0,@Kxx[$j],@MSG[2] -	sha1su0		@MSG[0],@MSG[1],@MSG[2] -___ -for ($j=0,$i=1;$i<20-3;$i++) { -my $f=("c","p","m","p")[$i/5]; -$code.=<<___; -	sha1h		$E0,$ABCD		@ $i -	sha1$f		$ABCD,$E1,$W1 -	vadd.i32	$W1,@Kxx[$j],@MSG[3] -	sha1su1		@MSG[0],@MSG[3] -___ -$code.=<<___ if ($i<20-4); -	sha1su0		@MSG[1],@MSG[2],@MSG[3] -___ -	($E0,$E1)=($E1,$E0);	($W0,$W1)=($W1,$W0); -	push(@MSG,shift(@MSG));	$j++ if ((($i+3)%5)==0); -} -$code.=<<___; -	sha1h		$E0,$ABCD		@ $i -	sha1p		$ABCD,$E1,$W1 -	vadd.i32	$W1,@Kxx[$j],@MSG[3] - -	sha1h		$E1,$ABCD		@ 18 -	sha1p		$ABCD,$E0,$W0 - -	sha1h		$E0,$ABCD		@ 19 -	sha1p		$ABCD,$E1,$W1 - -	vadd.i32	$E,$E,$E0 -	vadd.i32	$ABCD,$ABCD,$ABCD_SAVE -	bne		.Loop_v8 - -	vst1.32		{$ABCD},[$ctx]! -	vst1.32		{$E\[0]},[$ctx] - -	vldmia	sp!,{d8-d15} -	ret					@ bx lr -.size	sha1_block_data_order_armv8,.-sha1_block_data_order_armv8 -#endif -___ -}}} -$code.=<<___; -.comm	OPENSSL_armcap_P,4,4 +.size	sha1_block_data_order,.-sha1_block_data_order +.asciz	"SHA1 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" +.align	2  ___ -{   my  %opcode = ( -	"sha1c"		=> 0xf2000c40,	"sha1p"		=> 0xf2100c40, -	"sha1m"		=> 0xf2200c40,	"sha1su0"	=> 0xf2300c40, -	"sha1h"		=> 0xf3b902c0,	"sha1su1"	=> 0xf3ba0380	); - -    sub unsha1 { -	my ($mnemonic,$arg)=@_; - -	if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { -	    my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) -					 |(($2&7)<<17)|(($2&8)<<4) -					 |(($3&7)<<1) |(($3&8)<<2); -	    # since ARMv7 instructions are always encoded little-endian. -	    # correct solution is to use .inst directive, but older -	    # assemblers don't implement it:-( -	    sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", -			$word&0xff,($word>>8)&0xff, -			($word>>16)&0xff,($word>>24)&0xff, -			$mnemonic,$arg; -	} -    } -} - -foreach (split($/,$code)) { -	s/{q([0-9]+)\[\]}/sprintf "{d%d[],d%d[]}",2*$1,2*$1+1/eo	or -	s/{q([0-9]+)\[0\]}/sprintf "{d%d[0]}",2*$1/eo; - -	s/\b(sha1\w+)\s+(q.*)/unsha1($1,$2)/geo; - -	s/\bret\b/bx	lr/o		or -	s/\bbx\s+lr\b/.word\t0xe12fff1e/o;	# make it possible to compile with -march=armv4 - -	print $_,$/; -} - +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4 +print $code;  close STDOUT; # enforce flush diff --git a/app/openssl/crypto/sha/asm/sha1-armv4-large.s b/app/openssl/crypto/sha/asm/sha1-armv4-large.s index a1562883..639ae78a 100644 --- a/app/openssl/crypto/sha/asm/sha1-armv4-large.s +++ b/app/openssl/crypto/sha/asm/sha1-armv4-large.s @@ -1,22 +1,12 @@  #include "arm_arch.h"  .text -.code	32  .global	sha1_block_data_order  .type	sha1_block_data_order,%function -.align	5 +.align	2  sha1_block_data_order: -#if __ARM_ARCH__>=7 -	sub	r3,pc,#8		@ sha1_block_data_order -	ldr	r12,.LOPENSSL_armcap -	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P -	tst	r12,#ARMV8_SHA1 -	bne	.LARMv8 -	tst	r12,#ARMV7_NEON -	bne	.LNEON -#endif  	stmdb	sp!,{r4-r12,lr}  	add	r2,r1,r2,lsl#6	@ r2 to point at the end of r1  	ldmia	r0,{r3,r4,r5,r6,r7} @@ -452,999 +442,11 @@ sha1_block_data_order:  	moveq	pc,lr			@ be binary compatible with V4, yet  	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)  #endif -.size	sha1_block_data_order,.-sha1_block_data_order - -.align	5 +.align	2  .LK_00_19:	.word	0x5a827999  .LK_20_39:	.word	0x6ed9eba1  .LK_40_59:	.word	0x8f1bbcdc  .LK_60_79:	.word	0xca62c1d6 -.LOPENSSL_armcap: -.word	OPENSSL_armcap_P-sha1_block_data_order -.asciz	"SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>" -.align	5 -#if __ARM_ARCH__>=7 -.fpu	neon - -.type	sha1_block_data_order_neon,%function -.align	4 -sha1_block_data_order_neon: -.LNEON: -	stmdb	sp!,{r4-r12,lr} -	add	r2,r1,r2,lsl#6	@ r2 to point at the end of r1 -	@ dmb				@ errata #451034 on early Cortex A8 -	@ vstmdb	sp!,{d8-d15}	@ ABI specification says so -	mov	r14,sp -	sub	sp,sp,#64		@ alloca -	adr	r8,.LK_00_19 -	bic	sp,sp,#15		@ align for 128-bit stores - -	ldmia	r0,{r3,r4,r5,r6,r7}	@ load context -	mov	r12,sp - -	vld1.8		{q0-q1},[r1]!	@ handles unaligned -	veor		q15,q15,q15 -	vld1.8		{q2-q3},[r1]! -	vld1.32		{d28[],d29[]},[r8,:32]!	@ load K_00_19 -	vrev32.8	q0,q0		@ yes, even on -	vrev32.8	q1,q1		@ big-endian... -	vrev32.8	q2,q2 -	vadd.i32	q8,q0,q14 -	vrev32.8	q3,q3 -	vadd.i32	q9,q1,q14 -	vst1.32		{q8},[r12,:128]! -	vadd.i32	q10,q2,q14 -	vst1.32		{q9},[r12,:128]! -	vst1.32		{q10},[r12,:128]! -	ldr		r9,[sp]			@ big RAW stall - -.Loop_neon: -	vext.8	q8,q0,q1,#8 -	bic	r10,r6,r4 -	add	r7,r7,r9 -	and	r11,r5,r4 -	vadd.i32	q13,q3,q14 -	ldr	r9,[sp,#4] -	add	r7,r7,r3,ror#27 -	vext.8	q12,q3,q15,#4 -	eor	r11,r11,r10 -	mov	r4,r4,ror#2 -	add	r7,r7,r11 -	veor	q8,q8,q0 -	bic	r10,r5,r3 -	add	r6,r6,r9 -	veor	q12,q12,q2 -	and	r11,r4,r3 -	ldr	r9,[sp,#8] -	veor	q12,q12,q8 -	add	r6,r6,r7,ror#27 -	eor	r11,r11,r10 -	vst1.32	{q13},[r12,:128]! -	sub	r12,r12,#64 -	mov	r3,r3,ror#2 -	add	r6,r6,r11 -	vext.8	q13,q15,q12,#4 -	bic	r10,r4,r7 -	add	r5,r5,r9 -	vadd.i32	q8,q12,q12 -	and	r11,r3,r7 -	ldr	r9,[sp,#12] -	vsri.32	q8,q12,#31 -	add	r5,r5,r6,ror#27 -	eor	r11,r11,r10 -	mov	r7,r7,ror#2 -	vshr.u32	q12,q13,#30 -	add	r5,r5,r11 -	bic	r10,r3,r6 -	vshl.u32	q13,q13,#2 -	add	r4,r4,r9 -	and	r11,r7,r6 -	veor	q8,q8,q12 -	ldr	r9,[sp,#16] -	add	r4,r4,r5,ror#27 -	veor	q8,q8,q13 -	eor	r11,r11,r10 -	mov	r6,r6,ror#2 -	add	r4,r4,r11 -	vext.8	q9,q1,q2,#8 -	bic	r10,r7,r5 -	add	r3,r3,r9 -	and	r11,r6,r5 -	vadd.i32	q13,q8,q14 -	ldr	r9,[sp,#20] -	vld1.32	{d28[],d29[]},[r8,:32]! -	add	r3,r3,r4,ror#27 -	vext.8	q12,q8,q15,#4 -	eor	r11,r11,r10 -	mov	r5,r5,ror#2 -	add	r3,r3,r11 -	veor	q9,q9,q1 -	bic	r10,r6,r4 -	add	r7,r7,r9 -	veor	q12,q12,q3 -	and	r11,r5,r4 -	ldr	r9,[sp,#24] -	veor	q12,q12,q9 -	add	r7,r7,r3,ror#27 -	eor	r11,r11,r10 -	vst1.32	{q13},[r12,:128]! -	mov	r4,r4,ror#2 -	add	r7,r7,r11 -	vext.8	q13,q15,q12,#4 -	bic	r10,r5,r3 -	add	r6,r6,r9 -	vadd.i32	q9,q12,q12 -	and	r11,r4,r3 -	ldr	r9,[sp,#28] -	vsri.32	q9,q12,#31 -	add	r6,r6,r7,ror#27 -	eor	r11,r11,r10 -	mov	r3,r3,ror#2 -	vshr.u32	q12,q13,#30 -	add	r6,r6,r11 -	bic	r10,r4,r7 -	vshl.u32	q13,q13,#2 -	add	r5,r5,r9 -	and	r11,r3,r7 -	veor	q9,q9,q12 -	ldr	r9,[sp,#32] -	add	r5,r5,r6,ror#27 -	veor	q9,q9,q13 -	eor	r11,r11,r10 -	mov	r7,r7,ror#2 -	add	r5,r5,r11 -	vext.8	q10,q2,q3,#8 -	bic	r10,r3,r6 -	add	r4,r4,r9 -	and	r11,r7,r6 -	vadd.i32	q13,q9,q14 -	ldr	r9,[sp,#36] -	add	r4,r4,r5,ror#27 -	vext.8	q12,q9,q15,#4 -	eor	r11,r11,r10 -	mov	r6,r6,ror#2 -	add	r4,r4,r11 -	veor	q10,q10,q2 -	bic	r10,r7,r5 -	add	r3,r3,r9 -	veor	q12,q12,q8 -	and	r11,r6,r5 -	ldr	r9,[sp,#40] -	veor	q12,q12,q10 -	add	r3,r3,r4,ror#27 -	eor	r11,r11,r10 -	vst1.32	{q13},[r12,:128]! -	mov	r5,r5,ror#2 -	add	r3,r3,r11 -	vext.8	q13,q15,q12,#4 -	bic	r10,r6,r4 -	add	r7,r7,r9 -	vadd.i32	q10,q12,q12 -	and	r11,r5,r4 -	ldr	r9,[sp,#44] -	vsri.32	q10,q12,#31 -	add	r7,r7,r3,ror#27 -	eor	r11,r11,r10 -	mov	r4,r4,ror#2 -	vshr.u32	q12,q13,#30 -	add	r7,r7,r11 -	bic	r10,r5,r3 -	vshl.u32	q13,q13,#2 -	add	r6,r6,r9 -	and	r11,r4,r3 -	veor	q10,q10,q12 -	ldr	r9,[sp,#48] -	add	r6,r6,r7,ror#27 -	veor	q10,q10,q13 -	eor	r11,r11,r10 -	mov	r3,r3,ror#2 -	add	r6,r6,r11 -	vext.8	q11,q3,q8,#8 -	bic	r10,r4,r7 -	add	r5,r5,r9 -	and	r11,r3,r7 -	vadd.i32	q13,q10,q14 -	ldr	r9,[sp,#52] -	add	r5,r5,r6,ror#27 -	vext.8	q12,q10,q15,#4 -	eor	r11,r11,r10 -	mov	r7,r7,ror#2 -	add	r5,r5,r11 -	veor	q11,q11,q3 -	bic	r10,r3,r6 -	add	r4,r4,r9 -	veor	q12,q12,q9 -	and	r11,r7,r6 -	ldr	r9,[sp,#56] -	veor	q12,q12,q11 -	add	r4,r4,r5,ror#27 -	eor	r11,r11,r10 -	vst1.32	{q13},[r12,:128]! -	mov	r6,r6,ror#2 -	add	r4,r4,r11 -	vext.8	q13,q15,q12,#4 -	bic	r10,r7,r5 -	add	r3,r3,r9 -	vadd.i32	q11,q12,q12 -	and	r11,r6,r5 -	ldr	r9,[sp,#60] -	vsri.32	q11,q12,#31 -	add	r3,r3,r4,ror#27 -	eor	r11,r11,r10 -	mov	r5,r5,ror#2 -	vshr.u32	q12,q13,#30 -	add	r3,r3,r11 -	bic	r10,r6,r4 -	vshl.u32	q13,q13,#2 -	add	r7,r7,r9 -	and	r11,r5,r4 -	veor	q11,q11,q12 -	ldr	r9,[sp,#0] -	add	r7,r7,r3,ror#27 -	veor	q11,q11,q13 -	eor	r11,r11,r10 -	mov	r4,r4,ror#2 -	add	r7,r7,r11 -	vext.8	q12,q10,q11,#8 -	bic	r10,r5,r3 -	add	r6,r6,r9 -	and	r11,r4,r3 -	veor	q0,q0,q8 -	ldr	r9,[sp,#4] -	add	r6,r6,r7,ror#27 -	veor	q0,q0,q1 -	eor	r11,r11,r10 -	mov	r3,r3,ror#2 -	vadd.i32	q13,q11,q14 -	add	r6,r6,r11 -	bic	r10,r4,r7 -	veor	q12,q12,q0 -	add	r5,r5,r9 -	and	r11,r3,r7 -	vshr.u32	q0,q12,#30 -	ldr	r9,[sp,#8] -	add	r5,r5,r6,ror#27 -	vst1.32	{q13},[r12,:128]! -	sub	r12,r12,#64 -	eor	r11,r11,r10 -	mov	r7,r7,ror#2 -	vsli.32	q0,q12,#2 -	add	r5,r5,r11 -	bic	r10,r3,r6 -	add	r4,r4,r9 -	and	r11,r7,r6 -	ldr	r9,[sp,#12] -	add	r4,r4,r5,ror#27 -	eor	r11,r11,r10 -	mov	r6,r6,ror#2 -	add	r4,r4,r11 -	bic	r10,r7,r5 -	add	r3,r3,r9 -	and	r11,r6,r5 -	ldr	r9,[sp,#16] -	add	r3,r3,r4,ror#27 -	eor	r11,r11,r10 -	mov	r5,r5,ror#2 -	add	r3,r3,r11 -	vext.8	q12,q11,q0,#8 -	eor	r10,r4,r6 -	add	r7,r7,r9 -	ldr	r9,[sp,#20] -	veor	q1,q1,q9 -	eor	r11,r10,r5 -	add	r7,r7,r3,ror#27 -	veor	q1,q1,q2 -	mov	r4,r4,ror#2 -	add	r7,r7,r11 -	vadd.i32	q13,q0,q14 -	eor	r10,r3,r5 -	add	r6,r6,r9 -	veor	q12,q12,q1 -	ldr	r9,[sp,#24] -	eor	r11,r10,r4 -	vshr.u32	q1,q12,#30 -	add	r6,r6,r7,ror#27 -	mov	r3,r3,ror#2 -	vst1.32	{q13},[r12,:128]! -	add	r6,r6,r11 -	eor	r10,r7,r4 -	vsli.32	q1,q12,#2 -	add	r5,r5,r9 -	ldr	r9,[sp,#28] -	eor	r11,r10,r3 -	add	r5,r5,r6,ror#27 -	mov	r7,r7,ror#2 -	add	r5,r5,r11 -	eor	r10,r6,r3 -	add	r4,r4,r9 -	ldr	r9,[sp,#32] -	eor	r11,r10,r7 -	add	r4,r4,r5,ror#27 -	mov	r6,r6,ror#2 -	add	r4,r4,r11 -	vext.8	q12,q0,q1,#8 -	eor	r10,r5,r7 -	add	r3,r3,r9 -	ldr	r9,[sp,#36] -	veor	q2,q2,q10 -	eor	r11,r10,r6 -	add	r3,r3,r4,ror#27 -	veor	q2,q2,q3 -	mov	r5,r5,ror#2 -	add	r3,r3,r11 -	vadd.i32	q13,q1,q14 -	eor	r10,r4,r6 -	vld1.32	{d28[],d29[]},[r8,:32]! -	add	r7,r7,r9 -	veor	q12,q12,q2 -	ldr	r9,[sp,#40] -	eor	r11,r10,r5 -	vshr.u32	q2,q12,#30 -	add	r7,r7,r3,ror#27 -	mov	r4,r4,ror#2 -	vst1.32	{q13},[r12,:128]! -	add	r7,r7,r11 -	eor	r10,r3,r5 -	vsli.32	q2,q12,#2 -	add	r6,r6,r9 -	ldr	r9,[sp,#44] -	eor	r11,r10,r4 -	add	r6,r6,r7,ror#27 -	mov	r3,r3,ror#2 -	add	r6,r6,r11 -	eor	r10,r7,r4 -	add	r5,r5,r9 -	ldr	r9,[sp,#48] -	eor	r11,r10,r3 -	add	r5,r5,r6,ror#27 -	mov	r7,r7,ror#2 -	add	r5,r5,r11 -	vext.8	q12,q1,q2,#8 -	eor	r10,r6,r3 -	add	r4,r4,r9 -	ldr	r9,[sp,#52] -	veor	q3,q3,q11 -	eor	r11,r10,r7 -	add	r4,r4,r5,ror#27 -	veor	q3,q3,q8 -	mov	r6,r6,ror#2 -	add	r4,r4,r11 -	vadd.i32	q13,q2,q14 -	eor	r10,r5,r7 -	add	r3,r3,r9 -	veor	q12,q12,q3 -	ldr	r9,[sp,#56] -	eor	r11,r10,r6 -	vshr.u32	q3,q12,#30 -	add	r3,r3,r4,ror#27 -	mov	r5,r5,ror#2 -	vst1.32	{q13},[r12,:128]! -	add	r3,r3,r11 -	eor	r10,r4,r6 -	vsli.32	q3,q12,#2 -	add	r7,r7,r9 -	ldr	r9,[sp,#60] -	eor	r11,r10,r5 -	add	r7,r7,r3,ror#27 -	mov	r4,r4,ror#2 -	add	r7,r7,r11 -	eor	r10,r3,r5 -	add	r6,r6,r9 -	ldr	r9,[sp,#0] -	eor	r11,r10,r4 -	add	r6,r6,r7,ror#27 -	mov	r3,r3,ror#2 -	add	r6,r6,r11 -	vext.8	q12,q2,q3,#8 -	eor	r10,r7,r4 -	add	r5,r5,r9 -	ldr	r9,[sp,#4] -	veor	q8,q8,q0 -	eor	r11,r10,r3 -	add	r5,r5,r6,ror#27 -	veor	q8,q8,q9 -	mov	r7,r7,ror#2 -	add	r5,r5,r11 -	vadd.i32	q13,q3,q14 -	eor	r10,r6,r3 -	add	r4,r4,r9 -	veor	q12,q12,q8 -	ldr	r9,[sp,#8] -	eor	r11,r10,r7 -	vshr.u32	q8,q12,#30 -	add	r4,r4,r5,ror#27 -	mov	r6,r6,ror#2 -	vst1.32	{q13},[r12,:128]! -	sub	r12,r12,#64 -	add	r4,r4,r11 -	eor	r10,r5,r7 -	vsli.32	q8,q12,#2 -	add	r3,r3,r9 -	ldr	r9,[sp,#12] -	eor	r11,r10,r6 -	add	r3,r3,r4,ror#27 -	mov	r5,r5,ror#2 -	add	r3,r3,r11 -	eor	r10,r4,r6 -	add	r7,r7,r9 -	ldr	r9,[sp,#16] -	eor	r11,r10,r5 -	add	r7,r7,r3,ror#27 -	mov	r4,r4,ror#2 -	add	r7,r7,r11 -	vext.8	q12,q3,q8,#8 -	eor	r10,r3,r5 -	add	r6,r6,r9 -	ldr	r9,[sp,#20] -	veor	q9,q9,q1 -	eor	r11,r10,r4 -	add	r6,r6,r7,ror#27 -	veor	q9,q9,q10 -	mov	r3,r3,ror#2 -	add	r6,r6,r11 -	vadd.i32	q13,q8,q14 -	eor	r10,r7,r4 -	add	r5,r5,r9 -	veor	q12,q12,q9 -	ldr	r9,[sp,#24] -	eor	r11,r10,r3 -	vshr.u32	q9,q12,#30 -	add	r5,r5,r6,ror#27 -	mov	r7,r7,ror#2 -	vst1.32	{q13},[r12,:128]! -	add	r5,r5,r11 -	eor	r10,r6,r3 -	vsli.32	q9,q12,#2 -	add	r4,r4,r9 -	ldr	r9,[sp,#28] -	eor	r11,r10,r7 -	add	r4,r4,r5,ror#27 -	mov	r6,r6,ror#2 -	add	r4,r4,r11 -	eor	r10,r5,r7 -	add	r3,r3,r9 -	ldr	r9,[sp,#32] -	eor	r11,r10,r6 -	add	r3,r3,r4,ror#27 -	mov	r5,r5,ror#2 -	add	r3,r3,r11 -	vext.8	q12,q8,q9,#8 -	add	r7,r7,r9 -	and	r10,r5,r6 -	ldr	r9,[sp,#36] -	veor	q10,q10,q2 -	add	r7,r7,r3,ror#27 -	eor	r11,r5,r6 -	veor	q10,q10,q11 -	add	r7,r7,r10 -	and	r11,r11,r4 -	vadd.i32	q13,q9,q14 -	mov	r4,r4,ror#2 -	add	r7,r7,r11 -	veor	q12,q12,q10 -	add	r6,r6,r9 -	and	r10,r4,r5 -	vshr.u32	q10,q12,#30 -	ldr	r9,[sp,#40] -	add	r6,r6,r7,ror#27 -	vst1.32	{q13},[r12,:128]! -	eor	r11,r4,r5 -	add	r6,r6,r10 -	vsli.32	q10,q12,#2 -	and	r11,r11,r3 -	mov	r3,r3,ror#2 -	add	r6,r6,r11 -	add	r5,r5,r9 -	and	r10,r3,r4 -	ldr	r9,[sp,#44] -	add	r5,r5,r6,ror#27 -	eor	r11,r3,r4 -	add	r5,r5,r10 -	and	r11,r11,r7 -	mov	r7,r7,ror#2 -	add	r5,r5,r11 -	add	r4,r4,r9 -	and	r10,r7,r3 -	ldr	r9,[sp,#48] -	add	r4,r4,r5,ror#27 -	eor	r11,r7,r3 -	add	r4,r4,r10 -	and	r11,r11,r6 -	mov	r6,r6,ror#2 -	add	r4,r4,r11 -	vext.8	q12,q9,q10,#8 -	add	r3,r3,r9 -	and	r10,r6,r7 -	ldr	r9,[sp,#52] -	veor	q11,q11,q3 -	add	r3,r3,r4,ror#27 -	eor	r11,r6,r7 -	veor	q11,q11,q0 -	add	r3,r3,r10 -	and	r11,r11,r5 -	vadd.i32	q13,q10,q14 -	mov	r5,r5,ror#2 -	vld1.32	{d28[],d29[]},[r8,:32]! -	add	r3,r3,r11 -	veor	q12,q12,q11 -	add	r7,r7,r9 -	and	r10,r5,r6 -	vshr.u32	q11,q12,#30 -	ldr	r9,[sp,#56] -	add	r7,r7,r3,ror#27 -	vst1.32	{q13},[r12,:128]! -	eor	r11,r5,r6 -	add	r7,r7,r10 -	vsli.32	q11,q12,#2 -	and	r11,r11,r4 -	mov	r4,r4,ror#2 -	add	r7,r7,r11 -	add	r6,r6,r9 -	and	r10,r4,r5 -	ldr	r9,[sp,#60] -	add	r6,r6,r7,ror#27 -	eor	r11,r4,r5 -	add	r6,r6,r10 -	and	r11,r11,r3 -	mov	r3,r3,ror#2 -	add	r6,r6,r11 -	add	r5,r5,r9 -	and	r10,r3,r4 -	ldr	r9,[sp,#0] -	add	r5,r5,r6,ror#27 -	eor	r11,r3,r4 -	add	r5,r5,r10 -	and	r11,r11,r7 -	mov	r7,r7,ror#2 -	add	r5,r5,r11 -	vext.8	q12,q10,q11,#8 -	add	r4,r4,r9 -	and	r10,r7,r3 -	ldr	r9,[sp,#4] -	veor	q0,q0,q8 -	add	r4,r4,r5,ror#27 -	eor	r11,r7,r3 -	veor	q0,q0,q1 -	add	r4,r4,r10 -	and	r11,r11,r6 -	vadd.i32	q13,q11,q14 -	mov	r6,r6,ror#2 -	add	r4,r4,r11 -	veor	q12,q12,q0 -	add	r3,r3,r9 -	and	r10,r6,r7 -	vshr.u32	q0,q12,#30 -	ldr	r9,[sp,#8] -	add	r3,r3,r4,ror#27 -	vst1.32	{q13},[r12,:128]! -	sub	r12,r12,#64 -	eor	r11,r6,r7 -	add	r3,r3,r10 -	vsli.32	q0,q12,#2 -	and	r11,r11,r5 -	mov	r5,r5,ror#2 -	add	r3,r3,r11 -	add	r7,r7,r9 -	and	r10,r5,r6 -	ldr	r9,[sp,#12] -	add	r7,r7,r3,ror#27 -	eor	r11,r5,r6 -	add	r7,r7,r10 -	and	r11,r11,r4 -	mov	r4,r4,ror#2 -	add	r7,r7,r11 -	add	r6,r6,r9 -	and	r10,r4,r5 -	ldr	r9,[sp,#16] -	add	r6,r6,r7,ror#27 -	eor	r11,r4,r5 -	add	r6,r6,r10 -	and	r11,r11,r3 -	mov	r3,r3,ror#2 -	add	r6,r6,r11 -	vext.8	q12,q11,q0,#8 -	add	r5,r5,r9 -	and	r10,r3,r4 -	ldr	r9,[sp,#20] -	veor	q1,q1,q9 -	add	r5,r5,r6,ror#27 -	eor	r11,r3,r4 -	veor	q1,q1,q2 -	add	r5,r5,r10 -	and	r11,r11,r7 -	vadd.i32	q13,q0,q14 -	mov	r7,r7,ror#2 -	add	r5,r5,r11 -	veor	q12,q12,q1 -	add	r4,r4,r9 -	and	r10,r7,r3 -	vshr.u32	q1,q12,#30 -	ldr	r9,[sp,#24] -	add	r4,r4,r5,ror#27 -	vst1.32	{q13},[r12,:128]! -	eor	r11,r7,r3 -	add	r4,r4,r10 -	vsli.32	q1,q12,#2 -	and	r11,r11,r6 -	mov	r6,r6,ror#2 -	add	r4,r4,r11 -	add	r3,r3,r9 -	and	r10,r6,r7 -	ldr	r9,[sp,#28] -	add	r3,r3,r4,ror#27 -	eor	r11,r6,r7 -	add	r3,r3,r10 -	and	r11,r11,r5 -	mov	r5,r5,ror#2 -	add	r3,r3,r11 -	add	r7,r7,r9 -	and	r10,r5,r6 -	ldr	r9,[sp,#32] -	add	r7,r7,r3,ror#27 -	eor	r11,r5,r6 -	add	r7,r7,r10 -	and	r11,r11,r4 -	mov	r4,r4,ror#2 -	add	r7,r7,r11 -	vext.8	q12,q0,q1,#8 -	add	r6,r6,r9 -	and	r10,r4,r5 -	ldr	r9,[sp,#36] -	veor	q2,q2,q10 -	add	r6,r6,r7,ror#27 -	eor	r11,r4,r5 -	veor	q2,q2,q3 -	add	r6,r6,r10 -	and	r11,r11,r3 -	vadd.i32	q13,q1,q14 -	mov	r3,r3,ror#2 -	add	r6,r6,r11 -	veor	q12,q12,q2 -	add	r5,r5,r9 -	and	r10,r3,r4 -	vshr.u32	q2,q12,#30 -	ldr	r9,[sp,#40] -	add	r5,r5,r6,ror#27 -	vst1.32	{q13},[r12,:128]! -	eor	r11,r3,r4 -	add	r5,r5,r10 -	vsli.32	q2,q12,#2 -	and	r11,r11,r7 -	mov	r7,r7,ror#2 -	add	r5,r5,r11 -	add	r4,r4,r9 -	and	r10,r7,r3 -	ldr	r9,[sp,#44] -	add	r4,r4,r5,ror#27 -	eor	r11,r7,r3 -	add	r4,r4,r10 -	and	r11,r11,r6 -	mov	r6,r6,ror#2 -	add	r4,r4,r11 -	add	r3,r3,r9 -	and	r10,r6,r7 -	ldr	r9,[sp,#48] -	add	r3,r3,r4,ror#27 -	eor	r11,r6,r7 -	add	r3,r3,r10 -	and	r11,r11,r5 -	mov	r5,r5,ror#2 -	add	r3,r3,r11 -	vext.8	q12,q1,q2,#8 -	eor	r10,r4,r6 -	add	r7,r7,r9 -	ldr	r9,[sp,#52] -	veor	q3,q3,q11 -	eor	r11,r10,r5 -	add	r7,r7,r3,ror#27 -	veor	q3,q3,q8 -	mov	r4,r4,ror#2 -	add	r7,r7,r11 -	vadd.i32	q13,q2,q14 -	eor	r10,r3,r5 -	add	r6,r6,r9 -	veor	q12,q12,q3 -	ldr	r9,[sp,#56] -	eor	r11,r10,r4 -	vshr.u32	q3,q12,#30 -	add	r6,r6,r7,ror#27 -	mov	r3,r3,ror#2 -	vst1.32	{q13},[r12,:128]! -	add	r6,r6,r11 -	eor	r10,r7,r4 -	vsli.32	q3,q12,#2 -	add	r5,r5,r9 -	ldr	r9,[sp,#60] -	eor	r11,r10,r3 -	add	r5,r5,r6,ror#27 -	mov	r7,r7,ror#2 -	add	r5,r5,r11 -	eor	r10,r6,r3 -	add	r4,r4,r9 -	ldr	r9,[sp,#0] -	eor	r11,r10,r7 -	add	r4,r4,r5,ror#27 -	mov	r6,r6,ror#2 -	add	r4,r4,r11 -	vadd.i32	q13,q3,q14 -	eor	r10,r5,r7 -	add	r3,r3,r9 -	vst1.32	{q13},[r12,:128]! -	sub	r12,r12,#64 -	teq	r1,r2 -	sub	r8,r8,#16 -	subeq	r1,r1,#64 -	vld1.8	{q0-q1},[r1]! -	ldr	r9,[sp,#4] -	eor	r11,r10,r6 -	vld1.8	{q2-q3},[r1]! -	add	r3,r3,r4,ror#27 -	mov	r5,r5,ror#2 -	vld1.32	{d28[],d29[]},[r8,:32]! -	add	r3,r3,r11 -	eor	r10,r4,r6 -	vrev32.8	q0,q0 -	add	r7,r7,r9 -	ldr	r9,[sp,#8] -	eor	r11,r10,r5 -	add	r7,r7,r3,ror#27 -	mov	r4,r4,ror#2 -	add	r7,r7,r11 -	eor	r10,r3,r5 -	add	r6,r6,r9 -	ldr	r9,[sp,#12] -	eor	r11,r10,r4 -	add	r6,r6,r7,ror#27 -	mov	r3,r3,ror#2 -	add	r6,r6,r11 -	eor	r10,r7,r4 -	add	r5,r5,r9 -	ldr	r9,[sp,#16] -	eor	r11,r10,r3 -	add	r5,r5,r6,ror#27 -	mov	r7,r7,ror#2 -	add	r5,r5,r11 -	vrev32.8	q1,q1 -	eor	r10,r6,r3 -	add	r4,r4,r9 -	vadd.i32	q8,q0,q14 -	ldr	r9,[sp,#20] -	eor	r11,r10,r7 -	vst1.32	{q8},[r12,:128]! -	add	r4,r4,r5,ror#27 -	mov	r6,r6,ror#2 -	add	r4,r4,r11 -	eor	r10,r5,r7 -	add	r3,r3,r9 -	ldr	r9,[sp,#24] -	eor	r11,r10,r6 -	add	r3,r3,r4,ror#27 -	mov	r5,r5,ror#2 -	add	r3,r3,r11 -	eor	r10,r4,r6 -	add	r7,r7,r9 -	ldr	r9,[sp,#28] -	eor	r11,r10,r5 -	add	r7,r7,r3,ror#27 -	mov	r4,r4,ror#2 -	add	r7,r7,r11 -	eor	r10,r3,r5 -	add	r6,r6,r9 -	ldr	r9,[sp,#32] -	eor	r11,r10,r4 -	add	r6,r6,r7,ror#27 -	mov	r3,r3,ror#2 -	add	r6,r6,r11 -	vrev32.8	q2,q2 -	eor	r10,r7,r4 -	add	r5,r5,r9 -	vadd.i32	q9,q1,q14 -	ldr	r9,[sp,#36] -	eor	r11,r10,r3 -	vst1.32	{q9},[r12,:128]! -	add	r5,r5,r6,ror#27 -	mov	r7,r7,ror#2 -	add	r5,r5,r11 -	eor	r10,r6,r3 -	add	r4,r4,r9 -	ldr	r9,[sp,#40] -	eor	r11,r10,r7 -	add	r4,r4,r5,ror#27 -	mov	r6,r6,ror#2 -	add	r4,r4,r11 -	eor	r10,r5,r7 -	add	r3,r3,r9 -	ldr	r9,[sp,#44] -	eor	r11,r10,r6 -	add	r3,r3,r4,ror#27 -	mov	r5,r5,ror#2 -	add	r3,r3,r11 -	eor	r10,r4,r6 -	add	r7,r7,r9 -	ldr	r9,[sp,#48] -	eor	r11,r10,r5 -	add	r7,r7,r3,ror#27 -	mov	r4,r4,ror#2 -	add	r7,r7,r11 -	vrev32.8	q3,q3 -	eor	r10,r3,r5 -	add	r6,r6,r9 -	vadd.i32	q10,q2,q14 -	ldr	r9,[sp,#52] -	eor	r11,r10,r4 -	vst1.32	{q10},[r12,:128]! -	add	r6,r6,r7,ror#27 -	mov	r3,r3,ror#2 -	add	r6,r6,r11 -	eor	r10,r7,r4 -	add	r5,r5,r9 -	ldr	r9,[sp,#56] -	eor	r11,r10,r3 -	add	r5,r5,r6,ror#27 -	mov	r7,r7,ror#2 -	add	r5,r5,r11 -	eor	r10,r6,r3 -	add	r4,r4,r9 -	ldr	r9,[sp,#60] -	eor	r11,r10,r7 -	add	r4,r4,r5,ror#27 -	mov	r6,r6,ror#2 -	add	r4,r4,r11 -	eor	r10,r5,r7 -	add	r3,r3,r9 -	eor	r11,r10,r6 -	add	r3,r3,r4,ror#27 -	mov	r5,r5,ror#2 -	add	r3,r3,r11 -	ldmia	r0,{r9,r10,r11,r12}	@ accumulate context -	add	r3,r3,r9 -	ldr	r9,[r0,#16] -	add	r4,r4,r10 -	add	r5,r5,r11 -	add	r6,r6,r12 -	moveq	sp,r14 -	add	r7,r7,r9 -	ldrne	r9,[sp] -	stmia	r0,{r3,r4,r5,r6,r7} -	addne	r12,sp,#3*16 -	bne	.Loop_neon - -	@ vldmia	sp!,{d8-d15} -	ldmia	sp!,{r4-r12,pc} -.size	sha1_block_data_order_neon,.-sha1_block_data_order_neon -#endif -#if __ARM_ARCH__>=7 -.type	sha1_block_data_order_armv8,%function -.align	5 -sha1_block_data_order_armv8: -.LARMv8: -	vstmdb	sp!,{d8-d15}		@ ABI specification says so - -	veor	q1,q1,q1 -	adr	r3,.LK_00_19 -	vld1.32	{q0},[r0]! -	vld1.32	{d2[0]},[r0] -	sub	r0,r0,#16 -	vld1.32	{d16[],d17[]},[r3,:32]! -	vld1.32	{d18[],d19[]},[r3,:32]! -	vld1.32	{d20[],d21[]},[r3,:32]! -	vld1.32	{d22[],d23[]},[r3,:32] - -.Loop_v8: -	vld1.8		{q4-q5},[r1]! -	vld1.8		{q6-q7},[r1]! -	vrev32.8	q4,q4 -	vrev32.8	q5,q5 - -	vadd.i32	q12,q8,q4 -	vrev32.8	q6,q6 -	vmov		q14,q0	@ offload -	subs		r2,r2,#1 - -	vadd.i32	q13,q8,q5 -	vrev32.8	q7,q7 -	.byte	0xc0,0x62,0xb9,0xf3	@ sha1h q3,q0		@ 0 -	.byte	0x68,0x0c,0x02,0xf2	@ sha1c q0,q1,q12 -	vadd.i32	q12,q8,q6 -	.byte	0x4c,0x8c,0x3a,0xf2	@ sha1su0 q4,q5,q6 -	.byte	0xc0,0x42,0xb9,0xf3	@ sha1h q2,q0		@ 1 -	.byte	0x6a,0x0c,0x06,0xf2	@ sha1c q0,q3,q13 -	vadd.i32	q13,q8,q7 -	.byte	0x8e,0x83,0xba,0xf3	@ sha1su1 q4,q7 -	.byte	0x4e,0xac,0x3c,0xf2	@ sha1su0 q5,q6,q7 -	.byte	0xc0,0x62,0xb9,0xf3	@ sha1h q3,q0		@ 2 -	.byte	0x68,0x0c,0x04,0xf2	@ sha1c q0,q2,q12 -	vadd.i32	q12,q8,q4 -	.byte	0x88,0xa3,0xba,0xf3	@ sha1su1 q5,q4 -	.byte	0x48,0xcc,0x3e,0xf2	@ sha1su0 q6,q7,q4 -	.byte	0xc0,0x42,0xb9,0xf3	@ sha1h q2,q0		@ 3 -	.byte	0x6a,0x0c,0x06,0xf2	@ sha1c q0,q3,q13 -	vadd.i32	q13,q9,q5 -	.byte	0x8a,0xc3,0xba,0xf3	@ sha1su1 q6,q5 -	.byte	0x4a,0xec,0x38,0xf2	@ sha1su0 q7,q4,q5 -	.byte	0xc0,0x62,0xb9,0xf3	@ sha1h q3,q0		@ 4 -	.byte	0x68,0x0c,0x04,0xf2	@ sha1c q0,q2,q12 -	vadd.i32	q12,q9,q6 -	.byte	0x8c,0xe3,0xba,0xf3	@ sha1su1 q7,q6 -	.byte	0x4c,0x8c,0x3a,0xf2	@ sha1su0 q4,q5,q6 -	.byte	0xc0,0x42,0xb9,0xf3	@ sha1h q2,q0		@ 5 -	.byte	0x6a,0x0c,0x16,0xf2	@ sha1p q0,q3,q13 -	vadd.i32	q13,q9,q7 -	.byte	0x8e,0x83,0xba,0xf3	@ sha1su1 q4,q7 -	.byte	0x4e,0xac,0x3c,0xf2	@ sha1su0 q5,q6,q7 -	.byte	0xc0,0x62,0xb9,0xf3	@ sha1h q3,q0		@ 6 -	.byte	0x68,0x0c,0x14,0xf2	@ sha1p q0,q2,q12 -	vadd.i32	q12,q9,q4 -	.byte	0x88,0xa3,0xba,0xf3	@ sha1su1 q5,q4 -	.byte	0x48,0xcc,0x3e,0xf2	@ sha1su0 q6,q7,q4 -	.byte	0xc0,0x42,0xb9,0xf3	@ sha1h q2,q0		@ 7 -	.byte	0x6a,0x0c,0x16,0xf2	@ sha1p q0,q3,q13 -	vadd.i32	q13,q9,q5 -	.byte	0x8a,0xc3,0xba,0xf3	@ sha1su1 q6,q5 -	.byte	0x4a,0xec,0x38,0xf2	@ sha1su0 q7,q4,q5 -	.byte	0xc0,0x62,0xb9,0xf3	@ sha1h q3,q0		@ 8 -	.byte	0x68,0x0c,0x14,0xf2	@ sha1p q0,q2,q12 -	vadd.i32	q12,q10,q6 -	.byte	0x8c,0xe3,0xba,0xf3	@ sha1su1 q7,q6 -	.byte	0x4c,0x8c,0x3a,0xf2	@ sha1su0 q4,q5,q6 -	.byte	0xc0,0x42,0xb9,0xf3	@ sha1h q2,q0		@ 9 -	.byte	0x6a,0x0c,0x16,0xf2	@ sha1p q0,q3,q13 -	vadd.i32	q13,q10,q7 -	.byte	0x8e,0x83,0xba,0xf3	@ sha1su1 q4,q7 -	.byte	0x4e,0xac,0x3c,0xf2	@ sha1su0 q5,q6,q7 -	.byte	0xc0,0x62,0xb9,0xf3	@ sha1h q3,q0		@ 10 -	.byte	0x68,0x0c,0x24,0xf2	@ sha1m q0,q2,q12 -	vadd.i32	q12,q10,q4 -	.byte	0x88,0xa3,0xba,0xf3	@ sha1su1 q5,q4 -	.byte	0x48,0xcc,0x3e,0xf2	@ sha1su0 q6,q7,q4 -	.byte	0xc0,0x42,0xb9,0xf3	@ sha1h q2,q0		@ 11 -	.byte	0x6a,0x0c,0x26,0xf2	@ sha1m q0,q3,q13 -	vadd.i32	q13,q10,q5 -	.byte	0x8a,0xc3,0xba,0xf3	@ sha1su1 q6,q5 -	.byte	0x4a,0xec,0x38,0xf2	@ sha1su0 q7,q4,q5 -	.byte	0xc0,0x62,0xb9,0xf3	@ sha1h q3,q0		@ 12 -	.byte	0x68,0x0c,0x24,0xf2	@ sha1m q0,q2,q12 -	vadd.i32	q12,q10,q6 -	.byte	0x8c,0xe3,0xba,0xf3	@ sha1su1 q7,q6 -	.byte	0x4c,0x8c,0x3a,0xf2	@ sha1su0 q4,q5,q6 -	.byte	0xc0,0x42,0xb9,0xf3	@ sha1h q2,q0		@ 13 -	.byte	0x6a,0x0c,0x26,0xf2	@ sha1m q0,q3,q13 -	vadd.i32	q13,q11,q7 -	.byte	0x8e,0x83,0xba,0xf3	@ sha1su1 q4,q7 -	.byte	0x4e,0xac,0x3c,0xf2	@ sha1su0 q5,q6,q7 -	.byte	0xc0,0x62,0xb9,0xf3	@ sha1h q3,q0		@ 14 -	.byte	0x68,0x0c,0x24,0xf2	@ sha1m q0,q2,q12 -	vadd.i32	q12,q11,q4 -	.byte	0x88,0xa3,0xba,0xf3	@ sha1su1 q5,q4 -	.byte	0x48,0xcc,0x3e,0xf2	@ sha1su0 q6,q7,q4 -	.byte	0xc0,0x42,0xb9,0xf3	@ sha1h q2,q0		@ 15 -	.byte	0x6a,0x0c,0x16,0xf2	@ sha1p q0,q3,q13 -	vadd.i32	q13,q11,q5 -	.byte	0x8a,0xc3,0xba,0xf3	@ sha1su1 q6,q5 -	.byte	0x4a,0xec,0x38,0xf2	@ sha1su0 q7,q4,q5 -	.byte	0xc0,0x62,0xb9,0xf3	@ sha1h q3,q0		@ 16 -	.byte	0x68,0x0c,0x14,0xf2	@ sha1p q0,q2,q12 -	vadd.i32	q12,q11,q6 -	.byte	0x8c,0xe3,0xba,0xf3	@ sha1su1 q7,q6 -	.byte	0xc0,0x42,0xb9,0xf3	@ sha1h q2,q0		@ 17 -	.byte	0x6a,0x0c,0x16,0xf2	@ sha1p q0,q3,q13 -	vadd.i32	q13,q11,q7 - -	.byte	0xc0,0x62,0xb9,0xf3	@ sha1h q3,q0		@ 18 -	.byte	0x68,0x0c,0x14,0xf2	@ sha1p q0,q2,q12 - -	.byte	0xc0,0x42,0xb9,0xf3	@ sha1h q2,q0		@ 19 -	.byte	0x6a,0x0c,0x16,0xf2	@ sha1p q0,q3,q13 - -	vadd.i32	q1,q1,q2 -	vadd.i32	q0,q0,q14 -	bne		.Loop_v8 - -	vst1.32		{q0},[r0]! -	vst1.32		{d2[0]},[r0] - -	vldmia	sp!,{d8-d15} -	bx	lr					@ bx lr -.size	sha1_block_data_order_armv8,.-sha1_block_data_order_armv8 -#endif -.comm	OPENSSL_armcap_P,4,4 +.size	sha1_block_data_order,.-sha1_block_data_order +.asciz	"SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>" +.align	2 diff --git a/app/openssl/crypto/sha/asm/sha1-armv8.S b/app/openssl/crypto/sha/asm/sha1-armv8.S deleted file mode 100644 index f9d12625..00000000 --- a/app/openssl/crypto/sha/asm/sha1-armv8.S +++ /dev/null @@ -1,1211 +0,0 @@ -#include "arm_arch.h" - -.text - -.globl	sha1_block_data_order -.type	sha1_block_data_order,%function -.align	6 -sha1_block_data_order: -	ldr	x16,.LOPENSSL_armcap_P -	adr	x17,.LOPENSSL_armcap_P -	add	x16,x16,x17 -	ldr	w16,[x16] -	tst	w16,#ARMV8_SHA1 -	b.ne	.Lv8_entry - -	stp	x29,x30,[sp,#-96]! -	add	x29,sp,#0 -	stp	x19,x20,[sp,#16] -	stp	x21,x22,[sp,#32] -	stp	x23,x24,[sp,#48] -	stp	x25,x26,[sp,#64] -	stp	x27,x28,[sp,#80] - -	ldp	w20,w21,[x0] -	ldp	w22,w23,[x0,#8] -	ldr	w24,[x0,#16] - -.Loop: -	ldr	x3,[x1],#64 -	movz	w28,#0x7999 -	sub	x2,x2,#1 -	movk	w28,#0x5a82,lsl#16 -#ifdef	__ARMEB__ -	ror	x3,x3,#32 -#else -	rev32	x3,x3 -#endif -	add	w24,w24,w28		// warm it up -	add	w24,w24,w3 -	lsr	x4,x3,#32 -	ldr	x5,[x1,#-56] -	bic	w25,w23,w21 -	and	w26,w22,w21 -	ror	w27,w20,#27 -	add	w23,w23,w28		// future e+=K -	orr	w25,w25,w26 -	add	w24,w24,w27		// e+=rot(a,5) -	ror	w21,w21,#2 -	add	w23,w23,w4	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -#ifdef	__ARMEB__ -	ror	x5,x5,#32 -#else -	rev32	x5,x5 -#endif -	bic	w25,w22,w20 -	and	w26,w21,w20 -	ror	w27,w24,#27 -	add	w22,w22,w28		// future e+=K -	orr	w25,w25,w26 -	add	w23,w23,w27		// e+=rot(a,5) -	ror	w20,w20,#2 -	add	w22,w22,w5	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -	lsr	x6,x5,#32 -	ldr	x7,[x1,#-48] -	bic	w25,w21,w24 -	and	w26,w20,w24 -	ror	w27,w23,#27 -	add	w21,w21,w28		// future e+=K -	orr	w25,w25,w26 -	add	w22,w22,w27		// e+=rot(a,5) -	ror	w24,w24,#2 -	add	w21,w21,w6	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -#ifdef	__ARMEB__ -	ror	x7,x7,#32 -#else -	rev32	x7,x7 -#endif -	bic	w25,w20,w23 -	and	w26,w24,w23 -	ror	w27,w22,#27 -	add	w20,w20,w28		// future e+=K -	orr	w25,w25,w26 -	add	w21,w21,w27		// e+=rot(a,5) -	ror	w23,w23,#2 -	add	w20,w20,w7	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -	lsr	x8,x7,#32 -	ldr	x9,[x1,#-40] -	bic	w25,w24,w22 -	and	w26,w23,w22 -	ror	w27,w21,#27 -	add	w24,w24,w28		// future e+=K -	orr	w25,w25,w26 -	add	w20,w20,w27		// e+=rot(a,5) -	ror	w22,w22,#2 -	add	w24,w24,w8	// future e+=X[i] -	add	w20,w20,w25		// e+=F(b,c,d) -#ifdef	__ARMEB__ -	ror	x9,x9,#32 -#else -	rev32	x9,x9 -#endif -	bic	w25,w23,w21 -	and	w26,w22,w21 -	ror	w27,w20,#27 -	add	w23,w23,w28		// future e+=K -	orr	w25,w25,w26 -	add	w24,w24,w27		// e+=rot(a,5) -	ror	w21,w21,#2 -	add	w23,w23,w9	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -	lsr	x10,x9,#32 -	ldr	x11,[x1,#-32] -	bic	w25,w22,w20 -	and	w26,w21,w20 -	ror	w27,w24,#27 -	add	w22,w22,w28		// future e+=K -	orr	w25,w25,w26 -	add	w23,w23,w27		// e+=rot(a,5) -	ror	w20,w20,#2 -	add	w22,w22,w10	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -#ifdef	__ARMEB__ -	ror	x11,x11,#32 -#else -	rev32	x11,x11 -#endif -	bic	w25,w21,w24 -	and	w26,w20,w24 -	ror	w27,w23,#27 -	add	w21,w21,w28		// future e+=K -	orr	w25,w25,w26 -	add	w22,w22,w27		// e+=rot(a,5) -	ror	w24,w24,#2 -	add	w21,w21,w11	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -	lsr	x12,x11,#32 -	ldr	x13,[x1,#-24] -	bic	w25,w20,w23 -	and	w26,w24,w23 -	ror	w27,w22,#27 -	add	w20,w20,w28		// future e+=K -	orr	w25,w25,w26 -	add	w21,w21,w27		// e+=rot(a,5) -	ror	w23,w23,#2 -	add	w20,w20,w12	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -#ifdef	__ARMEB__ -	ror	x13,x13,#32 -#else -	rev32	x13,x13 -#endif -	bic	w25,w24,w22 -	and	w26,w23,w22 -	ror	w27,w21,#27 -	add	w24,w24,w28		// future e+=K -	orr	w25,w25,w26 -	add	w20,w20,w27		// e+=rot(a,5) -	ror	w22,w22,#2 -	add	w24,w24,w13	// future e+=X[i] -	add	w20,w20,w25		// e+=F(b,c,d) -	lsr	x14,x13,#32 -	ldr	x15,[x1,#-16] -	bic	w25,w23,w21 -	and	w26,w22,w21 -	ror	w27,w20,#27 -	add	w23,w23,w28		// future e+=K -	orr	w25,w25,w26 -	add	w24,w24,w27		// e+=rot(a,5) -	ror	w21,w21,#2 -	add	w23,w23,w14	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -#ifdef	__ARMEB__ -	ror	x15,x15,#32 -#else -	rev32	x15,x15 -#endif -	bic	w25,w22,w20 -	and	w26,w21,w20 -	ror	w27,w24,#27 -	add	w22,w22,w28		// future e+=K -	orr	w25,w25,w26 -	add	w23,w23,w27		// e+=rot(a,5) -	ror	w20,w20,#2 -	add	w22,w22,w15	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -	lsr	x16,x15,#32 -	ldr	x17,[x1,#-8] -	bic	w25,w21,w24 -	and	w26,w20,w24 -	ror	w27,w23,#27 -	add	w21,w21,w28		// future e+=K -	orr	w25,w25,w26 -	add	w22,w22,w27		// e+=rot(a,5) -	ror	w24,w24,#2 -	add	w21,w21,w16	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -#ifdef	__ARMEB__ -	ror	x17,x17,#32 -#else -	rev32	x17,x17 -#endif -	bic	w25,w20,w23 -	and	w26,w24,w23 -	ror	w27,w22,#27 -	add	w20,w20,w28		// future e+=K -	orr	w25,w25,w26 -	add	w21,w21,w27		// e+=rot(a,5) -	ror	w23,w23,#2 -	add	w20,w20,w17	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -	lsr	x19,x17,#32 -	 eor	w3,w3,w5 -	bic	w25,w24,w22 -	and	w26,w23,w22 -	ror	w27,w21,#27 -	 eor	w3,w3,w11 -	add	w24,w24,w28		// future e+=K -	orr	w25,w25,w26 -	add	w20,w20,w27		// e+=rot(a,5) -	 eor	w3,w3,w16 -	ror	w22,w22,#2 -	add	w24,w24,w19	// future e+=X[i] -	add	w20,w20,w25		// e+=F(b,c,d) -	 ror	w3,w3,#31 -	 eor	w4,w4,w6 -	bic	w25,w23,w21 -	and	w26,w22,w21 -	ror	w27,w20,#27 -	 eor	w4,w4,w12 -	add	w23,w23,w28		// future e+=K -	orr	w25,w25,w26 -	add	w24,w24,w27		// e+=rot(a,5) -	 eor	w4,w4,w17 -	ror	w21,w21,#2 -	add	w23,w23,w3	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -	 ror	w4,w4,#31 -	 eor	w5,w5,w7 -	bic	w25,w22,w20 -	and	w26,w21,w20 -	ror	w27,w24,#27 -	 eor	w5,w5,w13 -	add	w22,w22,w28		// future e+=K -	orr	w25,w25,w26 -	add	w23,w23,w27		// e+=rot(a,5) -	 eor	w5,w5,w19 -	ror	w20,w20,#2 -	add	w22,w22,w4	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -	 ror	w5,w5,#31 -	 eor	w6,w6,w8 -	bic	w25,w21,w24 -	and	w26,w20,w24 -	ror	w27,w23,#27 -	 eor	w6,w6,w14 -	add	w21,w21,w28		// future e+=K -	orr	w25,w25,w26 -	add	w22,w22,w27		// e+=rot(a,5) -	 eor	w6,w6,w3 -	ror	w24,w24,#2 -	add	w21,w21,w5	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -	 ror	w6,w6,#31 -	 eor	w7,w7,w9 -	bic	w25,w20,w23 -	and	w26,w24,w23 -	ror	w27,w22,#27 -	 eor	w7,w7,w15 -	add	w20,w20,w28		// future e+=K -	orr	w25,w25,w26 -	add	w21,w21,w27		// e+=rot(a,5) -	 eor	w7,w7,w4 -	ror	w23,w23,#2 -	add	w20,w20,w6	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -	 ror	w7,w7,#31 -	movz	w28,#0xeba1 -	movk	w28,#0x6ed9,lsl#16 -	 eor	w8,w8,w10 -	bic	w25,w24,w22 -	and	w26,w23,w22 -	ror	w27,w21,#27 -	 eor	w8,w8,w16 -	add	w24,w24,w28		// future e+=K -	orr	w25,w25,w26 -	add	w20,w20,w27		// e+=rot(a,5) -	 eor	w8,w8,w5 -	ror	w22,w22,#2 -	add	w24,w24,w7	// future e+=X[i] -	add	w20,w20,w25		// e+=F(b,c,d) -	 ror	w8,w8,#31 -	 eor	w9,w9,w11 -	eor	w25,w23,w21 -	ror	w27,w20,#27 -	add	w23,w23,w28		// future e+=K -	 eor	w9,w9,w17 -	eor	w25,w25,w22 -	add	w24,w24,w27		// e+=rot(a,5) -	ror	w21,w21,#2 -	 eor	w9,w9,w6 -	add	w23,w23,w8	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -	 ror	w9,w9,#31 -	 eor	w10,w10,w12 -	eor	w25,w22,w20 -	ror	w27,w24,#27 -	add	w22,w22,w28		// future e+=K -	 eor	w10,w10,w19 -	eor	w25,w25,w21 -	add	w23,w23,w27		// e+=rot(a,5) -	ror	w20,w20,#2 -	 eor	w10,w10,w7 -	add	w22,w22,w9	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -	 ror	w10,w10,#31 -	 eor	w11,w11,w13 -	eor	w25,w21,w24 -	ror	w27,w23,#27 -	add	w21,w21,w28		// future e+=K -	 eor	w11,w11,w3 -	eor	w25,w25,w20 -	add	w22,w22,w27		// e+=rot(a,5) -	ror	w24,w24,#2 -	 eor	w11,w11,w8 -	add	w21,w21,w10	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -	 ror	w11,w11,#31 -	 eor	w12,w12,w14 -	eor	w25,w20,w23 -	ror	w27,w22,#27 -	add	w20,w20,w28		// future e+=K -	 eor	w12,w12,w4 -	eor	w25,w25,w24 -	add	w21,w21,w27		// e+=rot(a,5) -	ror	w23,w23,#2 -	 eor	w12,w12,w9 -	add	w20,w20,w11	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -	 ror	w12,w12,#31 -	 eor	w13,w13,w15 -	eor	w25,w24,w22 -	ror	w27,w21,#27 -	add	w24,w24,w28		// future e+=K -	 eor	w13,w13,w5 -	eor	w25,w25,w23 -	add	w20,w20,w27		// e+=rot(a,5) -	ror	w22,w22,#2 -	 eor	w13,w13,w10 -	add	w24,w24,w12	// future e+=X[i] -	add	w20,w20,w25		// e+=F(b,c,d) -	 ror	w13,w13,#31 -	 eor	w14,w14,w16 -	eor	w25,w23,w21 -	ror	w27,w20,#27 -	add	w23,w23,w28		// future e+=K -	 eor	w14,w14,w6 -	eor	w25,w25,w22 -	add	w24,w24,w27		// e+=rot(a,5) -	ror	w21,w21,#2 -	 eor	w14,w14,w11 -	add	w23,w23,w13	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -	 ror	w14,w14,#31 -	 eor	w15,w15,w17 -	eor	w25,w22,w20 -	ror	w27,w24,#27 -	add	w22,w22,w28		// future e+=K -	 eor	w15,w15,w7 -	eor	w25,w25,w21 -	add	w23,w23,w27		// e+=rot(a,5) -	ror	w20,w20,#2 -	 eor	w15,w15,w12 -	add	w22,w22,w14	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -	 ror	w15,w15,#31 -	 eor	w16,w16,w19 -	eor	w25,w21,w24 -	ror	w27,w23,#27 -	add	w21,w21,w28		// future e+=K -	 eor	w16,w16,w8 -	eor	w25,w25,w20 -	add	w22,w22,w27		// e+=rot(a,5) -	ror	w24,w24,#2 -	 eor	w16,w16,w13 -	add	w21,w21,w15	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -	 ror	w16,w16,#31 -	 eor	w17,w17,w3 -	eor	w25,w20,w23 -	ror	w27,w22,#27 -	add	w20,w20,w28		// future e+=K -	 eor	w17,w17,w9 -	eor	w25,w25,w24 -	add	w21,w21,w27		// e+=rot(a,5) -	ror	w23,w23,#2 -	 eor	w17,w17,w14 -	add	w20,w20,w16	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -	 ror	w17,w17,#31 -	 eor	w19,w19,w4 -	eor	w25,w24,w22 -	ror	w27,w21,#27 -	add	w24,w24,w28		// future e+=K -	 eor	w19,w19,w10 -	eor	w25,w25,w23 -	add	w20,w20,w27		// e+=rot(a,5) -	ror	w22,w22,#2 -	 eor	w19,w19,w15 -	add	w24,w24,w17	// future e+=X[i] -	add	w20,w20,w25		// e+=F(b,c,d) -	 ror	w19,w19,#31 -	 eor	w3,w3,w5 -	eor	w25,w23,w21 -	ror	w27,w20,#27 -	add	w23,w23,w28		// future e+=K -	 eor	w3,w3,w11 -	eor	w25,w25,w22 -	add	w24,w24,w27		// e+=rot(a,5) -	ror	w21,w21,#2 -	 eor	w3,w3,w16 -	add	w23,w23,w19	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -	 ror	w3,w3,#31 -	 eor	w4,w4,w6 -	eor	w25,w22,w20 -	ror	w27,w24,#27 -	add	w22,w22,w28		// future e+=K -	 eor	w4,w4,w12 -	eor	w25,w25,w21 -	add	w23,w23,w27		// e+=rot(a,5) -	ror	w20,w20,#2 -	 eor	w4,w4,w17 -	add	w22,w22,w3	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -	 ror	w4,w4,#31 -	 eor	w5,w5,w7 -	eor	w25,w21,w24 -	ror	w27,w23,#27 -	add	w21,w21,w28		// future e+=K -	 eor	w5,w5,w13 -	eor	w25,w25,w20 -	add	w22,w22,w27		// e+=rot(a,5) -	ror	w24,w24,#2 -	 eor	w5,w5,w19 -	add	w21,w21,w4	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -	 ror	w5,w5,#31 -	 eor	w6,w6,w8 -	eor	w25,w20,w23 -	ror	w27,w22,#27 -	add	w20,w20,w28		// future e+=K -	 eor	w6,w6,w14 -	eor	w25,w25,w24 -	add	w21,w21,w27		// e+=rot(a,5) -	ror	w23,w23,#2 -	 eor	w6,w6,w3 -	add	w20,w20,w5	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -	 ror	w6,w6,#31 -	 eor	w7,w7,w9 -	eor	w25,w24,w22 -	ror	w27,w21,#27 -	add	w24,w24,w28		// future e+=K -	 eor	w7,w7,w15 -	eor	w25,w25,w23 -	add	w20,w20,w27		// e+=rot(a,5) -	ror	w22,w22,#2 -	 eor	w7,w7,w4 -	add	w24,w24,w6	// future e+=X[i] -	add	w20,w20,w25		// e+=F(b,c,d) -	 ror	w7,w7,#31 -	 eor	w8,w8,w10 -	eor	w25,w23,w21 -	ror	w27,w20,#27 -	add	w23,w23,w28		// future e+=K -	 eor	w8,w8,w16 -	eor	w25,w25,w22 -	add	w24,w24,w27		// e+=rot(a,5) -	ror	w21,w21,#2 -	 eor	w8,w8,w5 -	add	w23,w23,w7	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -	 ror	w8,w8,#31 -	 eor	w9,w9,w11 -	eor	w25,w22,w20 -	ror	w27,w24,#27 -	add	w22,w22,w28		// future e+=K -	 eor	w9,w9,w17 -	eor	w25,w25,w21 -	add	w23,w23,w27		// e+=rot(a,5) -	ror	w20,w20,#2 -	 eor	w9,w9,w6 -	add	w22,w22,w8	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -	 ror	w9,w9,#31 -	 eor	w10,w10,w12 -	eor	w25,w21,w24 -	ror	w27,w23,#27 -	add	w21,w21,w28		// future e+=K -	 eor	w10,w10,w19 -	eor	w25,w25,w20 -	add	w22,w22,w27		// e+=rot(a,5) -	ror	w24,w24,#2 -	 eor	w10,w10,w7 -	add	w21,w21,w9	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -	 ror	w10,w10,#31 -	 eor	w11,w11,w13 -	eor	w25,w20,w23 -	ror	w27,w22,#27 -	add	w20,w20,w28		// future e+=K -	 eor	w11,w11,w3 -	eor	w25,w25,w24 -	add	w21,w21,w27		// e+=rot(a,5) -	ror	w23,w23,#2 -	 eor	w11,w11,w8 -	add	w20,w20,w10	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -	 ror	w11,w11,#31 -	movz	w28,#0xbcdc -	movk	w28,#0x8f1b,lsl#16 -	 eor	w12,w12,w14 -	eor	w25,w24,w22 -	ror	w27,w21,#27 -	add	w24,w24,w28		// future e+=K -	 eor	w12,w12,w4 -	eor	w25,w25,w23 -	add	w20,w20,w27		// e+=rot(a,5) -	ror	w22,w22,#2 -	 eor	w12,w12,w9 -	add	w24,w24,w11	// future e+=X[i] -	add	w20,w20,w25		// e+=F(b,c,d) -	 ror	w12,w12,#31 -	orr	w25,w21,w22 -	and	w26,w21,w22 -	 eor	w13,w13,w15 -	ror	w27,w20,#27 -	and	w25,w25,w23 -	add	w23,w23,w28		// future e+=K -	 eor	w13,w13,w5 -	add	w24,w24,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w21,w21,#2 -	 eor	w13,w13,w10 -	add	w23,w23,w12	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -	 ror	w13,w13,#31 -	orr	w25,w20,w21 -	and	w26,w20,w21 -	 eor	w14,w14,w16 -	ror	w27,w24,#27 -	and	w25,w25,w22 -	add	w22,w22,w28		// future e+=K -	 eor	w14,w14,w6 -	add	w23,w23,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w20,w20,#2 -	 eor	w14,w14,w11 -	add	w22,w22,w13	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -	 ror	w14,w14,#31 -	orr	w25,w24,w20 -	and	w26,w24,w20 -	 eor	w15,w15,w17 -	ror	w27,w23,#27 -	and	w25,w25,w21 -	add	w21,w21,w28		// future e+=K -	 eor	w15,w15,w7 -	add	w22,w22,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w24,w24,#2 -	 eor	w15,w15,w12 -	add	w21,w21,w14	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -	 ror	w15,w15,#31 -	orr	w25,w23,w24 -	and	w26,w23,w24 -	 eor	w16,w16,w19 -	ror	w27,w22,#27 -	and	w25,w25,w20 -	add	w20,w20,w28		// future e+=K -	 eor	w16,w16,w8 -	add	w21,w21,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w23,w23,#2 -	 eor	w16,w16,w13 -	add	w20,w20,w15	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -	 ror	w16,w16,#31 -	orr	w25,w22,w23 -	and	w26,w22,w23 -	 eor	w17,w17,w3 -	ror	w27,w21,#27 -	and	w25,w25,w24 -	add	w24,w24,w28		// future e+=K -	 eor	w17,w17,w9 -	add	w20,w20,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w22,w22,#2 -	 eor	w17,w17,w14 -	add	w24,w24,w16	// future e+=X[i] -	add	w20,w20,w25		// e+=F(b,c,d) -	 ror	w17,w17,#31 -	orr	w25,w21,w22 -	and	w26,w21,w22 -	 eor	w19,w19,w4 -	ror	w27,w20,#27 -	and	w25,w25,w23 -	add	w23,w23,w28		// future e+=K -	 eor	w19,w19,w10 -	add	w24,w24,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w21,w21,#2 -	 eor	w19,w19,w15 -	add	w23,w23,w17	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -	 ror	w19,w19,#31 -	orr	w25,w20,w21 -	and	w26,w20,w21 -	 eor	w3,w3,w5 -	ror	w27,w24,#27 -	and	w25,w25,w22 -	add	w22,w22,w28		// future e+=K -	 eor	w3,w3,w11 -	add	w23,w23,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w20,w20,#2 -	 eor	w3,w3,w16 -	add	w22,w22,w19	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -	 ror	w3,w3,#31 -	orr	w25,w24,w20 -	and	w26,w24,w20 -	 eor	w4,w4,w6 -	ror	w27,w23,#27 -	and	w25,w25,w21 -	add	w21,w21,w28		// future e+=K -	 eor	w4,w4,w12 -	add	w22,w22,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w24,w24,#2 -	 eor	w4,w4,w17 -	add	w21,w21,w3	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -	 ror	w4,w4,#31 -	orr	w25,w23,w24 -	and	w26,w23,w24 -	 eor	w5,w5,w7 -	ror	w27,w22,#27 -	and	w25,w25,w20 -	add	w20,w20,w28		// future e+=K -	 eor	w5,w5,w13 -	add	w21,w21,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w23,w23,#2 -	 eor	w5,w5,w19 -	add	w20,w20,w4	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -	 ror	w5,w5,#31 -	orr	w25,w22,w23 -	and	w26,w22,w23 -	 eor	w6,w6,w8 -	ror	w27,w21,#27 -	and	w25,w25,w24 -	add	w24,w24,w28		// future e+=K -	 eor	w6,w6,w14 -	add	w20,w20,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w22,w22,#2 -	 eor	w6,w6,w3 -	add	w24,w24,w5	// future e+=X[i] -	add	w20,w20,w25		// e+=F(b,c,d) -	 ror	w6,w6,#31 -	orr	w25,w21,w22 -	and	w26,w21,w22 -	 eor	w7,w7,w9 -	ror	w27,w20,#27 -	and	w25,w25,w23 -	add	w23,w23,w28		// future e+=K -	 eor	w7,w7,w15 -	add	w24,w24,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w21,w21,#2 -	 eor	w7,w7,w4 -	add	w23,w23,w6	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -	 ror	w7,w7,#31 -	orr	w25,w20,w21 -	and	w26,w20,w21 -	 eor	w8,w8,w10 -	ror	w27,w24,#27 -	and	w25,w25,w22 -	add	w22,w22,w28		// future e+=K -	 eor	w8,w8,w16 -	add	w23,w23,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w20,w20,#2 -	 eor	w8,w8,w5 -	add	w22,w22,w7	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -	 ror	w8,w8,#31 -	orr	w25,w24,w20 -	and	w26,w24,w20 -	 eor	w9,w9,w11 -	ror	w27,w23,#27 -	and	w25,w25,w21 -	add	w21,w21,w28		// future e+=K -	 eor	w9,w9,w17 -	add	w22,w22,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w24,w24,#2 -	 eor	w9,w9,w6 -	add	w21,w21,w8	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -	 ror	w9,w9,#31 -	orr	w25,w23,w24 -	and	w26,w23,w24 -	 eor	w10,w10,w12 -	ror	w27,w22,#27 -	and	w25,w25,w20 -	add	w20,w20,w28		// future e+=K -	 eor	w10,w10,w19 -	add	w21,w21,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w23,w23,#2 -	 eor	w10,w10,w7 -	add	w20,w20,w9	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -	 ror	w10,w10,#31 -	orr	w25,w22,w23 -	and	w26,w22,w23 -	 eor	w11,w11,w13 -	ror	w27,w21,#27 -	and	w25,w25,w24 -	add	w24,w24,w28		// future e+=K -	 eor	w11,w11,w3 -	add	w20,w20,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w22,w22,#2 -	 eor	w11,w11,w8 -	add	w24,w24,w10	// future e+=X[i] -	add	w20,w20,w25		// e+=F(b,c,d) -	 ror	w11,w11,#31 -	orr	w25,w21,w22 -	and	w26,w21,w22 -	 eor	w12,w12,w14 -	ror	w27,w20,#27 -	and	w25,w25,w23 -	add	w23,w23,w28		// future e+=K -	 eor	w12,w12,w4 -	add	w24,w24,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w21,w21,#2 -	 eor	w12,w12,w9 -	add	w23,w23,w11	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -	 ror	w12,w12,#31 -	orr	w25,w20,w21 -	and	w26,w20,w21 -	 eor	w13,w13,w15 -	ror	w27,w24,#27 -	and	w25,w25,w22 -	add	w22,w22,w28		// future e+=K -	 eor	w13,w13,w5 -	add	w23,w23,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w20,w20,#2 -	 eor	w13,w13,w10 -	add	w22,w22,w12	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -	 ror	w13,w13,#31 -	orr	w25,w24,w20 -	and	w26,w24,w20 -	 eor	w14,w14,w16 -	ror	w27,w23,#27 -	and	w25,w25,w21 -	add	w21,w21,w28		// future e+=K -	 eor	w14,w14,w6 -	add	w22,w22,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w24,w24,#2 -	 eor	w14,w14,w11 -	add	w21,w21,w13	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -	 ror	w14,w14,#31 -	orr	w25,w23,w24 -	and	w26,w23,w24 -	 eor	w15,w15,w17 -	ror	w27,w22,#27 -	and	w25,w25,w20 -	add	w20,w20,w28		// future e+=K -	 eor	w15,w15,w7 -	add	w21,w21,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w23,w23,#2 -	 eor	w15,w15,w12 -	add	w20,w20,w14	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -	 ror	w15,w15,#31 -	movz	w28,#0xc1d6 -	movk	w28,#0xca62,lsl#16 -	orr	w25,w22,w23 -	and	w26,w22,w23 -	 eor	w16,w16,w19 -	ror	w27,w21,#27 -	and	w25,w25,w24 -	add	w24,w24,w28		// future e+=K -	 eor	w16,w16,w8 -	add	w20,w20,w27		// e+=rot(a,5) -	orr	w25,w25,w26 -	ror	w22,w22,#2 -	 eor	w16,w16,w13 -	add	w24,w24,w15	// future e+=X[i] -	add	w20,w20,w25		// e+=F(b,c,d) -	 ror	w16,w16,#31 -	 eor	w17,w17,w3 -	eor	w25,w23,w21 -	ror	w27,w20,#27 -	add	w23,w23,w28		// future e+=K -	 eor	w17,w17,w9 -	eor	w25,w25,w22 -	add	w24,w24,w27		// e+=rot(a,5) -	ror	w21,w21,#2 -	 eor	w17,w17,w14 -	add	w23,w23,w16	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -	 ror	w17,w17,#31 -	 eor	w19,w19,w4 -	eor	w25,w22,w20 -	ror	w27,w24,#27 -	add	w22,w22,w28		// future e+=K -	 eor	w19,w19,w10 -	eor	w25,w25,w21 -	add	w23,w23,w27		// e+=rot(a,5) -	ror	w20,w20,#2 -	 eor	w19,w19,w15 -	add	w22,w22,w17	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -	 ror	w19,w19,#31 -	 eor	w3,w3,w5 -	eor	w25,w21,w24 -	ror	w27,w23,#27 -	add	w21,w21,w28		// future e+=K -	 eor	w3,w3,w11 -	eor	w25,w25,w20 -	add	w22,w22,w27		// e+=rot(a,5) -	ror	w24,w24,#2 -	 eor	w3,w3,w16 -	add	w21,w21,w19	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -	 ror	w3,w3,#31 -	 eor	w4,w4,w6 -	eor	w25,w20,w23 -	ror	w27,w22,#27 -	add	w20,w20,w28		// future e+=K -	 eor	w4,w4,w12 -	eor	w25,w25,w24 -	add	w21,w21,w27		// e+=rot(a,5) -	ror	w23,w23,#2 -	 eor	w4,w4,w17 -	add	w20,w20,w3	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -	 ror	w4,w4,#31 -	 eor	w5,w5,w7 -	eor	w25,w24,w22 -	ror	w27,w21,#27 -	add	w24,w24,w28		// future e+=K -	 eor	w5,w5,w13 -	eor	w25,w25,w23 -	add	w20,w20,w27		// e+=rot(a,5) -	ror	w22,w22,#2 -	 eor	w5,w5,w19 -	add	w24,w24,w4	// future e+=X[i] -	add	w20,w20,w25		// e+=F(b,c,d) -	 ror	w5,w5,#31 -	 eor	w6,w6,w8 -	eor	w25,w23,w21 -	ror	w27,w20,#27 -	add	w23,w23,w28		// future e+=K -	 eor	w6,w6,w14 -	eor	w25,w25,w22 -	add	w24,w24,w27		// e+=rot(a,5) -	ror	w21,w21,#2 -	 eor	w6,w6,w3 -	add	w23,w23,w5	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -	 ror	w6,w6,#31 -	 eor	w7,w7,w9 -	eor	w25,w22,w20 -	ror	w27,w24,#27 -	add	w22,w22,w28		// future e+=K -	 eor	w7,w7,w15 -	eor	w25,w25,w21 -	add	w23,w23,w27		// e+=rot(a,5) -	ror	w20,w20,#2 -	 eor	w7,w7,w4 -	add	w22,w22,w6	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -	 ror	w7,w7,#31 -	 eor	w8,w8,w10 -	eor	w25,w21,w24 -	ror	w27,w23,#27 -	add	w21,w21,w28		// future e+=K -	 eor	w8,w8,w16 -	eor	w25,w25,w20 -	add	w22,w22,w27		// e+=rot(a,5) -	ror	w24,w24,#2 -	 eor	w8,w8,w5 -	add	w21,w21,w7	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -	 ror	w8,w8,#31 -	 eor	w9,w9,w11 -	eor	w25,w20,w23 -	ror	w27,w22,#27 -	add	w20,w20,w28		// future e+=K -	 eor	w9,w9,w17 -	eor	w25,w25,w24 -	add	w21,w21,w27		// e+=rot(a,5) -	ror	w23,w23,#2 -	 eor	w9,w9,w6 -	add	w20,w20,w8	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -	 ror	w9,w9,#31 -	 eor	w10,w10,w12 -	eor	w25,w24,w22 -	ror	w27,w21,#27 -	add	w24,w24,w28		// future e+=K -	 eor	w10,w10,w19 -	eor	w25,w25,w23 -	add	w20,w20,w27		// e+=rot(a,5) -	ror	w22,w22,#2 -	 eor	w10,w10,w7 -	add	w24,w24,w9	// future e+=X[i] -	add	w20,w20,w25		// e+=F(b,c,d) -	 ror	w10,w10,#31 -	 eor	w11,w11,w13 -	eor	w25,w23,w21 -	ror	w27,w20,#27 -	add	w23,w23,w28		// future e+=K -	 eor	w11,w11,w3 -	eor	w25,w25,w22 -	add	w24,w24,w27		// e+=rot(a,5) -	ror	w21,w21,#2 -	 eor	w11,w11,w8 -	add	w23,w23,w10	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -	 ror	w11,w11,#31 -	 eor	w12,w12,w14 -	eor	w25,w22,w20 -	ror	w27,w24,#27 -	add	w22,w22,w28		// future e+=K -	 eor	w12,w12,w4 -	eor	w25,w25,w21 -	add	w23,w23,w27		// e+=rot(a,5) -	ror	w20,w20,#2 -	 eor	w12,w12,w9 -	add	w22,w22,w11	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -	 ror	w12,w12,#31 -	 eor	w13,w13,w15 -	eor	w25,w21,w24 -	ror	w27,w23,#27 -	add	w21,w21,w28		// future e+=K -	 eor	w13,w13,w5 -	eor	w25,w25,w20 -	add	w22,w22,w27		// e+=rot(a,5) -	ror	w24,w24,#2 -	 eor	w13,w13,w10 -	add	w21,w21,w12	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -	 ror	w13,w13,#31 -	 eor	w14,w14,w16 -	eor	w25,w20,w23 -	ror	w27,w22,#27 -	add	w20,w20,w28		// future e+=K -	 eor	w14,w14,w6 -	eor	w25,w25,w24 -	add	w21,w21,w27		// e+=rot(a,5) -	ror	w23,w23,#2 -	 eor	w14,w14,w11 -	add	w20,w20,w13	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -	 ror	w14,w14,#31 -	 eor	w15,w15,w17 -	eor	w25,w24,w22 -	ror	w27,w21,#27 -	add	w24,w24,w28		// future e+=K -	 eor	w15,w15,w7 -	eor	w25,w25,w23 -	add	w20,w20,w27		// e+=rot(a,5) -	ror	w22,w22,#2 -	 eor	w15,w15,w12 -	add	w24,w24,w14	// future e+=X[i] -	add	w20,w20,w25		// e+=F(b,c,d) -	 ror	w15,w15,#31 -	 eor	w16,w16,w19 -	eor	w25,w23,w21 -	ror	w27,w20,#27 -	add	w23,w23,w28		// future e+=K -	 eor	w16,w16,w8 -	eor	w25,w25,w22 -	add	w24,w24,w27		// e+=rot(a,5) -	ror	w21,w21,#2 -	 eor	w16,w16,w13 -	add	w23,w23,w15	// future e+=X[i] -	add	w24,w24,w25		// e+=F(b,c,d) -	 ror	w16,w16,#31 -	 eor	w17,w17,w3 -	eor	w25,w22,w20 -	ror	w27,w24,#27 -	add	w22,w22,w28		// future e+=K -	 eor	w17,w17,w9 -	eor	w25,w25,w21 -	add	w23,w23,w27		// e+=rot(a,5) -	ror	w20,w20,#2 -	 eor	w17,w17,w14 -	add	w22,w22,w16	// future e+=X[i] -	add	w23,w23,w25		// e+=F(b,c,d) -	 ror	w17,w17,#31 -	 eor	w19,w19,w4 -	eor	w25,w21,w24 -	ror	w27,w23,#27 -	add	w21,w21,w28		// future e+=K -	 eor	w19,w19,w10 -	eor	w25,w25,w20 -	add	w22,w22,w27		// e+=rot(a,5) -	ror	w24,w24,#2 -	 eor	w19,w19,w15 -	add	w21,w21,w17	// future e+=X[i] -	add	w22,w22,w25		// e+=F(b,c,d) -	 ror	w19,w19,#31 -	ldp	w4,w5,[x0] -	eor	w25,w20,w23 -	ror	w27,w22,#27 -	add	w20,w20,w28		// future e+=K -	eor	w25,w25,w24 -	add	w21,w21,w27		// e+=rot(a,5) -	ror	w23,w23,#2 -	add	w20,w20,w19	// future e+=X[i] -	add	w21,w21,w25		// e+=F(b,c,d) -	ldp	w6,w7,[x0,#8] -	eor	w25,w24,w22 -	ror	w27,w21,#27 -	eor	w25,w25,w23 -	add	w20,w20,w27		// e+=rot(a,5) -	ror	w22,w22,#2 -	ldr	w8,[x0,#16] -	add	w20,w20,w25		// e+=F(b,c,d) -	add	w21,w21,w5 -	add	w22,w22,w6 -	add	w20,w20,w4 -	add	w23,w23,w7 -	add	w24,w24,w8 -	stp	w20,w21,[x0] -	stp	w22,w23,[x0,#8] -	str	w24,[x0,#16] -	cbnz	x2,.Loop - -	ldp	x19,x20,[sp,#16] -	ldp	x21,x22,[sp,#32] -	ldp	x23,x24,[sp,#48] -	ldp	x25,x26,[sp,#64] -	ldp	x27,x28,[sp,#80] -	ldr	x29,[sp],#96 -	ret -.size	sha1_block_data_order,.-sha1_block_data_order -.type	sha1_block_armv8,%function -.align	6 -sha1_block_armv8: -.Lv8_entry: -	stp	x29,x30,[sp,#-16]! -	add	x29,sp,#0 - -	adr	x4,.Lconst -	eor	v1.16b,v1.16b,v1.16b -	ld1	{v0.4s},[x0],#16 -	ld1	{v1.s}[0],[x0] -	sub	x0,x0,#16 -	ld1	{v16.4s-v19.4s},[x4] - -.Loop_hw: -	ld1	{v4.16b-v7.16b},[x1],#64 -	sub	x2,x2,#1 -	rev32	v4.16b,v4.16b -	rev32	v5.16b,v5.16b - -	add	v20.4s,v16.4s,v4.4s -	rev32	v6.16b,v6.16b -	orr	v22.16b,v0.16b,v0.16b	// offload - -	add	v21.4s,v16.4s,v5.4s -	rev32	v7.16b,v7.16b -	.inst	0x5e280803	//sha1h v3.16b,v0.16b -	.inst	0x5e140020	//sha1c v0.16b,v1.16b,v20.4s		// 0 -	add	v20.4s,v16.4s,v6.4s -	.inst	0x5e0630a4	//sha1su0 v4.16b,v5.16b,v6.16b -	.inst	0x5e280802	//sha1h v2.16b,v0.16b		// 1 -	.inst	0x5e150060	//sha1c v0.16b,v3.16b,v21.4s -	add	v21.4s,v16.4s,v7.4s -	.inst	0x5e2818e4	//sha1su1 v4.16b,v7.16b -	.inst	0x5e0730c5	//sha1su0 v5.16b,v6.16b,v7.16b -	.inst	0x5e280803	//sha1h v3.16b,v0.16b		// 2 -	.inst	0x5e140040	//sha1c v0.16b,v2.16b,v20.4s -	add	v20.4s,v16.4s,v4.4s -	.inst	0x5e281885	//sha1su1 v5.16b,v4.16b -	.inst	0x5e0430e6	//sha1su0 v6.16b,v7.16b,v4.16b -	.inst	0x5e280802	//sha1h v2.16b,v0.16b		// 3 -	.inst	0x5e150060	//sha1c v0.16b,v3.16b,v21.4s -	add	v21.4s,v17.4s,v5.4s -	.inst	0x5e2818a6	//sha1su1 v6.16b,v5.16b -	.inst	0x5e053087	//sha1su0 v7.16b,v4.16b,v5.16b -	.inst	0x5e280803	//sha1h v3.16b,v0.16b		// 4 -	.inst	0x5e140040	//sha1c v0.16b,v2.16b,v20.4s -	add	v20.4s,v17.4s,v6.4s -	.inst	0x5e2818c7	//sha1su1 v7.16b,v6.16b -	.inst	0x5e0630a4	//sha1su0 v4.16b,v5.16b,v6.16b -	.inst	0x5e280802	//sha1h v2.16b,v0.16b		// 5 -	.inst	0x5e151060	//sha1p v0.16b,v3.16b,v21.4s -	add	v21.4s,v17.4s,v7.4s -	.inst	0x5e2818e4	//sha1su1 v4.16b,v7.16b -	.inst	0x5e0730c5	//sha1su0 v5.16b,v6.16b,v7.16b -	.inst	0x5e280803	//sha1h v3.16b,v0.16b		// 6 -	.inst	0x5e141040	//sha1p v0.16b,v2.16b,v20.4s -	add	v20.4s,v17.4s,v4.4s -	.inst	0x5e281885	//sha1su1 v5.16b,v4.16b -	.inst	0x5e0430e6	//sha1su0 v6.16b,v7.16b,v4.16b -	.inst	0x5e280802	//sha1h v2.16b,v0.16b		// 7 -	.inst	0x5e151060	//sha1p v0.16b,v3.16b,v21.4s -	add	v21.4s,v17.4s,v5.4s -	.inst	0x5e2818a6	//sha1su1 v6.16b,v5.16b -	.inst	0x5e053087	//sha1su0 v7.16b,v4.16b,v5.16b -	.inst	0x5e280803	//sha1h v3.16b,v0.16b		// 8 -	.inst	0x5e141040	//sha1p v0.16b,v2.16b,v20.4s -	add	v20.4s,v18.4s,v6.4s -	.inst	0x5e2818c7	//sha1su1 v7.16b,v6.16b -	.inst	0x5e0630a4	//sha1su0 v4.16b,v5.16b,v6.16b -	.inst	0x5e280802	//sha1h v2.16b,v0.16b		// 9 -	.inst	0x5e151060	//sha1p v0.16b,v3.16b,v21.4s -	add	v21.4s,v18.4s,v7.4s -	.inst	0x5e2818e4	//sha1su1 v4.16b,v7.16b -	.inst	0x5e0730c5	//sha1su0 v5.16b,v6.16b,v7.16b -	.inst	0x5e280803	//sha1h v3.16b,v0.16b		// 10 -	.inst	0x5e142040	//sha1m v0.16b,v2.16b,v20.4s -	add	v20.4s,v18.4s,v4.4s -	.inst	0x5e281885	//sha1su1 v5.16b,v4.16b -	.inst	0x5e0430e6	//sha1su0 v6.16b,v7.16b,v4.16b -	.inst	0x5e280802	//sha1h v2.16b,v0.16b		// 11 -	.inst	0x5e152060	//sha1m v0.16b,v3.16b,v21.4s -	add	v21.4s,v18.4s,v5.4s -	.inst	0x5e2818a6	//sha1su1 v6.16b,v5.16b -	.inst	0x5e053087	//sha1su0 v7.16b,v4.16b,v5.16b -	.inst	0x5e280803	//sha1h v3.16b,v0.16b		// 12 -	.inst	0x5e142040	//sha1m v0.16b,v2.16b,v20.4s -	add	v20.4s,v18.4s,v6.4s -	.inst	0x5e2818c7	//sha1su1 v7.16b,v6.16b -	.inst	0x5e0630a4	//sha1su0 v4.16b,v5.16b,v6.16b -	.inst	0x5e280802	//sha1h v2.16b,v0.16b		// 13 -	.inst	0x5e152060	//sha1m v0.16b,v3.16b,v21.4s -	add	v21.4s,v19.4s,v7.4s -	.inst	0x5e2818e4	//sha1su1 v4.16b,v7.16b -	.inst	0x5e0730c5	//sha1su0 v5.16b,v6.16b,v7.16b -	.inst	0x5e280803	//sha1h v3.16b,v0.16b		// 14 -	.inst	0x5e142040	//sha1m v0.16b,v2.16b,v20.4s -	add	v20.4s,v19.4s,v4.4s -	.inst	0x5e281885	//sha1su1 v5.16b,v4.16b -	.inst	0x5e0430e6	//sha1su0 v6.16b,v7.16b,v4.16b -	.inst	0x5e280802	//sha1h v2.16b,v0.16b		// 15 -	.inst	0x5e151060	//sha1p v0.16b,v3.16b,v21.4s -	add	v21.4s,v19.4s,v5.4s -	.inst	0x5e2818a6	//sha1su1 v6.16b,v5.16b -	.inst	0x5e053087	//sha1su0 v7.16b,v4.16b,v5.16b -	.inst	0x5e280803	//sha1h v3.16b,v0.16b		// 16 -	.inst	0x5e141040	//sha1p v0.16b,v2.16b,v20.4s -	add	v20.4s,v19.4s,v6.4s -	.inst	0x5e2818c7	//sha1su1 v7.16b,v6.16b -	.inst	0x5e280802	//sha1h v2.16b,v0.16b		// 17 -	.inst	0x5e151060	//sha1p v0.16b,v3.16b,v21.4s -	add	v21.4s,v19.4s,v7.4s - -	.inst	0x5e280803	//sha1h v3.16b,v0.16b		// 18 -	.inst	0x5e141040	//sha1p v0.16b,v2.16b,v20.4s - -	.inst	0x5e280802	//sha1h v2.16b,v0.16b		// 19 -	.inst	0x5e151060	//sha1p v0.16b,v3.16b,v21.4s - -	add	v1.4s,v1.4s,v2.4s -	add	v0.4s,v0.4s,v22.4s - -	cbnz	x2,.Loop_hw - -	st1	{v0.4s},[x0],#16 -	st1	{v1.s}[0],[x0] - -	ldr	x29,[sp],#16 -	ret -.size	sha1_block_armv8,.-sha1_block_armv8 -.align	6 -.Lconst: -.long	0x5a827999,0x5a827999,0x5a827999,0x5a827999	//K_00_19 -.long	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1	//K_20_39 -.long	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc	//K_40_59 -.long	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6	//K_60_79 -.LOPENSSL_armcap_P: -.quad	OPENSSL_armcap_P-. -.asciz	"SHA1 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>" -.align	2 -.comm	OPENSSL_armcap_P,4,4 diff --git a/app/openssl/crypto/sha/asm/sha1-armv8.pl b/app/openssl/crypto/sha/asm/sha1-armv8.pl deleted file mode 100644 index c1f552b6..00000000 --- a/app/openssl/crypto/sha/asm/sha1-armv8.pl +++ /dev/null @@ -1,333 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# SHA1 for ARMv8. -# -# Performance in cycles per processed byte and improvement coefficient -# over code generated with "default" compiler: -# -#		hardware-assisted	software(*) -# Apple A7	2.31			4.13 (+14%) -# Cortex-A5x	n/a			n/a -# -# (*)	Software results are presented mostly for reference purposes. - -$flavour = shift; -open STDOUT,">".shift; - -($ctx,$inp,$num)=("x0","x1","x2"); -@Xw=map("w$_",(3..17,19)); -@Xx=map("x$_",(3..17,19)); -@V=($A,$B,$C,$D,$E)=map("w$_",(20..24)); -($t0,$t1,$t2,$K)=map("w$_",(25..28)); - - -sub BODY_00_19 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=($i+2)&15; - -$code.=<<___ if ($i<15 && !($i&1)); -	lsr	@Xx[$i+1],@Xx[$i],#32 -___ -$code.=<<___ if ($i<14 && !($i&1)); -	ldr	@Xx[$i+2],[$inp,#`($i+2)*4-64`] -___ -$code.=<<___ if ($i<14 && ($i&1)); -#ifdef	__ARMEB__ -	ror	@Xx[$i+1],@Xx[$i+1],#32 -#else -	rev32	@Xx[$i+1],@Xx[$i+1] -#endif -___ -$code.=<<___ if ($i<14); -	bic	$t0,$d,$b -	and	$t1,$c,$b -	ror	$t2,$a,#27 -	add	$d,$d,$K		// future e+=K -	orr	$t0,$t0,$t1 -	add	$e,$e,$t2		// e+=rot(a,5) -	ror	$b,$b,#2 -	add	$d,$d,@Xw[($i+1)&15]	// future e+=X[i] -	add	$e,$e,$t0		// e+=F(b,c,d) -___ -$code.=<<___ if ($i==19); -	movz	$K,#0xeba1 -	movk	$K,#0x6ed9,lsl#16 -___ -$code.=<<___ if ($i>=14); -	 eor	@Xw[$j],@Xw[$j],@Xw[($j+2)&15] -	bic	$t0,$d,$b -	and	$t1,$c,$b -	ror	$t2,$a,#27 -	 eor	@Xw[$j],@Xw[$j],@Xw[($j+8)&15] -	add	$d,$d,$K		// future e+=K -	orr	$t0,$t0,$t1 -	add	$e,$e,$t2		// e+=rot(a,5) -	 eor	@Xw[$j],@Xw[$j],@Xw[($j+13)&15] -	ror	$b,$b,#2 -	add	$d,$d,@Xw[($i+1)&15]	// future e+=X[i] -	add	$e,$e,$t0		// e+=F(b,c,d) -	 ror	@Xw[$j],@Xw[$j],#31 -___ -} - -sub BODY_40_59 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=($i+2)&15; - -$code.=<<___ if ($i==59); -	movz	$K,#0xc1d6 -	movk	$K,#0xca62,lsl#16 -___ -$code.=<<___; -	orr	$t0,$b,$c -	and	$t1,$b,$c -	 eor	@Xw[$j],@Xw[$j],@Xw[($j+2)&15] -	ror	$t2,$a,#27 -	and	$t0,$t0,$d -	add	$d,$d,$K		// future e+=K -	 eor	@Xw[$j],@Xw[$j],@Xw[($j+8)&15] -	add	$e,$e,$t2		// e+=rot(a,5) -	orr	$t0,$t0,$t1 -	ror	$b,$b,#2 -	 eor	@Xw[$j],@Xw[$j],@Xw[($j+13)&15] -	add	$d,$d,@Xw[($i+1)&15]	// future e+=X[i] -	add	$e,$e,$t0		// e+=F(b,c,d) -	 ror	@Xw[$j],@Xw[$j],#31 -___ -} - -sub BODY_20_39 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=($i+2)&15; - -$code.=<<___ if ($i==39); -	movz	$K,#0xbcdc -	movk	$K,#0x8f1b,lsl#16 -___ -$code.=<<___ if ($i<78); -	 eor	@Xw[$j],@Xw[$j],@Xw[($j+2)&15] -	eor	$t0,$d,$b -	ror	$t2,$a,#27 -	add	$d,$d,$K		// future e+=K -	 eor	@Xw[$j],@Xw[$j],@Xw[($j+8)&15] -	eor	$t0,$t0,$c -	add	$e,$e,$t2		// e+=rot(a,5) -	ror	$b,$b,#2 -	 eor	@Xw[$j],@Xw[$j],@Xw[($j+13)&15] -	add	$d,$d,@Xw[($i+1)&15]	// future e+=X[i] -	add	$e,$e,$t0		// e+=F(b,c,d) -	 ror	@Xw[$j],@Xw[$j],#31 -___ -$code.=<<___ if ($i==78); -	ldp	@Xw[1],@Xw[2],[$ctx] -	eor	$t0,$d,$b -	ror	$t2,$a,#27 -	add	$d,$d,$K		// future e+=K -	eor	$t0,$t0,$c -	add	$e,$e,$t2		// e+=rot(a,5) -	ror	$b,$b,#2 -	add	$d,$d,@Xw[($i+1)&15]	// future e+=X[i] -	add	$e,$e,$t0		// e+=F(b,c,d) -___ -$code.=<<___ if ($i==79); -	ldp	@Xw[3],@Xw[4],[$ctx,#8] -	eor	$t0,$d,$b -	ror	$t2,$a,#27 -	eor	$t0,$t0,$c -	add	$e,$e,$t2		// e+=rot(a,5) -	ror	$b,$b,#2 -	ldr	@Xw[5],[$ctx,#16] -	add	$e,$e,$t0		// e+=F(b,c,d) -___ -} - -$code.=<<___; -#include "arm_arch.h" - -.text - -.globl	sha1_block_data_order -.type	sha1_block_data_order,%function -.align	6 -sha1_block_data_order: -	ldr	x16,.LOPENSSL_armcap_P -	adr	x17,.LOPENSSL_armcap_P -	add	x16,x16,x17 -	ldr	w16,[x16] -	tst	w16,#ARMV8_SHA1 -	b.ne	.Lv8_entry - -	stp	x29,x30,[sp,#-96]! -	add	x29,sp,#0 -	stp	x19,x20,[sp,#16] -	stp	x21,x22,[sp,#32] -	stp	x23,x24,[sp,#48] -	stp	x25,x26,[sp,#64] -	stp	x27,x28,[sp,#80] - -	ldp	$A,$B,[$ctx] -	ldp	$C,$D,[$ctx,#8] -	ldr	$E,[$ctx,#16] - -.Loop: -	ldr	@Xx[0],[$inp],#64 -	movz	$K,#0x7999 -	sub	$num,$num,#1 -	movk	$K,#0x5a82,lsl#16 -#ifdef	__ARMEB__ -	ror	$Xx[0],@Xx[0],#32 -#else -	rev32	@Xx[0],@Xx[0] -#endif -	add	$E,$E,$K		// warm it up -	add	$E,$E,@Xw[0] -___ -for($i=0;$i<20;$i++)	{ &BODY_00_19($i,@V); unshift(@V,pop(@V)); } -for(;$i<40;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -for(;$i<60;$i++)	{ &BODY_40_59($i,@V); unshift(@V,pop(@V)); } -for(;$i<80;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; -	add	$B,$B,@Xw[2] -	add	$C,$C,@Xw[3] -	add	$A,$A,@Xw[1] -	add	$D,$D,@Xw[4] -	add	$E,$E,@Xw[5] -	stp	$A,$B,[$ctx] -	stp	$C,$D,[$ctx,#8] -	str	$E,[$ctx,#16] -	cbnz	$num,.Loop - -	ldp	x19,x20,[sp,#16] -	ldp	x21,x22,[sp,#32] -	ldp	x23,x24,[sp,#48] -	ldp	x25,x26,[sp,#64] -	ldp	x27,x28,[sp,#80] -	ldr	x29,[sp],#96 -	ret -.size	sha1_block_data_order,.-sha1_block_data_order -___ -{{{ -my ($ABCD,$E,$E0,$E1)=map("v$_.16b",(0..3)); -my @MSG=map("v$_.16b",(4..7)); -my @Kxx=map("v$_.4s",(16..19)); -my ($W0,$W1)=("v20.4s","v21.4s"); -my $ABCD_SAVE="v22.16b"; - -$code.=<<___; -.type	sha1_block_armv8,%function -.align	6 -sha1_block_armv8: -.Lv8_entry: -	stp	x29,x30,[sp,#-16]! -	add	x29,sp,#0 - -	adr	x4,.Lconst -	eor	$E,$E,$E -	ld1.32	{$ABCD},[$ctx],#16 -	ld1.32	{$E}[0],[$ctx] -	sub	$ctx,$ctx,#16 -	ld1.32	{@Kxx[0]-@Kxx[3]},[x4] - -.Loop_hw: -	ld1	{@MSG[0]-@MSG[3]},[$inp],#64 -	sub	$num,$num,#1 -	rev32	@MSG[0],@MSG[0] -	rev32	@MSG[1],@MSG[1] - -	add.i32	$W0,@Kxx[0],@MSG[0] -	rev32	@MSG[2],@MSG[2] -	orr	$ABCD_SAVE,$ABCD,$ABCD	// offload - -	add.i32	$W1,@Kxx[0],@MSG[1] -	rev32	@MSG[3],@MSG[3] -	sha1h	$E1,$ABCD -	sha1c	$ABCD,$E,$W0		// 0 -	add.i32	$W0,@Kxx[$j],@MSG[2] -	sha1su0	@MSG[0],@MSG[1],@MSG[2] -___ -for ($j=0,$i=1;$i<20-3;$i++) { -my $f=("c","p","m","p")[$i/5]; -$code.=<<___; -	sha1h	$E0,$ABCD		// $i -	sha1$f	$ABCD,$E1,$W1 -	add.i32	$W1,@Kxx[$j],@MSG[3] -	sha1su1	@MSG[0],@MSG[3] -___ -$code.=<<___ if ($i<20-4); -	sha1su0	@MSG[1],@MSG[2],@MSG[3] -___ -	($E0,$E1)=($E1,$E0);		($W0,$W1)=($W1,$W0); -	push(@MSG,shift(@MSG));		$j++ if ((($i+3)%5)==0); -} -$code.=<<___; -	sha1h	$E0,$ABCD		// $i -	sha1p	$ABCD,$E1,$W1 -	add.i32	$W1,@Kxx[$j],@MSG[3] - -	sha1h	$E1,$ABCD		// 18 -	sha1p	$ABCD,$E0,$W0 - -	sha1h	$E0,$ABCD		// 19 -	sha1p	$ABCD,$E1,$W1 - -	add.i32	$E,$E,$E0 -	add.i32	$ABCD,$ABCD,$ABCD_SAVE - -	cbnz	$num,.Loop_hw - -	st1.32	{$ABCD},[$ctx],#16 -	st1.32	{$E}[0],[$ctx] - -	ldr	x29,[sp],#16 -	ret -.size	sha1_block_armv8,.-sha1_block_armv8 -.align	6 -.Lconst: -.long	0x5a827999,0x5a827999,0x5a827999,0x5a827999	//K_00_19 -.long	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1	//K_20_39 -.long	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc	//K_40_59 -.long	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6	//K_60_79 -.LOPENSSL_armcap_P: -.quad	OPENSSL_armcap_P-. -.asciz	"SHA1 block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>" -.align	2 -.comm	OPENSSL_armcap_P,4,4 -___ -}}} - -{   my	%opcode = ( -	"sha1c"		=> 0x5e000000,	"sha1p"		=> 0x5e001000, -	"sha1m"		=> 0x5e002000,	"sha1su0"	=> 0x5e003000, -	"sha1h"		=> 0x5e280800,	"sha1su1"	=> 0x5e281800	); - -    sub unsha1 { -	my ($mnemonic,$arg)=@_; - -	$arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o -	&& -	sprintf ".inst\t0x%08x\t//%s %s", -			$opcode{$mnemonic}|$1|($2<<5)|($3<<16), -			$mnemonic,$arg; -    } -} - -foreach(split("\n",$code)) { - -	s/\`([^\`]*)\`/eval($1)/geo; - -	s/\b(sha1\w+)\s+([qv].*)/unsha1($1,$2)/geo; - -	s/\.\w?32\b//o		and s/\.16b/\.4s/go; -	m/(ld|st)1[^\[]+\[0\]/o	and s/\.4s/\.s/go; - -	print $_,"\n"; -} - -close STDOUT; diff --git a/app/openssl/crypto/sha/asm/sha256-armv4.pl b/app/openssl/crypto/sha/asm/sha256-armv4.pl index 505ca8f3..9c84e8d9 100644 --- a/app/openssl/crypto/sha/asm/sha256-armv4.pl +++ b/app/openssl/crypto/sha/asm/sha256-armv4.pl @@ -1,7 +1,7 @@  #!/usr/bin/env perl  # ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL  # project. The module is, however, dual licensed under OpenSSL and  # CRYPTOGAMS licenses depending on where you obtain it. For further  # details see http://www.openssl.org/~appro/cryptogams/. @@ -21,27 +21,15 @@  # February 2011.  #  # Profiler-assisted and platform-specific optimization resulted in 16% -# improvement on Cortex A8 core and ~15.4 cycles per processed byte. - -# September 2013. -# -# Add NEON implementation. On Cortex A8 it was measured to process one -# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon -# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only -# code (meaning that latter performs sub-optimally, nothing was done -# about it). - -# May 2014. -# -# Add ARMv8 code path performing at 2.0 cpb on Apple A7. +# improvement on Cortex A8 core and ~17 cycles per processed byte.  while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}  open STDOUT,">$output";  $ctx="r0";	$t0="r0"; -$inp="r1";	$t4="r1"; +$inp="r1";	$t3="r1";  $len="r2";	$t1="r2"; -$T1="r3";	$t3="r3"; +$T1="r3";  $A="r4";  $B="r5";  $C="r6"; @@ -64,88 +52,71 @@ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;  $code.=<<___ if ($i<16);  #if __ARM_ARCH__>=7 -	@ ldr	$t1,[$inp],#4			@ $i -# if $i==15 -	str	$inp,[sp,#17*4]			@ make room for $t4 -# endif -	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` -	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past -	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e) -	rev	$t1,$t1 +	ldr	$T1,[$inp],#4  #else -	@ ldrb	$t1,[$inp,#3]			@ $i -	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past +	ldrb	$T1,[$inp,#3]			@ $i  	ldrb	$t2,[$inp,#2] -	ldrb	$t0,[$inp,#1] -	orr	$t1,$t1,$t2,lsl#8 -	ldrb	$t2,[$inp],#4 -	orr	$t1,$t1,$t0,lsl#16 -# if $i==15 -	str	$inp,[sp,#17*4]			@ make room for $t4 -# endif -	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` -	orr	$t1,$t1,$t2,lsl#24 -	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e) +	ldrb	$t1,[$inp,#1] +	ldrb	$t0,[$inp],#4 +	orr	$T1,$T1,$t2,lsl#8 +	orr	$T1,$T1,$t1,lsl#16 +	orr	$T1,$T1,$t0,lsl#24  #endif  ___  $code.=<<___; +	mov	$t0,$e,ror#$Sigma1[0]  	ldr	$t2,[$Ktbl],#4			@ *K256++ -	add	$h,$h,$t1			@ h+=X[i] -	str	$t1,[sp,#`$i%16`*4] +	eor	$t0,$t0,$e,ror#$Sigma1[1]  	eor	$t1,$f,$g -	add	$h,$h,$t0,ror#$Sigma1[0]	@ h+=Sigma1(e) +#if $i>=16 +	add	$T1,$T1,$t3			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	$T1,$T1 +#endif +#if $i==15 +	str	$inp,[sp,#17*4]			@ leave room for $t3 +#endif +	eor	$t0,$t0,$e,ror#$Sigma1[2]	@ Sigma1(e)  	and	$t1,$t1,$e -	add	$h,$h,$t2			@ h+=K256[i] +	str	$T1,[sp,#`$i%16`*4] +	add	$T1,$T1,$t0  	eor	$t1,$t1,$g			@ Ch(e,f,g) -	eor	$t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]` -	add	$h,$h,$t1			@ h+=Ch(e,f,g) -#if $i==31 -	and	$t2,$t2,#0xff -	cmp	$t2,#0xf2			@ done? +	add	$T1,$T1,$h +	mov	$h,$a,ror#$Sigma0[0] +	add	$T1,$T1,$t1 +	eor	$h,$h,$a,ror#$Sigma0[1] +	add	$T1,$T1,$t2 +	eor	$h,$h,$a,ror#$Sigma0[2]		@ Sigma0(a) +#if $i>=15 +	ldr	$t3,[sp,#`($i+2)%16`*4]		@ from BODY_16_xx  #endif -#if $i<15 -# if __ARM_ARCH__>=7 -	ldr	$t1,[$inp],#4			@ prefetch -# else -	ldrb	$t1,[$inp,#3] -# endif -	eor	$t2,$a,$b			@ a^b, b^c in next round -#else -	ldr	$t1,[sp,#`($i+2)%16`*4]		@ from future BODY_16_xx -	eor	$t2,$a,$b			@ a^b, b^c in next round -	ldr	$t4,[sp,#`($i+15)%16`*4]	@ from future BODY_16_xx -#endif -	eor	$t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]`	@ Sigma0(a) -	and	$t3,$t3,$t2			@ (b^c)&=(a^b) -	add	$d,$d,$h			@ d+=h -	eor	$t3,$t3,$b			@ Maj(a,b,c) -	add	$h,$h,$t0,ror#$Sigma0[0]	@ h+=Sigma0(a) -	@ add	$h,$h,$t3			@ h+=Maj(a,b,c) +	orr	$t0,$a,$b +	and	$t1,$a,$b +	and	$t0,$t0,$c +	add	$h,$h,$T1 +	orr	$t0,$t0,$t1			@ Maj(a,b,c) +	add	$d,$d,$T1 +	add	$h,$h,$t0  ___ -	($t2,$t3)=($t3,$t2);  }  sub BODY_16_XX {  my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;  $code.=<<___; -	@ ldr	$t1,[sp,#`($i+1)%16`*4]		@ $i -	@ ldr	$t4,[sp,#`($i+14)%16`*4] -	mov	$t0,$t1,ror#$sigma0[0] -	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past -	mov	$t2,$t4,ror#$sigma1[0] -	eor	$t0,$t0,$t1,ror#$sigma0[1] -	eor	$t2,$t2,$t4,ror#$sigma1[1] -	eor	$t0,$t0,$t1,lsr#$sigma0[2]	@ sigma0(X[i+1]) -	ldr	$t1,[sp,#`($i+0)%16`*4] -	eor	$t2,$t2,$t4,lsr#$sigma1[2]	@ sigma1(X[i+14]) -	ldr	$t4,[sp,#`($i+9)%16`*4] - -	add	$t2,$t2,$t0 -	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`	@ from BODY_00_15 -	add	$t1,$t1,$t2 -	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e) -	add	$t1,$t1,$t4			@ X[i] +	@ ldr	$t3,[sp,#`($i+1)%16`*4]		@ $i +	ldr	$t2,[sp,#`($i+14)%16`*4] +	mov	$t0,$t3,ror#$sigma0[0] +	ldr	$T1,[sp,#`($i+0)%16`*4] +	eor	$t0,$t0,$t3,ror#$sigma0[1] +	ldr	$t1,[sp,#`($i+9)%16`*4] +	eor	$t0,$t0,$t3,lsr#$sigma0[2]	@ sigma0(X[i+1]) +	mov	$t3,$t2,ror#$sigma1[0] +	add	$T1,$T1,$t0 +	eor	$t3,$t3,$t2,ror#$sigma1[1] +	add	$T1,$T1,$t1 +	eor	$t3,$t3,$t2,lsr#$sigma1[2]	@ sigma1(X[i+14]) +	@ add	$T1,$T1,$t3  ___  	&BODY_00_15(@_);  } @@ -176,64 +147,46 @@ K256:  .word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208  .word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2  .size	K256,.-K256 -.word	0				@ terminator -.LOPENSSL_armcap: -.word	OPENSSL_armcap_P-sha256_block_data_order -.align	5  .global	sha256_block_data_order  .type	sha256_block_data_order,%function  sha256_block_data_order:  	sub	r3,pc,#8		@ sha256_block_data_order  	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp -#if __ARM_ARCH__>=7 -	ldr	r12,.LOPENSSL_armcap -	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P -	tst	r12,#ARMV8_SHA256 -	bne	.LARMv8 -	tst	r12,#ARMV7_NEON -	bne	.LNEON -#endif  	stmdb	sp!,{$ctx,$inp,$len,r4-r11,lr}  	ldmia	$ctx,{$A,$B,$C,$D,$E,$F,$G,$H} -	sub	$Ktbl,r3,#256+32	@ K256 +	sub	$Ktbl,r3,#256		@ K256  	sub	sp,sp,#16*4		@ alloca(X[16])  .Loop: -# if __ARM_ARCH__>=7 -	ldr	$t1,[$inp],#4 -# else -	ldrb	$t1,[$inp,#3] -# endif -	eor	$t3,$B,$C		@ magic -	eor	$t2,$t2,$t2  ___  for($i=0;$i<16;$i++)	{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); }  $code.=".Lrounds_16_xx:\n";  for (;$i<32;$i++)	{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }  $code.=<<___; -	ldreq	$t3,[sp,#16*4]		@ pull ctx +	and	$t2,$t2,#0xff +	cmp	$t2,#0xf2  	bne	.Lrounds_16_xx -	add	$A,$A,$t2		@ h+=Maj(a,b,c) from the past -	ldr	$t0,[$t3,#0] -	ldr	$t1,[$t3,#4] -	ldr	$t2,[$t3,#8] +	ldr	$T1,[sp,#16*4]		@ pull ctx +	ldr	$t0,[$T1,#0] +	ldr	$t1,[$T1,#4] +	ldr	$t2,[$T1,#8]  	add	$A,$A,$t0 -	ldr	$t0,[$t3,#12] +	ldr	$t0,[$T1,#12]  	add	$B,$B,$t1 -	ldr	$t1,[$t3,#16] +	ldr	$t1,[$T1,#16]  	add	$C,$C,$t2 -	ldr	$t2,[$t3,#20] +	ldr	$t2,[$T1,#20]  	add	$D,$D,$t0 -	ldr	$t0,[$t3,#24] +	ldr	$t0,[$T1,#24]  	add	$E,$E,$t1 -	ldr	$t1,[$t3,#28] +	ldr	$t1,[$T1,#28]  	add	$F,$F,$t2  	ldr	$inp,[sp,#17*4]		@ pull inp  	ldr	$t2,[sp,#18*4]		@ pull inp+len  	add	$G,$G,$t0  	add	$H,$H,$t1 -	stmia	$t3,{$A,$B,$C,$D,$E,$F,$G,$H} +	stmia	$T1,{$A,$B,$C,$D,$E,$F,$G,$H}  	cmp	$inp,$t2  	sub	$Ktbl,$Ktbl,#256	@ rewind Ktbl  	bne	.Loop @@ -247,410 +200,12 @@ $code.=<<___;  	moveq	pc,lr			@ be binary compatible with V4, yet  	bx	lr			@ interoperable with Thumb ISA:-)  #endif -.size	sha256_block_data_order,.-sha256_block_data_order -___ -###################################################################### -# NEON stuff -# -{{{ -my @X=map("q$_",(0..3)); -my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25"); -my $Xfer=$t4; -my $j=0; - -sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     } -sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   } - -sub AUTOLOAD()          # thunk [simplified] x86-style perlasm -{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; -  my $arg = pop; -    $arg = "#$arg" if ($arg*1 eq $arg); -    $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; -} - -sub Xupdate() -{ use integer; -  my $body = shift; -  my @insns = (&$body,&$body,&$body,&$body); -  my ($a,$b,$c,$d,$e,$f,$g,$h); - -	&vext_8		($T0,@X[0],@X[1],4);	# X[1..4] -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vext_8		($T1,@X[2],@X[3],4);	# X[9..12] -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vshr_u32	($T2,$T0,$sigma0[0]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vadd_i32	(@X[0],@X[0],$T1);	# X[0..3] += X[9..12] -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vshr_u32	($T1,$T0,$sigma0[2]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vsli_32	($T2,$T0,32-$sigma0[0]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vshr_u32	($T3,$T0,$sigma0[1]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&veor		($T1,$T1,$T2); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vsli_32	($T3,$T0,32-$sigma0[1]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &vshr_u32	($T4,&Dhi(@X[3]),$sigma1[0]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&veor		($T1,$T1,$T3);		# sigma0(X[1..4]) -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &vsli_32	($T4,&Dhi(@X[3]),32-$sigma1[0]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &vshr_u32	($T5,&Dhi(@X[3]),$sigma1[2]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vadd_i32	(@X[0],@X[0],$T1);	# X[0..3] += sigma0(X[1..4]) -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &veor		($T5,$T5,$T4); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &vshr_u32	($T4,&Dhi(@X[3]),$sigma1[1]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &vsli_32	($T4,&Dhi(@X[3]),32-$sigma1[1]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &veor		($T5,$T5,$T4);		# sigma1(X[14..15]) -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vadd_i32	(&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15]) -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &vshr_u32	($T4,&Dlo(@X[0]),$sigma1[0]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &vsli_32	($T4,&Dlo(@X[0]),32-$sigma1[0]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &vshr_u32	($T5,&Dlo(@X[0]),$sigma1[2]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &veor		($T5,$T5,$T4); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &vshr_u32	($T4,&Dlo(@X[0]),$sigma1[1]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vld1_32	("{$T0}","[$Ktbl,:128]!"); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &vsli_32	($T4,&Dlo(@X[0]),32-$sigma1[1]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	  &veor		($T5,$T5,$T4);		# sigma1(X[16..17]) -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vadd_i32	(&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17]) -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vadd_i32	($T0,$T0,@X[0]); -	 while($#insns>=2) { eval(shift(@insns)); } -	&vst1_32	("{$T0}","[$Xfer,:128]!"); -	 eval(shift(@insns)); -	 eval(shift(@insns)); - -	push(@X,shift(@X));		# "rotate" X[] -} - -sub Xpreload() -{ use integer; -  my $body = shift; -  my @insns = (&$body,&$body,&$body,&$body); -  my ($a,$b,$c,$d,$e,$f,$g,$h); - -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vld1_32	("{$T0}","[$Ktbl,:128]!"); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vrev32_8	(@X[0],@X[0]); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	 eval(shift(@insns)); -	&vadd_i32	($T0,$T0,@X[0]); -	 foreach (@insns) { eval; }	# remaining instructions -	&vst1_32	("{$T0}","[$Xfer,:128]!"); - -	push(@X,shift(@X));		# "rotate" X[] -} - -sub body_00_15 () { -	( -	'($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. -	'&add	($h,$h,$t1)',			# h+=X[i]+K[i] -	'&eor	($t1,$f,$g)', -	'&eor	($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', -	'&add	($a,$a,$t2)',			# h+=Maj(a,b,c) from the past -	'&and	($t1,$t1,$e)', -	'&eor	($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))',	# Sigma1(e) -	'&eor	($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', -	'&eor	($t1,$t1,$g)',			# Ch(e,f,g) -	'&add	($h,$h,$t2,"ror#$Sigma1[0]")',	# h+=Sigma1(e) -	'&eor	($t2,$a,$b)',			# a^b, b^c in next round -	'&eor	($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))',	# Sigma0(a) -	'&add	($h,$h,$t1)',			# h+=Ch(e,f,g) -	'&ldr	($t1,sprintf "[sp,#%d]",4*(($j+1)&15))	if (($j&15)!=15);'. -	'&ldr	($t1,"[$Ktbl]")				if ($j==15);'. -	'&ldr	($t1,"[sp,#64]")			if ($j==31)', -	'&and	($t3,$t3,$t2)',			# (b^c)&=(a^b) -	'&add	($d,$d,$h)',			# d+=h -	'&add	($h,$h,$t0,"ror#$Sigma0[0]");'.	# h+=Sigma0(a) -	'&eor	($t3,$t3,$b)',			# Maj(a,b,c) -	'$j++;	unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' -	) -} - -$code.=<<___; -#if __ARM_ARCH__>=7 -.fpu	neon - -.type	sha256_block_data_order_neon,%function -.align	4 -sha256_block_data_order_neon: -.LNEON: -	stmdb	sp!,{r4-r12,lr} - -	mov	$t2,sp -	sub	sp,sp,#16*4+16		@ alloca -	sub	$Ktbl,r3,#256+32	@ K256 -	bic	sp,sp,#15		@ align for 128-bit stores - -	vld1.8		{@X[0]},[$inp]! -	vld1.8		{@X[1]},[$inp]! -	vld1.8		{@X[2]},[$inp]! -	vld1.8		{@X[3]},[$inp]! -	vld1.32		{$T0},[$Ktbl,:128]! -	vld1.32		{$T1},[$Ktbl,:128]! -	vld1.32		{$T2},[$Ktbl,:128]! -	vld1.32		{$T3},[$Ktbl,:128]! -	vrev32.8	@X[0],@X[0]		@ yes, even on -	str		$ctx,[sp,#64] -	vrev32.8	@X[1],@X[1]		@ big-endian -	str		$inp,[sp,#68] -	mov		$Xfer,sp -	vrev32.8	@X[2],@X[2] -	str		$len,[sp,#72] -	vrev32.8	@X[3],@X[3] -	str		$t2,[sp,#76]		@ save original sp -	vadd.i32	$T0,$T0,@X[0] -	vadd.i32	$T1,$T1,@X[1] -	vst1.32		{$T0},[$Xfer,:128]! -	vadd.i32	$T2,$T2,@X[2] -	vst1.32		{$T1},[$Xfer,:128]! -	vadd.i32	$T3,$T3,@X[3] -	vst1.32		{$T2},[$Xfer,:128]! -	vst1.32		{$T3},[$Xfer,:128]! - -	ldmia		$ctx,{$A-$H} -	sub		$Xfer,$Xfer,#64 -	ldr		$t1,[sp,#0] -	eor		$t2,$t2,$t2 -	eor		$t3,$B,$C -	b		.L_00_48 - -.align	4 -.L_00_48: -___ -	&Xupdate(\&body_00_15); -	&Xupdate(\&body_00_15); -	&Xupdate(\&body_00_15); -	&Xupdate(\&body_00_15); -$code.=<<___; -	teq	$t1,#0				@ check for K256 terminator -	ldr	$t1,[sp,#0] -	sub	$Xfer,$Xfer,#64 -	bne	.L_00_48 - -	ldr		$inp,[sp,#68] -	ldr		$t0,[sp,#72] -	sub		$Ktbl,$Ktbl,#256	@ rewind $Ktbl -	teq		$inp,$t0 -	subeq		$inp,$inp,#64		@ avoid SEGV -	vld1.8		{@X[0]},[$inp]!		@ load next input block -	vld1.8		{@X[1]},[$inp]! -	vld1.8		{@X[2]},[$inp]! -	vld1.8		{@X[3]},[$inp]! -	strne		$inp,[sp,#68] -	mov		$Xfer,sp -___ -	&Xpreload(\&body_00_15); -	&Xpreload(\&body_00_15); -	&Xpreload(\&body_00_15); -	&Xpreload(\&body_00_15); -$code.=<<___; -	ldr	$t0,[$t1,#0] -	add	$A,$A,$t2			@ h+=Maj(a,b,c) from the past -	ldr	$t2,[$t1,#4] -	ldr	$t3,[$t1,#8] -	ldr	$t4,[$t1,#12] -	add	$A,$A,$t0			@ accumulate -	ldr	$t0,[$t1,#16] -	add	$B,$B,$t2 -	ldr	$t2,[$t1,#20] -	add	$C,$C,$t3 -	ldr	$t3,[$t1,#24] -	add	$D,$D,$t4 -	ldr	$t4,[$t1,#28] -	add	$E,$E,$t0 -	str	$A,[$t1],#4 -	add	$F,$F,$t2 -	str	$B,[$t1],#4 -	add	$G,$G,$t3 -	str	$C,[$t1],#4 -	add	$H,$H,$t4 -	str	$D,[$t1],#4 -	stmia	$t1,{$E-$H} - -	movne	$Xfer,sp -	ldrne	$t1,[sp,#0] -	eorne	$t2,$t2,$t2 -	ldreq	sp,[sp,#76]			@ restore original sp -	eorne	$t3,$B,$C -	bne	.L_00_48 - -	ldmia	sp!,{r4-r12,pc} -.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon -#endif -___ -}}} -###################################################################### -# ARMv8 stuff -# -{{{ -my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2)); -my @MSG=map("q$_",(8..11)); -my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15)); -my $Ktbl="r3"; - -$code.=<<___; -#if __ARM_ARCH__>=7 -.type	sha256_block_data_order_armv8,%function -.align	5 -sha256_block_data_order_armv8: -.LARMv8: -	vld1.32	{$ABCD,$EFGH},[$ctx] -	sub	$Ktbl,r3,#sha256_block_data_order-K256 - -.Loop_v8: -	vld1.8		{@MSG[0]-@MSG[1]},[$inp]! -	vld1.8		{@MSG[2]-@MSG[3]},[$inp]! -	vld1.32		{$W0},[$Ktbl]! -	vrev32.8	@MSG[0],@MSG[0] -	vrev32.8	@MSG[1],@MSG[1] -	vrev32.8	@MSG[2],@MSG[2] -	vrev32.8	@MSG[3],@MSG[3] -	vmov		$ABCD_SAVE,$ABCD	@ offload -	vmov		$EFGH_SAVE,$EFGH -	teq		$inp,$len -___ -for($i=0;$i<12;$i++) { -$code.=<<___; -	vld1.32		{$W1},[$Ktbl]! -	vadd.i32	$W0,$W0,@MSG[0] -	sha256su0	@MSG[0],@MSG[1] -	vmov		$abcd,$ABCD -	sha256h		$ABCD,$EFGH,$W0 -	sha256h2	$EFGH,$abcd,$W0 -	sha256su1	@MSG[0],@MSG[2],@MSG[3] -___ -	($W0,$W1)=($W1,$W0);	push(@MSG,shift(@MSG)); -} -$code.=<<___; -	vld1.32		{$W1},[$Ktbl]! -	vadd.i32	$W0,$W0,@MSG[0] -	vmov		$abcd,$ABCD -	sha256h		$ABCD,$EFGH,$W0 -	sha256h2	$EFGH,$abcd,$W0 - -	vld1.32		{$W0},[$Ktbl]! -	vadd.i32	$W1,$W1,@MSG[1] -	vmov		$abcd,$ABCD -	sha256h		$ABCD,$EFGH,$W1 -	sha256h2	$EFGH,$abcd,$W1 - -	vld1.32		{$W1},[$Ktbl] -	vadd.i32	$W0,$W0,@MSG[2] -	sub		$Ktbl,$Ktbl,#256-16	@ rewind -	vmov		$abcd,$ABCD -	sha256h		$ABCD,$EFGH,$W0 -	sha256h2	$EFGH,$abcd,$W0 - -	vadd.i32	$W1,$W1,@MSG[3] -	vmov		$abcd,$ABCD -	sha256h		$ABCD,$EFGH,$W1 -	sha256h2	$EFGH,$abcd,$W1 - -	vadd.i32	$ABCD,$ABCD,$ABCD_SAVE -	vadd.i32	$EFGH,$EFGH,$EFGH_SAVE -	bne		.Loop_v8 - -	vst1.32		{$ABCD,$EFGH},[$ctx] - -	ret		@ bx lr -.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 -#endif -___ -}}} -$code.=<<___; -.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" +.size   sha256_block_data_order,.-sha256_block_data_order +.asciz  "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"  .align	2 -.comm   OPENSSL_armcap_P,4,4  ___ -{   my  %opcode = ( -	"sha256h"	=> 0xf3000c40,	"sha256h2"	=> 0xf3100c40, -	"sha256su0"	=> 0xf3ba03c0,	"sha256su1"	=> 0xf3200c40	); - -    sub unsha256 { -	my ($mnemonic,$arg)=@_; - -	if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { -	    my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) -					 |(($2&7)<<17)|(($2&8)<<4) -					 |(($3&7)<<1) |(($3&8)<<2); -	    # since ARMv7 instructions are always encoded little-endian. -	    # correct solution is to use .inst directive, but older -	    # assemblers don't implement it:-( -	    sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", -			$word&0xff,($word>>8)&0xff, -			($word>>16)&0xff,($word>>24)&0xff, -			$mnemonic,$arg; -	} -    } -} - -foreach (split($/,$code)) { - -	s/\`([^\`]*)\`/eval $1/geo; - -	s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo; - -	s/\bret\b/bx	lr/go		or -	s/\bbx\s+lr\b/.word\t0xe12fff1e/go;	# make it possible to compile with -march=armv4 - -	print $_,"\n"; -} - +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4 +print $code;  close STDOUT; # enforce flush diff --git a/app/openssl/crypto/sha/asm/sha256-armv4.s b/app/openssl/crypto/sha/asm/sha256-armv4.s index 853d7da5..9c20a63c 100644 --- a/app/openssl/crypto/sha/asm/sha256-armv4.s +++ b/app/openssl/crypto/sha/asm/sha256-armv4.s @@ -23,1721 +23,1463 @@ K256:  .word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208  .word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2  .size	K256,.-K256 -.word	0				@ terminator -.LOPENSSL_armcap: -.word	OPENSSL_armcap_P-sha256_block_data_order -.align	5  .global	sha256_block_data_order  .type	sha256_block_data_order,%function  sha256_block_data_order:  	sub	r3,pc,#8		@ sha256_block_data_order  	add	r2,r1,r2,lsl#6	@ len to point at the end of inp -#if __ARM_ARCH__>=7 -	ldr	r12,.LOPENSSL_armcap -	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P -	tst	r12,#ARMV8_SHA256 -	bne	.LARMv8 -	tst	r12,#ARMV7_NEON -	bne	.LNEON -#endif  	stmdb	sp!,{r0,r1,r2,r4-r11,lr}  	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11} -	sub	r14,r3,#256+32	@ K256 +	sub	r14,r3,#256		@ K256  	sub	sp,sp,#16*4		@ alloca(X[16])  .Loop: -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4 -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r5,r6		@ magic -	eor	r12,r12,r12  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 0 -# if 0==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r8,r8,ror#5 -	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r8,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 0 -	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past +	ldrb	r3,[r1,#3]			@ 0  	ldrb	r12,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r12,lsl#8 -	ldrb	r12,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 0==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r8,r8,ror#5 -	orr	r2,r2,r12,lsl#24 -	eor	r0,r0,r8,ror#19	@ Sigma1(e) +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24  #endif +	mov	r0,r8,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r11,r11,r2			@ h+=X[i] -	str	r2,[sp,#0*4] +	eor	r0,r0,r8,ror#11  	eor	r2,r9,r10 -	add	r11,r11,r0,ror#6	@ h+=Sigma1(e) +#if 0>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 0==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r8,ror#25	@ Sigma1(e)  	and	r2,r2,r8 -	add	r11,r11,r12			@ h+=K256[i] +	str	r3,[sp,#0*4] +	add	r3,r3,r0  	eor	r2,r2,r10			@ Ch(e,f,g) -	eor	r0,r4,r4,ror#11 -	add	r11,r11,r2			@ h+=Ch(e,f,g) -#if 0==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 0<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r4,r5			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx -	eor	r12,r4,r5			@ a^b, b^c in next round -	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r4,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r7,r7,r11			@ d+=h -	eor	r3,r3,r5			@ Maj(a,b,c) -	add	r11,r11,r0,ror#2	@ h+=Sigma0(a) -	@ add	r11,r11,r3			@ h+=Maj(a,b,c) +	add	r3,r3,r11 +	mov	r11,r4,ror#2 +	add	r3,r3,r2 +	eor	r11,r11,r4,ror#13 +	add	r3,r3,r12 +	eor	r11,r11,r4,ror#22		@ Sigma0(a) +#if 0>=15 +	ldr	r1,[sp,#2*4]		@ from BODY_16_xx +#endif +	orr	r0,r4,r5 +	and	r2,r4,r5 +	and	r0,r0,r6 +	add	r11,r11,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r7,r7,r3 +	add	r11,r11,r0  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 1 -# if 1==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r7,r7,ror#5 -	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r7,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 1 -	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past -	ldrb	r3,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r3,lsl#8 -	ldrb	r3,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 1==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r7,r7,ror#5 -	orr	r2,r2,r3,lsl#24 -	eor	r0,r0,r7,ror#19	@ Sigma1(e) -#endif -	ldr	r3,[r14],#4			@ *K256++ -	add	r10,r10,r2			@ h+=X[i] -	str	r2,[sp,#1*4] +	ldrb	r3,[r1,#3]			@ 1 +	ldrb	r12,[r1,#2] +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24 +#endif +	mov	r0,r7,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r7,ror#11  	eor	r2,r8,r9 -	add	r10,r10,r0,ror#6	@ h+=Sigma1(e) +#if 1>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 1==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r7,ror#25	@ Sigma1(e)  	and	r2,r2,r7 -	add	r10,r10,r3			@ h+=K256[i] +	str	r3,[sp,#1*4] +	add	r3,r3,r0  	eor	r2,r2,r9			@ Ch(e,f,g) -	eor	r0,r11,r11,ror#11 -	add	r10,r10,r2			@ h+=Ch(e,f,g) -#if 1==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 1<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r11,r4			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx -	eor	r3,r11,r4			@ a^b, b^c in next round -	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r11,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r6,r6,r10			@ d+=h -	eor	r12,r12,r4			@ Maj(a,b,c) -	add	r10,r10,r0,ror#2	@ h+=Sigma0(a) -	@ add	r10,r10,r12			@ h+=Maj(a,b,c) +	add	r3,r3,r10 +	mov	r10,r11,ror#2 +	add	r3,r3,r2 +	eor	r10,r10,r11,ror#13 +	add	r3,r3,r12 +	eor	r10,r10,r11,ror#22		@ Sigma0(a) +#if 1>=15 +	ldr	r1,[sp,#3*4]		@ from BODY_16_xx +#endif +	orr	r0,r11,r4 +	and	r2,r11,r4 +	and	r0,r0,r5 +	add	r10,r10,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r6,r6,r3 +	add	r10,r10,r0  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 2 -# if 2==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r6,r6,ror#5 -	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r6,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 2 -	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past +	ldrb	r3,[r1,#3]			@ 2  	ldrb	r12,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r12,lsl#8 -	ldrb	r12,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 2==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r6,r6,ror#5 -	orr	r2,r2,r12,lsl#24 -	eor	r0,r0,r6,ror#19	@ Sigma1(e) +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24  #endif +	mov	r0,r6,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r9,r9,r2			@ h+=X[i] -	str	r2,[sp,#2*4] +	eor	r0,r0,r6,ror#11  	eor	r2,r7,r8 -	add	r9,r9,r0,ror#6	@ h+=Sigma1(e) +#if 2>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 2==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r6,ror#25	@ Sigma1(e)  	and	r2,r2,r6 -	add	r9,r9,r12			@ h+=K256[i] +	str	r3,[sp,#2*4] +	add	r3,r3,r0  	eor	r2,r2,r8			@ Ch(e,f,g) -	eor	r0,r10,r10,ror#11 -	add	r9,r9,r2			@ h+=Ch(e,f,g) -#if 2==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 2<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r10,r11			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx -	eor	r12,r10,r11			@ a^b, b^c in next round -	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r10,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r5,r5,r9			@ d+=h -	eor	r3,r3,r11			@ Maj(a,b,c) -	add	r9,r9,r0,ror#2	@ h+=Sigma0(a) -	@ add	r9,r9,r3			@ h+=Maj(a,b,c) +	add	r3,r3,r9 +	mov	r9,r10,ror#2 +	add	r3,r3,r2 +	eor	r9,r9,r10,ror#13 +	add	r3,r3,r12 +	eor	r9,r9,r10,ror#22		@ Sigma0(a) +#if 2>=15 +	ldr	r1,[sp,#4*4]		@ from BODY_16_xx +#endif +	orr	r0,r10,r11 +	and	r2,r10,r11 +	and	r0,r0,r4 +	add	r9,r9,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r5,r5,r3 +	add	r9,r9,r0  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 3 -# if 3==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r5,r5,ror#5 -	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r5,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 3 -	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past -	ldrb	r3,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r3,lsl#8 -	ldrb	r3,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 3==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r5,r5,ror#5 -	orr	r2,r2,r3,lsl#24 -	eor	r0,r0,r5,ror#19	@ Sigma1(e) -#endif -	ldr	r3,[r14],#4			@ *K256++ -	add	r8,r8,r2			@ h+=X[i] -	str	r2,[sp,#3*4] +	ldrb	r3,[r1,#3]			@ 3 +	ldrb	r12,[r1,#2] +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24 +#endif +	mov	r0,r5,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r5,ror#11  	eor	r2,r6,r7 -	add	r8,r8,r0,ror#6	@ h+=Sigma1(e) +#if 3>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 3==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r5,ror#25	@ Sigma1(e)  	and	r2,r2,r5 -	add	r8,r8,r3			@ h+=K256[i] +	str	r3,[sp,#3*4] +	add	r3,r3,r0  	eor	r2,r2,r7			@ Ch(e,f,g) -	eor	r0,r9,r9,ror#11 -	add	r8,r8,r2			@ h+=Ch(e,f,g) -#if 3==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 3<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r9,r10			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx -	eor	r3,r9,r10			@ a^b, b^c in next round -	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r9,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r4,r4,r8			@ d+=h -	eor	r12,r12,r10			@ Maj(a,b,c) -	add	r8,r8,r0,ror#2	@ h+=Sigma0(a) -	@ add	r8,r8,r12			@ h+=Maj(a,b,c) +	add	r3,r3,r8 +	mov	r8,r9,ror#2 +	add	r3,r3,r2 +	eor	r8,r8,r9,ror#13 +	add	r3,r3,r12 +	eor	r8,r8,r9,ror#22		@ Sigma0(a) +#if 3>=15 +	ldr	r1,[sp,#5*4]		@ from BODY_16_xx +#endif +	orr	r0,r9,r10 +	and	r2,r9,r10 +	and	r0,r0,r11 +	add	r8,r8,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r4,r4,r3 +	add	r8,r8,r0  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 4 -# if 4==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r4,r4,ror#5 -	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r4,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 4 -	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past +	ldrb	r3,[r1,#3]			@ 4  	ldrb	r12,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r12,lsl#8 -	ldrb	r12,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 4==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r4,r4,ror#5 -	orr	r2,r2,r12,lsl#24 -	eor	r0,r0,r4,ror#19	@ Sigma1(e) +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24  #endif +	mov	r0,r4,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r7,r7,r2			@ h+=X[i] -	str	r2,[sp,#4*4] +	eor	r0,r0,r4,ror#11  	eor	r2,r5,r6 -	add	r7,r7,r0,ror#6	@ h+=Sigma1(e) +#if 4>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 4==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r4,ror#25	@ Sigma1(e)  	and	r2,r2,r4 -	add	r7,r7,r12			@ h+=K256[i] +	str	r3,[sp,#4*4] +	add	r3,r3,r0  	eor	r2,r2,r6			@ Ch(e,f,g) -	eor	r0,r8,r8,ror#11 -	add	r7,r7,r2			@ h+=Ch(e,f,g) -#if 4==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 4<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r8,r9			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx -	eor	r12,r8,r9			@ a^b, b^c in next round -	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r8,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r11,r11,r7			@ d+=h -	eor	r3,r3,r9			@ Maj(a,b,c) -	add	r7,r7,r0,ror#2	@ h+=Sigma0(a) -	@ add	r7,r7,r3			@ h+=Maj(a,b,c) +	add	r3,r3,r7 +	mov	r7,r8,ror#2 +	add	r3,r3,r2 +	eor	r7,r7,r8,ror#13 +	add	r3,r3,r12 +	eor	r7,r7,r8,ror#22		@ Sigma0(a) +#if 4>=15 +	ldr	r1,[sp,#6*4]		@ from BODY_16_xx +#endif +	orr	r0,r8,r9 +	and	r2,r8,r9 +	and	r0,r0,r10 +	add	r7,r7,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r11,r11,r3 +	add	r7,r7,r0  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 5 -# if 5==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r11,r11,ror#5 -	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r11,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 5 -	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past -	ldrb	r3,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r3,lsl#8 -	ldrb	r3,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 5==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r11,r11,ror#5 -	orr	r2,r2,r3,lsl#24 -	eor	r0,r0,r11,ror#19	@ Sigma1(e) -#endif -	ldr	r3,[r14],#4			@ *K256++ -	add	r6,r6,r2			@ h+=X[i] -	str	r2,[sp,#5*4] +	ldrb	r3,[r1,#3]			@ 5 +	ldrb	r12,[r1,#2] +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24 +#endif +	mov	r0,r11,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r11,ror#11  	eor	r2,r4,r5 -	add	r6,r6,r0,ror#6	@ h+=Sigma1(e) +#if 5>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 5==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r11,ror#25	@ Sigma1(e)  	and	r2,r2,r11 -	add	r6,r6,r3			@ h+=K256[i] +	str	r3,[sp,#5*4] +	add	r3,r3,r0  	eor	r2,r2,r5			@ Ch(e,f,g) -	eor	r0,r7,r7,ror#11 -	add	r6,r6,r2			@ h+=Ch(e,f,g) -#if 5==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 5<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r7,r8			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx -	eor	r3,r7,r8			@ a^b, b^c in next round -	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r7,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r10,r10,r6			@ d+=h -	eor	r12,r12,r8			@ Maj(a,b,c) -	add	r6,r6,r0,ror#2	@ h+=Sigma0(a) -	@ add	r6,r6,r12			@ h+=Maj(a,b,c) +	add	r3,r3,r6 +	mov	r6,r7,ror#2 +	add	r3,r3,r2 +	eor	r6,r6,r7,ror#13 +	add	r3,r3,r12 +	eor	r6,r6,r7,ror#22		@ Sigma0(a) +#if 5>=15 +	ldr	r1,[sp,#7*4]		@ from BODY_16_xx +#endif +	orr	r0,r7,r8 +	and	r2,r7,r8 +	and	r0,r0,r9 +	add	r6,r6,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r10,r10,r3 +	add	r6,r6,r0  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 6 -# if 6==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r10,r10,ror#5 -	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r10,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 6 -	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past +	ldrb	r3,[r1,#3]			@ 6  	ldrb	r12,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r12,lsl#8 -	ldrb	r12,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 6==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r10,r10,ror#5 -	orr	r2,r2,r12,lsl#24 -	eor	r0,r0,r10,ror#19	@ Sigma1(e) +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24  #endif +	mov	r0,r10,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r5,r5,r2			@ h+=X[i] -	str	r2,[sp,#6*4] +	eor	r0,r0,r10,ror#11  	eor	r2,r11,r4 -	add	r5,r5,r0,ror#6	@ h+=Sigma1(e) +#if 6>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 6==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r10,ror#25	@ Sigma1(e)  	and	r2,r2,r10 -	add	r5,r5,r12			@ h+=K256[i] +	str	r3,[sp,#6*4] +	add	r3,r3,r0  	eor	r2,r2,r4			@ Ch(e,f,g) -	eor	r0,r6,r6,ror#11 -	add	r5,r5,r2			@ h+=Ch(e,f,g) -#if 6==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 6<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r6,r7			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx -	eor	r12,r6,r7			@ a^b, b^c in next round -	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r6,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r9,r9,r5			@ d+=h -	eor	r3,r3,r7			@ Maj(a,b,c) -	add	r5,r5,r0,ror#2	@ h+=Sigma0(a) -	@ add	r5,r5,r3			@ h+=Maj(a,b,c) +	add	r3,r3,r5 +	mov	r5,r6,ror#2 +	add	r3,r3,r2 +	eor	r5,r5,r6,ror#13 +	add	r3,r3,r12 +	eor	r5,r5,r6,ror#22		@ Sigma0(a) +#if 6>=15 +	ldr	r1,[sp,#8*4]		@ from BODY_16_xx +#endif +	orr	r0,r6,r7 +	and	r2,r6,r7 +	and	r0,r0,r8 +	add	r5,r5,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r9,r9,r3 +	add	r5,r5,r0  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 7 -# if 7==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r9,r9,ror#5 -	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r9,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 7 -	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past -	ldrb	r3,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r3,lsl#8 -	ldrb	r3,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 7==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r9,r9,ror#5 -	orr	r2,r2,r3,lsl#24 -	eor	r0,r0,r9,ror#19	@ Sigma1(e) -#endif -	ldr	r3,[r14],#4			@ *K256++ -	add	r4,r4,r2			@ h+=X[i] -	str	r2,[sp,#7*4] +	ldrb	r3,[r1,#3]			@ 7 +	ldrb	r12,[r1,#2] +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24 +#endif +	mov	r0,r9,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r9,ror#11  	eor	r2,r10,r11 -	add	r4,r4,r0,ror#6	@ h+=Sigma1(e) +#if 7>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 7==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r9,ror#25	@ Sigma1(e)  	and	r2,r2,r9 -	add	r4,r4,r3			@ h+=K256[i] +	str	r3,[sp,#7*4] +	add	r3,r3,r0  	eor	r2,r2,r11			@ Ch(e,f,g) -	eor	r0,r5,r5,ror#11 -	add	r4,r4,r2			@ h+=Ch(e,f,g) -#if 7==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 7<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r5,r6			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx -	eor	r3,r5,r6			@ a^b, b^c in next round -	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r5,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r8,r8,r4			@ d+=h -	eor	r12,r12,r6			@ Maj(a,b,c) -	add	r4,r4,r0,ror#2	@ h+=Sigma0(a) -	@ add	r4,r4,r12			@ h+=Maj(a,b,c) +	add	r3,r3,r4 +	mov	r4,r5,ror#2 +	add	r3,r3,r2 +	eor	r4,r4,r5,ror#13 +	add	r3,r3,r12 +	eor	r4,r4,r5,ror#22		@ Sigma0(a) +#if 7>=15 +	ldr	r1,[sp,#9*4]		@ from BODY_16_xx +#endif +	orr	r0,r5,r6 +	and	r2,r5,r6 +	and	r0,r0,r7 +	add	r4,r4,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r8,r8,r3 +	add	r4,r4,r0  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 8 -# if 8==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r8,r8,ror#5 -	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r8,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 8 -	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past +	ldrb	r3,[r1,#3]			@ 8  	ldrb	r12,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r12,lsl#8 -	ldrb	r12,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 8==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r8,r8,ror#5 -	orr	r2,r2,r12,lsl#24 -	eor	r0,r0,r8,ror#19	@ Sigma1(e) +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24  #endif +	mov	r0,r8,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r11,r11,r2			@ h+=X[i] -	str	r2,[sp,#8*4] +	eor	r0,r0,r8,ror#11  	eor	r2,r9,r10 -	add	r11,r11,r0,ror#6	@ h+=Sigma1(e) +#if 8>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 8==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r8,ror#25	@ Sigma1(e)  	and	r2,r2,r8 -	add	r11,r11,r12			@ h+=K256[i] +	str	r3,[sp,#8*4] +	add	r3,r3,r0  	eor	r2,r2,r10			@ Ch(e,f,g) -	eor	r0,r4,r4,ror#11 -	add	r11,r11,r2			@ h+=Ch(e,f,g) -#if 8==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 8<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r4,r5			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx -	eor	r12,r4,r5			@ a^b, b^c in next round -	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r4,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r7,r7,r11			@ d+=h -	eor	r3,r3,r5			@ Maj(a,b,c) -	add	r11,r11,r0,ror#2	@ h+=Sigma0(a) -	@ add	r11,r11,r3			@ h+=Maj(a,b,c) +	add	r3,r3,r11 +	mov	r11,r4,ror#2 +	add	r3,r3,r2 +	eor	r11,r11,r4,ror#13 +	add	r3,r3,r12 +	eor	r11,r11,r4,ror#22		@ Sigma0(a) +#if 8>=15 +	ldr	r1,[sp,#10*4]		@ from BODY_16_xx +#endif +	orr	r0,r4,r5 +	and	r2,r4,r5 +	and	r0,r0,r6 +	add	r11,r11,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r7,r7,r3 +	add	r11,r11,r0  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 9 -# if 9==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r7,r7,ror#5 -	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r7,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 9 -	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past -	ldrb	r3,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r3,lsl#8 -	ldrb	r3,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 9==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r7,r7,ror#5 -	orr	r2,r2,r3,lsl#24 -	eor	r0,r0,r7,ror#19	@ Sigma1(e) -#endif -	ldr	r3,[r14],#4			@ *K256++ -	add	r10,r10,r2			@ h+=X[i] -	str	r2,[sp,#9*4] +	ldrb	r3,[r1,#3]			@ 9 +	ldrb	r12,[r1,#2] +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24 +#endif +	mov	r0,r7,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r7,ror#11  	eor	r2,r8,r9 -	add	r10,r10,r0,ror#6	@ h+=Sigma1(e) +#if 9>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 9==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r7,ror#25	@ Sigma1(e)  	and	r2,r2,r7 -	add	r10,r10,r3			@ h+=K256[i] +	str	r3,[sp,#9*4] +	add	r3,r3,r0  	eor	r2,r2,r9			@ Ch(e,f,g) -	eor	r0,r11,r11,ror#11 -	add	r10,r10,r2			@ h+=Ch(e,f,g) -#if 9==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 9<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r11,r4			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx -	eor	r3,r11,r4			@ a^b, b^c in next round -	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r11,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r6,r6,r10			@ d+=h -	eor	r12,r12,r4			@ Maj(a,b,c) -	add	r10,r10,r0,ror#2	@ h+=Sigma0(a) -	@ add	r10,r10,r12			@ h+=Maj(a,b,c) +	add	r3,r3,r10 +	mov	r10,r11,ror#2 +	add	r3,r3,r2 +	eor	r10,r10,r11,ror#13 +	add	r3,r3,r12 +	eor	r10,r10,r11,ror#22		@ Sigma0(a) +#if 9>=15 +	ldr	r1,[sp,#11*4]		@ from BODY_16_xx +#endif +	orr	r0,r11,r4 +	and	r2,r11,r4 +	and	r0,r0,r5 +	add	r10,r10,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r6,r6,r3 +	add	r10,r10,r0  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 10 -# if 10==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r6,r6,ror#5 -	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r6,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 10 -	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past +	ldrb	r3,[r1,#3]			@ 10  	ldrb	r12,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r12,lsl#8 -	ldrb	r12,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 10==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r6,r6,ror#5 -	orr	r2,r2,r12,lsl#24 -	eor	r0,r0,r6,ror#19	@ Sigma1(e) +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24  #endif +	mov	r0,r6,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r9,r9,r2			@ h+=X[i] -	str	r2,[sp,#10*4] +	eor	r0,r0,r6,ror#11  	eor	r2,r7,r8 -	add	r9,r9,r0,ror#6	@ h+=Sigma1(e) +#if 10>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 10==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r6,ror#25	@ Sigma1(e)  	and	r2,r2,r6 -	add	r9,r9,r12			@ h+=K256[i] +	str	r3,[sp,#10*4] +	add	r3,r3,r0  	eor	r2,r2,r8			@ Ch(e,f,g) -	eor	r0,r10,r10,ror#11 -	add	r9,r9,r2			@ h+=Ch(e,f,g) -#if 10==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 10<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r10,r11			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx -	eor	r12,r10,r11			@ a^b, b^c in next round -	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r10,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r5,r5,r9			@ d+=h -	eor	r3,r3,r11			@ Maj(a,b,c) -	add	r9,r9,r0,ror#2	@ h+=Sigma0(a) -	@ add	r9,r9,r3			@ h+=Maj(a,b,c) +	add	r3,r3,r9 +	mov	r9,r10,ror#2 +	add	r3,r3,r2 +	eor	r9,r9,r10,ror#13 +	add	r3,r3,r12 +	eor	r9,r9,r10,ror#22		@ Sigma0(a) +#if 10>=15 +	ldr	r1,[sp,#12*4]		@ from BODY_16_xx +#endif +	orr	r0,r10,r11 +	and	r2,r10,r11 +	and	r0,r0,r4 +	add	r9,r9,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r5,r5,r3 +	add	r9,r9,r0  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 11 -# if 11==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r5,r5,ror#5 -	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r5,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 11 -	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past -	ldrb	r3,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r3,lsl#8 -	ldrb	r3,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 11==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r5,r5,ror#5 -	orr	r2,r2,r3,lsl#24 -	eor	r0,r0,r5,ror#19	@ Sigma1(e) -#endif -	ldr	r3,[r14],#4			@ *K256++ -	add	r8,r8,r2			@ h+=X[i] -	str	r2,[sp,#11*4] +	ldrb	r3,[r1,#3]			@ 11 +	ldrb	r12,[r1,#2] +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24 +#endif +	mov	r0,r5,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r5,ror#11  	eor	r2,r6,r7 -	add	r8,r8,r0,ror#6	@ h+=Sigma1(e) +#if 11>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 11==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r5,ror#25	@ Sigma1(e)  	and	r2,r2,r5 -	add	r8,r8,r3			@ h+=K256[i] +	str	r3,[sp,#11*4] +	add	r3,r3,r0  	eor	r2,r2,r7			@ Ch(e,f,g) -	eor	r0,r9,r9,ror#11 -	add	r8,r8,r2			@ h+=Ch(e,f,g) -#if 11==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 11<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r9,r10			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx -	eor	r3,r9,r10			@ a^b, b^c in next round -	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r9,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r4,r4,r8			@ d+=h -	eor	r12,r12,r10			@ Maj(a,b,c) -	add	r8,r8,r0,ror#2	@ h+=Sigma0(a) -	@ add	r8,r8,r12			@ h+=Maj(a,b,c) +	add	r3,r3,r8 +	mov	r8,r9,ror#2 +	add	r3,r3,r2 +	eor	r8,r8,r9,ror#13 +	add	r3,r3,r12 +	eor	r8,r8,r9,ror#22		@ Sigma0(a) +#if 11>=15 +	ldr	r1,[sp,#13*4]		@ from BODY_16_xx +#endif +	orr	r0,r9,r10 +	and	r2,r9,r10 +	and	r0,r0,r11 +	add	r8,r8,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r4,r4,r3 +	add	r8,r8,r0  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 12 -# if 12==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r4,r4,ror#5 -	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r4,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 12 -	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past +	ldrb	r3,[r1,#3]			@ 12  	ldrb	r12,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r12,lsl#8 -	ldrb	r12,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 12==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r4,r4,ror#5 -	orr	r2,r2,r12,lsl#24 -	eor	r0,r0,r4,ror#19	@ Sigma1(e) +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24  #endif +	mov	r0,r4,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r7,r7,r2			@ h+=X[i] -	str	r2,[sp,#12*4] +	eor	r0,r0,r4,ror#11  	eor	r2,r5,r6 -	add	r7,r7,r0,ror#6	@ h+=Sigma1(e) +#if 12>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 12==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r4,ror#25	@ Sigma1(e)  	and	r2,r2,r4 -	add	r7,r7,r12			@ h+=K256[i] +	str	r3,[sp,#12*4] +	add	r3,r3,r0  	eor	r2,r2,r6			@ Ch(e,f,g) -	eor	r0,r8,r8,ror#11 -	add	r7,r7,r2			@ h+=Ch(e,f,g) -#if 12==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 12<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r8,r9			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx -	eor	r12,r8,r9			@ a^b, b^c in next round -	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r8,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r11,r11,r7			@ d+=h -	eor	r3,r3,r9			@ Maj(a,b,c) -	add	r7,r7,r0,ror#2	@ h+=Sigma0(a) -	@ add	r7,r7,r3			@ h+=Maj(a,b,c) +	add	r3,r3,r7 +	mov	r7,r8,ror#2 +	add	r3,r3,r2 +	eor	r7,r7,r8,ror#13 +	add	r3,r3,r12 +	eor	r7,r7,r8,ror#22		@ Sigma0(a) +#if 12>=15 +	ldr	r1,[sp,#14*4]		@ from BODY_16_xx +#endif +	orr	r0,r8,r9 +	and	r2,r8,r9 +	and	r0,r0,r10 +	add	r7,r7,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r11,r11,r3 +	add	r7,r7,r0  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 13 -# if 13==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r11,r11,ror#5 -	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r11,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 13 -	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past -	ldrb	r3,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r3,lsl#8 -	ldrb	r3,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 13==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r11,r11,ror#5 -	orr	r2,r2,r3,lsl#24 -	eor	r0,r0,r11,ror#19	@ Sigma1(e) -#endif -	ldr	r3,[r14],#4			@ *K256++ -	add	r6,r6,r2			@ h+=X[i] -	str	r2,[sp,#13*4] +	ldrb	r3,[r1,#3]			@ 13 +	ldrb	r12,[r1,#2] +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24 +#endif +	mov	r0,r11,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r11,ror#11  	eor	r2,r4,r5 -	add	r6,r6,r0,ror#6	@ h+=Sigma1(e) +#if 13>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 13==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r11,ror#25	@ Sigma1(e)  	and	r2,r2,r11 -	add	r6,r6,r3			@ h+=K256[i] +	str	r3,[sp,#13*4] +	add	r3,r3,r0  	eor	r2,r2,r5			@ Ch(e,f,g) -	eor	r0,r7,r7,ror#11 -	add	r6,r6,r2			@ h+=Ch(e,f,g) -#if 13==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 13<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r7,r8			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx -	eor	r3,r7,r8			@ a^b, b^c in next round -	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r7,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r10,r10,r6			@ d+=h -	eor	r12,r12,r8			@ Maj(a,b,c) -	add	r6,r6,r0,ror#2	@ h+=Sigma0(a) -	@ add	r6,r6,r12			@ h+=Maj(a,b,c) +	add	r3,r3,r6 +	mov	r6,r7,ror#2 +	add	r3,r3,r2 +	eor	r6,r6,r7,ror#13 +	add	r3,r3,r12 +	eor	r6,r6,r7,ror#22		@ Sigma0(a) +#if 13>=15 +	ldr	r1,[sp,#15*4]		@ from BODY_16_xx +#endif +	orr	r0,r7,r8 +	and	r2,r7,r8 +	and	r0,r0,r9 +	add	r6,r6,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r10,r10,r3 +	add	r6,r6,r0  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 14 -# if 14==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r10,r10,ror#5 -	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r10,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 14 -	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past +	ldrb	r3,[r1,#3]			@ 14  	ldrb	r12,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r12,lsl#8 -	ldrb	r12,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 14==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r10,r10,ror#5 -	orr	r2,r2,r12,lsl#24 -	eor	r0,r0,r10,ror#19	@ Sigma1(e) +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24  #endif +	mov	r0,r10,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r5,r5,r2			@ h+=X[i] -	str	r2,[sp,#14*4] +	eor	r0,r0,r10,ror#11  	eor	r2,r11,r4 -	add	r5,r5,r0,ror#6	@ h+=Sigma1(e) +#if 14>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 14==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r10,ror#25	@ Sigma1(e)  	and	r2,r2,r10 -	add	r5,r5,r12			@ h+=K256[i] +	str	r3,[sp,#14*4] +	add	r3,r3,r0  	eor	r2,r2,r4			@ Ch(e,f,g) -	eor	r0,r6,r6,ror#11 -	add	r5,r5,r2			@ h+=Ch(e,f,g) -#if 14==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 14<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r6,r7			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx -	eor	r12,r6,r7			@ a^b, b^c in next round -	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r6,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r9,r9,r5			@ d+=h -	eor	r3,r3,r7			@ Maj(a,b,c) -	add	r5,r5,r0,ror#2	@ h+=Sigma0(a) -	@ add	r5,r5,r3			@ h+=Maj(a,b,c) +	add	r3,r3,r5 +	mov	r5,r6,ror#2 +	add	r3,r3,r2 +	eor	r5,r5,r6,ror#13 +	add	r3,r3,r12 +	eor	r5,r5,r6,ror#22		@ Sigma0(a) +#if 14>=15 +	ldr	r1,[sp,#0*4]		@ from BODY_16_xx +#endif +	orr	r0,r6,r7 +	and	r2,r6,r7 +	and	r0,r0,r8 +	add	r5,r5,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r9,r9,r3 +	add	r5,r5,r0  #if __ARM_ARCH__>=7 -	@ ldr	r2,[r1],#4			@ 15 -# if 15==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r9,r9,ror#5 -	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past -	eor	r0,r0,r9,ror#19	@ Sigma1(e) -	rev	r2,r2 +	ldr	r3,[r1],#4  #else -	@ ldrb	r2,[r1,#3]			@ 15 -	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past -	ldrb	r3,[r1,#2] -	ldrb	r0,[r1,#1] -	orr	r2,r2,r3,lsl#8 -	ldrb	r3,[r1],#4 -	orr	r2,r2,r0,lsl#16 -# if 15==15 -	str	r1,[sp,#17*4]			@ make room for r1 -# endif -	eor	r0,r9,r9,ror#5 -	orr	r2,r2,r3,lsl#24 -	eor	r0,r0,r9,ror#19	@ Sigma1(e) -#endif -	ldr	r3,[r14],#4			@ *K256++ -	add	r4,r4,r2			@ h+=X[i] -	str	r2,[sp,#15*4] +	ldrb	r3,[r1,#3]			@ 15 +	ldrb	r12,[r1,#2] +	ldrb	r2,[r1,#1] +	ldrb	r0,[r1],#4 +	orr	r3,r3,r12,lsl#8 +	orr	r3,r3,r2,lsl#16 +	orr	r3,r3,r0,lsl#24 +#endif +	mov	r0,r9,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r9,ror#11  	eor	r2,r10,r11 -	add	r4,r4,r0,ror#6	@ h+=Sigma1(e) +#if 15>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 15==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r9,ror#25	@ Sigma1(e)  	and	r2,r2,r9 -	add	r4,r4,r3			@ h+=K256[i] +	str	r3,[sp,#15*4] +	add	r3,r3,r0  	eor	r2,r2,r11			@ Ch(e,f,g) -	eor	r0,r5,r5,ror#11 -	add	r4,r4,r2			@ h+=Ch(e,f,g) -#if 15==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 15<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r5,r6			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx -	eor	r3,r5,r6			@ a^b, b^c in next round -	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r5,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r8,r8,r4			@ d+=h -	eor	r12,r12,r6			@ Maj(a,b,c) -	add	r4,r4,r0,ror#2	@ h+=Sigma0(a) -	@ add	r4,r4,r12			@ h+=Maj(a,b,c) +	add	r3,r3,r4 +	mov	r4,r5,ror#2 +	add	r3,r3,r2 +	eor	r4,r4,r5,ror#13 +	add	r3,r3,r12 +	eor	r4,r4,r5,ror#22		@ Sigma0(a) +#if 15>=15 +	ldr	r1,[sp,#1*4]		@ from BODY_16_xx +#endif +	orr	r0,r5,r6 +	and	r2,r5,r6 +	and	r0,r0,r7 +	add	r4,r4,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r8,r8,r3 +	add	r4,r4,r0  .Lrounds_16_xx: -	@ ldr	r2,[sp,#1*4]		@ 16 -	@ ldr	r1,[sp,#14*4] -	mov	r0,r2,ror#7 -	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past -	mov	r12,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r12,r12,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#0*4] -	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#9*4] - -	add	r12,r12,r0 -	eor	r0,r8,r8,ror#5	@ from BODY_00_15 -	add	r2,r2,r12 -	eor	r0,r0,r8,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] +	@ ldr	r1,[sp,#1*4]		@ 16 +	ldr	r12,[sp,#14*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#0*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#9*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17 +	add	r3,r3,r0 +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r8,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r11,r11,r2			@ h+=X[i] -	str	r2,[sp,#0*4] +	eor	r0,r0,r8,ror#11  	eor	r2,r9,r10 -	add	r11,r11,r0,ror#6	@ h+=Sigma1(e) +#if 16>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 16==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r8,ror#25	@ Sigma1(e)  	and	r2,r2,r8 -	add	r11,r11,r12			@ h+=K256[i] +	str	r3,[sp,#0*4] +	add	r3,r3,r0  	eor	r2,r2,r10			@ Ch(e,f,g) -	eor	r0,r4,r4,ror#11 -	add	r11,r11,r2			@ h+=Ch(e,f,g) -#if 16==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 16<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r4,r5			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx -	eor	r12,r4,r5			@ a^b, b^c in next round -	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r4,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r7,r7,r11			@ d+=h -	eor	r3,r3,r5			@ Maj(a,b,c) -	add	r11,r11,r0,ror#2	@ h+=Sigma0(a) -	@ add	r11,r11,r3			@ h+=Maj(a,b,c) -	@ ldr	r2,[sp,#2*4]		@ 17 -	@ ldr	r1,[sp,#15*4] -	mov	r0,r2,ror#7 -	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past -	mov	r3,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r3,r3,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#1*4] -	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#10*4] - +	add	r3,r3,r11 +	mov	r11,r4,ror#2 +	add	r3,r3,r2 +	eor	r11,r11,r4,ror#13 +	add	r3,r3,r12 +	eor	r11,r11,r4,ror#22		@ Sigma0(a) +#if 16>=15 +	ldr	r1,[sp,#2*4]		@ from BODY_16_xx +#endif +	orr	r0,r4,r5 +	and	r2,r4,r5 +	and	r0,r0,r6 +	add	r11,r11,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r7,r7,r3 +	add	r11,r11,r0 +	@ ldr	r1,[sp,#2*4]		@ 17 +	ldr	r12,[sp,#15*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#1*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#10*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17  	add	r3,r3,r0 -	eor	r0,r7,r7,ror#5	@ from BODY_00_15 -	add	r2,r2,r3 -	eor	r0,r0,r7,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] -	ldr	r3,[r14],#4			@ *K256++ -	add	r10,r10,r2			@ h+=X[i] -	str	r2,[sp,#1*4] +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r7,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r7,ror#11  	eor	r2,r8,r9 -	add	r10,r10,r0,ror#6	@ h+=Sigma1(e) +#if 17>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 17==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r7,ror#25	@ Sigma1(e)  	and	r2,r2,r7 -	add	r10,r10,r3			@ h+=K256[i] +	str	r3,[sp,#1*4] +	add	r3,r3,r0  	eor	r2,r2,r9			@ Ch(e,f,g) -	eor	r0,r11,r11,ror#11 -	add	r10,r10,r2			@ h+=Ch(e,f,g) -#if 17==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 17<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r11,r4			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx -	eor	r3,r11,r4			@ a^b, b^c in next round -	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r11,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r6,r6,r10			@ d+=h -	eor	r12,r12,r4			@ Maj(a,b,c) -	add	r10,r10,r0,ror#2	@ h+=Sigma0(a) -	@ add	r10,r10,r12			@ h+=Maj(a,b,c) -	@ ldr	r2,[sp,#3*4]		@ 18 -	@ ldr	r1,[sp,#0*4] -	mov	r0,r2,ror#7 -	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past -	mov	r12,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r12,r12,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#2*4] -	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#11*4] - -	add	r12,r12,r0 -	eor	r0,r6,r6,ror#5	@ from BODY_00_15 -	add	r2,r2,r12 -	eor	r0,r0,r6,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] +	add	r3,r3,r10 +	mov	r10,r11,ror#2 +	add	r3,r3,r2 +	eor	r10,r10,r11,ror#13 +	add	r3,r3,r12 +	eor	r10,r10,r11,ror#22		@ Sigma0(a) +#if 17>=15 +	ldr	r1,[sp,#3*4]		@ from BODY_16_xx +#endif +	orr	r0,r11,r4 +	and	r2,r11,r4 +	and	r0,r0,r5 +	add	r10,r10,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r6,r6,r3 +	add	r10,r10,r0 +	@ ldr	r1,[sp,#3*4]		@ 18 +	ldr	r12,[sp,#0*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#2*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#11*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17 +	add	r3,r3,r0 +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r6,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r9,r9,r2			@ h+=X[i] -	str	r2,[sp,#2*4] +	eor	r0,r0,r6,ror#11  	eor	r2,r7,r8 -	add	r9,r9,r0,ror#6	@ h+=Sigma1(e) +#if 18>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 18==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r6,ror#25	@ Sigma1(e)  	and	r2,r2,r6 -	add	r9,r9,r12			@ h+=K256[i] +	str	r3,[sp,#2*4] +	add	r3,r3,r0  	eor	r2,r2,r8			@ Ch(e,f,g) -	eor	r0,r10,r10,ror#11 -	add	r9,r9,r2			@ h+=Ch(e,f,g) -#if 18==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 18<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r10,r11			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx -	eor	r12,r10,r11			@ a^b, b^c in next round -	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r10,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r5,r5,r9			@ d+=h -	eor	r3,r3,r11			@ Maj(a,b,c) -	add	r9,r9,r0,ror#2	@ h+=Sigma0(a) -	@ add	r9,r9,r3			@ h+=Maj(a,b,c) -	@ ldr	r2,[sp,#4*4]		@ 19 -	@ ldr	r1,[sp,#1*4] -	mov	r0,r2,ror#7 -	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past -	mov	r3,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r3,r3,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#3*4] -	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#12*4] - +	add	r3,r3,r9 +	mov	r9,r10,ror#2 +	add	r3,r3,r2 +	eor	r9,r9,r10,ror#13 +	add	r3,r3,r12 +	eor	r9,r9,r10,ror#22		@ Sigma0(a) +#if 18>=15 +	ldr	r1,[sp,#4*4]		@ from BODY_16_xx +#endif +	orr	r0,r10,r11 +	and	r2,r10,r11 +	and	r0,r0,r4 +	add	r9,r9,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r5,r5,r3 +	add	r9,r9,r0 +	@ ldr	r1,[sp,#4*4]		@ 19 +	ldr	r12,[sp,#1*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#3*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#12*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17  	add	r3,r3,r0 -	eor	r0,r5,r5,ror#5	@ from BODY_00_15 -	add	r2,r2,r3 -	eor	r0,r0,r5,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] -	ldr	r3,[r14],#4			@ *K256++ -	add	r8,r8,r2			@ h+=X[i] -	str	r2,[sp,#3*4] +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r5,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r5,ror#11  	eor	r2,r6,r7 -	add	r8,r8,r0,ror#6	@ h+=Sigma1(e) +#if 19>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 19==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r5,ror#25	@ Sigma1(e)  	and	r2,r2,r5 -	add	r8,r8,r3			@ h+=K256[i] +	str	r3,[sp,#3*4] +	add	r3,r3,r0  	eor	r2,r2,r7			@ Ch(e,f,g) -	eor	r0,r9,r9,ror#11 -	add	r8,r8,r2			@ h+=Ch(e,f,g) -#if 19==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 19<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r9,r10			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx -	eor	r3,r9,r10			@ a^b, b^c in next round -	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r9,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r4,r4,r8			@ d+=h -	eor	r12,r12,r10			@ Maj(a,b,c) -	add	r8,r8,r0,ror#2	@ h+=Sigma0(a) -	@ add	r8,r8,r12			@ h+=Maj(a,b,c) -	@ ldr	r2,[sp,#5*4]		@ 20 -	@ ldr	r1,[sp,#2*4] -	mov	r0,r2,ror#7 -	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past -	mov	r12,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r12,r12,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#4*4] -	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#13*4] - -	add	r12,r12,r0 -	eor	r0,r4,r4,ror#5	@ from BODY_00_15 -	add	r2,r2,r12 -	eor	r0,r0,r4,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] +	add	r3,r3,r8 +	mov	r8,r9,ror#2 +	add	r3,r3,r2 +	eor	r8,r8,r9,ror#13 +	add	r3,r3,r12 +	eor	r8,r8,r9,ror#22		@ Sigma0(a) +#if 19>=15 +	ldr	r1,[sp,#5*4]		@ from BODY_16_xx +#endif +	orr	r0,r9,r10 +	and	r2,r9,r10 +	and	r0,r0,r11 +	add	r8,r8,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r4,r4,r3 +	add	r8,r8,r0 +	@ ldr	r1,[sp,#5*4]		@ 20 +	ldr	r12,[sp,#2*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#4*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#13*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17 +	add	r3,r3,r0 +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r4,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r7,r7,r2			@ h+=X[i] -	str	r2,[sp,#4*4] +	eor	r0,r0,r4,ror#11  	eor	r2,r5,r6 -	add	r7,r7,r0,ror#6	@ h+=Sigma1(e) +#if 20>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 20==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r4,ror#25	@ Sigma1(e)  	and	r2,r2,r4 -	add	r7,r7,r12			@ h+=K256[i] +	str	r3,[sp,#4*4] +	add	r3,r3,r0  	eor	r2,r2,r6			@ Ch(e,f,g) -	eor	r0,r8,r8,ror#11 -	add	r7,r7,r2			@ h+=Ch(e,f,g) -#if 20==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 20<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r8,r9			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx -	eor	r12,r8,r9			@ a^b, b^c in next round -	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r8,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r11,r11,r7			@ d+=h -	eor	r3,r3,r9			@ Maj(a,b,c) -	add	r7,r7,r0,ror#2	@ h+=Sigma0(a) -	@ add	r7,r7,r3			@ h+=Maj(a,b,c) -	@ ldr	r2,[sp,#6*4]		@ 21 -	@ ldr	r1,[sp,#3*4] -	mov	r0,r2,ror#7 -	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past -	mov	r3,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r3,r3,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#5*4] -	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#14*4] - +	add	r3,r3,r7 +	mov	r7,r8,ror#2 +	add	r3,r3,r2 +	eor	r7,r7,r8,ror#13 +	add	r3,r3,r12 +	eor	r7,r7,r8,ror#22		@ Sigma0(a) +#if 20>=15 +	ldr	r1,[sp,#6*4]		@ from BODY_16_xx +#endif +	orr	r0,r8,r9 +	and	r2,r8,r9 +	and	r0,r0,r10 +	add	r7,r7,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r11,r11,r3 +	add	r7,r7,r0 +	@ ldr	r1,[sp,#6*4]		@ 21 +	ldr	r12,[sp,#3*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#5*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#14*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17  	add	r3,r3,r0 -	eor	r0,r11,r11,ror#5	@ from BODY_00_15 -	add	r2,r2,r3 -	eor	r0,r0,r11,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] -	ldr	r3,[r14],#4			@ *K256++ -	add	r6,r6,r2			@ h+=X[i] -	str	r2,[sp,#5*4] +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r11,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r11,ror#11  	eor	r2,r4,r5 -	add	r6,r6,r0,ror#6	@ h+=Sigma1(e) +#if 21>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 21==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r11,ror#25	@ Sigma1(e)  	and	r2,r2,r11 -	add	r6,r6,r3			@ h+=K256[i] +	str	r3,[sp,#5*4] +	add	r3,r3,r0  	eor	r2,r2,r5			@ Ch(e,f,g) -	eor	r0,r7,r7,ror#11 -	add	r6,r6,r2			@ h+=Ch(e,f,g) -#if 21==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 21<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r7,r8			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx -	eor	r3,r7,r8			@ a^b, b^c in next round -	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r7,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r10,r10,r6			@ d+=h -	eor	r12,r12,r8			@ Maj(a,b,c) -	add	r6,r6,r0,ror#2	@ h+=Sigma0(a) -	@ add	r6,r6,r12			@ h+=Maj(a,b,c) -	@ ldr	r2,[sp,#7*4]		@ 22 -	@ ldr	r1,[sp,#4*4] -	mov	r0,r2,ror#7 -	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past -	mov	r12,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r12,r12,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#6*4] -	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#15*4] - -	add	r12,r12,r0 -	eor	r0,r10,r10,ror#5	@ from BODY_00_15 -	add	r2,r2,r12 -	eor	r0,r0,r10,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] +	add	r3,r3,r6 +	mov	r6,r7,ror#2 +	add	r3,r3,r2 +	eor	r6,r6,r7,ror#13 +	add	r3,r3,r12 +	eor	r6,r6,r7,ror#22		@ Sigma0(a) +#if 21>=15 +	ldr	r1,[sp,#7*4]		@ from BODY_16_xx +#endif +	orr	r0,r7,r8 +	and	r2,r7,r8 +	and	r0,r0,r9 +	add	r6,r6,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r10,r10,r3 +	add	r6,r6,r0 +	@ ldr	r1,[sp,#7*4]		@ 22 +	ldr	r12,[sp,#4*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#6*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#15*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17 +	add	r3,r3,r0 +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r10,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r5,r5,r2			@ h+=X[i] -	str	r2,[sp,#6*4] +	eor	r0,r0,r10,ror#11  	eor	r2,r11,r4 -	add	r5,r5,r0,ror#6	@ h+=Sigma1(e) +#if 22>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 22==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r10,ror#25	@ Sigma1(e)  	and	r2,r2,r10 -	add	r5,r5,r12			@ h+=K256[i] +	str	r3,[sp,#6*4] +	add	r3,r3,r0  	eor	r2,r2,r4			@ Ch(e,f,g) -	eor	r0,r6,r6,ror#11 -	add	r5,r5,r2			@ h+=Ch(e,f,g) -#if 22==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 22<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r6,r7			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx -	eor	r12,r6,r7			@ a^b, b^c in next round -	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r6,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r9,r9,r5			@ d+=h -	eor	r3,r3,r7			@ Maj(a,b,c) -	add	r5,r5,r0,ror#2	@ h+=Sigma0(a) -	@ add	r5,r5,r3			@ h+=Maj(a,b,c) -	@ ldr	r2,[sp,#8*4]		@ 23 -	@ ldr	r1,[sp,#5*4] -	mov	r0,r2,ror#7 -	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past -	mov	r3,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r3,r3,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#7*4] -	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#0*4] - +	add	r3,r3,r5 +	mov	r5,r6,ror#2 +	add	r3,r3,r2 +	eor	r5,r5,r6,ror#13 +	add	r3,r3,r12 +	eor	r5,r5,r6,ror#22		@ Sigma0(a) +#if 22>=15 +	ldr	r1,[sp,#8*4]		@ from BODY_16_xx +#endif +	orr	r0,r6,r7 +	and	r2,r6,r7 +	and	r0,r0,r8 +	add	r5,r5,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r9,r9,r3 +	add	r5,r5,r0 +	@ ldr	r1,[sp,#8*4]		@ 23 +	ldr	r12,[sp,#5*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#7*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#0*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17  	add	r3,r3,r0 -	eor	r0,r9,r9,ror#5	@ from BODY_00_15 -	add	r2,r2,r3 -	eor	r0,r0,r9,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] -	ldr	r3,[r14],#4			@ *K256++ -	add	r4,r4,r2			@ h+=X[i] -	str	r2,[sp,#7*4] +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r9,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r9,ror#11  	eor	r2,r10,r11 -	add	r4,r4,r0,ror#6	@ h+=Sigma1(e) +#if 23>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 23==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r9,ror#25	@ Sigma1(e)  	and	r2,r2,r9 -	add	r4,r4,r3			@ h+=K256[i] +	str	r3,[sp,#7*4] +	add	r3,r3,r0  	eor	r2,r2,r11			@ Ch(e,f,g) -	eor	r0,r5,r5,ror#11 -	add	r4,r4,r2			@ h+=Ch(e,f,g) -#if 23==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 23<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r5,r6			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx -	eor	r3,r5,r6			@ a^b, b^c in next round -	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r5,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r8,r8,r4			@ d+=h -	eor	r12,r12,r6			@ Maj(a,b,c) -	add	r4,r4,r0,ror#2	@ h+=Sigma0(a) -	@ add	r4,r4,r12			@ h+=Maj(a,b,c) -	@ ldr	r2,[sp,#9*4]		@ 24 -	@ ldr	r1,[sp,#6*4] -	mov	r0,r2,ror#7 -	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past -	mov	r12,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r12,r12,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#8*4] -	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#1*4] - -	add	r12,r12,r0 -	eor	r0,r8,r8,ror#5	@ from BODY_00_15 -	add	r2,r2,r12 -	eor	r0,r0,r8,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] +	add	r3,r3,r4 +	mov	r4,r5,ror#2 +	add	r3,r3,r2 +	eor	r4,r4,r5,ror#13 +	add	r3,r3,r12 +	eor	r4,r4,r5,ror#22		@ Sigma0(a) +#if 23>=15 +	ldr	r1,[sp,#9*4]		@ from BODY_16_xx +#endif +	orr	r0,r5,r6 +	and	r2,r5,r6 +	and	r0,r0,r7 +	add	r4,r4,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r8,r8,r3 +	add	r4,r4,r0 +	@ ldr	r1,[sp,#9*4]		@ 24 +	ldr	r12,[sp,#6*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#8*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#1*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17 +	add	r3,r3,r0 +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r8,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r11,r11,r2			@ h+=X[i] -	str	r2,[sp,#8*4] +	eor	r0,r0,r8,ror#11  	eor	r2,r9,r10 -	add	r11,r11,r0,ror#6	@ h+=Sigma1(e) +#if 24>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 24==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r8,ror#25	@ Sigma1(e)  	and	r2,r2,r8 -	add	r11,r11,r12			@ h+=K256[i] +	str	r3,[sp,#8*4] +	add	r3,r3,r0  	eor	r2,r2,r10			@ Ch(e,f,g) -	eor	r0,r4,r4,ror#11 -	add	r11,r11,r2			@ h+=Ch(e,f,g) -#if 24==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 24<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r4,r5			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx -	eor	r12,r4,r5			@ a^b, b^c in next round -	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r4,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r7,r7,r11			@ d+=h -	eor	r3,r3,r5			@ Maj(a,b,c) -	add	r11,r11,r0,ror#2	@ h+=Sigma0(a) -	@ add	r11,r11,r3			@ h+=Maj(a,b,c) -	@ ldr	r2,[sp,#10*4]		@ 25 -	@ ldr	r1,[sp,#7*4] -	mov	r0,r2,ror#7 -	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past -	mov	r3,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r3,r3,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#9*4] -	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#2*4] - +	add	r3,r3,r11 +	mov	r11,r4,ror#2 +	add	r3,r3,r2 +	eor	r11,r11,r4,ror#13 +	add	r3,r3,r12 +	eor	r11,r11,r4,ror#22		@ Sigma0(a) +#if 24>=15 +	ldr	r1,[sp,#10*4]		@ from BODY_16_xx +#endif +	orr	r0,r4,r5 +	and	r2,r4,r5 +	and	r0,r0,r6 +	add	r11,r11,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r7,r7,r3 +	add	r11,r11,r0 +	@ ldr	r1,[sp,#10*4]		@ 25 +	ldr	r12,[sp,#7*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#9*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#2*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17  	add	r3,r3,r0 -	eor	r0,r7,r7,ror#5	@ from BODY_00_15 -	add	r2,r2,r3 -	eor	r0,r0,r7,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] -	ldr	r3,[r14],#4			@ *K256++ -	add	r10,r10,r2			@ h+=X[i] -	str	r2,[sp,#9*4] +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r7,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r7,ror#11  	eor	r2,r8,r9 -	add	r10,r10,r0,ror#6	@ h+=Sigma1(e) +#if 25>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 25==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r7,ror#25	@ Sigma1(e)  	and	r2,r2,r7 -	add	r10,r10,r3			@ h+=K256[i] +	str	r3,[sp,#9*4] +	add	r3,r3,r0  	eor	r2,r2,r9			@ Ch(e,f,g) -	eor	r0,r11,r11,ror#11 -	add	r10,r10,r2			@ h+=Ch(e,f,g) -#if 25==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 25<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r11,r4			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx -	eor	r3,r11,r4			@ a^b, b^c in next round -	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r11,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r6,r6,r10			@ d+=h -	eor	r12,r12,r4			@ Maj(a,b,c) -	add	r10,r10,r0,ror#2	@ h+=Sigma0(a) -	@ add	r10,r10,r12			@ h+=Maj(a,b,c) -	@ ldr	r2,[sp,#11*4]		@ 26 -	@ ldr	r1,[sp,#8*4] -	mov	r0,r2,ror#7 -	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past -	mov	r12,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r12,r12,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#10*4] -	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#3*4] - -	add	r12,r12,r0 -	eor	r0,r6,r6,ror#5	@ from BODY_00_15 -	add	r2,r2,r12 -	eor	r0,r0,r6,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] +	add	r3,r3,r10 +	mov	r10,r11,ror#2 +	add	r3,r3,r2 +	eor	r10,r10,r11,ror#13 +	add	r3,r3,r12 +	eor	r10,r10,r11,ror#22		@ Sigma0(a) +#if 25>=15 +	ldr	r1,[sp,#11*4]		@ from BODY_16_xx +#endif +	orr	r0,r11,r4 +	and	r2,r11,r4 +	and	r0,r0,r5 +	add	r10,r10,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r6,r6,r3 +	add	r10,r10,r0 +	@ ldr	r1,[sp,#11*4]		@ 26 +	ldr	r12,[sp,#8*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#10*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#3*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17 +	add	r3,r3,r0 +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r6,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r9,r9,r2			@ h+=X[i] -	str	r2,[sp,#10*4] +	eor	r0,r0,r6,ror#11  	eor	r2,r7,r8 -	add	r9,r9,r0,ror#6	@ h+=Sigma1(e) +#if 26>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 26==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r6,ror#25	@ Sigma1(e)  	and	r2,r2,r6 -	add	r9,r9,r12			@ h+=K256[i] +	str	r3,[sp,#10*4] +	add	r3,r3,r0  	eor	r2,r2,r8			@ Ch(e,f,g) -	eor	r0,r10,r10,ror#11 -	add	r9,r9,r2			@ h+=Ch(e,f,g) -#if 26==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 26<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r10,r11			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx -	eor	r12,r10,r11			@ a^b, b^c in next round -	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r10,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r5,r5,r9			@ d+=h -	eor	r3,r3,r11			@ Maj(a,b,c) -	add	r9,r9,r0,ror#2	@ h+=Sigma0(a) -	@ add	r9,r9,r3			@ h+=Maj(a,b,c) -	@ ldr	r2,[sp,#12*4]		@ 27 -	@ ldr	r1,[sp,#9*4] -	mov	r0,r2,ror#7 -	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past -	mov	r3,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r3,r3,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#11*4] -	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#4*4] - +	add	r3,r3,r9 +	mov	r9,r10,ror#2 +	add	r3,r3,r2 +	eor	r9,r9,r10,ror#13 +	add	r3,r3,r12 +	eor	r9,r9,r10,ror#22		@ Sigma0(a) +#if 26>=15 +	ldr	r1,[sp,#12*4]		@ from BODY_16_xx +#endif +	orr	r0,r10,r11 +	and	r2,r10,r11 +	and	r0,r0,r4 +	add	r9,r9,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r5,r5,r3 +	add	r9,r9,r0 +	@ ldr	r1,[sp,#12*4]		@ 27 +	ldr	r12,[sp,#9*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#11*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#4*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17  	add	r3,r3,r0 -	eor	r0,r5,r5,ror#5	@ from BODY_00_15 -	add	r2,r2,r3 -	eor	r0,r0,r5,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] -	ldr	r3,[r14],#4			@ *K256++ -	add	r8,r8,r2			@ h+=X[i] -	str	r2,[sp,#11*4] +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r5,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r5,ror#11  	eor	r2,r6,r7 -	add	r8,r8,r0,ror#6	@ h+=Sigma1(e) +#if 27>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 27==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r5,ror#25	@ Sigma1(e)  	and	r2,r2,r5 -	add	r8,r8,r3			@ h+=K256[i] +	str	r3,[sp,#11*4] +	add	r3,r3,r0  	eor	r2,r2,r7			@ Ch(e,f,g) -	eor	r0,r9,r9,ror#11 -	add	r8,r8,r2			@ h+=Ch(e,f,g) -#if 27==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 27<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r9,r10			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx -	eor	r3,r9,r10			@ a^b, b^c in next round -	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r9,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r4,r4,r8			@ d+=h -	eor	r12,r12,r10			@ Maj(a,b,c) -	add	r8,r8,r0,ror#2	@ h+=Sigma0(a) -	@ add	r8,r8,r12			@ h+=Maj(a,b,c) -	@ ldr	r2,[sp,#13*4]		@ 28 -	@ ldr	r1,[sp,#10*4] -	mov	r0,r2,ror#7 -	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past -	mov	r12,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r12,r12,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#12*4] -	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#5*4] - -	add	r12,r12,r0 -	eor	r0,r4,r4,ror#5	@ from BODY_00_15 -	add	r2,r2,r12 -	eor	r0,r0,r4,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] +	add	r3,r3,r8 +	mov	r8,r9,ror#2 +	add	r3,r3,r2 +	eor	r8,r8,r9,ror#13 +	add	r3,r3,r12 +	eor	r8,r8,r9,ror#22		@ Sigma0(a) +#if 27>=15 +	ldr	r1,[sp,#13*4]		@ from BODY_16_xx +#endif +	orr	r0,r9,r10 +	and	r2,r9,r10 +	and	r0,r0,r11 +	add	r8,r8,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r4,r4,r3 +	add	r8,r8,r0 +	@ ldr	r1,[sp,#13*4]		@ 28 +	ldr	r12,[sp,#10*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#12*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#5*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17 +	add	r3,r3,r0 +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r4,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r7,r7,r2			@ h+=X[i] -	str	r2,[sp,#12*4] +	eor	r0,r0,r4,ror#11  	eor	r2,r5,r6 -	add	r7,r7,r0,ror#6	@ h+=Sigma1(e) +#if 28>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 28==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r4,ror#25	@ Sigma1(e)  	and	r2,r2,r4 -	add	r7,r7,r12			@ h+=K256[i] +	str	r3,[sp,#12*4] +	add	r3,r3,r0  	eor	r2,r2,r6			@ Ch(e,f,g) -	eor	r0,r8,r8,ror#11 -	add	r7,r7,r2			@ h+=Ch(e,f,g) -#if 28==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 28<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r8,r9			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx -	eor	r12,r8,r9			@ a^b, b^c in next round -	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r8,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r11,r11,r7			@ d+=h -	eor	r3,r3,r9			@ Maj(a,b,c) -	add	r7,r7,r0,ror#2	@ h+=Sigma0(a) -	@ add	r7,r7,r3			@ h+=Maj(a,b,c) -	@ ldr	r2,[sp,#14*4]		@ 29 -	@ ldr	r1,[sp,#11*4] -	mov	r0,r2,ror#7 -	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past -	mov	r3,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r3,r3,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#13*4] -	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#6*4] - +	add	r3,r3,r7 +	mov	r7,r8,ror#2 +	add	r3,r3,r2 +	eor	r7,r7,r8,ror#13 +	add	r3,r3,r12 +	eor	r7,r7,r8,ror#22		@ Sigma0(a) +#if 28>=15 +	ldr	r1,[sp,#14*4]		@ from BODY_16_xx +#endif +	orr	r0,r8,r9 +	and	r2,r8,r9 +	and	r0,r0,r10 +	add	r7,r7,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r11,r11,r3 +	add	r7,r7,r0 +	@ ldr	r1,[sp,#14*4]		@ 29 +	ldr	r12,[sp,#11*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#13*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#6*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17  	add	r3,r3,r0 -	eor	r0,r11,r11,ror#5	@ from BODY_00_15 -	add	r2,r2,r3 -	eor	r0,r0,r11,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] -	ldr	r3,[r14],#4			@ *K256++ -	add	r6,r6,r2			@ h+=X[i] -	str	r2,[sp,#13*4] +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r11,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r11,ror#11  	eor	r2,r4,r5 -	add	r6,r6,r0,ror#6	@ h+=Sigma1(e) +#if 29>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 29==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r11,ror#25	@ Sigma1(e)  	and	r2,r2,r11 -	add	r6,r6,r3			@ h+=K256[i] +	str	r3,[sp,#13*4] +	add	r3,r3,r0  	eor	r2,r2,r5			@ Ch(e,f,g) -	eor	r0,r7,r7,ror#11 -	add	r6,r6,r2			@ h+=Ch(e,f,g) -#if 29==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 29<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r7,r8			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx -	eor	r3,r7,r8			@ a^b, b^c in next round -	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r7,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r10,r10,r6			@ d+=h -	eor	r12,r12,r8			@ Maj(a,b,c) -	add	r6,r6,r0,ror#2	@ h+=Sigma0(a) -	@ add	r6,r6,r12			@ h+=Maj(a,b,c) -	@ ldr	r2,[sp,#15*4]		@ 30 -	@ ldr	r1,[sp,#12*4] -	mov	r0,r2,ror#7 -	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past -	mov	r12,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r12,r12,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#14*4] -	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#7*4] - -	add	r12,r12,r0 -	eor	r0,r10,r10,ror#5	@ from BODY_00_15 -	add	r2,r2,r12 -	eor	r0,r0,r10,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] +	add	r3,r3,r6 +	mov	r6,r7,ror#2 +	add	r3,r3,r2 +	eor	r6,r6,r7,ror#13 +	add	r3,r3,r12 +	eor	r6,r6,r7,ror#22		@ Sigma0(a) +#if 29>=15 +	ldr	r1,[sp,#15*4]		@ from BODY_16_xx +#endif +	orr	r0,r7,r8 +	and	r2,r7,r8 +	and	r0,r0,r9 +	add	r6,r6,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r10,r10,r3 +	add	r6,r6,r0 +	@ ldr	r1,[sp,#15*4]		@ 30 +	ldr	r12,[sp,#12*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#14*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#7*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17 +	add	r3,r3,r0 +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r10,ror#6  	ldr	r12,[r14],#4			@ *K256++ -	add	r5,r5,r2			@ h+=X[i] -	str	r2,[sp,#14*4] +	eor	r0,r0,r10,ror#11  	eor	r2,r11,r4 -	add	r5,r5,r0,ror#6	@ h+=Sigma1(e) +#if 30>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 30==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r10,ror#25	@ Sigma1(e)  	and	r2,r2,r10 -	add	r5,r5,r12			@ h+=K256[i] +	str	r3,[sp,#14*4] +	add	r3,r3,r0  	eor	r2,r2,r4			@ Ch(e,f,g) -	eor	r0,r6,r6,ror#11 -	add	r5,r5,r2			@ h+=Ch(e,f,g) -#if 30==31 -	and	r12,r12,#0xff -	cmp	r12,#0xf2			@ done? -#endif -#if 30<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r12,r6,r7			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx -	eor	r12,r6,r7			@ a^b, b^c in next round -	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r6,ror#20	@ Sigma0(a) -	and	r3,r3,r12			@ (b^c)&=(a^b) -	add	r9,r9,r5			@ d+=h -	eor	r3,r3,r7			@ Maj(a,b,c) -	add	r5,r5,r0,ror#2	@ h+=Sigma0(a) -	@ add	r5,r5,r3			@ h+=Maj(a,b,c) -	@ ldr	r2,[sp,#0*4]		@ 31 -	@ ldr	r1,[sp,#13*4] -	mov	r0,r2,ror#7 -	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past -	mov	r3,r1,ror#17 -	eor	r0,r0,r2,ror#18 -	eor	r3,r3,r1,ror#19 -	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1]) -	ldr	r2,[sp,#15*4] -	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14]) -	ldr	r1,[sp,#8*4] - +	add	r3,r3,r5 +	mov	r5,r6,ror#2 +	add	r3,r3,r2 +	eor	r5,r5,r6,ror#13 +	add	r3,r3,r12 +	eor	r5,r5,r6,ror#22		@ Sigma0(a) +#if 30>=15 +	ldr	r1,[sp,#0*4]		@ from BODY_16_xx +#endif +	orr	r0,r6,r7 +	and	r2,r6,r7 +	and	r0,r0,r8 +	add	r5,r5,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r9,r9,r3 +	add	r5,r5,r0 +	@ ldr	r1,[sp,#0*4]		@ 31 +	ldr	r12,[sp,#13*4] +	mov	r0,r1,ror#7 +	ldr	r3,[sp,#15*4] +	eor	r0,r0,r1,ror#18 +	ldr	r2,[sp,#8*4] +	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1]) +	mov	r1,r12,ror#17  	add	r3,r3,r0 -	eor	r0,r9,r9,ror#5	@ from BODY_00_15 -	add	r2,r2,r3 -	eor	r0,r0,r9,ror#19	@ Sigma1(e) -	add	r2,r2,r1			@ X[i] -	ldr	r3,[r14],#4			@ *K256++ -	add	r4,r4,r2			@ h+=X[i] -	str	r2,[sp,#15*4] +	eor	r1,r1,r12,ror#19 +	add	r3,r3,r2 +	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14]) +	@ add	r3,r3,r1 +	mov	r0,r9,ror#6 +	ldr	r12,[r14],#4			@ *K256++ +	eor	r0,r0,r9,ror#11  	eor	r2,r10,r11 -	add	r4,r4,r0,ror#6	@ h+=Sigma1(e) +#if 31>=16 +	add	r3,r3,r1			@ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) +	rev	r3,r3 +#endif +#if 31==15 +	str	r1,[sp,#17*4]			@ leave room for r1 +#endif +	eor	r0,r0,r9,ror#25	@ Sigma1(e)  	and	r2,r2,r9 -	add	r4,r4,r3			@ h+=K256[i] +	str	r3,[sp,#15*4] +	add	r3,r3,r0  	eor	r2,r2,r11			@ Ch(e,f,g) -	eor	r0,r5,r5,ror#11 -	add	r4,r4,r2			@ h+=Ch(e,f,g) -#if 31==31 -	and	r3,r3,#0xff -	cmp	r3,#0xf2			@ done? -#endif -#if 31<15 -# if __ARM_ARCH__>=7 -	ldr	r2,[r1],#4			@ prefetch -# else -	ldrb	r2,[r1,#3] -# endif -	eor	r3,r5,r6			@ a^b, b^c in next round -#else -	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx -	eor	r3,r5,r6			@ a^b, b^c in next round -	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx -#endif -	eor	r0,r0,r5,ror#20	@ Sigma0(a) -	and	r12,r12,r3			@ (b^c)&=(a^b) -	add	r8,r8,r4			@ d+=h -	eor	r12,r12,r6			@ Maj(a,b,c) -	add	r4,r4,r0,ror#2	@ h+=Sigma0(a) -	@ add	r4,r4,r12			@ h+=Maj(a,b,c) -	ldreq	r3,[sp,#16*4]		@ pull ctx +	add	r3,r3,r4 +	mov	r4,r5,ror#2 +	add	r3,r3,r2 +	eor	r4,r4,r5,ror#13 +	add	r3,r3,r12 +	eor	r4,r4,r5,ror#22		@ Sigma0(a) +#if 31>=15 +	ldr	r1,[sp,#1*4]		@ from BODY_16_xx +#endif +	orr	r0,r5,r6 +	and	r2,r5,r6 +	and	r0,r0,r7 +	add	r4,r4,r3 +	orr	r0,r0,r2			@ Maj(a,b,c) +	add	r8,r8,r3 +	add	r4,r4,r0 +	and	r12,r12,#0xff +	cmp	r12,#0xf2  	bne	.Lrounds_16_xx -	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past +	ldr	r3,[sp,#16*4]		@ pull ctx  	ldr	r0,[r3,#0]  	ldr	r2,[r3,#4]  	ldr	r12,[r3,#8] @@ -1770,921 +1512,6 @@ sha256_block_data_order:  	moveq	pc,lr			@ be binary compatible with V4, yet  	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)  #endif -.size	sha256_block_data_order,.-sha256_block_data_order -#if __ARM_ARCH__>=7 -.fpu	neon - -.type	sha256_block_data_order_neon,%function -.align	4 -sha256_block_data_order_neon: -.LNEON: -	stmdb	sp!,{r4-r12,lr} - -	mov	r12,sp -	sub	sp,sp,#16*4+16		@ alloca -	sub	r14,r3,#256+32	@ K256 -	bic	sp,sp,#15		@ align for 128-bit stores - -	vld1.8		{q0},[r1]! -	vld1.8		{q1},[r1]! -	vld1.8		{q2},[r1]! -	vld1.8		{q3},[r1]! -	vld1.32		{q8},[r14,:128]! -	vld1.32		{q9},[r14,:128]! -	vld1.32		{q10},[r14,:128]! -	vld1.32		{q11},[r14,:128]! -	vrev32.8	q0,q0		@ yes, even on -	str		r0,[sp,#64] -	vrev32.8	q1,q1		@ big-endian -	str		r1,[sp,#68] -	mov		r1,sp -	vrev32.8	q2,q2 -	str		r2,[sp,#72] -	vrev32.8	q3,q3 -	str		r12,[sp,#76]		@ save original sp -	vadd.i32	q8,q8,q0 -	vadd.i32	q9,q9,q1 -	vst1.32		{q8},[r1,:128]! -	vadd.i32	q10,q10,q2 -	vst1.32		{q9},[r1,:128]! -	vadd.i32	q11,q11,q3 -	vst1.32		{q10},[r1,:128]! -	vst1.32		{q11},[r1,:128]! - -	ldmia		r0,{r4-r11} -	sub		r1,r1,#64 -	ldr		r2,[sp,#0] -	eor		r12,r12,r12 -	eor		r3,r5,r6 -	b		.L_00_48 - -.align	4 -.L_00_48: -	vext.8	q8,q0,q1,#4 -	add	r11,r11,r2 -	eor	r2,r9,r10 -	eor	r0,r8,r8,ror#5 -	vext.8	q9,q2,q3,#4 -	add	r4,r4,r12 -	and	r2,r2,r8 -	eor	r12,r0,r8,ror#19 -	vshr.u32	q10,q8,#7 -	eor	r0,r4,r4,ror#11 -	eor	r2,r2,r10 -	vadd.i32	q0,q0,q9 -	add	r11,r11,r12,ror#6 -	eor	r12,r4,r5 -	vshr.u32	q9,q8,#3 -	eor	r0,r0,r4,ror#20 -	add	r11,r11,r2 -	vsli.32	q10,q8,#25 -	ldr	r2,[sp,#4] -	and	r3,r3,r12 -	vshr.u32	q11,q8,#18 -	add	r7,r7,r11 -	add	r11,r11,r0,ror#2 -	eor	r3,r3,r5 -	veor	q9,q9,q10 -	add	r10,r10,r2 -	vsli.32	q11,q8,#14 -	eor	r2,r8,r9 -	eor	r0,r7,r7,ror#5 -	vshr.u32	d24,d7,#17 -	add	r11,r11,r3 -	and	r2,r2,r7 -	veor	q9,q9,q11 -	eor	r3,r0,r7,ror#19 -	eor	r0,r11,r11,ror#11 -	vsli.32	d24,d7,#15 -	eor	r2,r2,r9 -	add	r10,r10,r3,ror#6 -	vshr.u32	d25,d7,#10 -	eor	r3,r11,r4 -	eor	r0,r0,r11,ror#20 -	vadd.i32	q0,q0,q9 -	add	r10,r10,r2 -	ldr	r2,[sp,#8] -	veor	d25,d25,d24 -	and	r12,r12,r3 -	add	r6,r6,r10 -	vshr.u32	d24,d7,#19 -	add	r10,r10,r0,ror#2 -	eor	r12,r12,r4 -	vsli.32	d24,d7,#13 -	add	r9,r9,r2 -	eor	r2,r7,r8 -	veor	d25,d25,d24 -	eor	r0,r6,r6,ror#5 -	add	r10,r10,r12 -	vadd.i32	d0,d0,d25 -	and	r2,r2,r6 -	eor	r12,r0,r6,ror#19 -	vshr.u32	d24,d0,#17 -	eor	r0,r10,r10,ror#11 -	eor	r2,r2,r8 -	vsli.32	d24,d0,#15 -	add	r9,r9,r12,ror#6 -	eor	r12,r10,r11 -	vshr.u32	d25,d0,#10 -	eor	r0,r0,r10,ror#20 -	add	r9,r9,r2 -	veor	d25,d25,d24 -	ldr	r2,[sp,#12] -	and	r3,r3,r12 -	vshr.u32	d24,d0,#19 -	add	r5,r5,r9 -	add	r9,r9,r0,ror#2 -	eor	r3,r3,r11 -	vld1.32	{q8},[r14,:128]! -	add	r8,r8,r2 -	vsli.32	d24,d0,#13 -	eor	r2,r6,r7 -	eor	r0,r5,r5,ror#5 -	veor	d25,d25,d24 -	add	r9,r9,r3 -	and	r2,r2,r5 -	vadd.i32	d1,d1,d25 -	eor	r3,r0,r5,ror#19 -	eor	r0,r9,r9,ror#11 -	vadd.i32	q8,q8,q0 -	eor	r2,r2,r7 -	add	r8,r8,r3,ror#6 -	eor	r3,r9,r10 -	eor	r0,r0,r9,ror#20 -	add	r8,r8,r2 -	ldr	r2,[sp,#16] -	and	r12,r12,r3 -	add	r4,r4,r8 -	vst1.32	{q8},[r1,:128]! -	add	r8,r8,r0,ror#2 -	eor	r12,r12,r10 -	vext.8	q8,q1,q2,#4 -	add	r7,r7,r2 -	eor	r2,r5,r6 -	eor	r0,r4,r4,ror#5 -	vext.8	q9,q3,q0,#4 -	add	r8,r8,r12 -	and	r2,r2,r4 -	eor	r12,r0,r4,ror#19 -	vshr.u32	q10,q8,#7 -	eor	r0,r8,r8,ror#11 -	eor	r2,r2,r6 -	vadd.i32	q1,q1,q9 -	add	r7,r7,r12,ror#6 -	eor	r12,r8,r9 -	vshr.u32	q9,q8,#3 -	eor	r0,r0,r8,ror#20 -	add	r7,r7,r2 -	vsli.32	q10,q8,#25 -	ldr	r2,[sp,#20] -	and	r3,r3,r12 -	vshr.u32	q11,q8,#18 -	add	r11,r11,r7 -	add	r7,r7,r0,ror#2 -	eor	r3,r3,r9 -	veor	q9,q9,q10 -	add	r6,r6,r2 -	vsli.32	q11,q8,#14 -	eor	r2,r4,r5 -	eor	r0,r11,r11,ror#5 -	vshr.u32	d24,d1,#17 -	add	r7,r7,r3 -	and	r2,r2,r11 -	veor	q9,q9,q11 -	eor	r3,r0,r11,ror#19 -	eor	r0,r7,r7,ror#11 -	vsli.32	d24,d1,#15 -	eor	r2,r2,r5 -	add	r6,r6,r3,ror#6 -	vshr.u32	d25,d1,#10 -	eor	r3,r7,r8 -	eor	r0,r0,r7,ror#20 -	vadd.i32	q1,q1,q9 -	add	r6,r6,r2 -	ldr	r2,[sp,#24] -	veor	d25,d25,d24 -	and	r12,r12,r3 -	add	r10,r10,r6 -	vshr.u32	d24,d1,#19 -	add	r6,r6,r0,ror#2 -	eor	r12,r12,r8 -	vsli.32	d24,d1,#13 -	add	r5,r5,r2 -	eor	r2,r11,r4 -	veor	d25,d25,d24 -	eor	r0,r10,r10,ror#5 -	add	r6,r6,r12 -	vadd.i32	d2,d2,d25 -	and	r2,r2,r10 -	eor	r12,r0,r10,ror#19 -	vshr.u32	d24,d2,#17 -	eor	r0,r6,r6,ror#11 -	eor	r2,r2,r4 -	vsli.32	d24,d2,#15 -	add	r5,r5,r12,ror#6 -	eor	r12,r6,r7 -	vshr.u32	d25,d2,#10 -	eor	r0,r0,r6,ror#20 -	add	r5,r5,r2 -	veor	d25,d25,d24 -	ldr	r2,[sp,#28] -	and	r3,r3,r12 -	vshr.u32	d24,d2,#19 -	add	r9,r9,r5 -	add	r5,r5,r0,ror#2 -	eor	r3,r3,r7 -	vld1.32	{q8},[r14,:128]! -	add	r4,r4,r2 -	vsli.32	d24,d2,#13 -	eor	r2,r10,r11 -	eor	r0,r9,r9,ror#5 -	veor	d25,d25,d24 -	add	r5,r5,r3 -	and	r2,r2,r9 -	vadd.i32	d3,d3,d25 -	eor	r3,r0,r9,ror#19 -	eor	r0,r5,r5,ror#11 -	vadd.i32	q8,q8,q1 -	eor	r2,r2,r11 -	add	r4,r4,r3,ror#6 -	eor	r3,r5,r6 -	eor	r0,r0,r5,ror#20 -	add	r4,r4,r2 -	ldr	r2,[sp,#32] -	and	r12,r12,r3 -	add	r8,r8,r4 -	vst1.32	{q8},[r1,:128]! -	add	r4,r4,r0,ror#2 -	eor	r12,r12,r6 -	vext.8	q8,q2,q3,#4 -	add	r11,r11,r2 -	eor	r2,r9,r10 -	eor	r0,r8,r8,ror#5 -	vext.8	q9,q0,q1,#4 -	add	r4,r4,r12 -	and	r2,r2,r8 -	eor	r12,r0,r8,ror#19 -	vshr.u32	q10,q8,#7 -	eor	r0,r4,r4,ror#11 -	eor	r2,r2,r10 -	vadd.i32	q2,q2,q9 -	add	r11,r11,r12,ror#6 -	eor	r12,r4,r5 -	vshr.u32	q9,q8,#3 -	eor	r0,r0,r4,ror#20 -	add	r11,r11,r2 -	vsli.32	q10,q8,#25 -	ldr	r2,[sp,#36] -	and	r3,r3,r12 -	vshr.u32	q11,q8,#18 -	add	r7,r7,r11 -	add	r11,r11,r0,ror#2 -	eor	r3,r3,r5 -	veor	q9,q9,q10 -	add	r10,r10,r2 -	vsli.32	q11,q8,#14 -	eor	r2,r8,r9 -	eor	r0,r7,r7,ror#5 -	vshr.u32	d24,d3,#17 -	add	r11,r11,r3 -	and	r2,r2,r7 -	veor	q9,q9,q11 -	eor	r3,r0,r7,ror#19 -	eor	r0,r11,r11,ror#11 -	vsli.32	d24,d3,#15 -	eor	r2,r2,r9 -	add	r10,r10,r3,ror#6 -	vshr.u32	d25,d3,#10 -	eor	r3,r11,r4 -	eor	r0,r0,r11,ror#20 -	vadd.i32	q2,q2,q9 -	add	r10,r10,r2 -	ldr	r2,[sp,#40] -	veor	d25,d25,d24 -	and	r12,r12,r3 -	add	r6,r6,r10 -	vshr.u32	d24,d3,#19 -	add	r10,r10,r0,ror#2 -	eor	r12,r12,r4 -	vsli.32	d24,d3,#13 -	add	r9,r9,r2 -	eor	r2,r7,r8 -	veor	d25,d25,d24 -	eor	r0,r6,r6,ror#5 -	add	r10,r10,r12 -	vadd.i32	d4,d4,d25 -	and	r2,r2,r6 -	eor	r12,r0,r6,ror#19 -	vshr.u32	d24,d4,#17 -	eor	r0,r10,r10,ror#11 -	eor	r2,r2,r8 -	vsli.32	d24,d4,#15 -	add	r9,r9,r12,ror#6 -	eor	r12,r10,r11 -	vshr.u32	d25,d4,#10 -	eor	r0,r0,r10,ror#20 -	add	r9,r9,r2 -	veor	d25,d25,d24 -	ldr	r2,[sp,#44] -	and	r3,r3,r12 -	vshr.u32	d24,d4,#19 -	add	r5,r5,r9 -	add	r9,r9,r0,ror#2 -	eor	r3,r3,r11 -	vld1.32	{q8},[r14,:128]! -	add	r8,r8,r2 -	vsli.32	d24,d4,#13 -	eor	r2,r6,r7 -	eor	r0,r5,r5,ror#5 -	veor	d25,d25,d24 -	add	r9,r9,r3 -	and	r2,r2,r5 -	vadd.i32	d5,d5,d25 -	eor	r3,r0,r5,ror#19 -	eor	r0,r9,r9,ror#11 -	vadd.i32	q8,q8,q2 -	eor	r2,r2,r7 -	add	r8,r8,r3,ror#6 -	eor	r3,r9,r10 -	eor	r0,r0,r9,ror#20 -	add	r8,r8,r2 -	ldr	r2,[sp,#48] -	and	r12,r12,r3 -	add	r4,r4,r8 -	vst1.32	{q8},[r1,:128]! -	add	r8,r8,r0,ror#2 -	eor	r12,r12,r10 -	vext.8	q8,q3,q0,#4 -	add	r7,r7,r2 -	eor	r2,r5,r6 -	eor	r0,r4,r4,ror#5 -	vext.8	q9,q1,q2,#4 -	add	r8,r8,r12 -	and	r2,r2,r4 -	eor	r12,r0,r4,ror#19 -	vshr.u32	q10,q8,#7 -	eor	r0,r8,r8,ror#11 -	eor	r2,r2,r6 -	vadd.i32	q3,q3,q9 -	add	r7,r7,r12,ror#6 -	eor	r12,r8,r9 -	vshr.u32	q9,q8,#3 -	eor	r0,r0,r8,ror#20 -	add	r7,r7,r2 -	vsli.32	q10,q8,#25 -	ldr	r2,[sp,#52] -	and	r3,r3,r12 -	vshr.u32	q11,q8,#18 -	add	r11,r11,r7 -	add	r7,r7,r0,ror#2 -	eor	r3,r3,r9 -	veor	q9,q9,q10 -	add	r6,r6,r2 -	vsli.32	q11,q8,#14 -	eor	r2,r4,r5 -	eor	r0,r11,r11,ror#5 -	vshr.u32	d24,d5,#17 -	add	r7,r7,r3 -	and	r2,r2,r11 -	veor	q9,q9,q11 -	eor	r3,r0,r11,ror#19 -	eor	r0,r7,r7,ror#11 -	vsli.32	d24,d5,#15 -	eor	r2,r2,r5 -	add	r6,r6,r3,ror#6 -	vshr.u32	d25,d5,#10 -	eor	r3,r7,r8 -	eor	r0,r0,r7,ror#20 -	vadd.i32	q3,q3,q9 -	add	r6,r6,r2 -	ldr	r2,[sp,#56] -	veor	d25,d25,d24 -	and	r12,r12,r3 -	add	r10,r10,r6 -	vshr.u32	d24,d5,#19 -	add	r6,r6,r0,ror#2 -	eor	r12,r12,r8 -	vsli.32	d24,d5,#13 -	add	r5,r5,r2 -	eor	r2,r11,r4 -	veor	d25,d25,d24 -	eor	r0,r10,r10,ror#5 -	add	r6,r6,r12 -	vadd.i32	d6,d6,d25 -	and	r2,r2,r10 -	eor	r12,r0,r10,ror#19 -	vshr.u32	d24,d6,#17 -	eor	r0,r6,r6,ror#11 -	eor	r2,r2,r4 -	vsli.32	d24,d6,#15 -	add	r5,r5,r12,ror#6 -	eor	r12,r6,r7 -	vshr.u32	d25,d6,#10 -	eor	r0,r0,r6,ror#20 -	add	r5,r5,r2 -	veor	d25,d25,d24 -	ldr	r2,[sp,#60] -	and	r3,r3,r12 -	vshr.u32	d24,d6,#19 -	add	r9,r9,r5 -	add	r5,r5,r0,ror#2 -	eor	r3,r3,r7 -	vld1.32	{q8},[r14,:128]! -	add	r4,r4,r2 -	vsli.32	d24,d6,#13 -	eor	r2,r10,r11 -	eor	r0,r9,r9,ror#5 -	veor	d25,d25,d24 -	add	r5,r5,r3 -	and	r2,r2,r9 -	vadd.i32	d7,d7,d25 -	eor	r3,r0,r9,ror#19 -	eor	r0,r5,r5,ror#11 -	vadd.i32	q8,q8,q3 -	eor	r2,r2,r11 -	add	r4,r4,r3,ror#6 -	eor	r3,r5,r6 -	eor	r0,r0,r5,ror#20 -	add	r4,r4,r2 -	ldr	r2,[r14] -	and	r12,r12,r3 -	add	r8,r8,r4 -	vst1.32	{q8},[r1,:128]! -	add	r4,r4,r0,ror#2 -	eor	r12,r12,r6 -	teq	r2,#0				@ check for K256 terminator -	ldr	r2,[sp,#0] -	sub	r1,r1,#64 -	bne	.L_00_48 - -	ldr		r1,[sp,#68] -	ldr		r0,[sp,#72] -	sub		r14,r14,#256	@ rewind r14 -	teq		r1,r0 -	subeq		r1,r1,#64		@ avoid SEGV -	vld1.8		{q0},[r1]!		@ load next input block -	vld1.8		{q1},[r1]! -	vld1.8		{q2},[r1]! -	vld1.8		{q3},[r1]! -	strne		r1,[sp,#68] -	mov		r1,sp -	add	r11,r11,r2 -	eor	r2,r9,r10 -	eor	r0,r8,r8,ror#5 -	add	r4,r4,r12 -	vld1.32	{q8},[r14,:128]! -	and	r2,r2,r8 -	eor	r12,r0,r8,ror#19 -	eor	r0,r4,r4,ror#11 -	eor	r2,r2,r10 -	vrev32.8	q0,q0 -	add	r11,r11,r12,ror#6 -	eor	r12,r4,r5 -	eor	r0,r0,r4,ror#20 -	add	r11,r11,r2 -	vadd.i32	q8,q8,q0 -	ldr	r2,[sp,#4] -	and	r3,r3,r12 -	add	r7,r7,r11 -	add	r11,r11,r0,ror#2 -	eor	r3,r3,r5 -	add	r10,r10,r2 -	eor	r2,r8,r9 -	eor	r0,r7,r7,ror#5 -	add	r11,r11,r3 -	and	r2,r2,r7 -	eor	r3,r0,r7,ror#19 -	eor	r0,r11,r11,ror#11 -	eor	r2,r2,r9 -	add	r10,r10,r3,ror#6 -	eor	r3,r11,r4 -	eor	r0,r0,r11,ror#20 -	add	r10,r10,r2 -	ldr	r2,[sp,#8] -	and	r12,r12,r3 -	add	r6,r6,r10 -	add	r10,r10,r0,ror#2 -	eor	r12,r12,r4 -	add	r9,r9,r2 -	eor	r2,r7,r8 -	eor	r0,r6,r6,ror#5 -	add	r10,r10,r12 -	and	r2,r2,r6 -	eor	r12,r0,r6,ror#19 -	eor	r0,r10,r10,ror#11 -	eor	r2,r2,r8 -	add	r9,r9,r12,ror#6 -	eor	r12,r10,r11 -	eor	r0,r0,r10,ror#20 -	add	r9,r9,r2 -	ldr	r2,[sp,#12] -	and	r3,r3,r12 -	add	r5,r5,r9 -	add	r9,r9,r0,ror#2 -	eor	r3,r3,r11 -	add	r8,r8,r2 -	eor	r2,r6,r7 -	eor	r0,r5,r5,ror#5 -	add	r9,r9,r3 -	and	r2,r2,r5 -	eor	r3,r0,r5,ror#19 -	eor	r0,r9,r9,ror#11 -	eor	r2,r2,r7 -	add	r8,r8,r3,ror#6 -	eor	r3,r9,r10 -	eor	r0,r0,r9,ror#20 -	add	r8,r8,r2 -	ldr	r2,[sp,#16] -	and	r12,r12,r3 -	add	r4,r4,r8 -	add	r8,r8,r0,ror#2 -	eor	r12,r12,r10 -	vst1.32	{q8},[r1,:128]! -	add	r7,r7,r2 -	eor	r2,r5,r6 -	eor	r0,r4,r4,ror#5 -	add	r8,r8,r12 -	vld1.32	{q8},[r14,:128]! -	and	r2,r2,r4 -	eor	r12,r0,r4,ror#19 -	eor	r0,r8,r8,ror#11 -	eor	r2,r2,r6 -	vrev32.8	q1,q1 -	add	r7,r7,r12,ror#6 -	eor	r12,r8,r9 -	eor	r0,r0,r8,ror#20 -	add	r7,r7,r2 -	vadd.i32	q8,q8,q1 -	ldr	r2,[sp,#20] -	and	r3,r3,r12 -	add	r11,r11,r7 -	add	r7,r7,r0,ror#2 -	eor	r3,r3,r9 -	add	r6,r6,r2 -	eor	r2,r4,r5 -	eor	r0,r11,r11,ror#5 -	add	r7,r7,r3 -	and	r2,r2,r11 -	eor	r3,r0,r11,ror#19 -	eor	r0,r7,r7,ror#11 -	eor	r2,r2,r5 -	add	r6,r6,r3,ror#6 -	eor	r3,r7,r8 -	eor	r0,r0,r7,ror#20 -	add	r6,r6,r2 -	ldr	r2,[sp,#24] -	and	r12,r12,r3 -	add	r10,r10,r6 -	add	r6,r6,r0,ror#2 -	eor	r12,r12,r8 -	add	r5,r5,r2 -	eor	r2,r11,r4 -	eor	r0,r10,r10,ror#5 -	add	r6,r6,r12 -	and	r2,r2,r10 -	eor	r12,r0,r10,ror#19 -	eor	r0,r6,r6,ror#11 -	eor	r2,r2,r4 -	add	r5,r5,r12,ror#6 -	eor	r12,r6,r7 -	eor	r0,r0,r6,ror#20 -	add	r5,r5,r2 -	ldr	r2,[sp,#28] -	and	r3,r3,r12 -	add	r9,r9,r5 -	add	r5,r5,r0,ror#2 -	eor	r3,r3,r7 -	add	r4,r4,r2 -	eor	r2,r10,r11 -	eor	r0,r9,r9,ror#5 -	add	r5,r5,r3 -	and	r2,r2,r9 -	eor	r3,r0,r9,ror#19 -	eor	r0,r5,r5,ror#11 -	eor	r2,r2,r11 -	add	r4,r4,r3,ror#6 -	eor	r3,r5,r6 -	eor	r0,r0,r5,ror#20 -	add	r4,r4,r2 -	ldr	r2,[sp,#32] -	and	r12,r12,r3 -	add	r8,r8,r4 -	add	r4,r4,r0,ror#2 -	eor	r12,r12,r6 -	vst1.32	{q8},[r1,:128]! -	add	r11,r11,r2 -	eor	r2,r9,r10 -	eor	r0,r8,r8,ror#5 -	add	r4,r4,r12 -	vld1.32	{q8},[r14,:128]! -	and	r2,r2,r8 -	eor	r12,r0,r8,ror#19 -	eor	r0,r4,r4,ror#11 -	eor	r2,r2,r10 -	vrev32.8	q2,q2 -	add	r11,r11,r12,ror#6 -	eor	r12,r4,r5 -	eor	r0,r0,r4,ror#20 -	add	r11,r11,r2 -	vadd.i32	q8,q8,q2 -	ldr	r2,[sp,#36] -	and	r3,r3,r12 -	add	r7,r7,r11 -	add	r11,r11,r0,ror#2 -	eor	r3,r3,r5 -	add	r10,r10,r2 -	eor	r2,r8,r9 -	eor	r0,r7,r7,ror#5 -	add	r11,r11,r3 -	and	r2,r2,r7 -	eor	r3,r0,r7,ror#19 -	eor	r0,r11,r11,ror#11 -	eor	r2,r2,r9 -	add	r10,r10,r3,ror#6 -	eor	r3,r11,r4 -	eor	r0,r0,r11,ror#20 -	add	r10,r10,r2 -	ldr	r2,[sp,#40] -	and	r12,r12,r3 -	add	r6,r6,r10 -	add	r10,r10,r0,ror#2 -	eor	r12,r12,r4 -	add	r9,r9,r2 -	eor	r2,r7,r8 -	eor	r0,r6,r6,ror#5 -	add	r10,r10,r12 -	and	r2,r2,r6 -	eor	r12,r0,r6,ror#19 -	eor	r0,r10,r10,ror#11 -	eor	r2,r2,r8 -	add	r9,r9,r12,ror#6 -	eor	r12,r10,r11 -	eor	r0,r0,r10,ror#20 -	add	r9,r9,r2 -	ldr	r2,[sp,#44] -	and	r3,r3,r12 -	add	r5,r5,r9 -	add	r9,r9,r0,ror#2 -	eor	r3,r3,r11 -	add	r8,r8,r2 -	eor	r2,r6,r7 -	eor	r0,r5,r5,ror#5 -	add	r9,r9,r3 -	and	r2,r2,r5 -	eor	r3,r0,r5,ror#19 -	eor	r0,r9,r9,ror#11 -	eor	r2,r2,r7 -	add	r8,r8,r3,ror#6 -	eor	r3,r9,r10 -	eor	r0,r0,r9,ror#20 -	add	r8,r8,r2 -	ldr	r2,[sp,#48] -	and	r12,r12,r3 -	add	r4,r4,r8 -	add	r8,r8,r0,ror#2 -	eor	r12,r12,r10 -	vst1.32	{q8},[r1,:128]! -	add	r7,r7,r2 -	eor	r2,r5,r6 -	eor	r0,r4,r4,ror#5 -	add	r8,r8,r12 -	vld1.32	{q8},[r14,:128]! -	and	r2,r2,r4 -	eor	r12,r0,r4,ror#19 -	eor	r0,r8,r8,ror#11 -	eor	r2,r2,r6 -	vrev32.8	q3,q3 -	add	r7,r7,r12,ror#6 -	eor	r12,r8,r9 -	eor	r0,r0,r8,ror#20 -	add	r7,r7,r2 -	vadd.i32	q8,q8,q3 -	ldr	r2,[sp,#52] -	and	r3,r3,r12 -	add	r11,r11,r7 -	add	r7,r7,r0,ror#2 -	eor	r3,r3,r9 -	add	r6,r6,r2 -	eor	r2,r4,r5 -	eor	r0,r11,r11,ror#5 -	add	r7,r7,r3 -	and	r2,r2,r11 -	eor	r3,r0,r11,ror#19 -	eor	r0,r7,r7,ror#11 -	eor	r2,r2,r5 -	add	r6,r6,r3,ror#6 -	eor	r3,r7,r8 -	eor	r0,r0,r7,ror#20 -	add	r6,r6,r2 -	ldr	r2,[sp,#56] -	and	r12,r12,r3 -	add	r10,r10,r6 -	add	r6,r6,r0,ror#2 -	eor	r12,r12,r8 -	add	r5,r5,r2 -	eor	r2,r11,r4 -	eor	r0,r10,r10,ror#5 -	add	r6,r6,r12 -	and	r2,r2,r10 -	eor	r12,r0,r10,ror#19 -	eor	r0,r6,r6,ror#11 -	eor	r2,r2,r4 -	add	r5,r5,r12,ror#6 -	eor	r12,r6,r7 -	eor	r0,r0,r6,ror#20 -	add	r5,r5,r2 -	ldr	r2,[sp,#60] -	and	r3,r3,r12 -	add	r9,r9,r5 -	add	r5,r5,r0,ror#2 -	eor	r3,r3,r7 -	add	r4,r4,r2 -	eor	r2,r10,r11 -	eor	r0,r9,r9,ror#5 -	add	r5,r5,r3 -	and	r2,r2,r9 -	eor	r3,r0,r9,ror#19 -	eor	r0,r5,r5,ror#11 -	eor	r2,r2,r11 -	add	r4,r4,r3,ror#6 -	eor	r3,r5,r6 -	eor	r0,r0,r5,ror#20 -	add	r4,r4,r2 -	ldr	r2,[sp,#64] -	and	r12,r12,r3 -	add	r8,r8,r4 -	add	r4,r4,r0,ror#2 -	eor	r12,r12,r6 -	vst1.32	{q8},[r1,:128]! -	ldr	r0,[r2,#0] -	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past -	ldr	r12,[r2,#4] -	ldr	r3,[r2,#8] -	ldr	r1,[r2,#12] -	add	r4,r4,r0			@ accumulate -	ldr	r0,[r2,#16] -	add	r5,r5,r12 -	ldr	r12,[r2,#20] -	add	r6,r6,r3 -	ldr	r3,[r2,#24] -	add	r7,r7,r1 -	ldr	r1,[r2,#28] -	add	r8,r8,r0 -	str	r4,[r2],#4 -	add	r9,r9,r12 -	str	r5,[r2],#4 -	add	r10,r10,r3 -	str	r6,[r2],#4 -	add	r11,r11,r1 -	str	r7,[r2],#4 -	stmia	r2,{r8-r11} - -	movne	r1,sp -	ldrne	r2,[sp,#0] -	eorne	r12,r12,r12 -	ldreq	sp,[sp,#76]			@ restore original sp -	eorne	r3,r5,r6 -	bne	.L_00_48 - -	ldmia	sp!,{r4-r12,pc} -.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon -#endif -#if __ARM_ARCH__>=7 -.type	sha256_block_data_order_armv8,%function -.align	5 -sha256_block_data_order_armv8: -.LARMv8: -	vld1.32	{q0,q1},[r0] -	sub	r3,r3,#sha256_block_data_order-K256 - -.Loop_v8: -	vld1.8		{q8-q9},[r1]! -	vld1.8		{q10-q11},[r1]! -	vld1.32		{q12},[r3]! -	vrev32.8	q8,q8 -	vrev32.8	q9,q9 -	vrev32.8	q10,q10 -	vrev32.8	q11,q11 -	vmov		q14,q0	@ offload -	vmov		q15,q1 -	teq		r1,r2 -	vld1.32		{q13},[r3]! -	vadd.i32	q12,q12,q8 -	.byte	0xe2,0x03,0xfa,0xf3	@ sha256su0 q8,q9 -	vmov		q2,q0 -	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12 -	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12 -	.byte	0xe6,0x0c,0x64,0xf3	@ sha256su1 q8,q10,q11 -	vld1.32		{q12},[r3]! -	vadd.i32	q13,q13,q9 -	.byte	0xe4,0x23,0xfa,0xf3	@ sha256su0 q9,q10 -	vmov		q2,q0 -	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13 -	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13 -	.byte	0xe0,0x2c,0x66,0xf3	@ sha256su1 q9,q11,q8 -	vld1.32		{q13},[r3]! -	vadd.i32	q12,q12,q10 -	.byte	0xe6,0x43,0xfa,0xf3	@ sha256su0 q10,q11 -	vmov		q2,q0 -	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12 -	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12 -	.byte	0xe2,0x4c,0x60,0xf3	@ sha256su1 q10,q8,q9 -	vld1.32		{q12},[r3]! -	vadd.i32	q13,q13,q11 -	.byte	0xe0,0x63,0xfa,0xf3	@ sha256su0 q11,q8 -	vmov		q2,q0 -	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13 -	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13 -	.byte	0xe4,0x6c,0x62,0xf3	@ sha256su1 q11,q9,q10 -	vld1.32		{q13},[r3]! -	vadd.i32	q12,q12,q8 -	.byte	0xe2,0x03,0xfa,0xf3	@ sha256su0 q8,q9 -	vmov		q2,q0 -	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12 -	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12 -	.byte	0xe6,0x0c,0x64,0xf3	@ sha256su1 q8,q10,q11 -	vld1.32		{q12},[r3]! -	vadd.i32	q13,q13,q9 -	.byte	0xe4,0x23,0xfa,0xf3	@ sha256su0 q9,q10 -	vmov		q2,q0 -	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13 -	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13 -	.byte	0xe0,0x2c,0x66,0xf3	@ sha256su1 q9,q11,q8 -	vld1.32		{q13},[r3]! -	vadd.i32	q12,q12,q10 -	.byte	0xe6,0x43,0xfa,0xf3	@ sha256su0 q10,q11 -	vmov		q2,q0 -	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12 -	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12 -	.byte	0xe2,0x4c,0x60,0xf3	@ sha256su1 q10,q8,q9 -	vld1.32		{q12},[r3]! -	vadd.i32	q13,q13,q11 -	.byte	0xe0,0x63,0xfa,0xf3	@ sha256su0 q11,q8 -	vmov		q2,q0 -	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13 -	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13 -	.byte	0xe4,0x6c,0x62,0xf3	@ sha256su1 q11,q9,q10 -	vld1.32		{q13},[r3]! -	vadd.i32	q12,q12,q8 -	.byte	0xe2,0x03,0xfa,0xf3	@ sha256su0 q8,q9 -	vmov		q2,q0 -	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12 -	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12 -	.byte	0xe6,0x0c,0x64,0xf3	@ sha256su1 q8,q10,q11 -	vld1.32		{q12},[r3]! -	vadd.i32	q13,q13,q9 -	.byte	0xe4,0x23,0xfa,0xf3	@ sha256su0 q9,q10 -	vmov		q2,q0 -	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13 -	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13 -	.byte	0xe0,0x2c,0x66,0xf3	@ sha256su1 q9,q11,q8 -	vld1.32		{q13},[r3]! -	vadd.i32	q12,q12,q10 -	.byte	0xe6,0x43,0xfa,0xf3	@ sha256su0 q10,q11 -	vmov		q2,q0 -	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12 -	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12 -	.byte	0xe2,0x4c,0x60,0xf3	@ sha256su1 q10,q8,q9 -	vld1.32		{q12},[r3]! -	vadd.i32	q13,q13,q11 -	.byte	0xe0,0x63,0xfa,0xf3	@ sha256su0 q11,q8 -	vmov		q2,q0 -	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13 -	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13 -	.byte	0xe4,0x6c,0x62,0xf3	@ sha256su1 q11,q9,q10 -	vld1.32		{q13},[r3]! -	vadd.i32	q12,q12,q8 -	vmov		q2,q0 -	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12 -	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12 - -	vld1.32		{q12},[r3]! -	vadd.i32	q13,q13,q9 -	vmov		q2,q0 -	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13 -	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13 - -	vld1.32		{q13},[r3] -	vadd.i32	q12,q12,q10 -	sub		r3,r3,#256-16	@ rewind -	vmov		q2,q0 -	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12 -	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12 - -	vadd.i32	q13,q13,q11 -	vmov		q2,q0 -	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13 -	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13 - -	vadd.i32	q0,q0,q14 -	vadd.i32	q1,q1,q15 -	bne		.Loop_v8 - -	vst1.32		{q0,q1},[r0] - -	bx	lr		@ bx lr -.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 -#endif -.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>" +.size   sha256_block_data_order,.-sha256_block_data_order +.asciz  "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"  .align	2 -.comm   OPENSSL_armcap_P,4,4 diff --git a/app/openssl/crypto/sha/asm/sha256-armv8.S b/app/openssl/crypto/sha/asm/sha256-armv8.S deleted file mode 100644 index bd43b1fe..00000000 --- a/app/openssl/crypto/sha/asm/sha256-armv8.S +++ /dev/null @@ -1,1141 +0,0 @@ -#include "arm_arch.h" - -.text - -.globl	sha256_block_data_order -.type	sha256_block_data_order,%function -.align	6 -sha256_block_data_order: -	ldr	x16,.LOPENSSL_armcap_P -	adr	x17,.LOPENSSL_armcap_P -	add	x16,x16,x17 -	ldr	w16,[x16] -	tst	w16,#ARMV8_SHA256 -	b.ne	.Lv8_entry -	stp	x29,x30,[sp,#-128]! -	add	x29,sp,#0 - -	stp	x19,x20,[sp,#16] -	stp	x21,x22,[sp,#32] -	stp	x23,x24,[sp,#48] -	stp	x25,x26,[sp,#64] -	stp	x27,x28,[sp,#80] -	sub	sp,sp,#4*4 - -	ldp	w20,w21,[x0]				// load context -	ldp	w22,w23,[x0,#2*4] -	ldp	w24,w25,[x0,#4*4] -	add	x2,x1,x2,lsl#6	// end of input -	ldp	w26,w27,[x0,#6*4] -	adr	x30,K256 -	stp	x0,x2,[x29,#96] - -.Loop: -	ldp	w3,w4,[x1],#2*4 -	ldr	w19,[x30],#4			// *K++ -	eor	w28,w21,w22				// magic seed -	str	x1,[x29,#112] -#ifndef	__ARMEB__ -	rev	w3,w3			// 0 -#endif -	ror	w16,w24,#6 -	add	w27,w27,w19			// h+=K[i] -	eor	w6,w24,w24,ror#14 -	and	w17,w25,w24 -	bic	w19,w26,w24 -	add	w27,w27,w3			// h+=X[i] -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w20,w21			// a^b, b^c in next round -	eor	w16,w16,w6,ror#11	// Sigma1(e) -	ror	w6,w20,#2 -	add	w27,w27,w17			// h+=Ch(e,f,g) -	eor	w17,w20,w20,ror#9 -	add	w27,w27,w16			// h+=Sigma1(e) -	and	w28,w28,w19			// (b^c)&=(a^b) -	add	w23,w23,w27			// d+=h -	eor	w28,w28,w21			// Maj(a,b,c) -	eor	w17,w6,w17,ror#13	// Sigma0(a) -	add	w27,w27,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	//add	w27,w27,w17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	w4,w4			// 1 -#endif -	ldp	w5,w6,[x1],#2*4 -	add	w27,w27,w17			// h+=Sigma0(a) -	ror	w16,w23,#6 -	add	w26,w26,w28			// h+=K[i] -	eor	w7,w23,w23,ror#14 -	and	w17,w24,w23 -	bic	w28,w25,w23 -	add	w26,w26,w4			// h+=X[i] -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w27,w20			// a^b, b^c in next round -	eor	w16,w16,w7,ror#11	// Sigma1(e) -	ror	w7,w27,#2 -	add	w26,w26,w17			// h+=Ch(e,f,g) -	eor	w17,w27,w27,ror#9 -	add	w26,w26,w16			// h+=Sigma1(e) -	and	w19,w19,w28			// (b^c)&=(a^b) -	add	w22,w22,w26			// d+=h -	eor	w19,w19,w20			// Maj(a,b,c) -	eor	w17,w7,w17,ror#13	// Sigma0(a) -	add	w26,w26,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	//add	w26,w26,w17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	w5,w5			// 2 -#endif -	add	w26,w26,w17			// h+=Sigma0(a) -	ror	w16,w22,#6 -	add	w25,w25,w19			// h+=K[i] -	eor	w8,w22,w22,ror#14 -	and	w17,w23,w22 -	bic	w19,w24,w22 -	add	w25,w25,w5			// h+=X[i] -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w26,w27			// a^b, b^c in next round -	eor	w16,w16,w8,ror#11	// Sigma1(e) -	ror	w8,w26,#2 -	add	w25,w25,w17			// h+=Ch(e,f,g) -	eor	w17,w26,w26,ror#9 -	add	w25,w25,w16			// h+=Sigma1(e) -	and	w28,w28,w19			// (b^c)&=(a^b) -	add	w21,w21,w25			// d+=h -	eor	w28,w28,w27			// Maj(a,b,c) -	eor	w17,w8,w17,ror#13	// Sigma0(a) -	add	w25,w25,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	//add	w25,w25,w17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	w6,w6			// 3 -#endif -	ldp	w7,w8,[x1],#2*4 -	add	w25,w25,w17			// h+=Sigma0(a) -	ror	w16,w21,#6 -	add	w24,w24,w28			// h+=K[i] -	eor	w9,w21,w21,ror#14 -	and	w17,w22,w21 -	bic	w28,w23,w21 -	add	w24,w24,w6			// h+=X[i] -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w25,w26			// a^b, b^c in next round -	eor	w16,w16,w9,ror#11	// Sigma1(e) -	ror	w9,w25,#2 -	add	w24,w24,w17			// h+=Ch(e,f,g) -	eor	w17,w25,w25,ror#9 -	add	w24,w24,w16			// h+=Sigma1(e) -	and	w19,w19,w28			// (b^c)&=(a^b) -	add	w20,w20,w24			// d+=h -	eor	w19,w19,w26			// Maj(a,b,c) -	eor	w17,w9,w17,ror#13	// Sigma0(a) -	add	w24,w24,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	//add	w24,w24,w17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	w7,w7			// 4 -#endif -	add	w24,w24,w17			// h+=Sigma0(a) -	ror	w16,w20,#6 -	add	w23,w23,w19			// h+=K[i] -	eor	w10,w20,w20,ror#14 -	and	w17,w21,w20 -	bic	w19,w22,w20 -	add	w23,w23,w7			// h+=X[i] -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w24,w25			// a^b, b^c in next round -	eor	w16,w16,w10,ror#11	// Sigma1(e) -	ror	w10,w24,#2 -	add	w23,w23,w17			// h+=Ch(e,f,g) -	eor	w17,w24,w24,ror#9 -	add	w23,w23,w16			// h+=Sigma1(e) -	and	w28,w28,w19			// (b^c)&=(a^b) -	add	w27,w27,w23			// d+=h -	eor	w28,w28,w25			// Maj(a,b,c) -	eor	w17,w10,w17,ror#13	// Sigma0(a) -	add	w23,w23,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	//add	w23,w23,w17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	w8,w8			// 5 -#endif -	ldp	w9,w10,[x1],#2*4 -	add	w23,w23,w17			// h+=Sigma0(a) -	ror	w16,w27,#6 -	add	w22,w22,w28			// h+=K[i] -	eor	w11,w27,w27,ror#14 -	and	w17,w20,w27 -	bic	w28,w21,w27 -	add	w22,w22,w8			// h+=X[i] -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w23,w24			// a^b, b^c in next round -	eor	w16,w16,w11,ror#11	// Sigma1(e) -	ror	w11,w23,#2 -	add	w22,w22,w17			// h+=Ch(e,f,g) -	eor	w17,w23,w23,ror#9 -	add	w22,w22,w16			// h+=Sigma1(e) -	and	w19,w19,w28			// (b^c)&=(a^b) -	add	w26,w26,w22			// d+=h -	eor	w19,w19,w24			// Maj(a,b,c) -	eor	w17,w11,w17,ror#13	// Sigma0(a) -	add	w22,w22,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	//add	w22,w22,w17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	w9,w9			// 6 -#endif -	add	w22,w22,w17			// h+=Sigma0(a) -	ror	w16,w26,#6 -	add	w21,w21,w19			// h+=K[i] -	eor	w12,w26,w26,ror#14 -	and	w17,w27,w26 -	bic	w19,w20,w26 -	add	w21,w21,w9			// h+=X[i] -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w22,w23			// a^b, b^c in next round -	eor	w16,w16,w12,ror#11	// Sigma1(e) -	ror	w12,w22,#2 -	add	w21,w21,w17			// h+=Ch(e,f,g) -	eor	w17,w22,w22,ror#9 -	add	w21,w21,w16			// h+=Sigma1(e) -	and	w28,w28,w19			// (b^c)&=(a^b) -	add	w25,w25,w21			// d+=h -	eor	w28,w28,w23			// Maj(a,b,c) -	eor	w17,w12,w17,ror#13	// Sigma0(a) -	add	w21,w21,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	//add	w21,w21,w17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	w10,w10			// 7 -#endif -	ldp	w11,w12,[x1],#2*4 -	add	w21,w21,w17			// h+=Sigma0(a) -	ror	w16,w25,#6 -	add	w20,w20,w28			// h+=K[i] -	eor	w13,w25,w25,ror#14 -	and	w17,w26,w25 -	bic	w28,w27,w25 -	add	w20,w20,w10			// h+=X[i] -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w21,w22			// a^b, b^c in next round -	eor	w16,w16,w13,ror#11	// Sigma1(e) -	ror	w13,w21,#2 -	add	w20,w20,w17			// h+=Ch(e,f,g) -	eor	w17,w21,w21,ror#9 -	add	w20,w20,w16			// h+=Sigma1(e) -	and	w19,w19,w28			// (b^c)&=(a^b) -	add	w24,w24,w20			// d+=h -	eor	w19,w19,w22			// Maj(a,b,c) -	eor	w17,w13,w17,ror#13	// Sigma0(a) -	add	w20,w20,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	//add	w20,w20,w17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	w11,w11			// 8 -#endif -	add	w20,w20,w17			// h+=Sigma0(a) -	ror	w16,w24,#6 -	add	w27,w27,w19			// h+=K[i] -	eor	w14,w24,w24,ror#14 -	and	w17,w25,w24 -	bic	w19,w26,w24 -	add	w27,w27,w11			// h+=X[i] -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w20,w21			// a^b, b^c in next round -	eor	w16,w16,w14,ror#11	// Sigma1(e) -	ror	w14,w20,#2 -	add	w27,w27,w17			// h+=Ch(e,f,g) -	eor	w17,w20,w20,ror#9 -	add	w27,w27,w16			// h+=Sigma1(e) -	and	w28,w28,w19			// (b^c)&=(a^b) -	add	w23,w23,w27			// d+=h -	eor	w28,w28,w21			// Maj(a,b,c) -	eor	w17,w14,w17,ror#13	// Sigma0(a) -	add	w27,w27,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	//add	w27,w27,w17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	w12,w12			// 9 -#endif -	ldp	w13,w14,[x1],#2*4 -	add	w27,w27,w17			// h+=Sigma0(a) -	ror	w16,w23,#6 -	add	w26,w26,w28			// h+=K[i] -	eor	w15,w23,w23,ror#14 -	and	w17,w24,w23 -	bic	w28,w25,w23 -	add	w26,w26,w12			// h+=X[i] -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w27,w20			// a^b, b^c in next round -	eor	w16,w16,w15,ror#11	// Sigma1(e) -	ror	w15,w27,#2 -	add	w26,w26,w17			// h+=Ch(e,f,g) -	eor	w17,w27,w27,ror#9 -	add	w26,w26,w16			// h+=Sigma1(e) -	and	w19,w19,w28			// (b^c)&=(a^b) -	add	w22,w22,w26			// d+=h -	eor	w19,w19,w20			// Maj(a,b,c) -	eor	w17,w15,w17,ror#13	// Sigma0(a) -	add	w26,w26,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	//add	w26,w26,w17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	w13,w13			// 10 -#endif -	add	w26,w26,w17			// h+=Sigma0(a) -	ror	w16,w22,#6 -	add	w25,w25,w19			// h+=K[i] -	eor	w0,w22,w22,ror#14 -	and	w17,w23,w22 -	bic	w19,w24,w22 -	add	w25,w25,w13			// h+=X[i] -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w26,w27			// a^b, b^c in next round -	eor	w16,w16,w0,ror#11	// Sigma1(e) -	ror	w0,w26,#2 -	add	w25,w25,w17			// h+=Ch(e,f,g) -	eor	w17,w26,w26,ror#9 -	add	w25,w25,w16			// h+=Sigma1(e) -	and	w28,w28,w19			// (b^c)&=(a^b) -	add	w21,w21,w25			// d+=h -	eor	w28,w28,w27			// Maj(a,b,c) -	eor	w17,w0,w17,ror#13	// Sigma0(a) -	add	w25,w25,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	//add	w25,w25,w17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	w14,w14			// 11 -#endif -	ldp	w15,w0,[x1],#2*4 -	add	w25,w25,w17			// h+=Sigma0(a) -	str	w6,[sp,#12] -	ror	w16,w21,#6 -	add	w24,w24,w28			// h+=K[i] -	eor	w6,w21,w21,ror#14 -	and	w17,w22,w21 -	bic	w28,w23,w21 -	add	w24,w24,w14			// h+=X[i] -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w25,w26			// a^b, b^c in next round -	eor	w16,w16,w6,ror#11	// Sigma1(e) -	ror	w6,w25,#2 -	add	w24,w24,w17			// h+=Ch(e,f,g) -	eor	w17,w25,w25,ror#9 -	add	w24,w24,w16			// h+=Sigma1(e) -	and	w19,w19,w28			// (b^c)&=(a^b) -	add	w20,w20,w24			// d+=h -	eor	w19,w19,w26			// Maj(a,b,c) -	eor	w17,w6,w17,ror#13	// Sigma0(a) -	add	w24,w24,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	//add	w24,w24,w17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	w15,w15			// 12 -#endif -	add	w24,w24,w17			// h+=Sigma0(a) -	str	w7,[sp,#0] -	ror	w16,w20,#6 -	add	w23,w23,w19			// h+=K[i] -	eor	w7,w20,w20,ror#14 -	and	w17,w21,w20 -	bic	w19,w22,w20 -	add	w23,w23,w15			// h+=X[i] -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w24,w25			// a^b, b^c in next round -	eor	w16,w16,w7,ror#11	// Sigma1(e) -	ror	w7,w24,#2 -	add	w23,w23,w17			// h+=Ch(e,f,g) -	eor	w17,w24,w24,ror#9 -	add	w23,w23,w16			// h+=Sigma1(e) -	and	w28,w28,w19			// (b^c)&=(a^b) -	add	w27,w27,w23			// d+=h -	eor	w28,w28,w25			// Maj(a,b,c) -	eor	w17,w7,w17,ror#13	// Sigma0(a) -	add	w23,w23,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	//add	w23,w23,w17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	w0,w0			// 13 -#endif -	ldp	w1,w2,[x1] -	add	w23,w23,w17			// h+=Sigma0(a) -	str	w8,[sp,#4] -	ror	w16,w27,#6 -	add	w22,w22,w28			// h+=K[i] -	eor	w8,w27,w27,ror#14 -	and	w17,w20,w27 -	bic	w28,w21,w27 -	add	w22,w22,w0			// h+=X[i] -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w23,w24			// a^b, b^c in next round -	eor	w16,w16,w8,ror#11	// Sigma1(e) -	ror	w8,w23,#2 -	add	w22,w22,w17			// h+=Ch(e,f,g) -	eor	w17,w23,w23,ror#9 -	add	w22,w22,w16			// h+=Sigma1(e) -	and	w19,w19,w28			// (b^c)&=(a^b) -	add	w26,w26,w22			// d+=h -	eor	w19,w19,w24			// Maj(a,b,c) -	eor	w17,w8,w17,ror#13	// Sigma0(a) -	add	w22,w22,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	//add	w22,w22,w17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	w1,w1			// 14 -#endif -	ldr	w6,[sp,#12] -	add	w22,w22,w17			// h+=Sigma0(a) -	str	w9,[sp,#8] -	ror	w16,w26,#6 -	add	w21,w21,w19			// h+=K[i] -	eor	w9,w26,w26,ror#14 -	and	w17,w27,w26 -	bic	w19,w20,w26 -	add	w21,w21,w1			// h+=X[i] -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w22,w23			// a^b, b^c in next round -	eor	w16,w16,w9,ror#11	// Sigma1(e) -	ror	w9,w22,#2 -	add	w21,w21,w17			// h+=Ch(e,f,g) -	eor	w17,w22,w22,ror#9 -	add	w21,w21,w16			// h+=Sigma1(e) -	and	w28,w28,w19			// (b^c)&=(a^b) -	add	w25,w25,w21			// d+=h -	eor	w28,w28,w23			// Maj(a,b,c) -	eor	w17,w9,w17,ror#13	// Sigma0(a) -	add	w21,w21,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	//add	w21,w21,w17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	w2,w2			// 15 -#endif -	ldr	w7,[sp,#0] -	add	w21,w21,w17			// h+=Sigma0(a) -	str	w10,[sp,#12] -	ror	w16,w25,#6 -	add	w20,w20,w28			// h+=K[i] -	ror	w9,w4,#7 -	and	w17,w26,w25 -	ror	w8,w1,#17 -	bic	w28,w27,w25 -	ror	w10,w21,#2 -	add	w20,w20,w2			// h+=X[i] -	eor	w16,w16,w25,ror#11 -	eor	w9,w9,w4,ror#18 -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w21,w22			// a^b, b^c in next round -	eor	w16,w16,w25,ror#25	// Sigma1(e) -	eor	w10,w10,w21,ror#13 -	add	w20,w20,w17			// h+=Ch(e,f,g) -	and	w19,w19,w28			// (b^c)&=(a^b) -	eor	w8,w8,w1,ror#19 -	eor	w9,w9,w4,lsr#3	// sigma0(X[i+1]) -	add	w20,w20,w16			// h+=Sigma1(e) -	eor	w19,w19,w22			// Maj(a,b,c) -	eor	w17,w10,w21,ror#22	// Sigma0(a) -	eor	w8,w8,w1,lsr#10	// sigma1(X[i+14]) -	add	w3,w3,w12 -	add	w24,w24,w20			// d+=h -	add	w20,w20,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	add	w3,w3,w9 -	add	w20,w20,w17			// h+=Sigma0(a) -	add	w3,w3,w8 -.Loop_16_xx: -	ldr	w8,[sp,#4] -	str	w11,[sp,#0] -	ror	w16,w24,#6 -	add	w27,w27,w19			// h+=K[i] -	ror	w10,w5,#7 -	and	w17,w25,w24 -	ror	w9,w2,#17 -	bic	w19,w26,w24 -	ror	w11,w20,#2 -	add	w27,w27,w3			// h+=X[i] -	eor	w16,w16,w24,ror#11 -	eor	w10,w10,w5,ror#18 -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w20,w21			// a^b, b^c in next round -	eor	w16,w16,w24,ror#25	// Sigma1(e) -	eor	w11,w11,w20,ror#13 -	add	w27,w27,w17			// h+=Ch(e,f,g) -	and	w28,w28,w19			// (b^c)&=(a^b) -	eor	w9,w9,w2,ror#19 -	eor	w10,w10,w5,lsr#3	// sigma0(X[i+1]) -	add	w27,w27,w16			// h+=Sigma1(e) -	eor	w28,w28,w21			// Maj(a,b,c) -	eor	w17,w11,w20,ror#22	// Sigma0(a) -	eor	w9,w9,w2,lsr#10	// sigma1(X[i+14]) -	add	w4,w4,w13 -	add	w23,w23,w27			// d+=h -	add	w27,w27,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	add	w4,w4,w10 -	add	w27,w27,w17			// h+=Sigma0(a) -	add	w4,w4,w9 -	ldr	w9,[sp,#8] -	str	w12,[sp,#4] -	ror	w16,w23,#6 -	add	w26,w26,w28			// h+=K[i] -	ror	w11,w6,#7 -	and	w17,w24,w23 -	ror	w10,w3,#17 -	bic	w28,w25,w23 -	ror	w12,w27,#2 -	add	w26,w26,w4			// h+=X[i] -	eor	w16,w16,w23,ror#11 -	eor	w11,w11,w6,ror#18 -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w27,w20			// a^b, b^c in next round -	eor	w16,w16,w23,ror#25	// Sigma1(e) -	eor	w12,w12,w27,ror#13 -	add	w26,w26,w17			// h+=Ch(e,f,g) -	and	w19,w19,w28			// (b^c)&=(a^b) -	eor	w10,w10,w3,ror#19 -	eor	w11,w11,w6,lsr#3	// sigma0(X[i+1]) -	add	w26,w26,w16			// h+=Sigma1(e) -	eor	w19,w19,w20			// Maj(a,b,c) -	eor	w17,w12,w27,ror#22	// Sigma0(a) -	eor	w10,w10,w3,lsr#10	// sigma1(X[i+14]) -	add	w5,w5,w14 -	add	w22,w22,w26			// d+=h -	add	w26,w26,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	add	w5,w5,w11 -	add	w26,w26,w17			// h+=Sigma0(a) -	add	w5,w5,w10 -	ldr	w10,[sp,#12] -	str	w13,[sp,#8] -	ror	w16,w22,#6 -	add	w25,w25,w19			// h+=K[i] -	ror	w12,w7,#7 -	and	w17,w23,w22 -	ror	w11,w4,#17 -	bic	w19,w24,w22 -	ror	w13,w26,#2 -	add	w25,w25,w5			// h+=X[i] -	eor	w16,w16,w22,ror#11 -	eor	w12,w12,w7,ror#18 -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w26,w27			// a^b, b^c in next round -	eor	w16,w16,w22,ror#25	// Sigma1(e) -	eor	w13,w13,w26,ror#13 -	add	w25,w25,w17			// h+=Ch(e,f,g) -	and	w28,w28,w19			// (b^c)&=(a^b) -	eor	w11,w11,w4,ror#19 -	eor	w12,w12,w7,lsr#3	// sigma0(X[i+1]) -	add	w25,w25,w16			// h+=Sigma1(e) -	eor	w28,w28,w27			// Maj(a,b,c) -	eor	w17,w13,w26,ror#22	// Sigma0(a) -	eor	w11,w11,w4,lsr#10	// sigma1(X[i+14]) -	add	w6,w6,w15 -	add	w21,w21,w25			// d+=h -	add	w25,w25,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	add	w6,w6,w12 -	add	w25,w25,w17			// h+=Sigma0(a) -	add	w6,w6,w11 -	ldr	w11,[sp,#0] -	str	w14,[sp,#12] -	ror	w16,w21,#6 -	add	w24,w24,w28			// h+=K[i] -	ror	w13,w8,#7 -	and	w17,w22,w21 -	ror	w12,w5,#17 -	bic	w28,w23,w21 -	ror	w14,w25,#2 -	add	w24,w24,w6			// h+=X[i] -	eor	w16,w16,w21,ror#11 -	eor	w13,w13,w8,ror#18 -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w25,w26			// a^b, b^c in next round -	eor	w16,w16,w21,ror#25	// Sigma1(e) -	eor	w14,w14,w25,ror#13 -	add	w24,w24,w17			// h+=Ch(e,f,g) -	and	w19,w19,w28			// (b^c)&=(a^b) -	eor	w12,w12,w5,ror#19 -	eor	w13,w13,w8,lsr#3	// sigma0(X[i+1]) -	add	w24,w24,w16			// h+=Sigma1(e) -	eor	w19,w19,w26			// Maj(a,b,c) -	eor	w17,w14,w25,ror#22	// Sigma0(a) -	eor	w12,w12,w5,lsr#10	// sigma1(X[i+14]) -	add	w7,w7,w0 -	add	w20,w20,w24			// d+=h -	add	w24,w24,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	add	w7,w7,w13 -	add	w24,w24,w17			// h+=Sigma0(a) -	add	w7,w7,w12 -	ldr	w12,[sp,#4] -	str	w15,[sp,#0] -	ror	w16,w20,#6 -	add	w23,w23,w19			// h+=K[i] -	ror	w14,w9,#7 -	and	w17,w21,w20 -	ror	w13,w6,#17 -	bic	w19,w22,w20 -	ror	w15,w24,#2 -	add	w23,w23,w7			// h+=X[i] -	eor	w16,w16,w20,ror#11 -	eor	w14,w14,w9,ror#18 -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w24,w25			// a^b, b^c in next round -	eor	w16,w16,w20,ror#25	// Sigma1(e) -	eor	w15,w15,w24,ror#13 -	add	w23,w23,w17			// h+=Ch(e,f,g) -	and	w28,w28,w19			// (b^c)&=(a^b) -	eor	w13,w13,w6,ror#19 -	eor	w14,w14,w9,lsr#3	// sigma0(X[i+1]) -	add	w23,w23,w16			// h+=Sigma1(e) -	eor	w28,w28,w25			// Maj(a,b,c) -	eor	w17,w15,w24,ror#22	// Sigma0(a) -	eor	w13,w13,w6,lsr#10	// sigma1(X[i+14]) -	add	w8,w8,w1 -	add	w27,w27,w23			// d+=h -	add	w23,w23,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	add	w8,w8,w14 -	add	w23,w23,w17			// h+=Sigma0(a) -	add	w8,w8,w13 -	ldr	w13,[sp,#8] -	str	w0,[sp,#4] -	ror	w16,w27,#6 -	add	w22,w22,w28			// h+=K[i] -	ror	w15,w10,#7 -	and	w17,w20,w27 -	ror	w14,w7,#17 -	bic	w28,w21,w27 -	ror	w0,w23,#2 -	add	w22,w22,w8			// h+=X[i] -	eor	w16,w16,w27,ror#11 -	eor	w15,w15,w10,ror#18 -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w23,w24			// a^b, b^c in next round -	eor	w16,w16,w27,ror#25	// Sigma1(e) -	eor	w0,w0,w23,ror#13 -	add	w22,w22,w17			// h+=Ch(e,f,g) -	and	w19,w19,w28			// (b^c)&=(a^b) -	eor	w14,w14,w7,ror#19 -	eor	w15,w15,w10,lsr#3	// sigma0(X[i+1]) -	add	w22,w22,w16			// h+=Sigma1(e) -	eor	w19,w19,w24			// Maj(a,b,c) -	eor	w17,w0,w23,ror#22	// Sigma0(a) -	eor	w14,w14,w7,lsr#10	// sigma1(X[i+14]) -	add	w9,w9,w2 -	add	w26,w26,w22			// d+=h -	add	w22,w22,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	add	w9,w9,w15 -	add	w22,w22,w17			// h+=Sigma0(a) -	add	w9,w9,w14 -	ldr	w14,[sp,#12] -	str	w1,[sp,#8] -	ror	w16,w26,#6 -	add	w21,w21,w19			// h+=K[i] -	ror	w0,w11,#7 -	and	w17,w27,w26 -	ror	w15,w8,#17 -	bic	w19,w20,w26 -	ror	w1,w22,#2 -	add	w21,w21,w9			// h+=X[i] -	eor	w16,w16,w26,ror#11 -	eor	w0,w0,w11,ror#18 -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w22,w23			// a^b, b^c in next round -	eor	w16,w16,w26,ror#25	// Sigma1(e) -	eor	w1,w1,w22,ror#13 -	add	w21,w21,w17			// h+=Ch(e,f,g) -	and	w28,w28,w19			// (b^c)&=(a^b) -	eor	w15,w15,w8,ror#19 -	eor	w0,w0,w11,lsr#3	// sigma0(X[i+1]) -	add	w21,w21,w16			// h+=Sigma1(e) -	eor	w28,w28,w23			// Maj(a,b,c) -	eor	w17,w1,w22,ror#22	// Sigma0(a) -	eor	w15,w15,w8,lsr#10	// sigma1(X[i+14]) -	add	w10,w10,w3 -	add	w25,w25,w21			// d+=h -	add	w21,w21,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	add	w10,w10,w0 -	add	w21,w21,w17			// h+=Sigma0(a) -	add	w10,w10,w15 -	ldr	w15,[sp,#0] -	str	w2,[sp,#12] -	ror	w16,w25,#6 -	add	w20,w20,w28			// h+=K[i] -	ror	w1,w12,#7 -	and	w17,w26,w25 -	ror	w0,w9,#17 -	bic	w28,w27,w25 -	ror	w2,w21,#2 -	add	w20,w20,w10			// h+=X[i] -	eor	w16,w16,w25,ror#11 -	eor	w1,w1,w12,ror#18 -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w21,w22			// a^b, b^c in next round -	eor	w16,w16,w25,ror#25	// Sigma1(e) -	eor	w2,w2,w21,ror#13 -	add	w20,w20,w17			// h+=Ch(e,f,g) -	and	w19,w19,w28			// (b^c)&=(a^b) -	eor	w0,w0,w9,ror#19 -	eor	w1,w1,w12,lsr#3	// sigma0(X[i+1]) -	add	w20,w20,w16			// h+=Sigma1(e) -	eor	w19,w19,w22			// Maj(a,b,c) -	eor	w17,w2,w21,ror#22	// Sigma0(a) -	eor	w0,w0,w9,lsr#10	// sigma1(X[i+14]) -	add	w11,w11,w4 -	add	w24,w24,w20			// d+=h -	add	w20,w20,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	add	w11,w11,w1 -	add	w20,w20,w17			// h+=Sigma0(a) -	add	w11,w11,w0 -	ldr	w0,[sp,#4] -	str	w3,[sp,#0] -	ror	w16,w24,#6 -	add	w27,w27,w19			// h+=K[i] -	ror	w2,w13,#7 -	and	w17,w25,w24 -	ror	w1,w10,#17 -	bic	w19,w26,w24 -	ror	w3,w20,#2 -	add	w27,w27,w11			// h+=X[i] -	eor	w16,w16,w24,ror#11 -	eor	w2,w2,w13,ror#18 -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w20,w21			// a^b, b^c in next round -	eor	w16,w16,w24,ror#25	// Sigma1(e) -	eor	w3,w3,w20,ror#13 -	add	w27,w27,w17			// h+=Ch(e,f,g) -	and	w28,w28,w19			// (b^c)&=(a^b) -	eor	w1,w1,w10,ror#19 -	eor	w2,w2,w13,lsr#3	// sigma0(X[i+1]) -	add	w27,w27,w16			// h+=Sigma1(e) -	eor	w28,w28,w21			// Maj(a,b,c) -	eor	w17,w3,w20,ror#22	// Sigma0(a) -	eor	w1,w1,w10,lsr#10	// sigma1(X[i+14]) -	add	w12,w12,w5 -	add	w23,w23,w27			// d+=h -	add	w27,w27,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	add	w12,w12,w2 -	add	w27,w27,w17			// h+=Sigma0(a) -	add	w12,w12,w1 -	ldr	w1,[sp,#8] -	str	w4,[sp,#4] -	ror	w16,w23,#6 -	add	w26,w26,w28			// h+=K[i] -	ror	w3,w14,#7 -	and	w17,w24,w23 -	ror	w2,w11,#17 -	bic	w28,w25,w23 -	ror	w4,w27,#2 -	add	w26,w26,w12			// h+=X[i] -	eor	w16,w16,w23,ror#11 -	eor	w3,w3,w14,ror#18 -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w27,w20			// a^b, b^c in next round -	eor	w16,w16,w23,ror#25	// Sigma1(e) -	eor	w4,w4,w27,ror#13 -	add	w26,w26,w17			// h+=Ch(e,f,g) -	and	w19,w19,w28			// (b^c)&=(a^b) -	eor	w2,w2,w11,ror#19 -	eor	w3,w3,w14,lsr#3	// sigma0(X[i+1]) -	add	w26,w26,w16			// h+=Sigma1(e) -	eor	w19,w19,w20			// Maj(a,b,c) -	eor	w17,w4,w27,ror#22	// Sigma0(a) -	eor	w2,w2,w11,lsr#10	// sigma1(X[i+14]) -	add	w13,w13,w6 -	add	w22,w22,w26			// d+=h -	add	w26,w26,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	add	w13,w13,w3 -	add	w26,w26,w17			// h+=Sigma0(a) -	add	w13,w13,w2 -	ldr	w2,[sp,#12] -	str	w5,[sp,#8] -	ror	w16,w22,#6 -	add	w25,w25,w19			// h+=K[i] -	ror	w4,w15,#7 -	and	w17,w23,w22 -	ror	w3,w12,#17 -	bic	w19,w24,w22 -	ror	w5,w26,#2 -	add	w25,w25,w13			// h+=X[i] -	eor	w16,w16,w22,ror#11 -	eor	w4,w4,w15,ror#18 -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w26,w27			// a^b, b^c in next round -	eor	w16,w16,w22,ror#25	// Sigma1(e) -	eor	w5,w5,w26,ror#13 -	add	w25,w25,w17			// h+=Ch(e,f,g) -	and	w28,w28,w19			// (b^c)&=(a^b) -	eor	w3,w3,w12,ror#19 -	eor	w4,w4,w15,lsr#3	// sigma0(X[i+1]) -	add	w25,w25,w16			// h+=Sigma1(e) -	eor	w28,w28,w27			// Maj(a,b,c) -	eor	w17,w5,w26,ror#22	// Sigma0(a) -	eor	w3,w3,w12,lsr#10	// sigma1(X[i+14]) -	add	w14,w14,w7 -	add	w21,w21,w25			// d+=h -	add	w25,w25,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	add	w14,w14,w4 -	add	w25,w25,w17			// h+=Sigma0(a) -	add	w14,w14,w3 -	ldr	w3,[sp,#0] -	str	w6,[sp,#12] -	ror	w16,w21,#6 -	add	w24,w24,w28			// h+=K[i] -	ror	w5,w0,#7 -	and	w17,w22,w21 -	ror	w4,w13,#17 -	bic	w28,w23,w21 -	ror	w6,w25,#2 -	add	w24,w24,w14			// h+=X[i] -	eor	w16,w16,w21,ror#11 -	eor	w5,w5,w0,ror#18 -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w25,w26			// a^b, b^c in next round -	eor	w16,w16,w21,ror#25	// Sigma1(e) -	eor	w6,w6,w25,ror#13 -	add	w24,w24,w17			// h+=Ch(e,f,g) -	and	w19,w19,w28			// (b^c)&=(a^b) -	eor	w4,w4,w13,ror#19 -	eor	w5,w5,w0,lsr#3	// sigma0(X[i+1]) -	add	w24,w24,w16			// h+=Sigma1(e) -	eor	w19,w19,w26			// Maj(a,b,c) -	eor	w17,w6,w25,ror#22	// Sigma0(a) -	eor	w4,w4,w13,lsr#10	// sigma1(X[i+14]) -	add	w15,w15,w8 -	add	w20,w20,w24			// d+=h -	add	w24,w24,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	add	w15,w15,w5 -	add	w24,w24,w17			// h+=Sigma0(a) -	add	w15,w15,w4 -	ldr	w4,[sp,#4] -	str	w7,[sp,#0] -	ror	w16,w20,#6 -	add	w23,w23,w19			// h+=K[i] -	ror	w6,w1,#7 -	and	w17,w21,w20 -	ror	w5,w14,#17 -	bic	w19,w22,w20 -	ror	w7,w24,#2 -	add	w23,w23,w15			// h+=X[i] -	eor	w16,w16,w20,ror#11 -	eor	w6,w6,w1,ror#18 -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w24,w25			// a^b, b^c in next round -	eor	w16,w16,w20,ror#25	// Sigma1(e) -	eor	w7,w7,w24,ror#13 -	add	w23,w23,w17			// h+=Ch(e,f,g) -	and	w28,w28,w19			// (b^c)&=(a^b) -	eor	w5,w5,w14,ror#19 -	eor	w6,w6,w1,lsr#3	// sigma0(X[i+1]) -	add	w23,w23,w16			// h+=Sigma1(e) -	eor	w28,w28,w25			// Maj(a,b,c) -	eor	w17,w7,w24,ror#22	// Sigma0(a) -	eor	w5,w5,w14,lsr#10	// sigma1(X[i+14]) -	add	w0,w0,w9 -	add	w27,w27,w23			// d+=h -	add	w23,w23,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	add	w0,w0,w6 -	add	w23,w23,w17			// h+=Sigma0(a) -	add	w0,w0,w5 -	ldr	w5,[sp,#8] -	str	w8,[sp,#4] -	ror	w16,w27,#6 -	add	w22,w22,w28			// h+=K[i] -	ror	w7,w2,#7 -	and	w17,w20,w27 -	ror	w6,w15,#17 -	bic	w28,w21,w27 -	ror	w8,w23,#2 -	add	w22,w22,w0			// h+=X[i] -	eor	w16,w16,w27,ror#11 -	eor	w7,w7,w2,ror#18 -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w23,w24			// a^b, b^c in next round -	eor	w16,w16,w27,ror#25	// Sigma1(e) -	eor	w8,w8,w23,ror#13 -	add	w22,w22,w17			// h+=Ch(e,f,g) -	and	w19,w19,w28			// (b^c)&=(a^b) -	eor	w6,w6,w15,ror#19 -	eor	w7,w7,w2,lsr#3	// sigma0(X[i+1]) -	add	w22,w22,w16			// h+=Sigma1(e) -	eor	w19,w19,w24			// Maj(a,b,c) -	eor	w17,w8,w23,ror#22	// Sigma0(a) -	eor	w6,w6,w15,lsr#10	// sigma1(X[i+14]) -	add	w1,w1,w10 -	add	w26,w26,w22			// d+=h -	add	w22,w22,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	add	w1,w1,w7 -	add	w22,w22,w17			// h+=Sigma0(a) -	add	w1,w1,w6 -	ldr	w6,[sp,#12] -	str	w9,[sp,#8] -	ror	w16,w26,#6 -	add	w21,w21,w19			// h+=K[i] -	ror	w8,w3,#7 -	and	w17,w27,w26 -	ror	w7,w0,#17 -	bic	w19,w20,w26 -	ror	w9,w22,#2 -	add	w21,w21,w1			// h+=X[i] -	eor	w16,w16,w26,ror#11 -	eor	w8,w8,w3,ror#18 -	orr	w17,w17,w19			// Ch(e,f,g) -	eor	w19,w22,w23			// a^b, b^c in next round -	eor	w16,w16,w26,ror#25	// Sigma1(e) -	eor	w9,w9,w22,ror#13 -	add	w21,w21,w17			// h+=Ch(e,f,g) -	and	w28,w28,w19			// (b^c)&=(a^b) -	eor	w7,w7,w0,ror#19 -	eor	w8,w8,w3,lsr#3	// sigma0(X[i+1]) -	add	w21,w21,w16			// h+=Sigma1(e) -	eor	w28,w28,w23			// Maj(a,b,c) -	eor	w17,w9,w22,ror#22	// Sigma0(a) -	eor	w7,w7,w0,lsr#10	// sigma1(X[i+14]) -	add	w2,w2,w11 -	add	w25,w25,w21			// d+=h -	add	w21,w21,w28			// h+=Maj(a,b,c) -	ldr	w28,[x30],#4		// *K++, w19 in next round -	add	w2,w2,w8 -	add	w21,w21,w17			// h+=Sigma0(a) -	add	w2,w2,w7 -	ldr	w7,[sp,#0] -	str	w10,[sp,#12] -	ror	w16,w25,#6 -	add	w20,w20,w28			// h+=K[i] -	ror	w9,w4,#7 -	and	w17,w26,w25 -	ror	w8,w1,#17 -	bic	w28,w27,w25 -	ror	w10,w21,#2 -	add	w20,w20,w2			// h+=X[i] -	eor	w16,w16,w25,ror#11 -	eor	w9,w9,w4,ror#18 -	orr	w17,w17,w28			// Ch(e,f,g) -	eor	w28,w21,w22			// a^b, b^c in next round -	eor	w16,w16,w25,ror#25	// Sigma1(e) -	eor	w10,w10,w21,ror#13 -	add	w20,w20,w17			// h+=Ch(e,f,g) -	and	w19,w19,w28			// (b^c)&=(a^b) -	eor	w8,w8,w1,ror#19 -	eor	w9,w9,w4,lsr#3	// sigma0(X[i+1]) -	add	w20,w20,w16			// h+=Sigma1(e) -	eor	w19,w19,w22			// Maj(a,b,c) -	eor	w17,w10,w21,ror#22	// Sigma0(a) -	eor	w8,w8,w1,lsr#10	// sigma1(X[i+14]) -	add	w3,w3,w12 -	add	w24,w24,w20			// d+=h -	add	w20,w20,w19			// h+=Maj(a,b,c) -	ldr	w19,[x30],#4		// *K++, w28 in next round -	add	w3,w3,w9 -	add	w20,w20,w17			// h+=Sigma0(a) -	add	w3,w3,w8 -	cbnz	w19,.Loop_16_xx - -	ldp	x0,x2,[x29,#96] -	ldr	x1,[x29,#112] -	sub	x30,x30,#260		// rewind - -	ldp	w3,w4,[x0] -	ldp	w5,w6,[x0,#2*4] -	add	x1,x1,#14*4			// advance input pointer -	ldp	w7,w8,[x0,#4*4] -	add	w20,w20,w3 -	ldp	w9,w10,[x0,#6*4] -	add	w21,w21,w4 -	add	w22,w22,w5 -	add	w23,w23,w6 -	stp	w20,w21,[x0] -	add	w24,w24,w7 -	add	w25,w25,w8 -	stp	w22,w23,[x0,#2*4] -	add	w26,w26,w9 -	add	w27,w27,w10 -	cmp	x1,x2 -	stp	w24,w25,[x0,#4*4] -	stp	w26,w27,[x0,#6*4] -	b.ne	.Loop - -	ldp	x19,x20,[x29,#16] -	add	sp,sp,#4*4 -	ldp	x21,x22,[x29,#32] -	ldp	x23,x24,[x29,#48] -	ldp	x25,x26,[x29,#64] -	ldp	x27,x28,[x29,#80] -	ldp	x29,x30,[sp],#128 -	ret -.size	sha256_block_data_order,.-sha256_block_data_order - -.align	6 -.type	K256,%object -K256: -	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 -	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 -	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 -	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 -	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc -	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da -	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 -	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 -	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 -	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 -	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 -	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070 -	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 -	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 -	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 -	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -	.long	0	//terminator -.size	K256,.-K256 -.align	3 -.LOPENSSL_armcap_P: -	.quad	OPENSSL_armcap_P-. -.asciz	"SHA256 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>" -.align	2 -.type	sha256_block_armv8,%function -.align	6 -sha256_block_armv8: -.Lv8_entry: -	stp		x29,x30,[sp,#-16]! -	add		x29,sp,#0 - -	ld1		{v0.4s,v1.4s},[x0] -	adr		x3,K256 - -.Loop_hw: -	ld1		{v4.16b-v7.16b},[x1],#64 -	sub		x2,x2,#1 -	ld1		{v16.4s},[x3],#16 -	rev32		v4.16b,v4.16b -	rev32		v5.16b,v5.16b -	rev32		v6.16b,v6.16b -	rev32		v7.16b,v7.16b -	orr		v18.16b,v0.16b,v0.16b		// offload -	orr		v19.16b,v1.16b,v1.16b -	ld1		{v17.4s},[x3],#16 -	add		v16.4s,v16.4s,v4.4s -	.inst	0x5e2828a4	//sha256su0 v4.16b,v5.16b -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s -	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s -	.inst	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b -	ld1		{v16.4s},[x3],#16 -	add		v17.4s,v17.4s,v5.4s -	.inst	0x5e2828c5	//sha256su0 v5.16b,v6.16b -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s -	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s -	.inst	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b -	ld1		{v17.4s},[x3],#16 -	add		v16.4s,v16.4s,v6.4s -	.inst	0x5e2828e6	//sha256su0 v6.16b,v7.16b -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s -	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s -	.inst	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b -	ld1		{v16.4s},[x3],#16 -	add		v17.4s,v17.4s,v7.4s -	.inst	0x5e282887	//sha256su0 v7.16b,v4.16b -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s -	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s -	.inst	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b -	ld1		{v17.4s},[x3],#16 -	add		v16.4s,v16.4s,v4.4s -	.inst	0x5e2828a4	//sha256su0 v4.16b,v5.16b -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s -	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s -	.inst	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b -	ld1		{v16.4s},[x3],#16 -	add		v17.4s,v17.4s,v5.4s -	.inst	0x5e2828c5	//sha256su0 v5.16b,v6.16b -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s -	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s -	.inst	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b -	ld1		{v17.4s},[x3],#16 -	add		v16.4s,v16.4s,v6.4s -	.inst	0x5e2828e6	//sha256su0 v6.16b,v7.16b -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s -	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s -	.inst	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b -	ld1		{v16.4s},[x3],#16 -	add		v17.4s,v17.4s,v7.4s -	.inst	0x5e282887	//sha256su0 v7.16b,v4.16b -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s -	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s -	.inst	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b -	ld1		{v17.4s},[x3],#16 -	add		v16.4s,v16.4s,v4.4s -	.inst	0x5e2828a4	//sha256su0 v4.16b,v5.16b -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s -	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s -	.inst	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b -	ld1		{v16.4s},[x3],#16 -	add		v17.4s,v17.4s,v5.4s -	.inst	0x5e2828c5	//sha256su0 v5.16b,v6.16b -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s -	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s -	.inst	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b -	ld1		{v17.4s},[x3],#16 -	add		v16.4s,v16.4s,v6.4s -	.inst	0x5e2828e6	//sha256su0 v6.16b,v7.16b -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s -	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s -	.inst	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b -	ld1		{v16.4s},[x3],#16 -	add		v17.4s,v17.4s,v7.4s -	.inst	0x5e282887	//sha256su0 v7.16b,v4.16b -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s -	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s -	.inst	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b -	ld1		{v17.4s},[x3],#16 -	add		v16.4s,v16.4s,v4.4s -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s -	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s - -	ld1		{v16.4s},[x3],#16 -	add		v17.4s,v17.4s,v5.4s -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s -	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s - -	ld1		{v17.4s},[x3] -	add		v16.4s,v16.4s,v6.4s -	sub		x3,x3,#64*4-16	// rewind -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s -	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s - -	add		v17.4s,v17.4s,v7.4s -	orr		v2.16b,v0.16b,v0.16b -	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s -	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s - -	add		v0.4s,v0.4s,v18.4s -	add		v1.4s,v1.4s,v19.4s - -	cbnz		x2,.Loop_hw - -	st1		{v0.4s,v1.4s},[x0] - -	ldr		x29,[sp],#16 -	ret -.size	sha256_block_armv8,.-sha256_block_armv8 -.comm	OPENSSL_armcap_P,4,4 diff --git a/app/openssl/crypto/sha/asm/sha512-armv4.pl b/app/openssl/crypto/sha/asm/sha512-armv4.pl index 71aa9356..7faf37b1 100644 --- a/app/openssl/crypto/sha/asm/sha512-armv4.pl +++ b/app/openssl/crypto/sha/asm/sha512-armv4.pl @@ -565,7 +565,7 @@ $code.=<<___;  	bne		.Loop_neon  	vldmia	sp!,{d8-d15}		@ epilogue -	ret				@ bx lr +	bx	lr  #endif  ___  } @@ -578,6 +578,5 @@ ___  $code =~ s/\`([^\`]*)\`/eval $1/gem;  $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4 -$code =~ s/\bret\b/bx	lr/gm;  print $code;  close STDOUT; # enforce flush diff --git a/app/openssl/crypto/sha/asm/sha512-armv4.s b/app/openssl/crypto/sha/asm/sha512-armv4.s index fd462771..57301922 100644 --- a/app/openssl/crypto/sha/asm/sha512-armv4.s +++ b/app/openssl/crypto/sha/asm/sha512-armv4.s @@ -1775,7 +1775,7 @@ sha512_block_data_order:  	bne		.Loop_neon  	vldmia	sp!,{d8-d15}		@ epilogue -	bx	lr				@ .word	0xe12fff1e +	.word	0xe12fff1e  #endif  .size	sha512_block_data_order,.-sha512_block_data_order  .asciz	"SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>" diff --git a/app/openssl/crypto/sha/asm/sha512-armv8.S b/app/openssl/crypto/sha/asm/sha512-armv8.S deleted file mode 100644 index 6b0d1940..00000000 --- a/app/openssl/crypto/sha/asm/sha512-armv8.S +++ /dev/null @@ -1,1021 +0,0 @@ -#include "arm_arch.h" - -.text - -.globl	sha512_block_data_order -.type	sha512_block_data_order,%function -.align	6 -sha512_block_data_order: -	stp	x29,x30,[sp,#-128]! -	add	x29,sp,#0 - -	stp	x19,x20,[sp,#16] -	stp	x21,x22,[sp,#32] -	stp	x23,x24,[sp,#48] -	stp	x25,x26,[sp,#64] -	stp	x27,x28,[sp,#80] -	sub	sp,sp,#4*8 - -	ldp	x20,x21,[x0]				// load context -	ldp	x22,x23,[x0,#2*8] -	ldp	x24,x25,[x0,#4*8] -	add	x2,x1,x2,lsl#7	// end of input -	ldp	x26,x27,[x0,#6*8] -	adr	x30,K512 -	stp	x0,x2,[x29,#96] - -.Loop: -	ldp	x3,x4,[x1],#2*8 -	ldr	x19,[x30],#8			// *K++ -	eor	x28,x21,x22				// magic seed -	str	x1,[x29,#112] -#ifndef	__ARMEB__ -	rev	x3,x3			// 0 -#endif -	ror	x16,x24,#14 -	add	x27,x27,x19			// h+=K[i] -	eor	x6,x24,x24,ror#23 -	and	x17,x25,x24 -	bic	x19,x26,x24 -	add	x27,x27,x3			// h+=X[i] -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x20,x21			// a^b, b^c in next round -	eor	x16,x16,x6,ror#18	// Sigma1(e) -	ror	x6,x20,#28 -	add	x27,x27,x17			// h+=Ch(e,f,g) -	eor	x17,x20,x20,ror#5 -	add	x27,x27,x16			// h+=Sigma1(e) -	and	x28,x28,x19			// (b^c)&=(a^b) -	add	x23,x23,x27			// d+=h -	eor	x28,x28,x21			// Maj(a,b,c) -	eor	x17,x6,x17,ror#34	// Sigma0(a) -	add	x27,x27,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	//add	x27,x27,x17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	x4,x4			// 1 -#endif -	ldp	x5,x6,[x1],#2*8 -	add	x27,x27,x17			// h+=Sigma0(a) -	ror	x16,x23,#14 -	add	x26,x26,x28			// h+=K[i] -	eor	x7,x23,x23,ror#23 -	and	x17,x24,x23 -	bic	x28,x25,x23 -	add	x26,x26,x4			// h+=X[i] -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x27,x20			// a^b, b^c in next round -	eor	x16,x16,x7,ror#18	// Sigma1(e) -	ror	x7,x27,#28 -	add	x26,x26,x17			// h+=Ch(e,f,g) -	eor	x17,x27,x27,ror#5 -	add	x26,x26,x16			// h+=Sigma1(e) -	and	x19,x19,x28			// (b^c)&=(a^b) -	add	x22,x22,x26			// d+=h -	eor	x19,x19,x20			// Maj(a,b,c) -	eor	x17,x7,x17,ror#34	// Sigma0(a) -	add	x26,x26,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	//add	x26,x26,x17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	x5,x5			// 2 -#endif -	add	x26,x26,x17			// h+=Sigma0(a) -	ror	x16,x22,#14 -	add	x25,x25,x19			// h+=K[i] -	eor	x8,x22,x22,ror#23 -	and	x17,x23,x22 -	bic	x19,x24,x22 -	add	x25,x25,x5			// h+=X[i] -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x26,x27			// a^b, b^c in next round -	eor	x16,x16,x8,ror#18	// Sigma1(e) -	ror	x8,x26,#28 -	add	x25,x25,x17			// h+=Ch(e,f,g) -	eor	x17,x26,x26,ror#5 -	add	x25,x25,x16			// h+=Sigma1(e) -	and	x28,x28,x19			// (b^c)&=(a^b) -	add	x21,x21,x25			// d+=h -	eor	x28,x28,x27			// Maj(a,b,c) -	eor	x17,x8,x17,ror#34	// Sigma0(a) -	add	x25,x25,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	//add	x25,x25,x17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	x6,x6			// 3 -#endif -	ldp	x7,x8,[x1],#2*8 -	add	x25,x25,x17			// h+=Sigma0(a) -	ror	x16,x21,#14 -	add	x24,x24,x28			// h+=K[i] -	eor	x9,x21,x21,ror#23 -	and	x17,x22,x21 -	bic	x28,x23,x21 -	add	x24,x24,x6			// h+=X[i] -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x25,x26			// a^b, b^c in next round -	eor	x16,x16,x9,ror#18	// Sigma1(e) -	ror	x9,x25,#28 -	add	x24,x24,x17			// h+=Ch(e,f,g) -	eor	x17,x25,x25,ror#5 -	add	x24,x24,x16			// h+=Sigma1(e) -	and	x19,x19,x28			// (b^c)&=(a^b) -	add	x20,x20,x24			// d+=h -	eor	x19,x19,x26			// Maj(a,b,c) -	eor	x17,x9,x17,ror#34	// Sigma0(a) -	add	x24,x24,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	//add	x24,x24,x17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	x7,x7			// 4 -#endif -	add	x24,x24,x17			// h+=Sigma0(a) -	ror	x16,x20,#14 -	add	x23,x23,x19			// h+=K[i] -	eor	x10,x20,x20,ror#23 -	and	x17,x21,x20 -	bic	x19,x22,x20 -	add	x23,x23,x7			// h+=X[i] -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x24,x25			// a^b, b^c in next round -	eor	x16,x16,x10,ror#18	// Sigma1(e) -	ror	x10,x24,#28 -	add	x23,x23,x17			// h+=Ch(e,f,g) -	eor	x17,x24,x24,ror#5 -	add	x23,x23,x16			// h+=Sigma1(e) -	and	x28,x28,x19			// (b^c)&=(a^b) -	add	x27,x27,x23			// d+=h -	eor	x28,x28,x25			// Maj(a,b,c) -	eor	x17,x10,x17,ror#34	// Sigma0(a) -	add	x23,x23,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	//add	x23,x23,x17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	x8,x8			// 5 -#endif -	ldp	x9,x10,[x1],#2*8 -	add	x23,x23,x17			// h+=Sigma0(a) -	ror	x16,x27,#14 -	add	x22,x22,x28			// h+=K[i] -	eor	x11,x27,x27,ror#23 -	and	x17,x20,x27 -	bic	x28,x21,x27 -	add	x22,x22,x8			// h+=X[i] -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x23,x24			// a^b, b^c in next round -	eor	x16,x16,x11,ror#18	// Sigma1(e) -	ror	x11,x23,#28 -	add	x22,x22,x17			// h+=Ch(e,f,g) -	eor	x17,x23,x23,ror#5 -	add	x22,x22,x16			// h+=Sigma1(e) -	and	x19,x19,x28			// (b^c)&=(a^b) -	add	x26,x26,x22			// d+=h -	eor	x19,x19,x24			// Maj(a,b,c) -	eor	x17,x11,x17,ror#34	// Sigma0(a) -	add	x22,x22,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	//add	x22,x22,x17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	x9,x9			// 6 -#endif -	add	x22,x22,x17			// h+=Sigma0(a) -	ror	x16,x26,#14 -	add	x21,x21,x19			// h+=K[i] -	eor	x12,x26,x26,ror#23 -	and	x17,x27,x26 -	bic	x19,x20,x26 -	add	x21,x21,x9			// h+=X[i] -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x22,x23			// a^b, b^c in next round -	eor	x16,x16,x12,ror#18	// Sigma1(e) -	ror	x12,x22,#28 -	add	x21,x21,x17			// h+=Ch(e,f,g) -	eor	x17,x22,x22,ror#5 -	add	x21,x21,x16			// h+=Sigma1(e) -	and	x28,x28,x19			// (b^c)&=(a^b) -	add	x25,x25,x21			// d+=h -	eor	x28,x28,x23			// Maj(a,b,c) -	eor	x17,x12,x17,ror#34	// Sigma0(a) -	add	x21,x21,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	//add	x21,x21,x17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	x10,x10			// 7 -#endif -	ldp	x11,x12,[x1],#2*8 -	add	x21,x21,x17			// h+=Sigma0(a) -	ror	x16,x25,#14 -	add	x20,x20,x28			// h+=K[i] -	eor	x13,x25,x25,ror#23 -	and	x17,x26,x25 -	bic	x28,x27,x25 -	add	x20,x20,x10			// h+=X[i] -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x21,x22			// a^b, b^c in next round -	eor	x16,x16,x13,ror#18	// Sigma1(e) -	ror	x13,x21,#28 -	add	x20,x20,x17			// h+=Ch(e,f,g) -	eor	x17,x21,x21,ror#5 -	add	x20,x20,x16			// h+=Sigma1(e) -	and	x19,x19,x28			// (b^c)&=(a^b) -	add	x24,x24,x20			// d+=h -	eor	x19,x19,x22			// Maj(a,b,c) -	eor	x17,x13,x17,ror#34	// Sigma0(a) -	add	x20,x20,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	//add	x20,x20,x17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	x11,x11			// 8 -#endif -	add	x20,x20,x17			// h+=Sigma0(a) -	ror	x16,x24,#14 -	add	x27,x27,x19			// h+=K[i] -	eor	x14,x24,x24,ror#23 -	and	x17,x25,x24 -	bic	x19,x26,x24 -	add	x27,x27,x11			// h+=X[i] -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x20,x21			// a^b, b^c in next round -	eor	x16,x16,x14,ror#18	// Sigma1(e) -	ror	x14,x20,#28 -	add	x27,x27,x17			// h+=Ch(e,f,g) -	eor	x17,x20,x20,ror#5 -	add	x27,x27,x16			// h+=Sigma1(e) -	and	x28,x28,x19			// (b^c)&=(a^b) -	add	x23,x23,x27			// d+=h -	eor	x28,x28,x21			// Maj(a,b,c) -	eor	x17,x14,x17,ror#34	// Sigma0(a) -	add	x27,x27,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	//add	x27,x27,x17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	x12,x12			// 9 -#endif -	ldp	x13,x14,[x1],#2*8 -	add	x27,x27,x17			// h+=Sigma0(a) -	ror	x16,x23,#14 -	add	x26,x26,x28			// h+=K[i] -	eor	x15,x23,x23,ror#23 -	and	x17,x24,x23 -	bic	x28,x25,x23 -	add	x26,x26,x12			// h+=X[i] -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x27,x20			// a^b, b^c in next round -	eor	x16,x16,x15,ror#18	// Sigma1(e) -	ror	x15,x27,#28 -	add	x26,x26,x17			// h+=Ch(e,f,g) -	eor	x17,x27,x27,ror#5 -	add	x26,x26,x16			// h+=Sigma1(e) -	and	x19,x19,x28			// (b^c)&=(a^b) -	add	x22,x22,x26			// d+=h -	eor	x19,x19,x20			// Maj(a,b,c) -	eor	x17,x15,x17,ror#34	// Sigma0(a) -	add	x26,x26,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	//add	x26,x26,x17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	x13,x13			// 10 -#endif -	add	x26,x26,x17			// h+=Sigma0(a) -	ror	x16,x22,#14 -	add	x25,x25,x19			// h+=K[i] -	eor	x0,x22,x22,ror#23 -	and	x17,x23,x22 -	bic	x19,x24,x22 -	add	x25,x25,x13			// h+=X[i] -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x26,x27			// a^b, b^c in next round -	eor	x16,x16,x0,ror#18	// Sigma1(e) -	ror	x0,x26,#28 -	add	x25,x25,x17			// h+=Ch(e,f,g) -	eor	x17,x26,x26,ror#5 -	add	x25,x25,x16			// h+=Sigma1(e) -	and	x28,x28,x19			// (b^c)&=(a^b) -	add	x21,x21,x25			// d+=h -	eor	x28,x28,x27			// Maj(a,b,c) -	eor	x17,x0,x17,ror#34	// Sigma0(a) -	add	x25,x25,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	//add	x25,x25,x17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	x14,x14			// 11 -#endif -	ldp	x15,x0,[x1],#2*8 -	add	x25,x25,x17			// h+=Sigma0(a) -	str	x6,[sp,#24] -	ror	x16,x21,#14 -	add	x24,x24,x28			// h+=K[i] -	eor	x6,x21,x21,ror#23 -	and	x17,x22,x21 -	bic	x28,x23,x21 -	add	x24,x24,x14			// h+=X[i] -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x25,x26			// a^b, b^c in next round -	eor	x16,x16,x6,ror#18	// Sigma1(e) -	ror	x6,x25,#28 -	add	x24,x24,x17			// h+=Ch(e,f,g) -	eor	x17,x25,x25,ror#5 -	add	x24,x24,x16			// h+=Sigma1(e) -	and	x19,x19,x28			// (b^c)&=(a^b) -	add	x20,x20,x24			// d+=h -	eor	x19,x19,x26			// Maj(a,b,c) -	eor	x17,x6,x17,ror#34	// Sigma0(a) -	add	x24,x24,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	//add	x24,x24,x17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	x15,x15			// 12 -#endif -	add	x24,x24,x17			// h+=Sigma0(a) -	str	x7,[sp,#0] -	ror	x16,x20,#14 -	add	x23,x23,x19			// h+=K[i] -	eor	x7,x20,x20,ror#23 -	and	x17,x21,x20 -	bic	x19,x22,x20 -	add	x23,x23,x15			// h+=X[i] -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x24,x25			// a^b, b^c in next round -	eor	x16,x16,x7,ror#18	// Sigma1(e) -	ror	x7,x24,#28 -	add	x23,x23,x17			// h+=Ch(e,f,g) -	eor	x17,x24,x24,ror#5 -	add	x23,x23,x16			// h+=Sigma1(e) -	and	x28,x28,x19			// (b^c)&=(a^b) -	add	x27,x27,x23			// d+=h -	eor	x28,x28,x25			// Maj(a,b,c) -	eor	x17,x7,x17,ror#34	// Sigma0(a) -	add	x23,x23,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	//add	x23,x23,x17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	x0,x0			// 13 -#endif -	ldp	x1,x2,[x1] -	add	x23,x23,x17			// h+=Sigma0(a) -	str	x8,[sp,#8] -	ror	x16,x27,#14 -	add	x22,x22,x28			// h+=K[i] -	eor	x8,x27,x27,ror#23 -	and	x17,x20,x27 -	bic	x28,x21,x27 -	add	x22,x22,x0			// h+=X[i] -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x23,x24			// a^b, b^c in next round -	eor	x16,x16,x8,ror#18	// Sigma1(e) -	ror	x8,x23,#28 -	add	x22,x22,x17			// h+=Ch(e,f,g) -	eor	x17,x23,x23,ror#5 -	add	x22,x22,x16			// h+=Sigma1(e) -	and	x19,x19,x28			// (b^c)&=(a^b) -	add	x26,x26,x22			// d+=h -	eor	x19,x19,x24			// Maj(a,b,c) -	eor	x17,x8,x17,ror#34	// Sigma0(a) -	add	x22,x22,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	//add	x22,x22,x17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	x1,x1			// 14 -#endif -	ldr	x6,[sp,#24] -	add	x22,x22,x17			// h+=Sigma0(a) -	str	x9,[sp,#16] -	ror	x16,x26,#14 -	add	x21,x21,x19			// h+=K[i] -	eor	x9,x26,x26,ror#23 -	and	x17,x27,x26 -	bic	x19,x20,x26 -	add	x21,x21,x1			// h+=X[i] -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x22,x23			// a^b, b^c in next round -	eor	x16,x16,x9,ror#18	// Sigma1(e) -	ror	x9,x22,#28 -	add	x21,x21,x17			// h+=Ch(e,f,g) -	eor	x17,x22,x22,ror#5 -	add	x21,x21,x16			// h+=Sigma1(e) -	and	x28,x28,x19			// (b^c)&=(a^b) -	add	x25,x25,x21			// d+=h -	eor	x28,x28,x23			// Maj(a,b,c) -	eor	x17,x9,x17,ror#34	// Sigma0(a) -	add	x21,x21,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	//add	x21,x21,x17			// h+=Sigma0(a) -#ifndef	__ARMEB__ -	rev	x2,x2			// 15 -#endif -	ldr	x7,[sp,#0] -	add	x21,x21,x17			// h+=Sigma0(a) -	str	x10,[sp,#24] -	ror	x16,x25,#14 -	add	x20,x20,x28			// h+=K[i] -	ror	x9,x4,#1 -	and	x17,x26,x25 -	ror	x8,x1,#19 -	bic	x28,x27,x25 -	ror	x10,x21,#28 -	add	x20,x20,x2			// h+=X[i] -	eor	x16,x16,x25,ror#18 -	eor	x9,x9,x4,ror#8 -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x21,x22			// a^b, b^c in next round -	eor	x16,x16,x25,ror#41	// Sigma1(e) -	eor	x10,x10,x21,ror#34 -	add	x20,x20,x17			// h+=Ch(e,f,g) -	and	x19,x19,x28			// (b^c)&=(a^b) -	eor	x8,x8,x1,ror#61 -	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1]) -	add	x20,x20,x16			// h+=Sigma1(e) -	eor	x19,x19,x22			// Maj(a,b,c) -	eor	x17,x10,x21,ror#39	// Sigma0(a) -	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14]) -	add	x3,x3,x12 -	add	x24,x24,x20			// d+=h -	add	x20,x20,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	add	x3,x3,x9 -	add	x20,x20,x17			// h+=Sigma0(a) -	add	x3,x3,x8 -.Loop_16_xx: -	ldr	x8,[sp,#8] -	str	x11,[sp,#0] -	ror	x16,x24,#14 -	add	x27,x27,x19			// h+=K[i] -	ror	x10,x5,#1 -	and	x17,x25,x24 -	ror	x9,x2,#19 -	bic	x19,x26,x24 -	ror	x11,x20,#28 -	add	x27,x27,x3			// h+=X[i] -	eor	x16,x16,x24,ror#18 -	eor	x10,x10,x5,ror#8 -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x20,x21			// a^b, b^c in next round -	eor	x16,x16,x24,ror#41	// Sigma1(e) -	eor	x11,x11,x20,ror#34 -	add	x27,x27,x17			// h+=Ch(e,f,g) -	and	x28,x28,x19			// (b^c)&=(a^b) -	eor	x9,x9,x2,ror#61 -	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1]) -	add	x27,x27,x16			// h+=Sigma1(e) -	eor	x28,x28,x21			// Maj(a,b,c) -	eor	x17,x11,x20,ror#39	// Sigma0(a) -	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14]) -	add	x4,x4,x13 -	add	x23,x23,x27			// d+=h -	add	x27,x27,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	add	x4,x4,x10 -	add	x27,x27,x17			// h+=Sigma0(a) -	add	x4,x4,x9 -	ldr	x9,[sp,#16] -	str	x12,[sp,#8] -	ror	x16,x23,#14 -	add	x26,x26,x28			// h+=K[i] -	ror	x11,x6,#1 -	and	x17,x24,x23 -	ror	x10,x3,#19 -	bic	x28,x25,x23 -	ror	x12,x27,#28 -	add	x26,x26,x4			// h+=X[i] -	eor	x16,x16,x23,ror#18 -	eor	x11,x11,x6,ror#8 -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x27,x20			// a^b, b^c in next round -	eor	x16,x16,x23,ror#41	// Sigma1(e) -	eor	x12,x12,x27,ror#34 -	add	x26,x26,x17			// h+=Ch(e,f,g) -	and	x19,x19,x28			// (b^c)&=(a^b) -	eor	x10,x10,x3,ror#61 -	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1]) -	add	x26,x26,x16			// h+=Sigma1(e) -	eor	x19,x19,x20			// Maj(a,b,c) -	eor	x17,x12,x27,ror#39	// Sigma0(a) -	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14]) -	add	x5,x5,x14 -	add	x22,x22,x26			// d+=h -	add	x26,x26,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	add	x5,x5,x11 -	add	x26,x26,x17			// h+=Sigma0(a) -	add	x5,x5,x10 -	ldr	x10,[sp,#24] -	str	x13,[sp,#16] -	ror	x16,x22,#14 -	add	x25,x25,x19			// h+=K[i] -	ror	x12,x7,#1 -	and	x17,x23,x22 -	ror	x11,x4,#19 -	bic	x19,x24,x22 -	ror	x13,x26,#28 -	add	x25,x25,x5			// h+=X[i] -	eor	x16,x16,x22,ror#18 -	eor	x12,x12,x7,ror#8 -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x26,x27			// a^b, b^c in next round -	eor	x16,x16,x22,ror#41	// Sigma1(e) -	eor	x13,x13,x26,ror#34 -	add	x25,x25,x17			// h+=Ch(e,f,g) -	and	x28,x28,x19			// (b^c)&=(a^b) -	eor	x11,x11,x4,ror#61 -	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1]) -	add	x25,x25,x16			// h+=Sigma1(e) -	eor	x28,x28,x27			// Maj(a,b,c) -	eor	x17,x13,x26,ror#39	// Sigma0(a) -	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14]) -	add	x6,x6,x15 -	add	x21,x21,x25			// d+=h -	add	x25,x25,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	add	x6,x6,x12 -	add	x25,x25,x17			// h+=Sigma0(a) -	add	x6,x6,x11 -	ldr	x11,[sp,#0] -	str	x14,[sp,#24] -	ror	x16,x21,#14 -	add	x24,x24,x28			// h+=K[i] -	ror	x13,x8,#1 -	and	x17,x22,x21 -	ror	x12,x5,#19 -	bic	x28,x23,x21 -	ror	x14,x25,#28 -	add	x24,x24,x6			// h+=X[i] -	eor	x16,x16,x21,ror#18 -	eor	x13,x13,x8,ror#8 -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x25,x26			// a^b, b^c in next round -	eor	x16,x16,x21,ror#41	// Sigma1(e) -	eor	x14,x14,x25,ror#34 -	add	x24,x24,x17			// h+=Ch(e,f,g) -	and	x19,x19,x28			// (b^c)&=(a^b) -	eor	x12,x12,x5,ror#61 -	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1]) -	add	x24,x24,x16			// h+=Sigma1(e) -	eor	x19,x19,x26			// Maj(a,b,c) -	eor	x17,x14,x25,ror#39	// Sigma0(a) -	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14]) -	add	x7,x7,x0 -	add	x20,x20,x24			// d+=h -	add	x24,x24,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	add	x7,x7,x13 -	add	x24,x24,x17			// h+=Sigma0(a) -	add	x7,x7,x12 -	ldr	x12,[sp,#8] -	str	x15,[sp,#0] -	ror	x16,x20,#14 -	add	x23,x23,x19			// h+=K[i] -	ror	x14,x9,#1 -	and	x17,x21,x20 -	ror	x13,x6,#19 -	bic	x19,x22,x20 -	ror	x15,x24,#28 -	add	x23,x23,x7			// h+=X[i] -	eor	x16,x16,x20,ror#18 -	eor	x14,x14,x9,ror#8 -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x24,x25			// a^b, b^c in next round -	eor	x16,x16,x20,ror#41	// Sigma1(e) -	eor	x15,x15,x24,ror#34 -	add	x23,x23,x17			// h+=Ch(e,f,g) -	and	x28,x28,x19			// (b^c)&=(a^b) -	eor	x13,x13,x6,ror#61 -	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1]) -	add	x23,x23,x16			// h+=Sigma1(e) -	eor	x28,x28,x25			// Maj(a,b,c) -	eor	x17,x15,x24,ror#39	// Sigma0(a) -	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14]) -	add	x8,x8,x1 -	add	x27,x27,x23			// d+=h -	add	x23,x23,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	add	x8,x8,x14 -	add	x23,x23,x17			// h+=Sigma0(a) -	add	x8,x8,x13 -	ldr	x13,[sp,#16] -	str	x0,[sp,#8] -	ror	x16,x27,#14 -	add	x22,x22,x28			// h+=K[i] -	ror	x15,x10,#1 -	and	x17,x20,x27 -	ror	x14,x7,#19 -	bic	x28,x21,x27 -	ror	x0,x23,#28 -	add	x22,x22,x8			// h+=X[i] -	eor	x16,x16,x27,ror#18 -	eor	x15,x15,x10,ror#8 -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x23,x24			// a^b, b^c in next round -	eor	x16,x16,x27,ror#41	// Sigma1(e) -	eor	x0,x0,x23,ror#34 -	add	x22,x22,x17			// h+=Ch(e,f,g) -	and	x19,x19,x28			// (b^c)&=(a^b) -	eor	x14,x14,x7,ror#61 -	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1]) -	add	x22,x22,x16			// h+=Sigma1(e) -	eor	x19,x19,x24			// Maj(a,b,c) -	eor	x17,x0,x23,ror#39	// Sigma0(a) -	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14]) -	add	x9,x9,x2 -	add	x26,x26,x22			// d+=h -	add	x22,x22,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	add	x9,x9,x15 -	add	x22,x22,x17			// h+=Sigma0(a) -	add	x9,x9,x14 -	ldr	x14,[sp,#24] -	str	x1,[sp,#16] -	ror	x16,x26,#14 -	add	x21,x21,x19			// h+=K[i] -	ror	x0,x11,#1 -	and	x17,x27,x26 -	ror	x15,x8,#19 -	bic	x19,x20,x26 -	ror	x1,x22,#28 -	add	x21,x21,x9			// h+=X[i] -	eor	x16,x16,x26,ror#18 -	eor	x0,x0,x11,ror#8 -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x22,x23			// a^b, b^c in next round -	eor	x16,x16,x26,ror#41	// Sigma1(e) -	eor	x1,x1,x22,ror#34 -	add	x21,x21,x17			// h+=Ch(e,f,g) -	and	x28,x28,x19			// (b^c)&=(a^b) -	eor	x15,x15,x8,ror#61 -	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1]) -	add	x21,x21,x16			// h+=Sigma1(e) -	eor	x28,x28,x23			// Maj(a,b,c) -	eor	x17,x1,x22,ror#39	// Sigma0(a) -	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14]) -	add	x10,x10,x3 -	add	x25,x25,x21			// d+=h -	add	x21,x21,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	add	x10,x10,x0 -	add	x21,x21,x17			// h+=Sigma0(a) -	add	x10,x10,x15 -	ldr	x15,[sp,#0] -	str	x2,[sp,#24] -	ror	x16,x25,#14 -	add	x20,x20,x28			// h+=K[i] -	ror	x1,x12,#1 -	and	x17,x26,x25 -	ror	x0,x9,#19 -	bic	x28,x27,x25 -	ror	x2,x21,#28 -	add	x20,x20,x10			// h+=X[i] -	eor	x16,x16,x25,ror#18 -	eor	x1,x1,x12,ror#8 -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x21,x22			// a^b, b^c in next round -	eor	x16,x16,x25,ror#41	// Sigma1(e) -	eor	x2,x2,x21,ror#34 -	add	x20,x20,x17			// h+=Ch(e,f,g) -	and	x19,x19,x28			// (b^c)&=(a^b) -	eor	x0,x0,x9,ror#61 -	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1]) -	add	x20,x20,x16			// h+=Sigma1(e) -	eor	x19,x19,x22			// Maj(a,b,c) -	eor	x17,x2,x21,ror#39	// Sigma0(a) -	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14]) -	add	x11,x11,x4 -	add	x24,x24,x20			// d+=h -	add	x20,x20,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	add	x11,x11,x1 -	add	x20,x20,x17			// h+=Sigma0(a) -	add	x11,x11,x0 -	ldr	x0,[sp,#8] -	str	x3,[sp,#0] -	ror	x16,x24,#14 -	add	x27,x27,x19			// h+=K[i] -	ror	x2,x13,#1 -	and	x17,x25,x24 -	ror	x1,x10,#19 -	bic	x19,x26,x24 -	ror	x3,x20,#28 -	add	x27,x27,x11			// h+=X[i] -	eor	x16,x16,x24,ror#18 -	eor	x2,x2,x13,ror#8 -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x20,x21			// a^b, b^c in next round -	eor	x16,x16,x24,ror#41	// Sigma1(e) -	eor	x3,x3,x20,ror#34 -	add	x27,x27,x17			// h+=Ch(e,f,g) -	and	x28,x28,x19			// (b^c)&=(a^b) -	eor	x1,x1,x10,ror#61 -	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1]) -	add	x27,x27,x16			// h+=Sigma1(e) -	eor	x28,x28,x21			// Maj(a,b,c) -	eor	x17,x3,x20,ror#39	// Sigma0(a) -	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14]) -	add	x12,x12,x5 -	add	x23,x23,x27			// d+=h -	add	x27,x27,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	add	x12,x12,x2 -	add	x27,x27,x17			// h+=Sigma0(a) -	add	x12,x12,x1 -	ldr	x1,[sp,#16] -	str	x4,[sp,#8] -	ror	x16,x23,#14 -	add	x26,x26,x28			// h+=K[i] -	ror	x3,x14,#1 -	and	x17,x24,x23 -	ror	x2,x11,#19 -	bic	x28,x25,x23 -	ror	x4,x27,#28 -	add	x26,x26,x12			// h+=X[i] -	eor	x16,x16,x23,ror#18 -	eor	x3,x3,x14,ror#8 -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x27,x20			// a^b, b^c in next round -	eor	x16,x16,x23,ror#41	// Sigma1(e) -	eor	x4,x4,x27,ror#34 -	add	x26,x26,x17			// h+=Ch(e,f,g) -	and	x19,x19,x28			// (b^c)&=(a^b) -	eor	x2,x2,x11,ror#61 -	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1]) -	add	x26,x26,x16			// h+=Sigma1(e) -	eor	x19,x19,x20			// Maj(a,b,c) -	eor	x17,x4,x27,ror#39	// Sigma0(a) -	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14]) -	add	x13,x13,x6 -	add	x22,x22,x26			// d+=h -	add	x26,x26,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	add	x13,x13,x3 -	add	x26,x26,x17			// h+=Sigma0(a) -	add	x13,x13,x2 -	ldr	x2,[sp,#24] -	str	x5,[sp,#16] -	ror	x16,x22,#14 -	add	x25,x25,x19			// h+=K[i] -	ror	x4,x15,#1 -	and	x17,x23,x22 -	ror	x3,x12,#19 -	bic	x19,x24,x22 -	ror	x5,x26,#28 -	add	x25,x25,x13			// h+=X[i] -	eor	x16,x16,x22,ror#18 -	eor	x4,x4,x15,ror#8 -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x26,x27			// a^b, b^c in next round -	eor	x16,x16,x22,ror#41	// Sigma1(e) -	eor	x5,x5,x26,ror#34 -	add	x25,x25,x17			// h+=Ch(e,f,g) -	and	x28,x28,x19			// (b^c)&=(a^b) -	eor	x3,x3,x12,ror#61 -	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1]) -	add	x25,x25,x16			// h+=Sigma1(e) -	eor	x28,x28,x27			// Maj(a,b,c) -	eor	x17,x5,x26,ror#39	// Sigma0(a) -	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14]) -	add	x14,x14,x7 -	add	x21,x21,x25			// d+=h -	add	x25,x25,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	add	x14,x14,x4 -	add	x25,x25,x17			// h+=Sigma0(a) -	add	x14,x14,x3 -	ldr	x3,[sp,#0] -	str	x6,[sp,#24] -	ror	x16,x21,#14 -	add	x24,x24,x28			// h+=K[i] -	ror	x5,x0,#1 -	and	x17,x22,x21 -	ror	x4,x13,#19 -	bic	x28,x23,x21 -	ror	x6,x25,#28 -	add	x24,x24,x14			// h+=X[i] -	eor	x16,x16,x21,ror#18 -	eor	x5,x5,x0,ror#8 -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x25,x26			// a^b, b^c in next round -	eor	x16,x16,x21,ror#41	// Sigma1(e) -	eor	x6,x6,x25,ror#34 -	add	x24,x24,x17			// h+=Ch(e,f,g) -	and	x19,x19,x28			// (b^c)&=(a^b) -	eor	x4,x4,x13,ror#61 -	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1]) -	add	x24,x24,x16			// h+=Sigma1(e) -	eor	x19,x19,x26			// Maj(a,b,c) -	eor	x17,x6,x25,ror#39	// Sigma0(a) -	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14]) -	add	x15,x15,x8 -	add	x20,x20,x24			// d+=h -	add	x24,x24,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	add	x15,x15,x5 -	add	x24,x24,x17			// h+=Sigma0(a) -	add	x15,x15,x4 -	ldr	x4,[sp,#8] -	str	x7,[sp,#0] -	ror	x16,x20,#14 -	add	x23,x23,x19			// h+=K[i] -	ror	x6,x1,#1 -	and	x17,x21,x20 -	ror	x5,x14,#19 -	bic	x19,x22,x20 -	ror	x7,x24,#28 -	add	x23,x23,x15			// h+=X[i] -	eor	x16,x16,x20,ror#18 -	eor	x6,x6,x1,ror#8 -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x24,x25			// a^b, b^c in next round -	eor	x16,x16,x20,ror#41	// Sigma1(e) -	eor	x7,x7,x24,ror#34 -	add	x23,x23,x17			// h+=Ch(e,f,g) -	and	x28,x28,x19			// (b^c)&=(a^b) -	eor	x5,x5,x14,ror#61 -	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1]) -	add	x23,x23,x16			// h+=Sigma1(e) -	eor	x28,x28,x25			// Maj(a,b,c) -	eor	x17,x7,x24,ror#39	// Sigma0(a) -	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14]) -	add	x0,x0,x9 -	add	x27,x27,x23			// d+=h -	add	x23,x23,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	add	x0,x0,x6 -	add	x23,x23,x17			// h+=Sigma0(a) -	add	x0,x0,x5 -	ldr	x5,[sp,#16] -	str	x8,[sp,#8] -	ror	x16,x27,#14 -	add	x22,x22,x28			// h+=K[i] -	ror	x7,x2,#1 -	and	x17,x20,x27 -	ror	x6,x15,#19 -	bic	x28,x21,x27 -	ror	x8,x23,#28 -	add	x22,x22,x0			// h+=X[i] -	eor	x16,x16,x27,ror#18 -	eor	x7,x7,x2,ror#8 -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x23,x24			// a^b, b^c in next round -	eor	x16,x16,x27,ror#41	// Sigma1(e) -	eor	x8,x8,x23,ror#34 -	add	x22,x22,x17			// h+=Ch(e,f,g) -	and	x19,x19,x28			// (b^c)&=(a^b) -	eor	x6,x6,x15,ror#61 -	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1]) -	add	x22,x22,x16			// h+=Sigma1(e) -	eor	x19,x19,x24			// Maj(a,b,c) -	eor	x17,x8,x23,ror#39	// Sigma0(a) -	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14]) -	add	x1,x1,x10 -	add	x26,x26,x22			// d+=h -	add	x22,x22,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	add	x1,x1,x7 -	add	x22,x22,x17			// h+=Sigma0(a) -	add	x1,x1,x6 -	ldr	x6,[sp,#24] -	str	x9,[sp,#16] -	ror	x16,x26,#14 -	add	x21,x21,x19			// h+=K[i] -	ror	x8,x3,#1 -	and	x17,x27,x26 -	ror	x7,x0,#19 -	bic	x19,x20,x26 -	ror	x9,x22,#28 -	add	x21,x21,x1			// h+=X[i] -	eor	x16,x16,x26,ror#18 -	eor	x8,x8,x3,ror#8 -	orr	x17,x17,x19			// Ch(e,f,g) -	eor	x19,x22,x23			// a^b, b^c in next round -	eor	x16,x16,x26,ror#41	// Sigma1(e) -	eor	x9,x9,x22,ror#34 -	add	x21,x21,x17			// h+=Ch(e,f,g) -	and	x28,x28,x19			// (b^c)&=(a^b) -	eor	x7,x7,x0,ror#61 -	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1]) -	add	x21,x21,x16			// h+=Sigma1(e) -	eor	x28,x28,x23			// Maj(a,b,c) -	eor	x17,x9,x22,ror#39	// Sigma0(a) -	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14]) -	add	x2,x2,x11 -	add	x25,x25,x21			// d+=h -	add	x21,x21,x28			// h+=Maj(a,b,c) -	ldr	x28,[x30],#8		// *K++, x19 in next round -	add	x2,x2,x8 -	add	x21,x21,x17			// h+=Sigma0(a) -	add	x2,x2,x7 -	ldr	x7,[sp,#0] -	str	x10,[sp,#24] -	ror	x16,x25,#14 -	add	x20,x20,x28			// h+=K[i] -	ror	x9,x4,#1 -	and	x17,x26,x25 -	ror	x8,x1,#19 -	bic	x28,x27,x25 -	ror	x10,x21,#28 -	add	x20,x20,x2			// h+=X[i] -	eor	x16,x16,x25,ror#18 -	eor	x9,x9,x4,ror#8 -	orr	x17,x17,x28			// Ch(e,f,g) -	eor	x28,x21,x22			// a^b, b^c in next round -	eor	x16,x16,x25,ror#41	// Sigma1(e) -	eor	x10,x10,x21,ror#34 -	add	x20,x20,x17			// h+=Ch(e,f,g) -	and	x19,x19,x28			// (b^c)&=(a^b) -	eor	x8,x8,x1,ror#61 -	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1]) -	add	x20,x20,x16			// h+=Sigma1(e) -	eor	x19,x19,x22			// Maj(a,b,c) -	eor	x17,x10,x21,ror#39	// Sigma0(a) -	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14]) -	add	x3,x3,x12 -	add	x24,x24,x20			// d+=h -	add	x20,x20,x19			// h+=Maj(a,b,c) -	ldr	x19,[x30],#8		// *K++, x28 in next round -	add	x3,x3,x9 -	add	x20,x20,x17			// h+=Sigma0(a) -	add	x3,x3,x8 -	cbnz	x19,.Loop_16_xx - -	ldp	x0,x2,[x29,#96] -	ldr	x1,[x29,#112] -	sub	x30,x30,#648		// rewind - -	ldp	x3,x4,[x0] -	ldp	x5,x6,[x0,#2*8] -	add	x1,x1,#14*8			// advance input pointer -	ldp	x7,x8,[x0,#4*8] -	add	x20,x20,x3 -	ldp	x9,x10,[x0,#6*8] -	add	x21,x21,x4 -	add	x22,x22,x5 -	add	x23,x23,x6 -	stp	x20,x21,[x0] -	add	x24,x24,x7 -	add	x25,x25,x8 -	stp	x22,x23,[x0,#2*8] -	add	x26,x26,x9 -	add	x27,x27,x10 -	cmp	x1,x2 -	stp	x24,x25,[x0,#4*8] -	stp	x26,x27,[x0,#6*8] -	b.ne	.Loop - -	ldp	x19,x20,[x29,#16] -	add	sp,sp,#4*8 -	ldp	x21,x22,[x29,#32] -	ldp	x23,x24,[x29,#48] -	ldp	x25,x26,[x29,#64] -	ldp	x27,x28,[x29,#80] -	ldp	x29,x30,[sp],#128 -	ret -.size	sha512_block_data_order,.-sha512_block_data_order - -.align	6 -.type	K512,%object -K512: -	.quad	0x428a2f98d728ae22,0x7137449123ef65cd -	.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc -	.quad	0x3956c25bf348b538,0x59f111f1b605d019 -	.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118 -	.quad	0xd807aa98a3030242,0x12835b0145706fbe -	.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 -	.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1 -	.quad	0x9bdc06a725c71235,0xc19bf174cf692694 -	.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3 -	.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 -	.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483 -	.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5 -	.quad	0x983e5152ee66dfab,0xa831c66d2db43210 -	.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4 -	.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725 -	.quad	0x06ca6351e003826f,0x142929670a0e6e70 -	.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926 -	.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df -	.quad	0x650a73548baf63de,0x766a0abb3c77b2a8 -	.quad	0x81c2c92e47edaee6,0x92722c851482353b -	.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001 -	.quad	0xc24b8b70d0f89791,0xc76c51a30654be30 -	.quad	0xd192e819d6ef5218,0xd69906245565a910 -	.quad	0xf40e35855771202a,0x106aa07032bbd1b8 -	.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53 -	.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 -	.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb -	.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 -	.quad	0x748f82ee5defb2fc,0x78a5636f43172f60 -	.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec -	.quad	0x90befffa23631e28,0xa4506cebde82bde9 -	.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b -	.quad	0xca273eceea26619c,0xd186b8c721c0c207 -	.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 -	.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6 -	.quad	0x113f9804bef90dae,0x1b710b35131c471b -	.quad	0x28db77f523047d84,0x32caab7b40c72493 -	.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c -	.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a -	.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817 -	.quad	0	// terminator -.size	K512,.-K512 -.align	3 -.LOPENSSL_armcap_P: -	.quad	OPENSSL_armcap_P-. -.asciz	"SHA512 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>" -.align	2 -.comm	OPENSSL_armcap_P,4,4 diff --git a/app/openssl/crypto/sha/asm/sha512-armv8.pl b/app/openssl/crypto/sha/asm/sha512-armv8.pl deleted file mode 100644 index 6935ed65..00000000 --- a/app/openssl/crypto/sha/asm/sha512-armv8.pl +++ /dev/null @@ -1,414 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# SHA256/512 for ARMv8. -# -# Performance in cycles per processed byte and improvement coefficient -# over code generated with "default" compiler: -# -#		SHA256-hw	SHA256(*)	SHA512 -# Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**)) -# Cortex-A5x	n/a		n/a		n/a -#  -# (*)	Software SHA256 results are of lesser relevance, presented -#	mostly for informational purposes. -# (**)	The result is a trade-off: it's possible to improve it by -#	10%, but at the cost of 20% loss on Cortex-A5x. - -$flavour=shift; -$output=shift; -open STDOUT,">$output"; - -if ($output =~ /512/) { -	$BITS=512; -	$SZ=8; -	@Sigma0=(28,34,39); -	@Sigma1=(14,18,41); -	@sigma0=(1,  8, 7); -	@sigma1=(19,61, 6); -	$rounds=80; -	$reg_t="x"; -} else { -	$BITS=256; -	$SZ=4; -	@Sigma0=( 2,13,22); -	@Sigma1=( 6,11,25); -	@sigma0=( 7,18, 3); -	@sigma1=(17,19,10); -	$rounds=64; -	$reg_t="w"; -} - -$func="sha${BITS}_block_data_order"; - -($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30)); - -@X=map("$reg_t$_",(3..15,0..2)); -@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("$reg_t$_",(20..27)); -($t0,$t1,$t2,$t3)=map("$reg_t$_",(16,17,19,28)); - -sub BODY_00_xx { -my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; -my $j=($i+1)&15; -my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]); -   $T0=@X[$i+3] if ($i<11); - -$code.=<<___	if ($i<16); -#ifndef	__ARMEB__ -	rev	@X[$i],@X[$i]			// $i -#endif -___ -$code.=<<___	if ($i<13 && ($i&1)); -	ldp	@X[$i+1],@X[$i+2],[$inp],#2*$SZ -___ -$code.=<<___	if ($i==13); -	ldp	@X[14],@X[15],[$inp] -___ -$code.=<<___	if ($i>=14); -	ldr	@X[($i-11)&15],[sp,#`$SZ*(($i-11)%4)`] -___ -$code.=<<___	if ($i>0 && $i<16); -	add	$a,$a,$t1			// h+=Sigma0(a) -___ -$code.=<<___	if ($i>=11); -	str	@X[($i-8)&15],[sp,#`$SZ*(($i-8)%4)`] -___ -# While ARMv8 specifies merged rotate-n-logical operation such as -# 'eor x,y,z,ror#n', it was found to negatively affect performance -# on Apple A7. The reason seems to be that it requires even 'y' to -# be available earlier. This means that such merged instruction is -# not necessarily best choice on critical path... On the other hand -# Cortex-A5x handles merged instructions much better than disjoint -# rotate and logical... See (**) footnote above. -$code.=<<___	if ($i<15); -	ror	$t0,$e,#$Sigma1[0] -	add	$h,$h,$t2			// h+=K[i] -	eor	$T0,$e,$e,ror#`$Sigma1[2]-$Sigma1[1]` -	and	$t1,$f,$e -	bic	$t2,$g,$e -	add	$h,$h,@X[$i&15]			// h+=X[i] -	orr	$t1,$t1,$t2			// Ch(e,f,g) -	eor	$t2,$a,$b			// a^b, b^c in next round -	eor	$t0,$t0,$T0,ror#$Sigma1[1]	// Sigma1(e) -	ror	$T0,$a,#$Sigma0[0] -	add	$h,$h,$t1			// h+=Ch(e,f,g) -	eor	$t1,$a,$a,ror#`$Sigma0[2]-$Sigma0[1]` -	add	$h,$h,$t0			// h+=Sigma1(e) -	and	$t3,$t3,$t2			// (b^c)&=(a^b) -	add	$d,$d,$h			// d+=h -	eor	$t3,$t3,$b			// Maj(a,b,c) -	eor	$t1,$T0,$t1,ror#$Sigma0[1]	// Sigma0(a) -	add	$h,$h,$t3			// h+=Maj(a,b,c) -	ldr	$t3,[$Ktbl],#$SZ		// *K++, $t2 in next round -	//add	$h,$h,$t1			// h+=Sigma0(a) -___ -$code.=<<___	if ($i>=15); -	ror	$t0,$e,#$Sigma1[0] -	add	$h,$h,$t2			// h+=K[i] -	ror	$T1,@X[($j+1)&15],#$sigma0[0] -	and	$t1,$f,$e -	ror	$T2,@X[($j+14)&15],#$sigma1[0] -	bic	$t2,$g,$e -	ror	$T0,$a,#$Sigma0[0] -	add	$h,$h,@X[$i&15]			// h+=X[i] -	eor	$t0,$t0,$e,ror#$Sigma1[1] -	eor	$T1,$T1,@X[($j+1)&15],ror#$sigma0[1] -	orr	$t1,$t1,$t2			// Ch(e,f,g) -	eor	$t2,$a,$b			// a^b, b^c in next round -	eor	$t0,$t0,$e,ror#$Sigma1[2]	// Sigma1(e) -	eor	$T0,$T0,$a,ror#$Sigma0[1] -	add	$h,$h,$t1			// h+=Ch(e,f,g) -	and	$t3,$t3,$t2			// (b^c)&=(a^b) -	eor	$T2,$T2,@X[($j+14)&15],ror#$sigma1[1] -	eor	$T1,$T1,@X[($j+1)&15],lsr#$sigma0[2]	// sigma0(X[i+1]) -	add	$h,$h,$t0			// h+=Sigma1(e) -	eor	$t3,$t3,$b			// Maj(a,b,c) -	eor	$t1,$T0,$a,ror#$Sigma0[2]	// Sigma0(a) -	eor	$T2,$T2,@X[($j+14)&15],lsr#$sigma1[2]	// sigma1(X[i+14]) -	add	@X[$j],@X[$j],@X[($j+9)&15] -	add	$d,$d,$h			// d+=h -	add	$h,$h,$t3			// h+=Maj(a,b,c) -	ldr	$t3,[$Ktbl],#$SZ		// *K++, $t2 in next round -	add	@X[$j],@X[$j],$T1 -	add	$h,$h,$t1			// h+=Sigma0(a) -	add	@X[$j],@X[$j],$T2 -___ -	($t2,$t3)=($t3,$t2); -} - -$code.=<<___; -#include "arm_arch.h" - -.text - -.globl	$func -.type	$func,%function -.align	6 -$func: -___ -$code.=<<___	if ($SZ==4); -	ldr	x16,.LOPENSSL_armcap_P -	adr	x17,.LOPENSSL_armcap_P -	add	x16,x16,x17 -	ldr	w16,[x16] -	tst	w16,#ARMV8_SHA256 -	b.ne	.Lv8_entry -___ -$code.=<<___; -	stp	x29,x30,[sp,#-128]! -	add	x29,sp,#0 - -	stp	x19,x20,[sp,#16] -	stp	x21,x22,[sp,#32] -	stp	x23,x24,[sp,#48] -	stp	x25,x26,[sp,#64] -	stp	x27,x28,[sp,#80] -	sub	sp,sp,#4*$SZ - -	ldp	$A,$B,[$ctx]				// load context -	ldp	$C,$D,[$ctx,#2*$SZ] -	ldp	$E,$F,[$ctx,#4*$SZ] -	add	$num,$inp,$num,lsl#`log(16*$SZ)/log(2)`	// end of input -	ldp	$G,$H,[$ctx,#6*$SZ] -	adr	$Ktbl,K$BITS -	stp	$ctx,$num,[x29,#96] - -.Loop: -	ldp	@X[0],@X[1],[$inp],#2*$SZ -	ldr	$t2,[$Ktbl],#$SZ			// *K++ -	eor	$t3,$B,$C				// magic seed -	str	$inp,[x29,#112] -___ -for ($i=0;$i<16;$i++)	{ &BODY_00_xx($i,@V); unshift(@V,pop(@V)); } -$code.=".Loop_16_xx:\n"; -for (;$i<32;$i++)	{ &BODY_00_xx($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; -	cbnz	$t2,.Loop_16_xx - -	ldp	$ctx,$num,[x29,#96] -	ldr	$inp,[x29,#112] -	sub	$Ktbl,$Ktbl,#`$SZ*($rounds+1)`		// rewind - -	ldp	@X[0],@X[1],[$ctx] -	ldp	@X[2],@X[3],[$ctx,#2*$SZ] -	add	$inp,$inp,#14*$SZ			// advance input pointer -	ldp	@X[4],@X[5],[$ctx,#4*$SZ] -	add	$A,$A,@X[0] -	ldp	@X[6],@X[7],[$ctx,#6*$SZ] -	add	$B,$B,@X[1] -	add	$C,$C,@X[2] -	add	$D,$D,@X[3] -	stp	$A,$B,[$ctx] -	add	$E,$E,@X[4] -	add	$F,$F,@X[5] -	stp	$C,$D,[$ctx,#2*$SZ] -	add	$G,$G,@X[6] -	add	$H,$H,@X[7] -	cmp	$inp,$num -	stp	$E,$F,[$ctx,#4*$SZ] -	stp	$G,$H,[$ctx,#6*$SZ] -	b.ne	.Loop - -	ldp	x19,x20,[x29,#16] -	add	sp,sp,#4*$SZ -	ldp	x21,x22,[x29,#32] -	ldp	x23,x24,[x29,#48] -	ldp	x25,x26,[x29,#64] -	ldp	x27,x28,[x29,#80] -	ldp	x29,x30,[sp],#128 -	ret -.size	$func,.-$func - -.align	6 -.type	K$BITS,%object -K$BITS: -___ -$code.=<<___ if ($SZ==8); -	.quad	0x428a2f98d728ae22,0x7137449123ef65cd -	.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc -	.quad	0x3956c25bf348b538,0x59f111f1b605d019 -	.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118 -	.quad	0xd807aa98a3030242,0x12835b0145706fbe -	.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 -	.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1 -	.quad	0x9bdc06a725c71235,0xc19bf174cf692694 -	.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3 -	.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 -	.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483 -	.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5 -	.quad	0x983e5152ee66dfab,0xa831c66d2db43210 -	.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4 -	.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725 -	.quad	0x06ca6351e003826f,0x142929670a0e6e70 -	.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926 -	.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df -	.quad	0x650a73548baf63de,0x766a0abb3c77b2a8 -	.quad	0x81c2c92e47edaee6,0x92722c851482353b -	.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001 -	.quad	0xc24b8b70d0f89791,0xc76c51a30654be30 -	.quad	0xd192e819d6ef5218,0xd69906245565a910 -	.quad	0xf40e35855771202a,0x106aa07032bbd1b8 -	.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53 -	.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 -	.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb -	.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 -	.quad	0x748f82ee5defb2fc,0x78a5636f43172f60 -	.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec -	.quad	0x90befffa23631e28,0xa4506cebde82bde9 -	.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b -	.quad	0xca273eceea26619c,0xd186b8c721c0c207 -	.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 -	.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6 -	.quad	0x113f9804bef90dae,0x1b710b35131c471b -	.quad	0x28db77f523047d84,0x32caab7b40c72493 -	.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c -	.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a -	.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817 -	.quad	0	// terminator -___ -$code.=<<___ if ($SZ==4); -	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 -	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 -	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 -	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 -	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc -	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da -	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 -	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 -	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 -	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 -	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 -	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070 -	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 -	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 -	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 -	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -	.long	0	//terminator -___ -$code.=<<___; -.size	K$BITS,.-K$BITS -.align	3 -.LOPENSSL_armcap_P: -	.quad	OPENSSL_armcap_P-. -.asciz	"SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>" -.align	2 -___ - -if ($SZ==4) { -my $Ktbl="x3"; - -my ($ABCD,$EFGH,$abcd)=map("v$_.16b",(0..2)); -my @MSG=map("v$_.16b",(4..7)); -my ($W0,$W1)=("v16.4s","v17.4s"); -my ($ABCD_SAVE,$EFGH_SAVE)=("v18.16b","v19.16b"); - -$code.=<<___; -.type	sha256_block_armv8,%function -.align	6 -sha256_block_armv8: -.Lv8_entry: -	stp		x29,x30,[sp,#-16]! -	add		x29,sp,#0 - -	ld1.32		{$ABCD,$EFGH},[$ctx] -	adr		$Ktbl,K256 - -.Loop_hw: -	ld1		{@MSG[0]-@MSG[3]},[$inp],#64 -	sub		$num,$num,#1 -	ld1.32		{$W0},[$Ktbl],#16 -	rev32		@MSG[0],@MSG[0] -	rev32		@MSG[1],@MSG[1] -	rev32		@MSG[2],@MSG[2] -	rev32		@MSG[3],@MSG[3] -	orr		$ABCD_SAVE,$ABCD,$ABCD		// offload -	orr		$EFGH_SAVE,$EFGH,$EFGH -___ -for($i=0;$i<12;$i++) { -$code.=<<___; -	ld1.32		{$W1},[$Ktbl],#16 -	add.i32		$W0,$W0,@MSG[0] -	sha256su0	@MSG[0],@MSG[1] -	orr		$abcd,$ABCD,$ABCD -	sha256h		$ABCD,$EFGH,$W0 -	sha256h2	$EFGH,$abcd,$W0 -	sha256su1	@MSG[0],@MSG[2],@MSG[3] -___ -	($W0,$W1)=($W1,$W0);	push(@MSG,shift(@MSG)); -} -$code.=<<___; -	ld1.32		{$W1},[$Ktbl],#16 -	add.i32		$W0,$W0,@MSG[0] -	orr		$abcd,$ABCD,$ABCD -	sha256h		$ABCD,$EFGH,$W0 -	sha256h2	$EFGH,$abcd,$W0 - -	ld1.32		{$W0},[$Ktbl],#16 -	add.i32		$W1,$W1,@MSG[1] -	orr		$abcd,$ABCD,$ABCD -	sha256h		$ABCD,$EFGH,$W1 -	sha256h2	$EFGH,$abcd,$W1 - -	ld1.32		{$W1},[$Ktbl] -	add.i32		$W0,$W0,@MSG[2] -	sub		$Ktbl,$Ktbl,#$rounds*$SZ-16	// rewind -	orr		$abcd,$ABCD,$ABCD -	sha256h		$ABCD,$EFGH,$W0 -	sha256h2	$EFGH,$abcd,$W0 - -	add.i32		$W1,$W1,@MSG[3] -	orr		$abcd,$ABCD,$ABCD -	sha256h		$ABCD,$EFGH,$W1 -	sha256h2	$EFGH,$abcd,$W1 - -	add.i32		$ABCD,$ABCD,$ABCD_SAVE -	add.i32		$EFGH,$EFGH,$EFGH_SAVE - -	cbnz		$num,.Loop_hw - -	st1.32		{$ABCD,$EFGH},[$ctx] - -	ldr		x29,[sp],#16 -	ret -.size	sha256_block_armv8,.-sha256_block_armv8 -___ -} - -$code.=<<___; -.comm	OPENSSL_armcap_P,4,4 -___ - -{   my  %opcode = ( -	"sha256h"	=> 0x5e004000,	"sha256h2"	=> 0x5e005000, -	"sha256su0"	=> 0x5e282800,	"sha256su1"	=> 0x5e006000	); - -    sub unsha256 { -	my ($mnemonic,$arg)=@_; - -	$arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o -	&& -	sprintf ".inst\t0x%08x\t//%s %s", -			$opcode{$mnemonic}|$1|($2<<5)|($3<<16), -			$mnemonic,$arg; -    } -} - -foreach(split("\n",$code)) { - -	s/\`([^\`]*)\`/eval($1)/geo; - -	s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/geo; - -	s/\.\w?32\b//o		and s/\.16b/\.4s/go; -	m/(ld|st)1[^\[]+\[0\]/o	and s/\.4s/\.s/go; - -	print $_,"\n"; -} - -close STDOUT; diff --git a/app/openssl/crypto/srp/srp_vfy.c b/app/openssl/crypto/srp/srp_vfy.c index fdca19ff..4a3d13ed 100644 --- a/app/openssl/crypto/srp/srp_vfy.c +++ b/app/openssl/crypto/srp/srp_vfy.c @@ -93,9 +93,6 @@ static int t_fromb64(unsigned char *a, const char *src)  		else a[i] = loc - b64table;  		++i;  		} -	/* if nothing valid to process we have a zero length response */ -	if (i == 0) -		return 0;  	size = i;  	i = size - 1;  	j = size; diff --git a/app/openssl/crypto/x509v3/v3_purp.c b/app/openssl/crypto/x509v3/v3_purp.c index f59bfc18..ad688657 100644 --- a/app/openssl/crypto/x509v3/v3_purp.c +++ b/app/openssl/crypto/x509v3/v3_purp.c @@ -389,8 +389,8 @@ static void x509v3_cache_extensions(X509 *x)  	/* Handle proxy certificates */  	if((pci=X509_get_ext_d2i(x, NID_proxyCertInfo, NULL, NULL))) {  		if (x->ex_flags & EXFLAG_CA -		    || X509_get_ext_by_NID(x, NID_subject_alt_name, -1) >= 0 -		    || X509_get_ext_by_NID(x, NID_issuer_alt_name, -1) >= 0) { +		    || X509_get_ext_by_NID(x, NID_subject_alt_name, 0) >= 0 +		    || X509_get_ext_by_NID(x, NID_issuer_alt_name, 0) >= 0) {  			x->ex_flags |= EXFLAG_INVALID;  		}  		if (pci->pcPathLengthConstraint) { @@ -670,7 +670,7 @@ static int check_purpose_timestamp_sign(const X509_PURPOSE *xp, const X509 *x,  		return 0;  	/* Extended Key Usage MUST be critical */ -	i_ext = X509_get_ext_by_NID((X509 *) x, NID_ext_key_usage, -1); +	i_ext = X509_get_ext_by_NID((X509 *) x, NID_ext_key_usage, 0);  	if (i_ext >= 0)  		{  		X509_EXTENSION *ext = X509_get_ext((X509 *) x, i_ext); diff --git a/app/openssl/import_openssl.sh b/app/openssl/import_openssl.sh index f16596bc..02d2ab1c 100755 --- a/app/openssl/import_openssl.sh +++ b/app/openssl/import_openssl.sh @@ -128,16 +128,7 @@ function default_asm_file () {  function gen_asm_arm () {    local OUT    OUT=$(default_asm_file "$@") -  $PERL_EXE "$1" void "$OUT" > "$OUT" -} - -# Generate an ARMv8 64-bit assembly file. -# $1: generator (perl script) -# $2: [optional] output file name -function gen_asm_arm64 () { -  local OUT -  OUT=$(default_asm_file "$@") -  $PERL_EXE "$1" linux64 "$OUT" > "$OUT" +  $PERL_EXE "$1" > "$OUT"  }  function gen_asm_mips () { @@ -186,54 +177,6 @@ function print_autogenerated_header() {    echo "#"  } -function run_verbose() { -  echo Running: $@ -  $@ -} - -function scan_opensslconf_for_flags() { -  for flag in "$@"; do -    awk "/^#define ${flag}$/ { print \$2 }" crypto/opensslconf.h -  done -} - -CRYPTO_CONF_FLAGS=( -OPENSSL_CPUID_OBJ -DES_LONG -DES_PTR -DES_RISC1 -DES_RISC2 -DES_UNROLL -RC4_INT -RC4_CHUNK -RC4_INDEX -) - -function check_asm_flags() { -  local arch="$1" -  local target="$2" -  local unsorted_flags -  local expected_flags -  local actual_flags -  local defines="OPENSSL_CRYPTO_DEFINES_$arch" - -  PERL=/usr/bin/perl run_verbose ./Configure $CONFIGURE_ARGS $target - -  unsorted_flags="$(awk '/^CFLAG=/ { sub(/^CFLAG= .*-Wall /, ""); gsub(/-D/, ""); print; }' Makefile)" -  unsorted_flags="$unsorted_flags $(scan_opensslconf_for_flags "${CRYPTO_CONF_FLAGS[@]}")" - -  expected_flags="$(echo $unsorted_flags | tr ' ' '\n' | sort | tr '\n' ' ')" -  actual_flags="$(echo ${!defines} | tr ' ' '\n' | sort | tr '\n' ' ')" - -  if [[ $actual_flags != $expected_flags ]]; then -    echo ${defines} is wrong! -    echo "    $actual_flags" -    echo Please update to: -    echo "    $expected_flags" -    exit 1 -  fi -} -  # Run Configure and generate headers  # $1: 32 for 32-bit arch, 64 for 64-bit arch, trusty for Trusty  # $2: 1 if building for static version @@ -249,9 +192,9 @@ function generate_build_config_headers() {    fi    if [[ $1 == trusty ]] ; then -    PERL=/usr/bin/perl run_verbose ./Configure $CONFIGURE_ARGS_TRUSTY +    PERL=/usr/bin/perl ./Configure $CONFIGURE_ARGS_TRUSTY    else -    PERL=/usr/bin/perl run_verbose ./Configure $CONFIGURE_ARGS ${!configure_args_bits} ${!configure_args_stat} +    PERL=/usr/bin/perl ./Configure $CONFIGURE_ARGS ${!configure_args_bits} ${!configure_args_stat}    fi    rm -f apps/CA.pl.bak crypto/opensslconf.h.bak @@ -481,16 +424,8 @@ function import() {    declare -r OPENSSL_SOURCE=$1    untar $OPENSSL_SOURCE readonly    applypatches $OPENSSL_DIR -  convert_iso8859_to_utf8 $OPENSSL_DIR    cd $OPENSSL_DIR - -  # Check the ASM flags for each arch -  check_asm_flags arm linux-armv4 -  check_asm_flags arm64 linux-aarch64 -  check_asm_flags x86 linux-elf -  check_asm_flags x86_64 linux-x86_64 -    generate_build_config_mk    generate_opensslconf_h @@ -508,23 +443,14 @@ function import() {    # Generate arm asm    gen_asm_arm crypto/aes/asm/aes-armv4.pl -  gen_asm_arm crypto/aes/asm/aesv8-armx.pl    gen_asm_arm crypto/aes/asm/bsaes-armv7.pl    gen_asm_arm crypto/bn/asm/armv4-gf2m.pl    gen_asm_arm crypto/bn/asm/armv4-mont.pl    gen_asm_arm crypto/modes/asm/ghash-armv4.pl -  gen_asm_arm crypto/modes/asm/ghashv8-armx.pl    gen_asm_arm crypto/sha/asm/sha1-armv4-large.pl    gen_asm_arm crypto/sha/asm/sha256-armv4.pl    gen_asm_arm crypto/sha/asm/sha512-armv4.pl -  # Generate armv8 asm -  gen_asm_arm64 crypto/aes/asm/aesv8-armx.pl crypto/aes/asm/aesv8-armx-64.S -  gen_asm_arm64 crypto/modes/asm/ghashv8-armx.pl crypto/modes/asm/ghashv8-armx-64.S -  gen_asm_arm64 crypto/sha/asm/sha1-armv8.pl -  gen_asm_arm64 crypto/sha/asm/sha512-armv8.pl crypto/sha/asm/sha256-armv8.S -  gen_asm_arm64 crypto/sha/asm/sha512-armv8.pl -    # Generate mips asm    gen_asm_mips crypto/aes/asm/aes-mips.pl    gen_asm_mips crypto/bn/asm/mips.pl crypto/bn/asm/bn-mips.S @@ -659,6 +585,7 @@ function untar() {    # Process new source    tar -zxf $OPENSSL_SOURCE +  convert_iso8859_to_utf8 $OPENSSL_DIR    cp -RfP $OPENSSL_DIR $OPENSSL_DIR_ORIG    if [ ! -z $readonly ]; then      find $OPENSSL_DIR_ORIG -type f -print0 | xargs -0 chmod a-w @@ -683,13 +610,12 @@ function applypatches () {    cd $dir    # Apply appropriate patches -  patches=(../patches/[0-9][0-9][0-9][0-9]-*.patch) -  for i in "${patches[@]}"; do -    if [[ $skip_patch != ${i##*/} ]]; then +  for i in $OPENSSL_PATCHES; do +    if [ ! "$skip_patch" = "patches/$i" ]; then        echo "Applying patch $i" -      patch -p1 < $i || die "Could not apply $i. Fix source and run: $0 regenerate patches/${i##*/}" +      patch -p1 < ../patches/$i || die "Could not apply patches/$i. Fix source and run: $0 regenerate patches/$i"      else -      echo "Skiping patch ${i##*/}" +      echo "Skiping patch $i"      fi    done diff --git a/app/openssl/include/openssl/bio.h b/app/openssl/include/openssl/bio.h index d05fa22a..05699ab2 100644 --- a/app/openssl/include/openssl/bio.h +++ b/app/openssl/include/openssl/bio.h @@ -266,9 +266,6 @@ void BIO_clear_flags(BIO *b, int flags);  #define BIO_RR_CONNECT			0x02  /* Returned from the accept BIO when an accept would have blocked */  #define BIO_RR_ACCEPT			0x03 -/* Returned from the SSL bio when the channel id retrieval code cannot find the - * private key. */ -#define BIO_RR_SSL_CHANNEL_ID_LOOKUP	0x04  /* These are passed by the BIO callback */  #define BIO_CB_FREE	0x01 diff --git a/app/openssl/include/openssl/opensslconf-32.h b/app/openssl/include/openssl/opensslconf-32.h index caf6f1b8..d6625489 100644 --- a/app/openssl/include/openssl/opensslconf-32.h +++ b/app/openssl/include/openssl/opensslconf-32.h @@ -53,9 +53,6 @@  #ifndef OPENSSL_NO_RFC3779  # define OPENSSL_NO_RFC3779  #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif  #ifndef OPENSSL_NO_RSAX  # define OPENSSL_NO_RSAX  #endif @@ -140,9 +137,6 @@  # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)  #  define NO_RFC3779  # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -#  define NO_RIPEMD -# endif  # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)  #  define NO_RSAX  # endif diff --git a/app/openssl/include/openssl/opensslconf-64.h b/app/openssl/include/openssl/opensslconf-64.h index 88fb0419..70c5a2cb 100644 --- a/app/openssl/include/openssl/opensslconf-64.h +++ b/app/openssl/include/openssl/opensslconf-64.h @@ -53,9 +53,6 @@  #ifndef OPENSSL_NO_RFC3779  # define OPENSSL_NO_RFC3779  #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif  #ifndef OPENSSL_NO_RSAX  # define OPENSSL_NO_RSAX  #endif @@ -140,9 +137,6 @@  # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)  #  define NO_RFC3779  # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -#  define NO_RIPEMD -# endif  # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)  #  define NO_RSAX  # endif diff --git a/app/openssl/include/openssl/opensslconf-static-32.h b/app/openssl/include/openssl/opensslconf-static-32.h index caf6f1b8..d6625489 100644 --- a/app/openssl/include/openssl/opensslconf-static-32.h +++ b/app/openssl/include/openssl/opensslconf-static-32.h @@ -53,9 +53,6 @@  #ifndef OPENSSL_NO_RFC3779  # define OPENSSL_NO_RFC3779  #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif  #ifndef OPENSSL_NO_RSAX  # define OPENSSL_NO_RSAX  #endif @@ -140,9 +137,6 @@  # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)  #  define NO_RFC3779  # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -#  define NO_RIPEMD -# endif  # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)  #  define NO_RSAX  # endif diff --git a/app/openssl/include/openssl/opensslconf-static-64.h b/app/openssl/include/openssl/opensslconf-static-64.h index 88fb0419..70c5a2cb 100644 --- a/app/openssl/include/openssl/opensslconf-static-64.h +++ b/app/openssl/include/openssl/opensslconf-static-64.h @@ -53,9 +53,6 @@  #ifndef OPENSSL_NO_RFC3779  # define OPENSSL_NO_RFC3779  #endif -#ifndef OPENSSL_NO_RIPEMD -# define OPENSSL_NO_RIPEMD -#endif  #ifndef OPENSSL_NO_RSAX  # define OPENSSL_NO_RSAX  #endif @@ -140,9 +137,6 @@  # if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)  #  define NO_RFC3779  # endif -# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) -#  define NO_RIPEMD -# endif  # if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)  #  define NO_RSAX  # endif diff --git a/app/openssl/include/openssl/opensslv.h b/app/openssl/include/openssl/opensslv.h index c3b6acec..ebe71807 100644 --- a/app/openssl/include/openssl/opensslv.h +++ b/app/openssl/include/openssl/opensslv.h @@ -25,11 +25,11 @@   * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for   *  major minor fix final patch/beta)   */ -#define OPENSSL_VERSION_NUMBER	0x1000108fL +#define OPENSSL_VERSION_NUMBER	0x1000107fL  #ifdef OPENSSL_FIPS -#define OPENSSL_VERSION_TEXT	"OpenSSL 1.0.1h-fips 5 Jun 2014" +#define OPENSSL_VERSION_TEXT	"OpenSSL 1.0.1g-fips 7 Apr 2014"  #else -#define OPENSSL_VERSION_TEXT	"OpenSSL 1.0.1h 5 Jun 2014" +#define OPENSSL_VERSION_TEXT	"OpenSSL 1.0.1g 7 Apr 2014"  #endif  #define OPENSSL_VERSION_PTEXT	" part of " OPENSSL_VERSION_TEXT diff --git a/app/openssl/include/openssl/pkcs7.h b/app/openssl/include/openssl/pkcs7.h index 04f60379..e4d44319 100644 --- a/app/openssl/include/openssl/pkcs7.h +++ b/app/openssl/include/openssl/pkcs7.h @@ -453,7 +453,6 @@ void ERR_load_PKCS7_strings(void);  #define PKCS7_R_ERROR_SETTING_CIPHER			 121  #define PKCS7_R_INVALID_MIME_TYPE			 131  #define PKCS7_R_INVALID_NULL_POINTER			 143 -#define PKCS7_R_INVALID_SIGNED_DATA_TYPE		 155  #define PKCS7_R_MIME_NO_CONTENT_TYPE			 132  #define PKCS7_R_MIME_PARSE_ERROR			 133  #define PKCS7_R_MIME_SIG_PARSE_ERROR			 134 diff --git a/app/openssl/include/openssl/ssl.h b/app/openssl/include/openssl/ssl.h index a85841b3..54b0eb6c 100644 --- a/app/openssl/include/openssl/ssl.h +++ b/app/openssl/include/openssl/ssl.h @@ -544,13 +544,6 @@ struct ssl_session_st  #ifndef OPENSSL_NO_SRP  	char *srp_username;  #endif - -	/* original_handshake_hash contains the handshake hash (either -	 * SHA-1+MD5 or SHA-2, depending on TLS version) for the original, full -	 * handshake that created a session. This is used by Channel IDs during -	 * resumption. */ -	unsigned char original_handshake_hash[EVP_MAX_MD_SIZE]; -	unsigned int original_handshake_hash_len;  	};  #endif @@ -560,7 +553,7 @@ struct ssl_session_st  /* Allow initial connection to servers that don't support RI */  #define SSL_OP_LEGACY_SERVER_CONNECT			0x00000004L  #define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG		0x00000008L -#define SSL_OP_TLSEXT_PADDING				0x00000010L +#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG		0x00000010L  #define SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER		0x00000020L  #define SSL_OP_SAFARI_ECDHE_ECDSA_BUG			0x00000040L  #define SSL_OP_SSLEAY_080_CLIENT_DH_BUG			0x00000080L @@ -569,8 +562,6 @@ struct ssl_session_st  /* Hasn't done anything since OpenSSL 0.9.7h, retained for compatibility */  #define SSL_OP_MSIE_SSLV2_RSA_PADDING			0x0 -/* Refers to ancient SSLREF and SSLv2, retained for compatibility */ -#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG		0x0  /* SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS is vestigial. Previously it disabled the   * insertion of empty records in CBC mode, but the empty records were commonly @@ -657,14 +648,12 @@ struct ssl_session_st   * TLS only.)  "Released" buffers are put onto a free-list in the context   * or just freed (depending on the context's setting for freelist_max_len). */  #define SSL_MODE_RELEASE_BUFFERS 0x00000010L -  /* Send the current time in the Random fields of the ClientHello and   * ServerHello records for compatibility with hypothetical implementations   * that require it.   */  #define SSL_MODE_SEND_CLIENTHELLO_TIME 0x00000020L  #define SSL_MODE_SEND_SERVERHELLO_TIME 0x00000040L -  /* When set, clients may send application data before receipt of CCS   * and Finished.  This mode enables full-handshakes to 'complete' in   * one RTT. */ @@ -877,9 +866,6 @@ struct ssl_ctx_st  	/* get client cert callback */  	int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey); -	/* get channel id callback */ -	void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey); -      /* cookie generate callback */      int (*app_gen_cookie_cb)(SSL *ssl, unsigned char *cookie,           unsigned int *cookie_len); @@ -1042,10 +1028,6 @@ struct ssl_ctx_st  	/* If true, a client will advertise the Channel ID extension and a  	 * server will echo it. */  	char tlsext_channel_id_enabled; -	/* tlsext_channel_id_enabled_new is a hack to support both old and new -	 * ChannelID signatures. It indicates that a client should advertise the -	 * new ChannelID extension number. */ -	char tlsext_channel_id_enabled_new;  	/* The client's Channel ID private key. */  	EVP_PKEY *tlsext_channel_id_private;  #endif @@ -1104,8 +1086,6 @@ void SSL_CTX_set_info_callback(SSL_CTX *ctx, void (*cb)(const SSL *ssl,int type,  void (*SSL_CTX_get_info_callback(SSL_CTX *ctx))(const SSL *ssl,int type,int val);  void SSL_CTX_set_client_cert_cb(SSL_CTX *ctx, int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey));  int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx))(SSL *ssl, X509 **x509, EVP_PKEY **pkey); -void SSL_CTX_set_channel_id_cb(SSL_CTX *ctx, void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey)); -void (*SSL_CTX_get_channel_id_cb(SSL_CTX *ctx))(SSL *ssl, EVP_PKEY **pkey);  #ifndef OPENSSL_NO_ENGINE  int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e);  #endif @@ -1182,14 +1162,12 @@ const char *SSL_get_psk_identity(const SSL *s);  #define SSL_WRITING	2  #define SSL_READING	3  #define SSL_X509_LOOKUP	4 -#define SSL_CHANNEL_ID_LOOKUP	5  /* These will only be used when doing non-blocking IO */  #define SSL_want_nothing(s)	(SSL_want(s) == SSL_NOTHING)  #define SSL_want_read(s)	(SSL_want(s) == SSL_READING)  #define SSL_want_write(s)	(SSL_want(s) == SSL_WRITING)  #define SSL_want_x509_lookup(s)	(SSL_want(s) == SSL_X509_LOOKUP) -#define SSL_want_channel_id_lookup(s)	(SSL_want(s) == SSL_CHANNEL_ID_LOOKUP)  #define SSL_MAC_FLAG_READ_MAC_STREAM 1  #define SSL_MAC_FLAG_WRITE_MAC_STREAM 2 @@ -1624,7 +1602,6 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)  #define SSL_ERROR_ZERO_RETURN		6  #define SSL_ERROR_WANT_CONNECT		7  #define SSL_ERROR_WANT_ACCEPT		8 -#define SSL_ERROR_WANT_CHANNEL_ID_LOOKUP	9  #define SSL_CTRL_NEED_TMP_RSA			1  #define SSL_CTRL_SET_TMP_RSA			2 @@ -1762,11 +1739,10 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)  #define SSL_set_tmp_ecdh(ssl,ecdh) \  	SSL_ctrl(ssl,SSL_CTRL_SET_TMP_ECDH,0,(char *)ecdh) -/* SSL_enable_tls_channel_id either configures a TLS server to accept TLS client - * IDs from clients, or configure a client to send TLS client IDs to server. - * Returns 1 on success. */ -#define SSL_enable_tls_channel_id(s) \ -	SSL_ctrl(s,SSL_CTRL_CHANNEL_ID,0,NULL) +/* SSL_enable_tls_channel_id configures a TLS server to accept TLS client + * IDs from clients. Returns 1 on success. */ +#define SSL_enable_tls_channel_id(ctx) \ +	SSL_ctrl(ctx,SSL_CTRL_CHANNEL_ID,0,NULL)  /* SSL_set1_tls_channel_id configures a TLS client to send a TLS Channel ID to   * compatible servers. private_key must be a P-256 EVP_PKEY*. Returns 1 on   * success. */ @@ -1816,7 +1792,7 @@ int	SSL_CIPHER_get_bits(const SSL_CIPHER *c,int *alg_bits);  char *	SSL_CIPHER_get_version(const SSL_CIPHER *c);  const char *	SSL_CIPHER_get_name(const SSL_CIPHER *c);  unsigned long 	SSL_CIPHER_get_id(const SSL_CIPHER *c); -const char *	SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher); +const char* SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher);  int	SSL_get_fd(const SSL *s);  int	SSL_get_rfd(const SSL *s); @@ -2731,6 +2707,7 @@ void ERR_load_SSL_strings(void);  #define SSL_R_WRONG_VERSION_NUMBER			 267  #define SSL_R_X509_LIB					 268  #define SSL_R_X509_VERIFICATION_SETUP_PROBLEMS		 269 +#define SSL_R_UNEXPECTED_CCS				 388  #ifdef  __cplusplus  } diff --git a/app/openssl/include/openssl/ssl3.h b/app/openssl/include/openssl/ssl3.h index 83d59bff..f205f73d 100644 --- a/app/openssl/include/openssl/ssl3.h +++ b/app/openssl/include/openssl/ssl3.h @@ -388,6 +388,9 @@ typedef struct ssl3_buffer_st  #define TLS1_FLAGS_TLS_PADDING_BUG		0x0008  #define TLS1_FLAGS_SKIP_CERT_VERIFY		0x0010  #define TLS1_FLAGS_KEEP_HANDSHAKE		0x0020 +/* SSL3_FLAGS_CCS_OK indicates that a ChangeCipherSpec record is acceptable at + * this point in the handshake. If this flag is not set then received CCS + * records will cause a fatal error for the connection. */  #define SSL3_FLAGS_CCS_OK			0x0080  /* SSL3_FLAGS_SGC_RESTART_DONE is set when we @@ -555,11 +558,6 @@ typedef struct ssl3_state_st  	 * for Channel IDs and that tlsext_channel_id will be valid after the  	 * handshake. */  	char tlsext_channel_id_valid; -	/* tlsext_channel_id_new means that the updated Channel ID extension -	 * was negotiated. This is a temporary hack in the code to support both -	 * forms of Channel ID extension while we transition to the new format, -	 * which fixed a security issue. */ -	char tlsext_channel_id_new;  	/* For a server:  	 *     If |tlsext_channel_id_valid| is true, then this contains the  	 *     verified Channel ID from the client: a P256 point, (x,y), where @@ -680,11 +678,11 @@ typedef struct ssl3_state_st  #define SSL3_ST_SR_CERT_VRFY_B		(0x1A1|SSL_ST_ACCEPT)  #define SSL3_ST_SR_CHANGE_A		(0x1B0|SSL_ST_ACCEPT)  #define SSL3_ST_SR_CHANGE_B		(0x1B1|SSL_ST_ACCEPT) +#define SSL3_ST_SR_POST_CLIENT_CERT	(0x1BF|SSL_ST_ACCEPT)  #ifndef OPENSSL_NO_NEXTPROTONEG  #define SSL3_ST_SR_NEXT_PROTO_A		(0x210|SSL_ST_ACCEPT)  #define SSL3_ST_SR_NEXT_PROTO_B		(0x211|SSL_ST_ACCEPT)  #endif -#define SSL3_ST_SR_POST_CLIENT_CERT	(0x1BF|SSL_ST_ACCEPT)  #define SSL3_ST_SR_CHANNEL_ID_A		(0x220|SSL_ST_ACCEPT)  #define SSL3_ST_SR_CHANNEL_ID_B		(0x221|SSL_ST_ACCEPT)  #define SSL3_ST_SR_FINISHED_A		(0x1C0|SSL_ST_ACCEPT) diff --git a/app/openssl/include/openssl/tls1.h b/app/openssl/include/openssl/tls1.h index b9a0899e..ec8948d5 100644 --- a/app/openssl/include/openssl/tls1.h +++ b/app/openssl/include/openssl/tls1.h @@ -259,7 +259,6 @@ extern "C" {  /* This is not an IANA defined extension number */  #define TLSEXT_TYPE_channel_id			30031 -#define TLSEXT_TYPE_channel_id_new		30032  /* NameType value from RFC 3546 */  #define TLSEXT_NAMETYPE_host_name 0 @@ -532,11 +531,9 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb)  #define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256        0x0300C031  #define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384        0x0300C032 -/* ECDHE PSK ciphersuites from RFC5489 - * SHA-2 cipher suites are omitted because they cannot be used safely with - * SSLv3. */ -#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA          0x0300C035 -#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA          0x0300C036 +/* ECDHE PSK ciphersuites from RFC 5489 */ +#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256    0x0300C037 +#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384    0x0300C038  /* XXX   * Inconsistency alert: @@ -689,9 +686,9 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb)  #define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256       "ECDH-RSA-AES128-GCM-SHA256"  #define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384       "ECDH-RSA-AES256-GCM-SHA384" -/* ECDHE PSK ciphersuites from RFC5489 */ -#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA         "ECDHE-PSK-AES128-CBC-SHA" -#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA         "ECDHE-PSK-AES256-CBC-SHA" +/* ECDHE PSK ciphersuites from RFC 5489 */ +#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256  "ECDHE-PSK-WITH-AES-128-CBC-SHA256" +#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384  "ECDHE-PSK-WITH-AES-256-CBC-SHA384"  #define TLS_CT_RSA_SIGN			1  #define TLS_CT_DSS_SIGN			2 diff --git a/app/openssl/openssl.config b/app/openssl/openssl.config index 867711fe..aa028705 100644 --- a/app/openssl/openssl.config +++ b/app/openssl/openssl.config @@ -13,7 +13,6 @@ no-md2 \  no-mdc2 \  no-rc5 \  no-rdrand \ -no-ripemd \  no-rfc3779 \  no-rsax \  no-sctp \ @@ -54,7 +53,6 @@ crypto/cast \  crypto/idea \  crypto/md2 \  crypto/rc5 \ -crypto/ripemd \  crypto/seed \  crypto/whrlpool \  demos \ @@ -96,6 +94,7 @@ README.ASN1 \  README.ENGINE \  apps/CA.com \  apps/Makefile \ +apps/Makefile.save \  apps/install-apps.com \  apps/makeapps.com \  apps/openssl-vms.cnf \ @@ -105,10 +104,14 @@ apps/vms_decc_init.c \  config \  crypto/LPdir_vms.c \  crypto/Makefile \ +crypto/Makefile.save \  crypto/aes/Makefile \ +crypto/aes/Makefile.save \  crypto/asn1/Makefile \ +crypto/asn1/Makefile.save \  crypto/bf/INSTALL \  crypto/bf/Makefile \ +crypto/bf/Makefile.save \  crypto/bf/README \  crypto/bf/VERSION \  crypto/bf/asm/readme \ @@ -118,77 +121,117 @@ crypto/bf/bfs.cpp \  crypto/bf/bfspeed.c \  crypto/bf/bftest.c \  crypto/bio/Makefile \ +crypto/bio/Makefile.save \  crypto/bio/bss_rtcp.c \  crypto/bn/Makefile \ +crypto/bn/Makefile.save \  crypto/bn/asm/vms.mar \  crypto/bn/bn_x931p.c \  crypto/bn/vms-helper.c \  crypto/buffer/Makefile \ +crypto/buffer/Makefile.save \  crypto/cmac/Makefile \ +crypto/cmac/Makefile.save \  crypto/cms/Makefile \ +crypto/cms/Makefile.save \  crypto/comp/Makefile \ +crypto/comp/Makefile.save \  crypto/conf/Makefile \ +crypto/conf/Makefile.save \  crypto/crypto-lib.com \  crypto/des/Makefile \ +crypto/des/Makefile.save \  crypto/des/des-lib.com \  crypto/dh/Makefile \ +crypto/dh/Makefile.save \  crypto/dh/dh_prn.c \  crypto/dsa/Makefile \ +crypto/dsa/Makefile.save \  crypto/dso/Makefile \ +crypto/dso/Makefile.save \  crypto/dso/dso_beos.c \  crypto/dso/dso_vms.c \  crypto/dso/dso_win32.c \  crypto/ec/Makefile \ +crypto/ec/Makefile.save \  crypto/ec/ecp_nistp224.c \  crypto/ec/ecp_nistp256.c \  crypto/ec/ecp_nistp521.c \  crypto/ec/ecp_nistputil.c \  crypto/ecdh/Makefile \ +crypto/ecdh/Makefile.save \  crypto/ecdsa/Makefile \ +crypto/ecdsa/Makefile.save \  crypto/engine/Makefile \ +crypto/engine/Makefile.save \  crypto/engine/eng_rdrand.c \  crypto/engine/eng_rsax.c \  crypto/err/Makefile \ +crypto/err/Makefile.save \  crypto/evp/Makefile \ +crypto/evp/Makefile.save \  crypto/evp/evp_fips.c \  crypto/evp/m_md2.c \  crypto/evp/m_sha.c \  crypto/fips_err.h \  crypto/fips_ers.c \  crypto/hmac/Makefile \ +crypto/hmac/Makefile.save \  crypto/install-crypto.com \  crypto/jpake/Makefile \  crypto/krb5/Makefile \ +crypto/krb5/Makefile.save \  crypto/lhash/Makefile \ +crypto/lhash/Makefile.save \  crypto/md4/Makefile \ +crypto/md4/Makefile.save \  crypto/md5/Makefile \ +crypto/md5/Makefile.save \  crypto/mdc2/Makefile \ +crypto/mdc2/Makefile.save \  crypto/modes/Makefile \ +crypto/modes/Makefile.save \  crypto/modes/cts128.c \  crypto/modes/modes.h \  crypto/o_fips.c \  crypto/objects/Makefile \ +crypto/objects/Makefile.save \  crypto/ocsp/Makefile \ +crypto/ocsp/Makefile.save \  crypto/pem/Makefile \ +crypto/pem/Makefile.save \  crypto/pkcs12/Makefile \ +crypto/pkcs12/Makefile.save \  crypto/pkcs7/Makefile \ +crypto/pkcs7/Makefile.save \  crypto/pkcs7/bio_pk7.c \  crypto/ppccap.c \  crypto/pqueue/Makefile \ +crypto/pqueue/Makefile.save \  crypto/rand/Makefile \ +crypto/rand/Makefile.save \  crypto/rand/rand_vms.c \  crypto/rc2/Makefile \ +crypto/rc2/Makefile.save \  crypto/rc4/Makefile \ +crypto/rc4/Makefile.save \ +crypto/ripemd/Makefile \ +crypto/ripemd/Makefile.save \  crypto/rsa/Makefile \ +crypto/rsa/Makefile.save \  crypto/sha/Makefile \ +crypto/sha/Makefile.save \  crypto/sha/sha_one.c \  crypto/srp/Makefile \ +crypto/srp/Makefile.save \  crypto/srp/srptest.c \  crypto/stack/Makefile \ +crypto/stack/Makefile.save \  crypto/store/Makefile \  crypto/threads/pthreads-vms.com \  crypto/threads/win32.bat \  crypto/ts/Makefile \ +crypto/ts/Makefile.save \  crypto/ts/ts.h \  crypto/ts/ts_asn1.c \  crypto/ts/ts_conf.c \ @@ -201,10 +244,14 @@ crypto/ts/ts_rsp_utils.c \  crypto/ts/ts_rsp_verify.c \  crypto/ts/ts_verify_ctx.c \  crypto/txt_db/Makefile \ +crypto/txt_db/Makefile.save \  crypto/ui/Makefile \ +crypto/ui/Makefile.save \  crypto/vms_rms.h  crypto/x509/Makefile \ +crypto/x509/Makefile.save \  crypto/x509v3/Makefile \ +crypto/x509v3/Makefile.save \  include/openssl/camellia.h \  include/openssl/cast.h \  include/openssl/idea.h \ @@ -216,11 +263,11 @@ makevms.com \  openssl.doxy \  openssl.spec \  ssl/Makefile \ -ssl/heartbeat_test.c \ +ssl/Makefile.save \  ssl/install-ssl.com \  ssl/ssl-lib.com \  ssl/ssl_task.c \ -" +"   NEEDED_SOURCES="\  apps \ @@ -238,24 +285,19 @@ NO_WINDOWS_BRAINDEATH \  "  OPENSSL_CRYPTO_DEFINES_arm="\ -AES_ASM \ -BSAES_ASM \ -DES_UNROLL \ -GHASH_ASM \  OPENSSL_BN_ASM_GF2m \  OPENSSL_BN_ASM_MONT \  OPENSSL_CPUID_OBJ \ +GHASH_ASM \ +AES_ASM \ +BSAES_ASM \  SHA1_ASM \  SHA256_ASM \  SHA512_ASM \  "  OPENSSL_CRYPTO_DEFINES_arm64="\ -DES_UNROLL \ -OPENSSL_CPUID_OBJ \ -SHA1_ASM \ -SHA256_ASM \ -SHA512_ASM \ +OPENSSL_NO_ASM \  "  OPENSSL_CRYPTO_DEFINES_mips="\ @@ -266,40 +308,39 @@ SHA256_ASM \  "  OPENSSL_CRYPTO_DEFINES_x86="\ -AES_ASM \ -DES_PTR \ -DES_RISC1 \ -DES_UNROLL \ -GHASH_ASM \ -MD5_ASM \ +OPENSSL_IA32_SSE2 \  OPENSSL_BN_ASM_GF2m \  OPENSSL_BN_ASM_MONT \  OPENSSL_BN_ASM_PART_WORDS \ -OPENSSL_CPUID_OBJ \ -OPENSSL_IA32_SSE2 \ -RC4_INDEX \ -RMD160_ASM \ +AES_ASM \ +VPAES_ASM \ +GHASH_ASM \  SHA1_ASM \  SHA256_ASM \  SHA512_ASM \ -VPAES_ASM \ +MD5_ASM \ +DES_PTR \ +DES_RISC1 \ +DES_UNROLL \ +OPENSSL_CPUID_OBJ \  "  OPENSSL_CRYPTO_DEFINES_x86_64="\ -AES_ASM \ -BSAES_ASM \ -DES_UNROLL \ -GHASH_ASM \ -MD5_ASM \  OPENSSL_BN_ASM_GF2m \  OPENSSL_BN_ASM_MONT \  OPENSSL_BN_ASM_MONT5 \ -OPENSSL_CPUID_OBJ \ -OPENSSL_IA32_SSE2 \ +AES_ASM \ +VPAES_ASM \ +BSAES_ASM \ +GHASH_ASM \  SHA1_ASM \  SHA256_ASM \  SHA512_ASM \ -VPAES_ASM \ +MD5_ASM \ +DES_PTR \ +DES_RISC1 \ +DES_UNROLL \ +OPENSSL_CPUID_OBJ \  "  OPENSSL_CRYPTO_INCLUDES="\ @@ -635,6 +676,7 @@ crypto/evp/m_md4.c \  crypto/evp/m_md5.c \  crypto/evp/m_mdc2.c \  crypto/evp/m_null.c \ +crypto/evp/m_ripemd.c \  crypto/evp/m_sha1.c \  crypto/evp/m_sigver.c \  crypto/evp/m_wp.c \ @@ -733,6 +775,8 @@ crypto/rc2/rc2ofb64.c \  crypto/rc4/rc4_enc.c \  crypto/rc4/rc4_skey.c \  crypto/rc4/rc4_utl.c \ +crypto/ripemd/rmd_dgst.c \ +crypto/ripemd/rmd_one.c \  crypto/rsa/rsa_ameth.c \  crypto/rsa/rsa_asn1.c \  crypto/rsa/rsa_chk.c \ @@ -829,14 +873,12 @@ crypto/x509v3/v3err.c \  OPENSSL_CRYPTO_SOURCES_arm="\  crypto/aes/asm/aes-armv4.S \ -crypto/aes/asm/aesv8-armx.S \  crypto/aes/asm/bsaes-armv7.S \  crypto/armcap.c \  crypto/armv4cpuid.S \  crypto/bn/asm/armv4-gf2m.S \  crypto/bn/asm/armv4-mont.S \  crypto/modes/asm/ghash-armv4.S \ -crypto/modes/asm/ghashv8-armx.S \  crypto/sha/asm/sha1-armv4-large.S \  crypto/sha/asm/sha256-armv4.S \  crypto/sha/asm/sha512-armv4.S \ @@ -848,13 +890,6 @@ crypto/mem_clr.c \  "  OPENSSL_CRYPTO_SOURCES_arm64="\ -crypto/armcap.c \ -crypto/arm64cpuid.S \ -crypto/aes/asm/aesv8-armx-64.S \ -crypto/modes/asm/ghashv8-armx-64.S \ -crypto/sha/asm/sha1-armv8.S \ -crypto/sha/asm/sha256-armv8.S \ -crypto/sha/asm/sha512-armv8.S \  "  OPENSSL_CRYPTO_SOURCES_EXCLUDES_arm64="\ @@ -1047,4 +1082,23 @@ apps/version.c \  apps/x509.c \  " +OPENSSL_PATCHES="\ +progs.patch \ +handshake_cutthrough.patch \ +jsse.patch \ +channelid.patch \ +eng_dyn_dirs.patch \ +fix_clang_build.patch \ +tls12_digests.patch \ +alpn.patch \ +cbc_record_splitting.patch \ +dsa_nonce.patch \ +ecdhe_psk.patch \ +wincrypt.patch \ +tls_psk_hint.patch \ +arm_asm.patch \ +psk_client_callback_128_byte_id_bug.patch \ +early_ccs.patch \ +" +  source ./openssl.trusty.config diff --git a/app/openssl/openssl.version b/app/openssl/openssl.version index ab2e62bf..2e849911 100644 --- a/app/openssl/openssl.version +++ b/app/openssl/openssl.version @@ -1 +1 @@ -OPENSSL_VERSION=1.0.1h +OPENSSL_VERSION=1.0.1g diff --git a/app/openssl/patches/README b/app/openssl/patches/README index 13e9bd8b..2ff69282 100644 --- a/app/openssl/patches/README +++ b/app/openssl/patches/README @@ -53,19 +53,6 @@ ecdhe_psk.patch  Adds support for ECDHE Pre-Shared Key (PSK) TLS cipher suites. -ecdhe_psk_part2.patch - -Removes ECHDE-PSK cipher suites with SHA-2 because they cannot be used with -SSLv3 (and there's no way to express that in OpenSSL's configuration). Adds -SHA-1 based ECDHE-PSK AES-CBC cipher suites instead. - -arm_asm.patch - -Adds newer ARM assembly pack with BSAES for ARMv7 and acceleration for ARMv8 -Based on branch available at: -https://git.linaro.org/people/ard.biesheuvel/openssl.git/shortlog/refs/heads/openssl-1.0.1f-with-arm-patches -c7b582ef23eb6f4386664e841e6e406d984c38d3^..cb8b1ab03e5c179a719afe83f03fecb1c2c78730 -  tls_psk_hint.patch  Fixes issues with TLS-PSK identity hint implementation where diff --git a/app/openssl/ssl/bio_ssl.c b/app/openssl/ssl/bio_ssl.c index 06a13de4..e9552cae 100644 --- a/app/openssl/ssl/bio_ssl.c +++ b/app/openssl/ssl/bio_ssl.c @@ -206,10 +206,6 @@ static int ssl_read(BIO *b, char *out, int outl)  		BIO_set_retry_special(b);  		retry_reason=BIO_RR_SSL_X509_LOOKUP;  		break; -	case SSL_ERROR_WANT_CHANNEL_ID_LOOKUP: -		BIO_set_retry_special(b); -		retry_reason=BIO_RR_SSL_CHANNEL_ID_LOOKUP; -		break;  	case SSL_ERROR_WANT_ACCEPT:  		BIO_set_retry_special(b);  		retry_reason=BIO_RR_ACCEPT; @@ -284,10 +280,6 @@ static int ssl_write(BIO *b, const char *out, int outl)  		BIO_set_retry_special(b);  		retry_reason=BIO_RR_SSL_X509_LOOKUP;  		break; -	case SSL_ERROR_WANT_CHANNEL_ID_LOOKUP: -		BIO_set_retry_special(b); -		retry_reason=BIO_RR_SSL_CHANNEL_ID_LOOKUP; -		break;  	case SSL_ERROR_WANT_CONNECT:  		BIO_set_retry_special(b);  		retry_reason=BIO_RR_CONNECT; diff --git a/app/openssl/ssl/d1_both.c b/app/openssl/ssl/d1_both.c index 04aa2310..2e8cf681 100644 --- a/app/openssl/ssl/d1_both.c +++ b/app/openssl/ssl/d1_both.c @@ -627,16 +627,7 @@ dtls1_reassemble_fragment(SSL *s, struct hm_header_st* msg_hdr, int *ok)  		frag->msg_header.frag_off = 0;  		}  	else -		{  		frag = (hm_fragment*) item->data; -		if (frag->msg_header.msg_len != msg_hdr->msg_len) -			{ -			item = NULL; -			frag = NULL; -			goto err; -			} -		} -  	/* If message is already reassembled, this must be a  	 * retransmit and can be dropped. @@ -683,8 +674,8 @@ dtls1_reassemble_fragment(SSL *s, struct hm_header_st* msg_hdr, int *ok)  		item = pitem_new(seq64be, frag);  		if (item == NULL)  			{ -			i = -1;  			goto err; +			i = -1;  			}  		pqueue_insert(s->d1->buffered_messages, item); @@ -793,7 +784,6 @@ dtls1_get_message_fragment(SSL *s, int st1, int stn, long max, int *ok)  	int i,al;  	struct hm_header_st msg_hdr; -	redo:  	/* see if we have the required fragment already */  	if ((frag_len = dtls1_retrieve_buffered_fragment(s,max,ok)) || *ok)  		{ @@ -852,7 +842,8 @@ dtls1_get_message_fragment(SSL *s, int st1, int stn, long max, int *ok)  					s->msg_callback_arg);  			s->init_num = 0; -			goto redo; +			return dtls1_get_message_fragment(s, st1, stn, +				max, ok);  			}  		else /* Incorrectly formated Hello request */  			{ diff --git a/app/openssl/ssl/d1_lib.c b/app/openssl/ssl/d1_lib.c index 6bde16fa..106939f2 100644 --- a/app/openssl/ssl/d1_lib.c +++ b/app/openssl/ssl/d1_lib.c @@ -176,12 +176,9 @@ static void dtls1_clear_queues(SSL *s)  	while ( (item = pqueue_pop(s->d1->buffered_app_data.q)) != NULL)  		{ -		rdata = (DTLS1_RECORD_DATA *) item->data; -		if (rdata->rbuf.buf) -			{ -			OPENSSL_free(rdata->rbuf.buf); -			} -		OPENSSL_free(item->data); +		frag = (hm_fragment *)item->data; +		OPENSSL_free(frag->fragment); +		OPENSSL_free(frag);  		pitem_free(item);  		}  	} diff --git a/app/openssl/ssl/d1_pkt.c b/app/openssl/ssl/d1_pkt.c index 363fc8c8..5b84e97c 100644 --- a/app/openssl/ssl/d1_pkt.c +++ b/app/openssl/ssl/d1_pkt.c @@ -241,28 +241,27 @@ dtls1_buffer_record(SSL *s, record_pqueue *queue, unsigned char *priority)  	}  #endif -	s->packet = NULL; -	s->packet_length = 0; -	memset(&(s->s3->rbuf), 0, sizeof(SSL3_BUFFER)); -	memset(&(s->s3->rrec), 0, sizeof(SSL3_RECORD)); -	 -	if (!ssl3_setup_buffers(s)) +	/* insert should not fail, since duplicates are dropped */ +	if (pqueue_insert(queue->q, item) == NULL)  		{ -		SSLerr(SSL_F_DTLS1_BUFFER_RECORD, ERR_R_INTERNAL_ERROR);  		OPENSSL_free(rdata);  		pitem_free(item);  		return(0);  		} -	/* insert should not fail, since duplicates are dropped */ -	if (pqueue_insert(queue->q, item) == NULL) +	s->packet = NULL; +	s->packet_length = 0; +	memset(&(s->s3->rbuf), 0, sizeof(SSL3_BUFFER)); +	memset(&(s->s3->rrec), 0, sizeof(SSL3_RECORD)); +	 +	if (!ssl3_setup_buffers(s))  		{  		SSLerr(SSL_F_DTLS1_BUFFER_RECORD, ERR_R_INTERNAL_ERROR);  		OPENSSL_free(rdata);  		pitem_free(item);  		return(0);  		} - +	  	return(1);  	} diff --git a/app/openssl/ssl/d1_srvr.c b/app/openssl/ssl/d1_srvr.c index c181db6d..09f47627 100644 --- a/app/openssl/ssl/d1_srvr.c +++ b/app/openssl/ssl/d1_srvr.c @@ -1356,7 +1356,6 @@ int dtls1_send_server_key_exchange(SSL *s)  			    (unsigned char *)encodedPoint,   			    encodedlen);  			OPENSSL_free(encodedPoint); -			encodedPoint = NULL;  			p += encodedlen;  			}  #endif diff --git a/app/openssl/ssl/s3_both.c b/app/openssl/ssl/s3_both.c index 607990d0..d9e18a31 100644 --- a/app/openssl/ssl/s3_both.c +++ b/app/openssl/ssl/s3_both.c @@ -561,7 +561,7 @@ long ssl3_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok)  #endif  	/* Feed this message into MAC computation. */ -	if (*((unsigned char*) s->init_buf->data) != SSL3_MT_ENCRYPTED_EXTENSIONS) +	if (*(unsigned char*)s->init_buf->data != SSL3_MT_ENCRYPTED_EXTENSIONS)  		ssl3_finish_mac(s, (unsigned char *)s->init_buf->data, s->init_num + 4);  	if (s->msg_callback)  		s->msg_callback(0, s->version, SSL3_RT_HANDSHAKE, s->init_buf->data, (size_t)s->init_num + 4, s, s->msg_callback_arg); diff --git a/app/openssl/ssl/s3_clnt.c b/app/openssl/ssl/s3_clnt.c index 486f538b..5e15b75c 100644 --- a/app/openssl/ssl/s3_clnt.c +++ b/app/openssl/ssl/s3_clnt.c @@ -215,12 +215,24 @@ int ssl3_connect(SSL *s)  		}  #endif +// BEGIN android-added +#if 0 +/* Send app data in separate packet, otherwise, some particular site + * (only one site so far) closes the socket. http://b/2511073 + * Note: there is a very small chance that two TCP packets + * could be arriving at server combined into a single TCP packet, + * then trigger that site to break. We haven't encounter that though. + */ +// END android-added  	if (SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH)  		{  		/* Send app data along with CCS/Finished */  		s->s3->flags |= SSL3_FLAGS_DELAY_CLIENT_FINISHED;  		} +// BEGIN android-added +#endif +// END android-added  	for (;;)  		{  		state=s->state; @@ -546,20 +558,7 @@ int ssl3_connect(SSL *s)  				}  			else  				{ -				/* This is a non-resumption handshake. If it -				 * involves ChannelID, then record the -				 * handshake hashes at this point in the -				 * session so that any resumption of this -				 * session with ChannelID can sign those -				 * hashes. */ -				if (s->s3->tlsext_channel_id_new) -					{ -					ret = tls1_record_handshake_hashes_for_channel_id(s); -					if (ret <= 0) -						goto end; -					} -				if ((SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) -				    && ssl3_can_cutthrough(s) +				if ((SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) && SSL_get_cipher_bits(s, NULL) >= 128  				    && s->s3->previous_server_finished_len == 0 /* no cutthrough on renegotiation (would complicate the state machine) */  				   )  					{ @@ -608,7 +607,6 @@ int ssl3_connect(SSL *s)  		case SSL3_ST_CR_FINISHED_A:  		case SSL3_ST_CR_FINISHED_B: -  			s->s3->flags |= SSL3_FLAGS_CCS_OK;  			ret=ssl3_get_finished(s,SSL3_ST_CR_FINISHED_A,  				SSL3_ST_CR_FINISHED_B); @@ -2304,7 +2302,7 @@ int ssl3_get_server_done(SSL *s)  int ssl3_send_client_key_exchange(SSL *s)  	{  	unsigned char *p,*d; -	int n = 0; +	int n;  	unsigned long alg_k;  	unsigned long alg_a;  #ifndef OPENSSL_NO_RSA @@ -2690,13 +2688,6 @@ int ssl3_send_client_key_exchange(SSL *s)  			unsigned int i;  #endif -			if (s->session->sess_cert == NULL)  -				{ -				ssl3_send_alert(s,SSL3_AL_FATAL,SSL_AD_UNEXPECTED_MESSAGE); -				SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,SSL_R_UNEXPECTED_MESSAGE); -				goto err; -				} -  			/* Did we send out the client's  			 * ECDH share for use in premaster  			 * computation as part of client certificate? @@ -3036,7 +3027,7 @@ int ssl3_send_client_key_exchange(SSL *s)  				}  			}  #endif -		else if (!(alg_k & SSL_kPSK) || ((alg_k & SSL_kPSK) && !(alg_a & SSL_aPSK))) +		else if (!(alg_k & SSL_kPSK))  			{  			ssl3_send_alert(s, SSL3_AL_FATAL,  			    SSL_AD_HANDSHAKE_FAILURE); @@ -3500,29 +3491,10 @@ int ssl3_send_channel_id(SSL *s)  	if (s->state != SSL3_ST_CW_CHANNEL_ID_A)  		return ssl3_do_write(s, SSL3_RT_HANDSHAKE); -	if (!s->tlsext_channel_id_private && s->ctx->channel_id_cb) -		{ -		EVP_PKEY *key = NULL; -		s->ctx->channel_id_cb(s, &key); -		if (key != NULL) -			{ -			s->tlsext_channel_id_private = key; -			} -		} -	if (!s->tlsext_channel_id_private) -		{ -		s->rwstate=SSL_CHANNEL_ID_LOOKUP; -		return (-1); -		} -	s->rwstate=SSL_NOTHING; -  	d = (unsigned char *)s->init_buf->data;  	*(d++)=SSL3_MT_ENCRYPTED_EXTENSIONS;  	l2n3(2 + 2 + TLSEXT_CHANNEL_ID_SIZE, d); -	if (s->s3->tlsext_channel_id_new) -		s2n(TLSEXT_TYPE_channel_id_new, d); -	else -		s2n(TLSEXT_TYPE_channel_id, d); +	s2n(TLSEXT_TYPE_channel_id, d);  	s2n(TLSEXT_CHANNEL_ID_SIZE, d);  	EVP_MD_CTX_init(&md_ctx); @@ -3533,9 +3505,9 @@ int ssl3_send_channel_id(SSL *s)  		SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_CANNOT_SERIALIZE_PUBLIC_KEY);  		goto err;  		} -	/* i2d_PublicKey will produce an ANSI X9.62 public key which, for a -	 * P-256 key, is 0x04 (meaning uncompressed) followed by the x and y -	 * field elements as 32-byte, big-endian numbers. */ +	// i2d_PublicKey will produce an ANSI X9.62 public key which, for a +	// P-256 key, is 0x04 (meaning uncompressed) followed by the x and y +	// field elements as 32-byte, big-endian numbers.  	if (public_key_len != 65)  		{  		SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_CHANNEL_ID_NOT_P256); @@ -3581,14 +3553,14 @@ int ssl3_send_channel_id(SSL *s)  		}  	derp = der_sig; -	sig = d2i_ECDSA_SIG(NULL, (const unsigned char**) &derp, sig_len); +	sig = d2i_ECDSA_SIG(NULL, (const unsigned char**)&derp, sig_len);  	if (sig == NULL)  		{  		SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_D2I_ECDSA_SIG);  		goto err;  		} -	/* The first byte of public_key will be 0x4, denoting an uncompressed key. */ +	// The first byte of public_key will be 0x4, denoting an uncompressed key.  	memcpy(d, public_key + 1, 64);  	d += 64;  	memset(d, 0, 2 * 32); diff --git a/app/openssl/ssl/s3_enc.c b/app/openssl/ssl/s3_enc.c index 53b94b7c..90fbb180 100644 --- a/app/openssl/ssl/s3_enc.c +++ b/app/openssl/ssl/s3_enc.c @@ -728,7 +728,7 @@ int n_ssl3_mac(SSL *ssl, unsigned char *md, int send)  		}  	t=EVP_MD_CTX_size(hash); -	if (t < 0 || t > 20) +	if (t < 0)  		return -1;  	md_size=t;  	npad=(48/md_size)*md_size; diff --git a/app/openssl/ssl/s3_lib.c b/app/openssl/ssl/s3_lib.c index 896d1e19..f84da7f5 100644 --- a/app/openssl/ssl/s3_lib.c +++ b/app/openssl/ssl/s3_lib.c @@ -2828,34 +2828,35 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={  #ifndef OPENSSL_NO_PSK      /* ECDH PSK ciphersuites from RFC 5489 */ -	/* Cipher C035 */ + +	/* Cipher C037 */  	{  	1, -	TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA, -	TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA, +	TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256, +	TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256,  	SSL_kEECDH,  	SSL_aPSK,  	SSL_AES128, -	SSL_SHA1, +	SSL_SHA256,  	SSL_TLSV1, -	SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, -	SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, +	SSL_NOT_EXP|SSL_HIGH, +	SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF_SHA256,  	128,  	128,  	}, -	/* Cipher C036 */ +	/* Cipher C038 */  	{  	1, -	TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA, -	TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA, +	TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384, +	TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384,  	SSL_kEECDH,  	SSL_aPSK,  	SSL_AES256, -	SSL_SHA1, +	SSL_SHA384,  	SSL_TLSV1, -	SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, -	SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, +	SSL_NOT_EXP|SSL_HIGH, +	SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF_SHA384,  	256,  	256,  	}, @@ -3411,6 +3412,8 @@ long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg)  		break;  #endif  	case SSL_CTRL_CHANNEL_ID: +		if (!s->server) +			break;  		s->tlsext_channel_id_enabled = 1;  		ret = 1;  		break; @@ -3426,7 +3429,7 @@ long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg)  			}  		if (s->tlsext_channel_id_private)  			EVP_PKEY_free(s->tlsext_channel_id_private); -		s->tlsext_channel_id_private = EVP_PKEY_dup((EVP_PKEY*) parg); +		s->tlsext_channel_id_private = (EVP_PKEY*) parg;  		ret = 1;  		break; @@ -3741,7 +3744,7 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg)  			}  		if (ctx->tlsext_channel_id_private)  			EVP_PKEY_free(ctx->tlsext_channel_id_private); -		ctx->tlsext_channel_id_private = EVP_PKEY_dup((EVP_PKEY*) parg); +		ctx->tlsext_channel_id_private = (EVP_PKEY*) parg;  		break;  	default: diff --git a/app/openssl/ssl/s3_pkt.c b/app/openssl/ssl/s3_pkt.c index 60c4f1a4..75997ac2 100644 --- a/app/openssl/ssl/s3_pkt.c +++ b/app/openssl/ssl/s3_pkt.c @@ -110,7 +110,6 @@   */  #include <stdio.h> -#include <limits.h>  #include <errno.h>  #define USE_SOCKETS  #include "ssl_locl.h" @@ -581,11 +580,10 @@ int ssl3_do_compress(SSL *ssl)  int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len)  	{  	const unsigned char *buf=buf_; -	unsigned int n,nw; -	int i,tot; +	unsigned int tot,n,nw; +	int i;  	s->rwstate=SSL_NOTHING; -	OPENSSL_assert(s->s3->wnum <= INT_MAX);  	tot=s->s3->wnum;  	s->s3->wnum=0; @@ -600,22 +598,6 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len)  			}  		} -	/* ensure that if we end up with a smaller value of data to write  -	 * out than the the original len from a write which didn't complete  -	 * for non-blocking I/O and also somehow ended up avoiding  -	 * the check for this in ssl3_write_pending/SSL_R_BAD_WRITE_RETRY as -	 * it must never be possible to end up with (len-tot) as a large -	 * number that will then promptly send beyond the end of the users -	 * buffer ... so we trap and report the error in a way the user -	 * will notice -	 */ -	if (len < tot) -		{ -		SSLerr(SSL_F_SSL3_WRITE_BYTES,SSL_R_BAD_LENGTH); -		return(-1); -		} - -  	n=(len-tot);  	for (;;)  		{ @@ -686,6 +668,9 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,  	SSL3_BUFFER *wb=&(s->s3->wbuf);  	SSL_SESSION *sess; + 	if (wb->buf == NULL) +		if (!ssl3_setup_write_buffer(s)) +			return -1;  	/* first check if there is a SSL3_BUFFER still being written  	 * out.  This will happen with non blocking IO */ @@ -701,10 +686,6 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,  		/* if it went, fall through and send more stuff */  		} - 	if (wb->buf == NULL) -		if (!ssl3_setup_write_buffer(s)) -			return -1; -  	if (len == 0)  		return 0; @@ -1086,7 +1067,7 @@ start:  				{  				s->rstate=SSL_ST_READ_HEADER;  				rr->off=0; -				if (s->mode & SSL_MODE_RELEASE_BUFFERS && s->s3->rbuf.left == 0) +				if (s->mode & SSL_MODE_RELEASE_BUFFERS)  					ssl3_release_read_buffer(s);  				}  			} @@ -1331,12 +1312,10 @@ start:  		if (!(s->s3->flags & SSL3_FLAGS_CCS_OK))  			{  			al=SSL_AD_UNEXPECTED_MESSAGE; -			SSLerr(SSL_F_SSL3_READ_BYTES,SSL_R_CCS_RECEIVED_EARLY); +			SSLerr(SSL_F_SSL3_READ_BYTES,SSL_R_UNEXPECTED_CCS);  			goto f_err;  			} -		s->s3->flags &= ~SSL3_FLAGS_CCS_OK; -  		rr->length=0;  		if (s->msg_callback) @@ -1471,7 +1450,12 @@ int ssl3_do_change_cipher_spec(SSL *s)  	if (s->s3->tmp.key_block == NULL)  		{ -		if (s->session == NULL || s->session->master_key_length == 0) +		if (s->session->master_key_length == 0) +			{ +			SSLerr(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC,SSL_R_UNEXPECTED_CCS); +			return (0); +			} +		if (s->session == NULL)  			{  			/* might happen if dtls1_read_bytes() calls this */  			SSLerr(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC,SSL_R_CCS_RECEIVED_EARLY); diff --git a/app/openssl/ssl/s3_srvr.c b/app/openssl/ssl/s3_srvr.c index f83c9366..1976efa7 100644 --- a/app/openssl/ssl/s3_srvr.c +++ b/app/openssl/ssl/s3_srvr.c @@ -675,8 +675,8 @@ int ssl3_accept(SSL *s)  		case SSL3_ST_SR_CERT_VRFY_A:  		case SSL3_ST_SR_CERT_VRFY_B: -			s->s3->flags |= SSL3_FLAGS_CCS_OK;  			/* we should decide if we expected this one */ +			s->s3->flags |= SSL3_FLAGS_CCS_OK;  			ret=ssl3_get_cert_verify(s);  			if (ret <= 0) goto end; @@ -694,6 +694,7 @@ int ssl3_accept(SSL *s)  			channel_id = s->s3->tlsext_channel_id_valid;  #endif +			s->s3->flags |= SSL3_FLAGS_CCS_OK;  			if (next_proto_neg)  				s->state=SSL3_ST_SR_NEXT_PROTO_A;  			else if (channel_id) @@ -728,7 +729,6 @@ int ssl3_accept(SSL *s)  		case SSL3_ST_SR_FINISHED_A:  		case SSL3_ST_SR_FINISHED_B: -			s->s3->flags |= SSL3_FLAGS_CCS_OK;  			ret=ssl3_get_finished(s,SSL3_ST_SR_FINISHED_A,  				SSL3_ST_SR_FINISHED_B);  			if (ret <= 0) goto end; @@ -740,15 +740,6 @@ int ssl3_accept(SSL *s)  #endif  			else  				s->state=SSL3_ST_SW_CHANGE_A; -			/* If this is a full handshake with ChannelID then -			 * record the hashshake hashes in |s->session| in case -			 * we need them to verify a ChannelID signature on a -			 * resumption of this session in the future. */ -			if (!s->hit && s->s3->tlsext_channel_id_new) -				{ -				ret = tls1_record_handshake_hashes_for_channel_id(s); -				if (ret <= 0) goto end; -				}  			s->init_num=0;  			break; @@ -1477,22 +1468,6 @@ int ssl3_send_server_hello(SSL *s)  	if (s->state == SSL3_ST_SW_SRVR_HELLO_A)  		{ -		/* We only accept ChannelIDs on connections with ECDHE in order -		 * to avoid a known attack while we fix ChannelID itself. */ -		if (s->s3 && -		    s->s3->tlsext_channel_id_valid && -		    (s->s3->tmp.new_cipher->algorithm_mkey & SSL_kEECDH) == 0) -			s->s3->tlsext_channel_id_valid = 0; - -		/* If this is a resumption and the original handshake didn't -		 * support ChannelID then we didn't record the original -		 * handshake hashes in the session and so cannot resume with -		 * ChannelIDs. */ -		if (s->hit && -		    s->s3->tlsext_channel_id_new && -		    s->session->original_handshake_hash_len == 0) -			s->s3->tlsext_channel_id_valid = 0; -  		buf=(unsigned char *)s->init_buf->data;  #ifdef OPENSSL_NO_TLSEXT  		p=s->s3->server_random; @@ -2168,11 +2143,6 @@ int ssl3_send_certificate_request(SSL *s)  		s->init_num=n+4;  		s->init_off=0;  #ifdef NETSCAPE_HANG_BUG -		if (!BUF_MEM_grow_clean(buf, s->init_num + 4)) -			{ -			SSLerr(SSL_F_SSL3_SEND_CERTIFICATE_REQUEST,ERR_R_BUF_LIB); -			goto err; -			}  		p=(unsigned char *)s->init_buf->data + s->init_num;  		/* do the header */ @@ -2915,8 +2885,6 @@ int ssl3_get_client_key_exchange(SSL *s)  		unsigned char premaster_secret[32], *start;  		size_t outlen=32, inlen;  		unsigned long alg_a; -		int Ttag, Tclass; -		long Tlen;  		/* Get our certificate private key*/  		alg_a = s->s3->tmp.new_cipher->algorithm_auth; @@ -2938,16 +2906,28 @@ int ssl3_get_client_key_exchange(SSL *s)  				ERR_clear_error();  			}  		/* Decrypt session key */ -		if (ASN1_get_object((const unsigned char **)&p, &Tlen, &Ttag, &Tclass, n) != V_ASN1_CONSTRUCTED || -			Ttag != V_ASN1_SEQUENCE || -			Tclass != V_ASN1_UNIVERSAL) +		if ((*p!=( V_ASN1_SEQUENCE| V_ASN1_CONSTRUCTED)))  +			{ +			SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED); +			goto gerr; +			} +		if (p[1] == 0x81) +			{ +			start = p+3; +			inlen = p[2]; +			} +		else if (p[1] < 0x80) +			{ +			start = p+2; +			inlen = p[1]; +			} +		else  			{  			SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED);  			goto gerr;  			} -		start = p; -		inlen = Tlen;  		if (EVP_PKEY_decrypt(pkey_ctx,premaster_secret,&outlen,start,inlen) <=0)  +  			{  			SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED);  			goto gerr; @@ -3695,7 +3675,6 @@ int ssl3_get_channel_id(SSL *s)  	EC_POINT* point = NULL;  	ECDSA_SIG sig;  	BIGNUM x, y; -	unsigned short expected_extension_type;  	if (s->state == SSL3_ST_SR_CHANNEL_ID_A && s->init_num == 0)  		{ @@ -3753,11 +3732,7 @@ int ssl3_get_channel_id(SSL *s)  	n2s(p, extension_type);  	n2s(p, extension_len); -	expected_extension_type = TLSEXT_TYPE_channel_id; -	if (s->s3->tlsext_channel_id_new) -		expected_extension_type = TLSEXT_TYPE_channel_id_new; - -	if (extension_type != expected_extension_type || +	if (extension_type != TLSEXT_TYPE_channel_id ||  	    extension_len != TLSEXT_CHANNEL_ID_SIZE)  		{  		SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_INVALID_MESSAGE); diff --git a/app/openssl/ssl/ssl.h b/app/openssl/ssl/ssl.h index a85841b3..54b0eb6c 100644 --- a/app/openssl/ssl/ssl.h +++ b/app/openssl/ssl/ssl.h @@ -544,13 +544,6 @@ struct ssl_session_st  #ifndef OPENSSL_NO_SRP  	char *srp_username;  #endif - -	/* original_handshake_hash contains the handshake hash (either -	 * SHA-1+MD5 or SHA-2, depending on TLS version) for the original, full -	 * handshake that created a session. This is used by Channel IDs during -	 * resumption. */ -	unsigned char original_handshake_hash[EVP_MAX_MD_SIZE]; -	unsigned int original_handshake_hash_len;  	};  #endif @@ -560,7 +553,7 @@ struct ssl_session_st  /* Allow initial connection to servers that don't support RI */  #define SSL_OP_LEGACY_SERVER_CONNECT			0x00000004L  #define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG		0x00000008L -#define SSL_OP_TLSEXT_PADDING				0x00000010L +#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG		0x00000010L  #define SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER		0x00000020L  #define SSL_OP_SAFARI_ECDHE_ECDSA_BUG			0x00000040L  #define SSL_OP_SSLEAY_080_CLIENT_DH_BUG			0x00000080L @@ -569,8 +562,6 @@ struct ssl_session_st  /* Hasn't done anything since OpenSSL 0.9.7h, retained for compatibility */  #define SSL_OP_MSIE_SSLV2_RSA_PADDING			0x0 -/* Refers to ancient SSLREF and SSLv2, retained for compatibility */ -#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG		0x0  /* SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS is vestigial. Previously it disabled the   * insertion of empty records in CBC mode, but the empty records were commonly @@ -657,14 +648,12 @@ struct ssl_session_st   * TLS only.)  "Released" buffers are put onto a free-list in the context   * or just freed (depending on the context's setting for freelist_max_len). */  #define SSL_MODE_RELEASE_BUFFERS 0x00000010L -  /* Send the current time in the Random fields of the ClientHello and   * ServerHello records for compatibility with hypothetical implementations   * that require it.   */  #define SSL_MODE_SEND_CLIENTHELLO_TIME 0x00000020L  #define SSL_MODE_SEND_SERVERHELLO_TIME 0x00000040L -  /* When set, clients may send application data before receipt of CCS   * and Finished.  This mode enables full-handshakes to 'complete' in   * one RTT. */ @@ -877,9 +866,6 @@ struct ssl_ctx_st  	/* get client cert callback */  	int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey); -	/* get channel id callback */ -	void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey); -      /* cookie generate callback */      int (*app_gen_cookie_cb)(SSL *ssl, unsigned char *cookie,           unsigned int *cookie_len); @@ -1042,10 +1028,6 @@ struct ssl_ctx_st  	/* If true, a client will advertise the Channel ID extension and a  	 * server will echo it. */  	char tlsext_channel_id_enabled; -	/* tlsext_channel_id_enabled_new is a hack to support both old and new -	 * ChannelID signatures. It indicates that a client should advertise the -	 * new ChannelID extension number. */ -	char tlsext_channel_id_enabled_new;  	/* The client's Channel ID private key. */  	EVP_PKEY *tlsext_channel_id_private;  #endif @@ -1104,8 +1086,6 @@ void SSL_CTX_set_info_callback(SSL_CTX *ctx, void (*cb)(const SSL *ssl,int type,  void (*SSL_CTX_get_info_callback(SSL_CTX *ctx))(const SSL *ssl,int type,int val);  void SSL_CTX_set_client_cert_cb(SSL_CTX *ctx, int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey));  int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx))(SSL *ssl, X509 **x509, EVP_PKEY **pkey); -void SSL_CTX_set_channel_id_cb(SSL_CTX *ctx, void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey)); -void (*SSL_CTX_get_channel_id_cb(SSL_CTX *ctx))(SSL *ssl, EVP_PKEY **pkey);  #ifndef OPENSSL_NO_ENGINE  int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e);  #endif @@ -1182,14 +1162,12 @@ const char *SSL_get_psk_identity(const SSL *s);  #define SSL_WRITING	2  #define SSL_READING	3  #define SSL_X509_LOOKUP	4 -#define SSL_CHANNEL_ID_LOOKUP	5  /* These will only be used when doing non-blocking IO */  #define SSL_want_nothing(s)	(SSL_want(s) == SSL_NOTHING)  #define SSL_want_read(s)	(SSL_want(s) == SSL_READING)  #define SSL_want_write(s)	(SSL_want(s) == SSL_WRITING)  #define SSL_want_x509_lookup(s)	(SSL_want(s) == SSL_X509_LOOKUP) -#define SSL_want_channel_id_lookup(s)	(SSL_want(s) == SSL_CHANNEL_ID_LOOKUP)  #define SSL_MAC_FLAG_READ_MAC_STREAM 1  #define SSL_MAC_FLAG_WRITE_MAC_STREAM 2 @@ -1624,7 +1602,6 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)  #define SSL_ERROR_ZERO_RETURN		6  #define SSL_ERROR_WANT_CONNECT		7  #define SSL_ERROR_WANT_ACCEPT		8 -#define SSL_ERROR_WANT_CHANNEL_ID_LOOKUP	9  #define SSL_CTRL_NEED_TMP_RSA			1  #define SSL_CTRL_SET_TMP_RSA			2 @@ -1762,11 +1739,10 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)  #define SSL_set_tmp_ecdh(ssl,ecdh) \  	SSL_ctrl(ssl,SSL_CTRL_SET_TMP_ECDH,0,(char *)ecdh) -/* SSL_enable_tls_channel_id either configures a TLS server to accept TLS client - * IDs from clients, or configure a client to send TLS client IDs to server. - * Returns 1 on success. */ -#define SSL_enable_tls_channel_id(s) \ -	SSL_ctrl(s,SSL_CTRL_CHANNEL_ID,0,NULL) +/* SSL_enable_tls_channel_id configures a TLS server to accept TLS client + * IDs from clients. Returns 1 on success. */ +#define SSL_enable_tls_channel_id(ctx) \ +	SSL_ctrl(ctx,SSL_CTRL_CHANNEL_ID,0,NULL)  /* SSL_set1_tls_channel_id configures a TLS client to send a TLS Channel ID to   * compatible servers. private_key must be a P-256 EVP_PKEY*. Returns 1 on   * success. */ @@ -1816,7 +1792,7 @@ int	SSL_CIPHER_get_bits(const SSL_CIPHER *c,int *alg_bits);  char *	SSL_CIPHER_get_version(const SSL_CIPHER *c);  const char *	SSL_CIPHER_get_name(const SSL_CIPHER *c);  unsigned long 	SSL_CIPHER_get_id(const SSL_CIPHER *c); -const char *	SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher); +const char* SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher);  int	SSL_get_fd(const SSL *s);  int	SSL_get_rfd(const SSL *s); @@ -2731,6 +2707,7 @@ void ERR_load_SSL_strings(void);  #define SSL_R_WRONG_VERSION_NUMBER			 267  #define SSL_R_X509_LIB					 268  #define SSL_R_X509_VERIFICATION_SETUP_PROBLEMS		 269 +#define SSL_R_UNEXPECTED_CCS				 388  #ifdef  __cplusplus  } diff --git a/app/openssl/ssl/ssl3.h b/app/openssl/ssl/ssl3.h index 83d59bff..f205f73d 100644 --- a/app/openssl/ssl/ssl3.h +++ b/app/openssl/ssl/ssl3.h @@ -388,6 +388,9 @@ typedef struct ssl3_buffer_st  #define TLS1_FLAGS_TLS_PADDING_BUG		0x0008  #define TLS1_FLAGS_SKIP_CERT_VERIFY		0x0010  #define TLS1_FLAGS_KEEP_HANDSHAKE		0x0020 +/* SSL3_FLAGS_CCS_OK indicates that a ChangeCipherSpec record is acceptable at + * this point in the handshake. If this flag is not set then received CCS + * records will cause a fatal error for the connection. */  #define SSL3_FLAGS_CCS_OK			0x0080  /* SSL3_FLAGS_SGC_RESTART_DONE is set when we @@ -555,11 +558,6 @@ typedef struct ssl3_state_st  	 * for Channel IDs and that tlsext_channel_id will be valid after the  	 * handshake. */  	char tlsext_channel_id_valid; -	/* tlsext_channel_id_new means that the updated Channel ID extension -	 * was negotiated. This is a temporary hack in the code to support both -	 * forms of Channel ID extension while we transition to the new format, -	 * which fixed a security issue. */ -	char tlsext_channel_id_new;  	/* For a server:  	 *     If |tlsext_channel_id_valid| is true, then this contains the  	 *     verified Channel ID from the client: a P256 point, (x,y), where @@ -680,11 +678,11 @@ typedef struct ssl3_state_st  #define SSL3_ST_SR_CERT_VRFY_B		(0x1A1|SSL_ST_ACCEPT)  #define SSL3_ST_SR_CHANGE_A		(0x1B0|SSL_ST_ACCEPT)  #define SSL3_ST_SR_CHANGE_B		(0x1B1|SSL_ST_ACCEPT) +#define SSL3_ST_SR_POST_CLIENT_CERT	(0x1BF|SSL_ST_ACCEPT)  #ifndef OPENSSL_NO_NEXTPROTONEG  #define SSL3_ST_SR_NEXT_PROTO_A		(0x210|SSL_ST_ACCEPT)  #define SSL3_ST_SR_NEXT_PROTO_B		(0x211|SSL_ST_ACCEPT)  #endif -#define SSL3_ST_SR_POST_CLIENT_CERT	(0x1BF|SSL_ST_ACCEPT)  #define SSL3_ST_SR_CHANNEL_ID_A		(0x220|SSL_ST_ACCEPT)  #define SSL3_ST_SR_CHANNEL_ID_B		(0x221|SSL_ST_ACCEPT)  #define SSL3_ST_SR_FINISHED_A		(0x1C0|SSL_ST_ACCEPT) diff --git a/app/openssl/ssl/ssl_asn1.c b/app/openssl/ssl/ssl_asn1.c index f83e18f8..38540be1 100644 --- a/app/openssl/ssl/ssl_asn1.c +++ b/app/openssl/ssl/ssl_asn1.c @@ -117,13 +117,12 @@ typedef struct ssl_session_asn1_st  #ifndef OPENSSL_NO_SRP  	ASN1_OCTET_STRING srp_username;  #endif /* OPENSSL_NO_SRP */ -	ASN1_OCTET_STRING original_handshake_hash;  	} SSL_SESSION_ASN1;  int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp)  	{  #define LSIZE2 (sizeof(long)*2) -	int v1=0,v2=0,v3=0,v4=0,v5=0,v7=0,v8=0,v14=0; +	int v1=0,v2=0,v3=0,v4=0,v5=0,v7=0,v8=0;  	unsigned char buf[4],ibuf1[LSIZE2],ibuf2[LSIZE2];  	unsigned char ibuf3[LSIZE2],ibuf4[LSIZE2],ibuf5[LSIZE2];  #ifndef OPENSSL_NO_TLSEXT @@ -273,13 +272,6 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp)  		a.psk_identity.type=V_ASN1_OCTET_STRING;  		a.psk_identity.data=(unsigned char *)(in->psk_identity);  		} - -	if (in->original_handshake_hash_len > 0) -		{ -		a.original_handshake_hash.length = in->original_handshake_hash_len; -		a.original_handshake_hash.type = V_ASN1_OCTET_STRING; -		a.original_handshake_hash.data = in->original_handshake_hash; -		}  #endif /* OPENSSL_NO_PSK */  #ifndef OPENSSL_NO_SRP  	if (in->srp_username) @@ -333,8 +325,6 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp)  	if (in->srp_username)          	M_ASN1_I2D_len_EXP_opt(&(a.srp_username), i2d_ASN1_OCTET_STRING,12,v12);  #endif /* OPENSSL_NO_SRP */ -	if (in->original_handshake_hash_len > 0) -		M_ASN1_I2D_len_EXP_opt(&(a.original_handshake_hash),i2d_ASN1_OCTET_STRING,14,v14);  	M_ASN1_I2D_seq_total(); @@ -383,8 +373,6 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp)  	if (in->srp_username)  		M_ASN1_I2D_put_EXP_opt(&(a.srp_username), i2d_ASN1_OCTET_STRING,12,v12);  #endif /* OPENSSL_NO_SRP */ -	if (in->original_handshake_hash_len > 0) -		M_ASN1_I2D_put_EXP_opt(&(a.original_handshake_hash),i2d_ASN1_OCTET_STRING,14,v14);  	M_ASN1_I2D_finish();  	} @@ -420,7 +408,6 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp,  		if (os.length != 3)  			{  			c.error=SSL_R_CIPHER_CODE_WRONG_LENGTH; -			c.line=__LINE__;  			goto err;  			}  		id=0x02000000L| @@ -433,7 +420,6 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp,  		if (os.length != 2)  			{  			c.error=SSL_R_CIPHER_CODE_WRONG_LENGTH; -			c.line=__LINE__;  			goto err;  			}  		id=0x03000000L| @@ -443,7 +429,6 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp,  	else  		{  		c.error=SSL_R_UNKNOWN_SSL_VERSION; -		c.line=__LINE__;  		goto err;  		} @@ -536,7 +521,6 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp,  	    if (os.length > SSL_MAX_SID_CTX_LENGTH)  		{  		c.error=SSL_R_BAD_LENGTH; -		c.line=__LINE__;  		goto err;  		}  	    else @@ -654,16 +638,5 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp,  		ret->srp_username=NULL;  #endif /* OPENSSL_NO_SRP */ -	os.length=0; -	os.data=NULL; -	M_ASN1_D2I_get_EXP_opt(osp,d2i_ASN1_OCTET_STRING,14); -	if (os.data && os.length < (int)sizeof(ret->original_handshake_hash)) -		{ -		memcpy(ret->original_handshake_hash, os.data, os.length); -		ret->original_handshake_hash_len = os.length; -		OPENSSL_free(os.data); -		os.data = NULL; -		} -  	M_ASN1_D2I_Finish(a,SSL_SESSION_free,SSL_F_D2I_SSL_SESSION);  	} diff --git a/app/openssl/ssl/ssl_err.c b/app/openssl/ssl/ssl_err.c index ac0aad9b..bddd7949 100644 --- a/app/openssl/ssl/ssl_err.c +++ b/app/openssl/ssl/ssl_err.c @@ -553,7 +553,7 @@ static ERR_STRING_DATA SSL_str_reasons[]=  {ERR_REASON(SSL_R_TLSV1_UNRECOGNIZED_NAME),"tlsv1 unrecognized name"},  {ERR_REASON(SSL_R_TLSV1_UNSUPPORTED_EXTENSION),"tlsv1 unsupported extension"},  {ERR_REASON(SSL_R_TLS_CLIENT_CERT_REQ_WITH_ANON_CIPHER),"tls client cert req with anon cipher"}, -{ERR_REASON(SSL_R_TLS_HEARTBEAT_PEER_DOESNT_ACCEPT),"peer does not accept heartbeats"}, +{ERR_REASON(SSL_R_TLS_HEARTBEAT_PEER_DOESNT_ACCEPT),"peer does not accept heartbearts"},  {ERR_REASON(SSL_R_TLS_HEARTBEAT_PENDING) ,"heartbeat request already pending"},  {ERR_REASON(SSL_R_TLS_ILLEGAL_EXPORTER_LABEL),"tls illegal exporter label"},  {ERR_REASON(SSL_R_TLS_INVALID_ECPOINTFORMAT_LIST),"tls invalid ecpointformat list"}, @@ -604,6 +604,7 @@ static ERR_STRING_DATA SSL_str_reasons[]=  {ERR_REASON(SSL_R_WRONG_VERSION_NUMBER)  ,"wrong version number"},  {ERR_REASON(SSL_R_X509_LIB)              ,"x509 lib"},  {ERR_REASON(SSL_R_X509_VERIFICATION_SETUP_PROBLEMS),"x509 verification setup problems"}, +{ERR_REASON(SSL_R_UNEXPECTED_CCS),"unexpected CCS"},  {0,NULL}  	}; diff --git a/app/openssl/ssl/ssl_lib.c b/app/openssl/ssl/ssl_lib.c index 3de68a78..8d2c3a76 100644 --- a/app/openssl/ssl/ssl_lib.c +++ b/app/openssl/ssl/ssl_lib.c @@ -1403,10 +1403,6 @@ char *SSL_get_shared_ciphers(const SSL *s,char *buf,int len)  	p=buf;  	sk=s->session->ciphers; - -	if (sk_SSL_CIPHER_num(sk) == 0) -		return NULL; -  	for (i=0; i<sk_SSL_CIPHER_num(sk); i++)  		{  		int n; @@ -2675,10 +2671,6 @@ int SSL_get_error(const SSL *s,int i)  		{  		return(SSL_ERROR_WANT_X509_LOOKUP);  		} -	if ((i < 0) && SSL_want_channel_id_lookup(s)) -		{ -		return(SSL_ERROR_WANT_CHANNEL_ID_LOOKUP); -		}  	if (i == 0)  		{ @@ -3427,41 +3419,12 @@ int SSL_cutthrough_complete(const SSL *s)  		s->version >= SSL3_VERSION &&  		s->s3->in_read_app_data == 0 &&   /* cutthrough only applies to write() */  		(SSL_get_mode((SSL*)s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) &&  /* cutthrough enabled */ -		ssl3_can_cutthrough(s) &&                                   /* cutthrough allowed */ +		SSL_get_cipher_bits(s, NULL) >= 128 &&                      /* strong cipher choosen */  		s->s3->previous_server_finished_len == 0 &&                 /* not a renegotiation handshake */  		(s->state == SSL3_ST_CR_SESSION_TICKET_A ||                 /* ready to write app-data*/  			s->state == SSL3_ST_CR_FINISHED_A));  	} -int ssl3_can_cutthrough(const SSL *s) -	{ -	const SSL_CIPHER *c; - -	/* require a strong enough cipher */ -	if (SSL_get_cipher_bits(s, NULL) < 128) -		return 0; - -	/* require ALPN or NPN extension */ -	if (!s->s3->alpn_selected -#ifndef OPENSSL_NO_NEXTPROTONEG -		&& !s->s3->next_proto_neg_seen -#endif -	) -		{ -		return 0; -		} - -	/* require a forward-secret cipher */ -	c = SSL_get_current_cipher(s); -	if (!c || (c->algorithm_mkey != SSL_kEDH && -			c->algorithm_mkey != SSL_kEECDH)) -		{ -		return 0; -		} - -	return 1; -	} -  /* Allocates new EVP_MD_CTX and sets pointer to it into given pointer   * vairable, freeing  EVP_MD_CTX previously stored in that variable, if   * any. If EVP_MD pointer is passed, initializes ctx with this md diff --git a/app/openssl/ssl/ssl_locl.h b/app/openssl/ssl/ssl_locl.h index 6b7731a4..f79ab009 100644 --- a/app/openssl/ssl/ssl_locl.h +++ b/app/openssl/ssl/ssl_locl.h @@ -1070,7 +1070,6 @@ void ssl_free_wbio_buffer(SSL *s);  int tls1_change_cipher_state(SSL *s, int which);  int tls1_setup_key_block(SSL *s);  int tls1_enc(SSL *s, int snd); -int tls1_handshake_digest(SSL *s, unsigned char *out, size_t out_len);  int tls1_final_finish_mac(SSL *s,  	const char *str, int slen, unsigned char *p);  int tls1_cert_verify_mac(SSL *s, int md_nid, unsigned char *p); @@ -1127,10 +1126,8 @@ int tls12_get_sigid(const EVP_PKEY *pk);  const EVP_MD *tls12_get_hash(unsigned char hash_alg);  int tls1_channel_id_hash(EVP_MD_CTX *ctx, SSL *s); -int tls1_record_handshake_hashes_for_channel_id(SSL *s);  #endif -int ssl3_can_cutthrough(const SSL *s);  EVP_MD_CTX* ssl_replace_hash(EVP_MD_CTX **hash,const EVP_MD *md) ;  void ssl_clear_hash_ctx(EVP_MD_CTX **hash);  int ssl_add_serverhello_renegotiate_ext(SSL *s, unsigned char *p, int *len, diff --git a/app/openssl/ssl/ssl_sess.c b/app/openssl/ssl/ssl_sess.c index 7d170852..ec088404 100644 --- a/app/openssl/ssl/ssl_sess.c +++ b/app/openssl/ssl/ssl_sess.c @@ -1144,17 +1144,6 @@ int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx))(SSL * ssl, X509 ** x509 , EVP_PK  	return ctx->client_cert_cb;  	} -void SSL_CTX_set_channel_id_cb(SSL_CTX *ctx, -	void (*cb)(SSL *ssl, EVP_PKEY **pkey)) -	{ -	ctx->channel_id_cb=cb; -	} - -void (*SSL_CTX_get_channel_id_cb(SSL_CTX *ctx))(SSL * ssl, EVP_PKEY **pkey) -	{ -	return ctx->channel_id_cb; -	} -  #ifndef OPENSSL_NO_ENGINE  int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e)  	{ diff --git a/app/openssl/ssl/t1_enc.c b/app/openssl/ssl/t1_enc.c index 22dd3cab..2ed2e076 100644 --- a/app/openssl/ssl/t1_enc.c +++ b/app/openssl/ssl/t1_enc.c @@ -895,79 +895,54 @@ int tls1_cert_verify_mac(SSL *s, int md_nid, unsigned char *out)  	return((int)ret);  	} -/* tls1_handshake_digest calculates the current handshake hash and writes it to - * |out|, which has space for |out_len| bytes. It returns the number of bytes - * written or -1 in the event of an error. This function works on a copy of the - * underlying digests so can be called multiple times and prior to the final - * update etc. */ -int tls1_handshake_digest(SSL *s, unsigned char *out, size_t out_len) -	{ -	const EVP_MD *md; -	EVP_MD_CTX ctx; -	int i, err = 0, len = 0; -	long mask; - -	EVP_MD_CTX_init(&ctx); - -	for (i = 0; ssl_get_handshake_digest(i, &mask, &md); i++) -		{ -		int hash_size; -		unsigned int digest_len; -		EVP_MD_CTX *hdgst = s->s3->handshake_dgst[i]; - -		if ((mask & ssl_get_algorithm2(s)) == 0) -			continue; - -		hash_size = EVP_MD_size(md); -		if (!hdgst || hash_size < 0 || (size_t)hash_size > out_len) -			{ -			err = 1; -			break; -			} - -		if (!EVP_MD_CTX_copy_ex(&ctx, hdgst) || -		    !EVP_DigestFinal_ex(&ctx, out, &digest_len) || -		    digest_len != (unsigned int)hash_size) /* internal error */ -			{ -			err = 1; -			break; -			} -		out += digest_len; -		out_len -= digest_len; -		len += digest_len; -		} - -	EVP_MD_CTX_cleanup(&ctx); - -	if (err != 0) -		return -1; -	return len; -	} -  int tls1_final_finish_mac(SSL *s,  	     const char *str, int slen, unsigned char *out)  	{ +	unsigned int i; +	EVP_MD_CTX ctx;  	unsigned char buf[2*EVP_MAX_MD_SIZE]; -	unsigned char buf2[12]; +	unsigned char *q,buf2[12]; +	int idx; +	long mask;  	int err=0; -	int digests_len; +	const EVP_MD *md;  -	if (s->s3->handshake_buffer) +	q=buf; + +	if (s->s3->handshake_buffer)   		if (!ssl3_digest_cached_records(s))  			return 0; -	digests_len = tls1_handshake_digest(s, buf, sizeof(buf)); -	if (digests_len < 0) +	EVP_MD_CTX_init(&ctx); + +	for (idx=0;ssl_get_handshake_digest(idx,&mask,&md);idx++)  		{ -		err = 1; -		digests_len = 0; +		if (mask & ssl_get_algorithm2(s)) +			{ +			int hashsize = EVP_MD_size(md); +			EVP_MD_CTX *hdgst = s->s3->handshake_dgst[idx]; +			if (!hdgst || hashsize < 0 || hashsize > (int)(sizeof buf - (size_t)(q-buf))) +				{ +				/* internal error: 'buf' is too small for this cipersuite! */ +				err = 1; +				} +			else +				{ +				if (!EVP_MD_CTX_copy_ex(&ctx, hdgst) || +					!EVP_DigestFinal_ex(&ctx,q,&i) || +					(i != (unsigned int)hashsize)) +					err = 1; +				q+=hashsize; +				} +			}  		} - +		  	if (!tls1_PRF(ssl_get_algorithm2(s), -			str,slen, buf, digests_len, NULL,0, NULL,0, NULL,0, +			str,slen, buf,(int)(q-buf), NULL,0, NULL,0, NULL,0,  			s->session->master_key,s->session->master_key_length,  			out,buf2,sizeof buf2))  		err = 1; +	EVP_MD_CTX_cleanup(&ctx);  	if (err)  		return 0; @@ -1073,10 +1048,14 @@ int tls1_mac(SSL *ssl, unsigned char *md, int send)  	if (!stream_mac)  		EVP_MD_CTX_cleanup(&hmac);  #ifdef TLS_DEBUG +printf("sec="); +{unsigned int z; for (z=0; z<md_size; z++) printf("%02X ",mac_sec[z]); printf("\n"); }  printf("seq=");  {int z; for (z=0; z<8; z++) printf("%02X ",seq[z]); printf("\n"); } +printf("buf="); +{int z; for (z=0; z<5; z++) printf("%02X ",buf[z]); printf("\n"); }  printf("rec="); -{unsigned int z; for (z=0; z<rec->length; z++) printf("%02X ",rec->data[z]); printf("\n"); } +{unsigned int z; for (z=0; z<rec->length; z++) printf("%02X ",buf[z]); printf("\n"); }  #endif  	if (ssl->version != DTLS1_VERSION && ssl->version != DTLS1_BAD_VER) @@ -1206,7 +1185,7 @@ int tls1_export_keying_material(SSL *s, unsigned char *out, size_t olen,  	if (memcmp(val, TLS_MD_KEY_EXPANSION_CONST,  		 TLS_MD_KEY_EXPANSION_CONST_SIZE) == 0) goto err1; -	rv = tls1_PRF(ssl_get_algorithm2(s), +	rv = tls1_PRF(s->s3->tmp.new_cipher->algorithm2,  		      val, vallen,  		      NULL, 0,  		      NULL, 0, diff --git a/app/openssl/ssl/t1_lib.c b/app/openssl/ssl/t1_lib.c index 122a25f5..369e09f4 100644 --- a/app/openssl/ssl/t1_lib.c +++ b/app/openssl/ssl/t1_lib.c @@ -617,8 +617,6 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha  #ifndef OPENSSL_NO_HEARTBEATS  	/* Add Heartbeat extension */ -	if ((limit - ret - 4 - 1) < 0) -		return NULL;  	s2n(TLSEXT_TYPE_heartbeat,ret);  	s2n(1,ret);  	/* Set mode: @@ -649,10 +647,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha  		 * support for Channel ID. */  		if (limit - ret - 4 < 0)  			return NULL; -		if (s->ctx->tlsext_channel_id_enabled_new) -			s2n(TLSEXT_TYPE_channel_id_new,ret); -		else -			s2n(TLSEXT_TYPE_channel_id,ret); +		s2n(TLSEXT_TYPE_channel_id,ret);  		s2n(0,ret);  		} @@ -688,35 +683,36 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha                  ret += el;                  }  #endif + +#ifdef TLSEXT_TYPE_padding  	/* Add padding to workaround bugs in F5 terminators.  	 * See https://tools.ietf.org/html/draft-agl-tls-padding-03  	 *  	 * NB: because this code works out the length of all existing  	 * extensions it MUST always appear last.  	 */ -	if (s->options & SSL_OP_TLSEXT_PADDING) +	{ +	int hlen = ret - (unsigned char *)s->init_buf->data; +	/* The code in s23_clnt.c to build ClientHello messages includes the +	 * 5-byte record header in the buffer, while the code in s3_clnt.c does +	 * not. */ +	if (s->state == SSL23_ST_CW_CLNT_HELLO_A) +		hlen -= 5; +	if (hlen > 0xff && hlen < 0x200)  		{ -		int hlen = ret - (unsigned char *)s->init_buf->data; -		/* The code in s23_clnt.c to build ClientHello messages -		 * includes the 5-byte record header in the buffer, while -		 * the code in s3_clnt.c does not. -		 */ -		if (s->state == SSL23_ST_CW_CLNT_HELLO_A) -			hlen -= 5; -		if (hlen > 0xff && hlen < 0x200) -			{ -			hlen = 0x200 - hlen; -			if (hlen >= 4) -				hlen -= 4; -			else -				hlen = 0; +		hlen = 0x200 - hlen; +		if (hlen >= 4) +			hlen -= 4; +		else +			hlen = 0; -			s2n(TLSEXT_TYPE_padding, ret); -			s2n(hlen, ret); -			memset(ret, 0, hlen); -			ret += hlen; -			} +		s2n(TLSEXT_TYPE_padding, ret); +		s2n(hlen, ret); +		memset(ret, 0, hlen); +		ret += hlen;  		} +	} +#endif  	if ((extdatalen = ret-p-2)== 0)   		return p; @@ -871,8 +867,6 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *p, unsigned cha  	/* Add Heartbeat extension if we've received one */  	if (s->tlsext_heartbeat & SSL_TLSEXT_HB_ENABLED)  		{ -		if ((limit - ret - 4 - 1) < 0) -			return NULL;  		s2n(TLSEXT_TYPE_heartbeat,ret);  		s2n(1,ret);  		/* Set mode: @@ -915,10 +909,7 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *p, unsigned cha  		{  		if (limit - ret - 4 < 0)  			return NULL; -		if (s->s3->tlsext_channel_id_new) -			s2n(TLSEXT_TYPE_channel_id_new,ret); -		else -			s2n(TLSEXT_TYPE_channel_id,ret); +		s2n(TLSEXT_TYPE_channel_id,ret);  		s2n(0,ret);  		} @@ -1581,13 +1572,6 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in  		else if (type == TLSEXT_TYPE_channel_id && s->tlsext_channel_id_enabled)  			s->s3->tlsext_channel_id_valid = 1; -		else if (type == TLSEXT_TYPE_channel_id_new && -			 s->tlsext_channel_id_enabled) -			{ -			s->s3->tlsext_channel_id_valid = 1; -			s->s3->tlsext_channel_id_new = 1; -			} -  		else if (type == TLSEXT_TYPE_application_layer_protocol_negotiation &&  			 s->ctx->alpn_select_cb &&  			 s->s3->tmp.finish_md_len == 0) @@ -1837,12 +1821,6 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in  		else if (type == TLSEXT_TYPE_channel_id)  			s->s3->tlsext_channel_id_valid = 1; -		else if (type == TLSEXT_TYPE_channel_id_new) -			{ -			s->s3->tlsext_channel_id_valid = 1; -			s->s3->tlsext_channel_id_new = 1; -			} -  		else if (type == TLSEXT_TYPE_application_layer_protocol_negotiation)  			{  			unsigned len; @@ -2930,17 +2908,6 @@ tls1_channel_id_hash(EVP_MD_CTX *md, SSL *s)  	EVP_DigestUpdate(md, kClientIDMagic, sizeof(kClientIDMagic)); -	if (s->hit && s->s3->tlsext_channel_id_new) -		{ -		static const char kResumptionMagic[] = "Resumption"; -		EVP_DigestUpdate(md, kResumptionMagic, -				 sizeof(kResumptionMagic)); -		if (s->session->original_handshake_hash_len == 0) -			return 0; -		EVP_DigestUpdate(md, s->session->original_handshake_hash, -				 s->session->original_handshake_hash_len); -		} -  	EVP_MD_CTX_init(&ctx);  	for (i = 0; i < SSL_MAX_DIGEST; i++)  		{ @@ -2955,29 +2922,3 @@ tls1_channel_id_hash(EVP_MD_CTX *md, SSL *s)  	return 1;  	}  #endif - -/* tls1_record_handshake_hashes_for_channel_id records the current handshake - * hashes in |s->session| so that Channel ID resumptions can sign that data. */ -int tls1_record_handshake_hashes_for_channel_id(SSL *s) -	{ -	int digest_len; -	/* This function should never be called for a resumed session because -	 * the handshake hashes that we wish to record are for the original, -	 * full handshake. */ -	if (s->hit) -		return -1; -	/* It only makes sense to call this function if Channel IDs have been -	 * negotiated. */ -	if (!s->s3->tlsext_channel_id_new) -		return -1; - -	digest_len = tls1_handshake_digest( -		s, s->session->original_handshake_hash, -		sizeof(s->session->original_handshake_hash)); -	if (digest_len < 0) -		return -1; - -	s->session->original_handshake_hash_len = digest_len; - -	return 1; -	} diff --git a/app/openssl/ssl/tls1.h b/app/openssl/ssl/tls1.h index b9a0899e..ec8948d5 100644 --- a/app/openssl/ssl/tls1.h +++ b/app/openssl/ssl/tls1.h @@ -259,7 +259,6 @@ extern "C" {  /* This is not an IANA defined extension number */  #define TLSEXT_TYPE_channel_id			30031 -#define TLSEXT_TYPE_channel_id_new		30032  /* NameType value from RFC 3546 */  #define TLSEXT_NAMETYPE_host_name 0 @@ -532,11 +531,9 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb)  #define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256        0x0300C031  #define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384        0x0300C032 -/* ECDHE PSK ciphersuites from RFC5489 - * SHA-2 cipher suites are omitted because they cannot be used safely with - * SSLv3. */ -#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA          0x0300C035 -#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA          0x0300C036 +/* ECDHE PSK ciphersuites from RFC 5489 */ +#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256    0x0300C037 +#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384    0x0300C038  /* XXX   * Inconsistency alert: @@ -689,9 +686,9 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb)  #define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256       "ECDH-RSA-AES128-GCM-SHA256"  #define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384       "ECDH-RSA-AES256-GCM-SHA384" -/* ECDHE PSK ciphersuites from RFC5489 */ -#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA         "ECDHE-PSK-AES128-CBC-SHA" -#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA         "ECDHE-PSK-AES256-CBC-SHA" +/* ECDHE PSK ciphersuites from RFC 5489 */ +#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256  "ECDHE-PSK-WITH-AES-128-CBC-SHA256" +#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384  "ECDHE-PSK-WITH-AES-256-CBC-SHA384"  #define TLS_CT_RSA_SIGN			1  #define TLS_CT_DSS_SIGN			2  | 
